Browse Source

Simplified and documented Vec4::SinCos function (#186)

Jorrit Rouwe 3 years ago
parent
commit
6793dcf7b4
1 changed files with 44 additions and 57 deletions
  1. 44 57
      Jolt/Math/Vec4.inl

+ 44 - 57
Jolt/Math/Vec4.inl

@@ -717,73 +717,60 @@ float Vec4::ReduceMax() const
 
 void Vec4::SinCos(Vec4 &outSin, Vec4 &outCos) const
 {
-	// Implementation based on sinf.c from the cephes library, combines sinf and cosf in a single function and vectorizes it
-	// Original implementation by Stephen L. Moshier (See: http://www.netlib.org/cephes/)
+	// Implementation based on sinf.c from the cephes library, combines sinf and cosf in a single function, changes octants to quadrants and vectorizes it
+	// Original implementation by Stephen L. Moshier (See: http://www.moshier.net/)
 
-	// Make argument positive and remember sign (highest bit set is negative)
+	// Make argument positive and remember sign for sin only since cos is symmetric around x (highest bit of a float is the sign bit)
 	UVec4 sin_sign = UVec4::sAnd(ReinterpretAsInt(), UVec4::sReplicate(0x80000000U));
 	Vec4 x = Vec4::sXor(*this, sin_sign.ReinterpretAsFloat());
 
-	// Integer part of x / (PI / 4)
-	UVec4 int_val = (1.27323954473516f * x).ToInt();
-	Vec4 y = int_val.ToFloat();
-
-	// Integer and fractional part modulo one octant, map zeros to origin
-	// if (int_val & 1) int_val++, y += 1;
-	UVec4 and_1 = int_val.LogicalShiftLeft<31>().ArithmeticShiftRight<31>();
-	int_val += UVec4::sAnd(and_1, UVec4::sReplicate(1));
-	y += Vec4::sAnd(and_1.ReinterpretAsFloat(), Vec4::sReplicate(1.0f));
-
-	// Extended precision modular arithmetic
-	x = ((x - y * 0.78515625f) - y * 2.4187564849853515625e-4f) - y * 3.77489497744594108e-8f;
-
-	// Calculate both results
-	Vec4 z = x * x;
-	Vec4 y1 = ((2.443315711809948e-5f * z - Vec4::sReplicate(1.388731625493765e-3f)) * z + Vec4::sReplicate(4.166664568298827e-2f)) * z * z - 0.5f * z + Vec4::sReplicate(1.0f);
-	Vec4 y2 = ((-1.9515295891e-4f * z + Vec4::sReplicate(8.3321608736e-3f)) * z - Vec4::sReplicate(1.6666654611e-1f)) * z * x + x;
-
-	// From here we deviate form the original cephes code, we would have to write:
-	//
-	// j &= 7;
-	// 
-	// if (j > 3)
-	// {
-	//		j -= 4;
-	//		sin_sign = -sin_sign;
-	//		cos_sign = -cos_sign;
-	// }
-	// 
-	// if (j > 1)
-	//		cos_sign = -cos_sign;
-	//
-	// ...
-	//
-	// if (j == 1 || j == 2) // condition
-	//		...
+	// x / (PI / 2) rounded to nearest int gives us the quadrant closest to x
+	UVec4 quadrant = (0.6366197723675814f * x + Vec4::sReplicate(0.5f)).ToInt();
+
+	// Make x relative to the closest quadrant.
+	// This does x = x - quadrant * PI / 2 using a two step Cody-Waite argument reduction.
+	// This improves the accuracy of the result by avoiding loss of significant bits in the subtraction.
+	// We start with x = x - quadrant * PI / 2, PI / 2 in hexadecimal notation is 0x3fc90fdb, we remove the lowest 16 bits to
+	// get 0x3fc90000 (= 1.5703125) this means we can now multiply with a number of up to 2^16 without losing any bits.
+	// This leaves us with: x = (x - quadrant * 1.5703125) - quadrant * (PI / 2 - 1.5703125).
+	// PI / 2 - 1.5703125 in hexadecimal is 0x39fdaa22, stripping the lowest 12 bits we get 0x39fda000 (= 0.0004837512969970703125)
+	// This leaves uw with: x = ((x - quadrant * 1.5703125) - quadrant * 0.0004837512969970703125) - quadrant * (PI / 2 - 1.5703125 - 0.0004837512969970703125)
+	// See: https://stackoverflow.com/questions/42455143/sine-cosine-modular-extended-precision-arithmetic
+	// After this we have x in the range [-PI / 4, PI / 4].
+	Vec4 float_quadrant = quadrant.ToFloat();
+	x = ((x - float_quadrant * 1.5703125f) - float_quadrant * 0.0004837512969970703125f) - float_quadrant * 7.549789948768648e-8f;
+
+	// Calculate x2 = x^2
+	Vec4 x2 = x * x;
+
+	// Taylor expansion:
+	// Cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + x^8/8! + ... = (((x2/8!- 1/6!) * x2 + 1/4!) * x2 - 1/2!) * x2 + 1
+	Vec4 taylor_cos = ((2.443315711809948e-5f * x2 - Vec4::sReplicate(1.388731625493765e-3f)) * x2 + Vec4::sReplicate(4.166664568298827e-2f)) * x2 * x2 - 0.5f * x2 + Vec4::sReplicate(1.0f);
+	// Sin(x) = x - x^3/3! + x^5/5! - x^7/7! + ... = ((-x2/7! + 1/5!) * x2 - 1/3!) * x2 * x + x
+	Vec4 taylor_sin = ((-1.9515295891e-4f * x2 + Vec4::sReplicate(8.3321608736e-3f)) * x2 - Vec4::sReplicate(1.6666654611e-1f)) * x2 * x + x;
+
+	// The lowest 2 bits of quadrant indicate the quadrant that we are in.
+	// Let x be the original input value and x' our value that has been mapped to the range [-PI / 4, PI / 4].
+	// since cos(x) = sin(x - PI / 2) and since we want to use the Taylor expansion as close as possible to 0,
+	// we can alternate between using the Taylor expansion for sin and cos according to the following table:
 	// 
-	// j		sin_sign	cos_sign	condition
-	// 000b     1			1			0
-	// 001b     1			1			1
-	// 010b     1			-1			1
-	// 011b     1			-1			0
-	// 100b     -1			-1			0
-	// 101b     -1			-1			1
-	// 110b     -1			1			1
-	// 111b		-1			1			0
+	// quadrant	 sin(x)		 cos(x)
+	// XXX00b	 sin(x')	 cos(x')
+	// XXX01b	 cos(x')	-sin(x')
+	// XXX10b	-sin(x')	-cos(x')
+	// XXX11b	-cos(x')	 sin(x')
 	//
-	// So: sin_sign = bit3, cos_sign = bit2 ^ bit3, condition = bit1 ^ bit2
-	UVec4 bit1 = int_val.LogicalShiftLeft<31>();
-	UVec4 bit2 = UVec4::sAnd(int_val.LogicalShiftLeft<30>(), UVec4::sReplicate(0x80000000U));
-	UVec4 bit3 = UVec4::sAnd(int_val.LogicalShiftLeft<29>(), UVec4::sReplicate(0x80000000U));
+	// So: sin_sign = bit2, cos_sign = bit1 ^ bit2, bit1 determines if we use sin or cos Taylor expansion
+	UVec4 bit1 = quadrant.LogicalShiftLeft<31>();
+	UVec4 bit2 = UVec4::sAnd(quadrant.LogicalShiftLeft<30>(), UVec4::sReplicate(0x80000000U));
 
 	// Select which one of the results is sin and which one is cos
-	UVec4 xor_1_2 = UVec4::sXor(bit1, bit2);
-	Vec4 s = Vec4::sSelect(y2, y1, xor_1_2);
-	Vec4 c = Vec4::sSelect(y1, y2, xor_1_2);
+	Vec4 s = Vec4::sSelect(taylor_sin, taylor_cos, bit1);
+	Vec4 c = Vec4::sSelect(taylor_cos, taylor_sin, bit1);
 
 	// Update the signs
-	sin_sign = UVec4::sXor(sin_sign, bit3);
-	UVec4 cos_sign = UVec4::sXor(bit2, bit3);
+	sin_sign = UVec4::sXor(sin_sign, bit2);
+	UVec4 cos_sign = UVec4::sXor(bit1, bit2);
 
 	// Correct the signs
 	outSin = Vec4::sXor(s, sin_sign.ReinterpretAsFloat());