Browse Source

cross3 with one less swizzle.

Branimir Karadžić 10 years ago
parent
commit
03c169e5c4
2 changed files with 20 additions and 6 deletions
  1. 15 6
      include/bx/float4_ni.h
  2. 5 0
      tests/float4_t.cpp

+ 15 - 6
include/bx/float4_ni.h

@@ -15,7 +15,7 @@ namespace bx
 		const float4_t xAyB   = float4_shuf_xAyB(_a, _b);
 		const float4_t zCwD   = float4_shuf_zCwD(_a, _b);
 		const float4_t result = float4_shuf_xyAB(xAyB, zCwD);
-		
+
 		return result;
 	}
 
@@ -24,7 +24,7 @@ namespace bx
 		const float4_t xAyB   = float4_shuf_xAyB(_a, _b);
 		const float4_t zCwD   = float4_shuf_zCwD(_a, _b);
 		const float4_t result = float4_shuf_zwCD(xAyB, zCwD);
-		
+
 		return result;
 	}
 
@@ -192,7 +192,7 @@ namespace bx
 		const float4_t one    = float4_splat(1.0f);
 		const float4_t sqrt   = float4_sqrt(_a);
 		const float4_t result = float4_div(one, sqrt);
-		
+
 		return result;
 	}
 
@@ -206,7 +206,7 @@ namespace bx
 		const float4_t three           = float4_splat(3.0f);
 		const float4_t three_sub_iter1 = float4_sub(three, iter1);
 		const float4_t result          = float4_mul(half_rsqrt, three_sub_iter1);
-		
+
 		return result;
 	}
 
@@ -375,7 +375,7 @@ namespace bx
 		const float4_t expfpart = float4_logexp_detail::float4_exppoly(fpart);
 
 		const float4_t result   = float4_mul(expipart, expfpart);
-		
+
 		return result;
 	}
 
@@ -401,12 +401,21 @@ namespace bx
 
 	BX_FLOAT4_INLINE float4_t float4_cross3_ni(float4_t _a, float4_t _b)
 	{
+		// a.yzx * b.zxy - a.zxy * b.yzx == (a * b.yzx - a.yzx * b).yzx
+#if 0
 		const float4_t a_yzxw = float4_swiz_yzxw(_a);
 		const float4_t a_zxyw = float4_swiz_zxyw(_a);
 		const float4_t b_zxyw = float4_swiz_zxyw(_b);
 		const float4_t b_yzxw = float4_swiz_yzxw(_b);
 		const float4_t tmp    = float4_mul(a_yzxw, b_zxyw);
 		const float4_t result = float4_nmsub(a_zxyw, b_yzxw, tmp);
+#else
+		const float4_t a_yzxw = float4_swiz_yzxw(_a);
+		const float4_t b_yzxw = float4_swiz_yzxw(_b);
+		const float4_t tmp0   = float4_mul(_a, b_yzxw);
+		const float4_t tmp1   = float4_nmsub(a_yzxw, _b, tmp0);
+		const float4_t result = float4_swiz_yzxw(tmp1);
+#endif
 
 		return result;
 	}
@@ -416,7 +425,7 @@ namespace bx
 		const float4_t dot3    = float4_dot3(_a, _a);
 		const float4_t invSqrt = float4_rsqrt(dot3);
 		const float4_t result  = float4_mul(_a, invSqrt);
-		
+
 		return result;
 	}
 

+ 5 - 0
tests/float4_t.cpp

@@ -228,6 +228,11 @@ TEST(float4_arithmetic)
 		, float4_madd(float4_ld(0.0f, 1.0f, 2.0f, 3.0f), float4_ld(4.0f, 5.0f, 6.0f, 7.0f), float4_ld(8.0f, 9.0f, 10.0f, 11.0f) )
 		, 8.0f, 14.0f, 22.0f, 32.0f
 		);
+
+	float4_check_float("cross3"
+		, float4_cross3(float4_ld(1.0f, 0.0f, 0.0f, 0.0f), float4_ld(0.0f, 1.0f, 0.0f, 0.0f) )
+		, 0.0f, 0.0f, 1.0f, 0.0f
+		);
 }
 
 TEST(float4)