Переглянути джерело

Added missing functions to float4 reference implementation.

bkaradzic 12 роки тому
батько
коміт
539076a75e
3 змінених файлів з 59 додано та 1 видалено
  1. 1 1
      include/bx/float4_neon.h
  2. 50 0
      include/bx/float4_ref.h
  3. 8 0
      tests/float4_t.cpp

+ 1 - 1
include/bx/float4_neon.h

@@ -349,7 +349,7 @@ namespace bx
 
 	BX_FLOAT4_INLINE float4_t float4_nmsub(float4_t _a, float4_t _b, float4_t _c)
 	{
-		return __builtin_neon_vmlav4sf(_c, _a, _b, 3);
+		return __builtin_neon_vmlsv4sf(_c, _a, _b, 3);
 	}
 
 	BX_FLOAT4_INLINE float4_t float4_icmpeq(float4_t _a, float4_t _b)

+ 50 - 0
include/bx/float4_ref.h

@@ -499,6 +499,56 @@ IMPLEMENT_TEST(xyzw , 0xf);
 		return result;
 	}
 
+	BX_FLOAT4_INLINE float4_t float4_icmpeq(float4_t _a, float4_t _b)
+	{
+		float4_t result;
+		result.ixyzw[0] = _a.ixyzw[0] == _b.ixyzw[0] ? 0xffffffff : 0x0;
+		result.ixyzw[1] = _a.ixyzw[1] == _b.ixyzw[1] ? 0xffffffff : 0x0;
+		result.ixyzw[2] = _a.ixyzw[2] == _b.ixyzw[2] ? 0xffffffff : 0x0;
+		result.ixyzw[3] = _a.ixyzw[3] == _b.ixyzw[3] ? 0xffffffff : 0x0;
+		return result;
+	}
+
+	BX_FLOAT4_INLINE float4_t float4_icmplt(float4_t _a, float4_t _b)
+	{
+		float4_t result;
+		result.ixyzw[0] = _a.ixyzw[0] < _b.ixyzw[0] ? 0xffffffff : 0x0;
+		result.ixyzw[1] = _a.ixyzw[1] < _b.ixyzw[1] ? 0xffffffff : 0x0;
+		result.ixyzw[2] = _a.ixyzw[2] < _b.ixyzw[2] ? 0xffffffff : 0x0;
+		result.ixyzw[3] = _a.ixyzw[3] < _b.ixyzw[3] ? 0xffffffff : 0x0;
+		return result;
+	}
+
+	BX_FLOAT4_INLINE float4_t float4_icmpgt(float4_t _a, float4_t _b)
+	{
+		float4_t result;
+		result.ixyzw[0] = _a.ixyzw[0] > _b.ixyzw[0] ? 0xffffffff : 0x0;
+		result.ixyzw[1] = _a.ixyzw[1] > _b.ixyzw[1] ? 0xffffffff : 0x0;
+		result.ixyzw[2] = _a.ixyzw[2] > _b.ixyzw[2] ? 0xffffffff : 0x0;
+		result.ixyzw[3] = _a.ixyzw[3] > _b.ixyzw[3] ? 0xffffffff : 0x0;
+		return result;
+	}
+
+	BX_FLOAT4_INLINE float4_t float4_imin(float4_t _a, float4_t _b)
+	{
+		float4_t result;
+		result.ixyzw[0] = _a.ixyzw[0] < _b.ixyzw[0] ? _a.ixyzw[0] : _b.ixyzw[0];
+		result.ixyzw[1] = _a.ixyzw[1] < _b.ixyzw[1] ? _a.ixyzw[1] : _b.ixyzw[1];
+		result.ixyzw[2] = _a.ixyzw[2] < _b.ixyzw[2] ? _a.ixyzw[2] : _b.ixyzw[2];
+		result.ixyzw[3] = _a.ixyzw[3] < _b.ixyzw[3] ? _a.ixyzw[3] : _b.ixyzw[3];
+		return result;
+	}
+
+	BX_NO_INLINE float4_t float4_imax(float4_t _a, float4_t _b)
+	{
+		float4_t result;
+		result.ixyzw[0] = _a.ixyzw[0] > _b.ixyzw[0] ? _a.ixyzw[0] : _b.ixyzw[0];
+		result.ixyzw[1] = _a.ixyzw[1] > _b.ixyzw[1] ? _a.ixyzw[1] : _b.ixyzw[1];
+		result.ixyzw[2] = _a.ixyzw[2] > _b.ixyzw[2] ? _a.ixyzw[2] : _b.ixyzw[2];
+		result.ixyzw[3] = _a.ixyzw[3] > _b.ixyzw[3] ? _a.ixyzw[3] : _b.ixyzw[3];
+		return result;
+	}
+
 	BX_FLOAT4_INLINE float4_t float4_iadd(float4_t _a, float4_t _b)
 	{
 		float4_t result;

+ 8 - 0
tests/float4_t.cpp

@@ -183,6 +183,14 @@ TEST(float4_load)
 		);
 }
 
+TEST(float4_arithmetic)
+{
+	float4_check_float("madd"
+		, float4_madd(float4_ld(0.0f, 1.0f, 2.0f, 3.0f), float4_ld(4.0f, 5.0f, 6.0f, 7.0f), float4_ld(8.0f, 9.0f, 10.0f, 11.0f) )
+		, 8.0f, 14.0f, 22.0f, 32.0f
+		);
+}
+
 TEST(float4)
 {
 	const float4_t isplat = float4_isplat(0x80000001);