5 năm trước cách đây · a3fd8d384f
--- a/include/bx/inline/simd128_langext.inl
+++ b/include/bx/inline/simd128_langext.inl
@@ -346,6 +346,14 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw , 0xf);
 
				 		return result;
			
 
				 	}
			
 
				 
			
 
				+	template<>
			
 
				+	BX_SIMD_FORCE_INLINE simd128_langext_t simd_cmpneq(simd128_langext_t _a, simd128_langext_t _b)
			
 
				+	{
			
 
				+		simd128_langext_t result;
			
 
				+		result.vi = _a.vf != _b.vf;
			
 
				+		return result;
			
 
				+	}
			
 
				+
			
 
				 	template<>
			
 
				 	BX_SIMD_FORCE_INLINE simd128_langext_t simd_cmplt(simd128_langext_t _a, simd128_langext_t _b)
			
 
				 	{
			
--- a/include/bx/inline/simd128_neon.inl
+++ b/include/bx/inline/simd128_neon.inl
@@ -9,23 +9,15 @@
 
				 
			
 
				 namespace bx
			
 
				 {
			
 
				-#if BX_COMPILER_CLANG
			
 
				 
			
 
				-#define SHUFFLE_A(_a,  _i0, _i1, _i2, _i3)	\
			
 
				-__builtin_shufflevector(_a, _a, _i0, _i1, _i2, _i3 )
			
 
				-#define SHUFFLE_AB(_a, _b, _i0, _i1, _i2, _i3)	\
			
 
				-__builtin_shufflevector(_a, _b, _i0, _i1, _i2, _i3 )
			
 
				-	
			
 
				+#if BX_COMPILER_CLANG
			
 
				+#	define SHUFFLE_A(_a,  _i0, _i1, _i2, _i3)     __builtin_shufflevector(_a, _a, _i0, _i1, _i2, _i3 )
			
 
				+#	define SHUFFLE_AB(_a, _b, _i0, _i1, _i2, _i3) __builtin_shufflevector(_a, _b, _i0, _i1, _i2, _i3 )
			
 
				 #else
			
 
				-
			
 
				-#define SHUFFLE_A(_a,  _i0, _i1, _i2, _i3)	\
			
 
				-__builtin_shuffle(_a, (uint32x4_t){ _i0, _i1, _i2, _i3 })
			
 
				-#define SHUFFLE_AB(_a, _b, _i0, _i1, _i2, _i3)	\
			
 
				-__builtin_shuffle(_a, _b, (uint32x4_t){ _i0, _i1, _i2, _i3 })
			
 
				-
			
 
				+#	define SHUFFLE_A(_a,  _i0, _i1, _i2, _i3)     __builtin_shuffle(_a, (uint32x4_t){ _i0, _i1, _i2, _i3 })
			
 
				+#	define SHUFFLE_AB(_a, _b, _i0, _i1, _i2, _i3) __builtin_shuffle(_a, _b, (uint32x4_t){ _i0, _i1, _i2, _i3 })
			
 
				 #endif
			
 
				 
			
 
				-	
			
 
				 #define ELEMx 0
			
 
				 #define ELEMy 1
			
 
				 #define ELEMz 2
			
@@ -291,11 +283,17 @@ BX_SIMD128_IMPLEMENT_TEST(yzw, yzww);
 
				 		return result;
			
 
				 	}
			
 
				 
			
 
				+	template<>
			
 
				+	BX_SIMD_FORCE_INLINE simd128_neon_t simd_cmpneq(simd128_neon_t _a, simd128_neon_t _b)
			
 
				+	{
			
 
				+		return simd_cmpneq_ni(_a, _b);
			
 
				+	}
			
 
				+
			
 
				 	template<>
			
 
				 	BX_SIMD_FORCE_INLINE simd128_neon_t simd_cmplt(simd128_neon_t _a, simd128_neon_t _b)
			
 
				 	{
			
 
				-		const uint32x4_t tmp    = vcltq_f32(_a, _b);
			
 
				-		const simd128_neon_t   result = vreinterpretq_f32_u32(tmp);
			
 
				+		const uint32x4_t tmp        = vcltq_f32(_a, _b);
			
 
				+		const simd128_neon_t result = vreinterpretq_f32_u32(tmp);
			
 
				 
			
 
				 		return result;
			
 
				 	}
			
@@ -303,8 +301,8 @@ BX_SIMD128_IMPLEMENT_TEST(yzw, yzww);
 
				 	template<>
			
 
				 	BX_SIMD_FORCE_INLINE simd128_neon_t simd_cmple(simd128_neon_t _a, simd128_neon_t _b)
			
 
				 	{
			
 
				-		const uint32x4_t tmp    = vcleq_f32(_a, _b);
			
 
				-		const simd128_neon_t   result = vreinterpretq_f32_u32(tmp);
			
 
				+		const uint32x4_t tmp        = vcleq_f32(_a, _b);
			
 
				+		const simd128_neon_t result = vreinterpretq_f32_u32(tmp);
			
 
				 
			
 
				 		return result;
			
 
				 	}
			
@@ -312,8 +310,8 @@ BX_SIMD128_IMPLEMENT_TEST(yzw, yzww);
 
				 	template<>
			
 
				 	BX_SIMD_FORCE_INLINE simd128_neon_t simd_cmpgt(simd128_neon_t _a, simd128_neon_t _b)
			
 
				 	{
			
 
				-		const uint32x4_t tmp    = vcgtq_f32(_a, _b);
			
 
				-		const simd128_neon_t   result = vreinterpretq_f32_u32(tmp);
			
 
				+		const uint32x4_t tmp        = vcgtq_f32(_a, _b);
			
 
				+		const simd128_neon_t result = vreinterpretq_f32_u32(tmp);
			
 
				 
			
 
				 		return result;
			
 
				 	}
			
@@ -321,8 +319,8 @@ BX_SIMD128_IMPLEMENT_TEST(yzw, yzww);
 
				 	template<>
			
 
				 	BX_SIMD_FORCE_INLINE simd128_neon_t simd_cmpge(simd128_neon_t _a, simd128_neon_t _b)
			
 
				 	{
			
 
				-		const uint32x4_t tmp    = vcgeq_f32(_a, _b);
			
 
				-		const simd128_neon_t   result = vreinterpretq_f32_u32(tmp);
			
 
				+		const uint32x4_t tmp        = vcgeq_f32(_a, _b);
			
 
				+		const simd128_neon_t result = vreinterpretq_f32_u32(tmp);
			
 
				 
			
 
				 		return result;
			
 
				 	}
			
--- a/include/bx/inline/simd128_ref.inl
+++ b/include/bx/inline/simd128_ref.inl
@@ -396,6 +396,17 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw , 0xf);
 
				 		return result;
			
 
				 	}
			
 
				 
			
 
				+	template<>
			
 
				+	BX_SIMD_FORCE_INLINE simd128_ref_t simd_cmpneq(simd128_ref_t _a, simd128_ref_t _b)
			
 
				+	{
			
 
				+		simd128_ref_t result;
			
 
				+		result.ixyzw[0] = _a.fxyzw[0] != _b.fxyzw[0] ? 0xffffffff : 0x0;
			
 
				+		result.ixyzw[1] = _a.fxyzw[1] != _b.fxyzw[1] ? 0xffffffff : 0x0;
			
 
				+		result.ixyzw[2] = _a.fxyzw[2] != _b.fxyzw[2] ? 0xffffffff : 0x0;
			
 
				+		result.ixyzw[3] = _a.fxyzw[3] != _b.fxyzw[3] ? 0xffffffff : 0x0;
			
 
				+		return result;
			
 
				+	}
			
 
				+
			
 
				 	template<>
			
 
				 	BX_SIMD_FORCE_INLINE simd128_ref_t simd_cmplt(simd128_ref_t _a, simd128_ref_t _b)
			
 
				 	{
			
--- a/include/bx/inline/simd128_sse.inl
+++ b/include/bx/inline/simd128_sse.inl
@@ -308,6 +308,12 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw , 0xf);
 
				 		return _mm_cmpeq_ps(_a, _b);
			
 
				 	}
			
 
				 
			
 
				+	template<>
			
 
				+	BX_SIMD_FORCE_INLINE simd128_sse_t simd_cmpneq(simd128_sse_t _a, simd128_sse_t _b)
			
 
				+	{
			
 
				+		return _mm_cmpneq_ps(_a, _b);
			
 
				+	}
			
 
				+
			
 
				 	template<>
			
 
				 	BX_SIMD_FORCE_INLINE simd128_sse_t simd_cmplt(simd128_sse_t _a, simd128_sse_t _b)
			
 
				 	{
			
--- a/include/bx/inline/simd_ni.inl
+++ b/include/bx/inline/simd_ni.inl
@@ -124,6 +124,15 @@ namespace bx
 
				 		return result;
			
 
				 	}
			
 
				 
			
 
				+	template<typename Ty>
			
 
				+	BX_SIMD_INLINE Ty simd_cmpneq_ni(Ty _a, Ty _b)
			
 
				+	{
			
 
				+		const Ty tmp0   = simd_cmpeq(_a, _b);
			
 
				+		const Ty result = simd_not(tmp0);
			
 
				+
			
 
				+		return result;
			
 
				+	}
			
 
				+
			
 
				 	template<typename Ty>
			
 
				 	BX_SIMD_INLINE Ty simd_min_ni(Ty _a, Ty _b)
			
 
				 	{
			
--- a/include/bx/simd_t.h
+++ b/include/bx/simd_t.h
@@ -196,6 +196,9 @@ BX_SIMD128_IMPLEMENT_TEST(xyzw);
 
				 	template<typename Ty>
			
 
				 	Ty simd_cmpeq(Ty _a, Ty _b);
			
 
				 
			
 
				+	template<typename Ty>
			
 
				+	Ty simd_cmpneq(Ty _a, Ty _b);
			
 
				+
			
 
				 	template<typename Ty>
			
 
				 	Ty simd_cmplt(Ty _a, Ty _b);