Browse Source

Added exclusive or to simd.h.

David Piuva 3 years ago
parent
commit
305389cef6
2 changed files with 19 additions and 0 deletions
  1. 16 0
      Source/DFPSR/base/simd.h
  2. 3 0
      Source/test/tests/SimdTest.cpp

+ 16 - 0
Source/DFPSR/base/simd.h

@@ -146,6 +146,7 @@
 		// Bitwise
 		#define BITWISE_AND_U32_SIMD(A, B) _mm_and_si128(A, B)
 		#define BITWISE_OR_U32_SIMD(A, B) _mm_or_si128(A, B)
+		#define BITWISE_XOR_U32_SIMD(A, B) _mm_xor_si128(A, B)
 	#endif
 
 	// Everything declared in here handles things specific for NEON.
@@ -255,6 +256,7 @@
 		// Bitwise
 		#define BITWISE_AND_U32_SIMD(A, B) vandq_u32(A, B)
 		#define BITWISE_OR_U32_SIMD(A, B) vorrq_u32(A, B)
+		#define BITWISE_XOR_U32_SIMD(A, B) veorq_u32(A, B)
 	#endif
 
 	/*
@@ -929,6 +931,20 @@
 			return U32x4(left.emulated[0] | mask, left.emulated[1] | mask, left.emulated[2] | mask, left.emulated[3] | mask);
 		#endif
 	}
+	inline U32x4 operator^(const U32x4& left, const U32x4& right) {
+		#ifdef USE_BASIC_SIMD
+			return U32x4(BITWISE_XOR_U32_SIMD(left.v, right.v));
+		#else
+			return U32x4(left.emulated[0] ^ right.emulated[0], left.emulated[1] ^ right.emulated[1], left.emulated[2] ^ right.emulated[2], left.emulated[3] ^ right.emulated[3]);
+		#endif
+	}
+	inline U32x4 operator^(const U32x4& left, uint32_t mask) {
+		#ifdef USE_BASIC_SIMD
+			return U32x4(BITWISE_XOR_U32_SIMD(left.v, LOAD_SCALAR_U32_SIMD(mask)));
+		#else
+			return U32x4(left.emulated[0] ^ mask, left.emulated[1] ^ mask, left.emulated[2] ^ mask, left.emulated[3] ^ mask);
+		#endif
+	}
 	inline U32x4 operator<<(const U32x4& left, uint32_t bitOffset) {
 		#ifdef USE_SSE2
 			return U32x4(_mm_slli_epi32(left.v, bitOffset));

+ 3 - 0
Source/test/tests/SimdTest.cpp

@@ -258,6 +258,9 @@ START_TEST(Simd)
 	ASSERT_EQUAL(U32x4(0xFFFFFFFF, 0xFFF000FF, 0xF0F0F0F0, 0x12345678) & U32x4(0xFF00FF00, 0xFFFF0000, 0x000FF000, 0x0FF00FF0), U32x4(0xFF00FF00, 0xFFF00000, 0x0000F000, 0x02300670));
 	ASSERT_EQUAL(U32x4(0xF00F000F, 0xFFF000FF, 0x10010011, 0xABC00000) | U32x4(0x0000FF00, 0xFFFF0000, 0x000FF000, 0x000DEF00), U32x4(0xF00FFF0F, 0xFFFF00FF, 0x100FF011, 0xABCDEF00));
 
+	// Exclusive or
+	ASSERT_EQUAL(U32x4(0xFFFFFFFF, 0x01234567, 0xF0F0F0F0, 0x00000000) ^ 0x0000FFFF, U32x4(0xFFFF0000, 0x0123BA98, 0xF0F00F0F, 0x0000FFFF));
+
 	// Bit shift
 	ASSERT_EQUAL(U32x4(1, 2, 3, 4) << 1, U32x4(2, 4, 6, 8));
 	ASSERT_EQUAL(U32x4(1, 2, 3, 4) << 2, U32x4(4, 8, 12, 16));