Browse Source

Implemented non-immediate uniform offset bit shifts for AVX2.

David Piuva 10 months ago
parent
commit
275b7dbabd
3 changed files with 796 additions and 628 deletions
  1. 133 2
      Source/DFPSR/base/simd.h
  2. 0 3
      Source/test/TestCaller.DsrProj
  3. 663 623
      Source/test/tests/SimdTest.cpp

+ 133 - 2
Source/DFPSR/base/simd.h

@@ -3138,8 +3138,39 @@
 			);
 		#endif
 	}
-
-	// TODO: Implement bit shifts with non-immediate uniform offsets.
+	inline U16x16 operator&(const U16x16& left, const U16x16& right) {
+		#if defined(USE_AVX2)
+			return U16x16(_mm256_and_si256(left.v, right.v));
+		#else
+			IMPL_SCALAR_REFERENCE_INFIX_8_LANES(left, right, U16x16, uint16_t, &)
+		#endif
+	}
+	// Bitwise or
+	inline U16x16 operator|(const U16x16& left, const U16x16& right) {
+		#if defined(USE_AVX2)
+			return U16x16(_mm256_or_si256(left.v, right.v));
+		#else
+			IMPL_SCALAR_REFERENCE_INFIX_8_LANES(left, right, U16x16, uint16_t, |)
+		#endif
+	}
+	// Bitwise xor
+	inline U16x16 operator^(const U16x16& left, const U16x16& right) {
+		#if defined(USE_AVX2)
+			return U16x16(_mm256_xor_si256(left.v, right.v));
+		#else
+			IMPL_SCALAR_REFERENCE_INFIX_8_LANES(left, right, U16x16, uint16_t, ^)
+		#endif
+	}
+	// Bitwise negation
+	inline U16x16 operator~(const U16x16& value) {
+		#if defined(USE_AVX2)
+			// Fall back on xor against all ones.
+			return value ^ U16x16(~uint16_t(0));
+		#else
+			// TODO: Perform using 64-bit integers.
+			return U16x16(~value.scalars[0], ~value.scalars[1], ~value.scalars[2], ~value.scalars[3], ~value.scalars[4], ~value.scalars[5], ~value.scalars[6], ~value.scalars[7], ~value.scalars[8], ~value.scalars[9], ~value.scalars[10], ~value.scalars[11], ~value.scalars[12], ~value.scalars[13], ~value.scalars[14], ~value.scalars[15]);
+		#endif
+	}
 
 	inline U16x16 operator<<(const U16x16& left, const U16x16 &bitOffsets) {
 		#ifdef SAFE_POINTER_CHECKS
@@ -3165,6 +3196,56 @@
 			IMPL_SCALAR_REFERENCE_INFIX_16_LANES(left, bitOffsets, U16x16, uint16_t, >>)
 		#endif
 	}
+	inline U16x16 operator<<(const U16x16& left, const uint32_t &bitOffset) {
+		#if defined(USE_AVX2)
+			#ifdef SAFE_POINTER_CHECKS
+				if(bitOffset >= 16u) {
+					throwError(U"Tried to shift ", left, U" by bit offset ", bitOffset, U", which is non-deterministic from being out of bound 0..16!\n");
+				}
+			#endif
+			// Write the content to aligned stack memory.
+			ALIGN32 __m256i values;
+			left.writeAlignedUnsafe((uint16_t*)&values);
+			// Cast a pointer to the data into four 64-bit elements.
+			uint64_t *largeLanes = (uint64_t*)&values;
+			// Shift the 128 bits as two 64-bit values.
+			largeLanes[0] = largeLanes[0] << bitOffset;
+			largeLanes[1] = largeLanes[1] << bitOffset;
+			largeLanes[2] = largeLanes[2] << bitOffset;
+			largeLanes[3] = largeLanes[3] << bitOffset;
+			// Create a mask.
+			U16x16 mask = U16x16(uint16_t(~0u) << bitOffset);
+			// Return the shifted 64-bit elements masked to remove spill across lanes.
+			return U16x16::readAlignedUnsafe((uint16_t*)&values) & mask;
+		#else
+			return left << U16x16(bitOffset);
+		#endif
+	}
+	inline U16x16 operator>>(const U16x16& left, const uint32_t &bitOffset) {
+		#if defined(USE_AVX2)
+			#ifdef SAFE_POINTER_CHECKS
+				if(bitOffset >= 16u) {
+					throwError(U"Tried to shift ", left, U" by bit offset ", bitOffset, U", which is non-deterministic from being out of bound 0..16!\n");
+				}
+			#endif
+			// Write the content to aligned stack memory.
+			ALIGN32 __m256i values;
+			left.writeAlignedUnsafe((uint16_t*)&values);
+			// Cast a pointer to the data into four 64-bit elements.
+			uint64_t *largeLanes = (uint64_t*)&values;
+			// Shift the 128 bits as two 64-bit values.
+			largeLanes[0] = largeLanes[0] >> bitOffset;
+			largeLanes[1] = largeLanes[1] >> bitOffset;
+			largeLanes[2] = largeLanes[2] >> bitOffset;
+			largeLanes[3] = largeLanes[3] >> bitOffset;
+			// Create a mask.
+			U16x16 mask = U16x16(uint16_t(~0u) >> bitOffset);
+			// Return the shifted 64-bit elements masked to remove spill across lanes.
+			return U16x16::readAlignedUnsafe((uint16_t*)&values) & mask;
+		#else
+			return left >> U16x16(bitOffset);
+		#endif
+	}
 	// bitOffset must be an immediate constant from 0 to 31, so a template argument is used.
 	template <uint32_t bitOffset>
 	inline U16x16 bitShiftLeftImmediate(const U16x16& left) {
@@ -3214,6 +3295,56 @@
 			IMPL_SCALAR_REFERENCE_INFIX_8_LANES(left, bitOffsets, U32x8, uint32_t, >>)
 		#endif
 	}
+	inline U32x8 operator<<(const U32x8& left, const uint32_t &bitOffset) {
+		#if defined(USE_AVX2)
+			#ifdef SAFE_POINTER_CHECKS
+				if(bitOffset >= 32u) {
+					throwError(U"Tried to shift ", left, U" by bit offset ", bitOffset, U", which is non-deterministic from being out of bound 0..31!\n");
+				}
+			#endif
+			// Write the content to aligned stack memory.
+			ALIGN32 __m256i values;
+			left.writeAlignedUnsafe((uint32_t*)&values);
+			// Cast a pointer to the data into two 64-bit elements.
+			uint64_t *largeLanes = (uint64_t*)&values;
+			// Shift the 128 bits as two 64-bit values.
+			largeLanes[0] = largeLanes[0] << bitOffset;
+			largeLanes[1] = largeLanes[1] << bitOffset;
+			largeLanes[2] = largeLanes[2] << bitOffset;
+			largeLanes[3] = largeLanes[3] << bitOffset;
+			// Create a mask.
+			U32x8 mask = U32x8(uint32_t(~0u) << bitOffset);
+			// Return the shifted 64-bit elements masked to remove spill across lanes.
+			return U32x8::readAlignedUnsafe((uint32_t*)&values) & mask;
+		#else
+			return left << U32x8(bitOffset);
+		#endif
+	}
+	inline U32x8 operator>>(const U32x8& left, const uint32_t &bitOffset) {
+		#if defined(USE_AVX2)
+			#ifdef SAFE_POINTER_CHECKS
+				if(bitOffset >= 32u) {
+					throwError(U"Tried to shift ", left, U" by bit offset ", bitOffset, U", which is non-deterministic from being out of bound 0..31!\n");
+				}
+			#endif
+			// Write the content to aligned stack memory.
+			ALIGN32 __m256i values;
+			left.writeAlignedUnsafe((uint32_t*)&values);
+			// Cast a pointer to the data into two 64-bit elements.
+			uint64_t *largeLanes = (uint64_t*)&values;
+			// Shift the 128 bits as two 64-bit values.
+			largeLanes[0] = largeLanes[0] >> bitOffset;
+			largeLanes[1] = largeLanes[1] >> bitOffset;
+			largeLanes[2] = largeLanes[2] >> bitOffset;
+			largeLanes[3] = largeLanes[3] >> bitOffset;
+			// Create a mask.
+			U32x8 mask = U32x8(uint32_t(~0u) >> bitOffset);
+			// Return the shifted 64-bit elements masked to remove spill across lanes.
+			return U32x8::readAlignedUnsafe((uint32_t*)&values) & mask;
+		#else
+			return left >> U32x8(bitOffset);
+		#endif
+	}
 	// bitOffset must be an immediate constant from 0 to 31, so a template argument is used.
 	template <uint32_t bitOffset>
 	inline U32x8 bitShiftLeftImmediate(const U32x8& left) {

+ 0 - 3
Source/test/TestCaller.DsrProj

@@ -4,9 +4,6 @@ CompilerFlag "-std=c++14"
 # Use all locally available SIMD extensions.
 CompilerFlag "-march=native"
 
-# Enable debug information
-CompilerFlag "-g"
-
 Debug = 1
 Supressed = 1
 Graphics = 0

+ 663 - 623
Source/test/tests/SimdTest.cpp

@@ -11,24 +11,7 @@
 #define ASSERT_EQUAL_SIMD(A, B) ASSERT_COMP(A, B, allLanesEqual, "==")
 #define ASSERT_NOTEQUAL_SIMD(A, B) ASSERT_COMP(A, B, !allLanesEqual, "!=")
 
-START_TEST(Simd)
-	printText("\nSIMD test is compiled using:\n");
-	#ifdef USE_SSE2
-		printText("	* SSE2\n");
-	#endif
-	#ifdef USE_SSSE3
-		printText("	* SSSE3\n");
-	#endif
-	#ifdef USE_AVX
-		printText("	* AVX\n");
-	#endif
-	#ifdef USE_AVX2
-		printText("	* AVX2\n");
-	#endif
-	#ifdef USE_NEON
-		printText("	* NEON\n");
-	#endif
-
+static void testComparisons() {
 	// F32x4 Comparisons
 	ASSERT_EQUAL_SIMD(F32x4(1.5f), F32x4(1.5f, 1.5f, 1.5f, 1.5f));
 	ASSERT_EQUAL_SIMD(F32x4(-1.5f), F32x4(-1.5f, -1.5f, -1.5f, -1.5f));
@@ -144,227 +127,9 @@ START_TEST(Simd)
 	ASSERT_NOTEQUAL_SIMD(U8x32((uint8_t)250), U8x32(250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 100, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250));
 	ASSERT_NOTEQUAL_SIMD(U8x32((uint8_t)250), U8x32(0, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250));
 	ASSERT_NOTEQUAL_SIMD(U8x32((uint8_t)250), U8x32(250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 0));
+}
 
-	// Macros
-	#ifdef USE_BASIC_SIMD
-		{ // Truncate float to int
-			SIMD_F32x4 f = LOAD_VECTOR_F32_SIMD(-1.01f, -0.99f, 0.99f, 1.01f);
-			SIMD_I32x4 i = LOAD_VECTOR_I32_SIMD(-1, 0, 0, 1);
-			ASSERT_EQUAL_SIMD(I32x4(F32_TO_I32_SIMD(f)), I32x4(i));
-		}
-		{ // Int to float
-			SIMD_I32x4 n = LOAD_VECTOR_I32_SIMD(123   , 456   , 789   , -1000   );
-			SIMD_F32x4 r = LOAD_VECTOR_F32_SIMD(123.0f, 456.0f, 789.0f, -1000.0f);
-			ASSERT_EQUAL_SIMD(F32x4(I32_TO_F32_SIMD(n)), F32x4(r));
-		}
-		{ // Signed-unsigned cast
-			ASSERT_EQUAL_SIMD(I32x4(REINTERPRET_U32_TO_I32_SIMD(U32x4(1, 2, 3, 4).v)), I32x4(1, 2, 3, 4));
-			ASSERT_EQUAL_SIMD(U32x4(REINTERPRET_I32_TO_U32_SIMD(I32x4(1, 2, 3, 4).v)), U32x4(1, 2, 3, 4));
-		}
-		{ // F32x4
-			SIMD_F32x4 a = LOAD_VECTOR_F32_SIMD(-1.3f, 2.5f, -3.4f, 4.7f);
-			SIMD_F32x4 b = LOAD_VECTOR_F32_SIMD(5.2f, -2.0f, 0.1f, 1.9f);
-			SIMD_F32x4 c = LOAD_SCALAR_F32_SIMD(0.5f);
-			ASSERT_EQUAL_SIMD(F32x4(ADD_F32_SIMD(a, b)), F32x4(-1.3f + 5.2f, 2.5f + -2.0f, -3.4f + 0.1f, 4.7f + 1.9f));
-			ASSERT_EQUAL_SIMD(F32x4(SUB_F32_SIMD(a, b)), F32x4(-1.3f - 5.2f, 2.5f - -2.0f, -3.4f - 0.1f, 4.7f - 1.9f));
-			ASSERT_EQUAL_SIMD(F32x4(ADD_F32_SIMD(a, c)), F32x4(-1.3f + 0.5f, 2.5f + 0.5f, -3.4f + 0.5f, 4.7f + 0.5f));
-			ASSERT_EQUAL_SIMD(F32x4(SUB_F32_SIMD(a, c)), F32x4(-1.3f - 0.5f, 2.5f - 0.5f, -3.4f - 0.5f, 4.7f - 0.5f));
-			ASSERT_EQUAL_SIMD(F32x4(MUL_F32_SIMD(a, c)), F32x4(-1.3f * 0.5f, 2.5f * 0.5f, -3.4f * 0.5f, 4.7f * 0.5f));
-			ASSERT_EQUAL_SIMD(F32x4(MIN_F32_SIMD(a, b)), F32x4(-1.3f, -2.0f, -3.4f, 1.9f));
-			ASSERT_EQUAL_SIMD(F32x4(MAX_F32_SIMD(a, b)), F32x4(5.2f, 2.5f, 0.1f, 4.7f));
-		}
-		{ // I32x4
-			SIMD_I32x4 a = LOAD_VECTOR_I32_SIMD(-1, 2, -3, 4);
-			SIMD_I32x4 b = LOAD_VECTOR_I32_SIMD(5, -2, 0, 1);
-			SIMD_I32x4 c = LOAD_SCALAR_I32_SIMD(4);
-			ASSERT_EQUAL_SIMD(I32x4(ADD_I32_SIMD(a, b)), I32x4(4, 0, -3, 5));
-			ASSERT_EQUAL_SIMD(I32x4(SUB_I32_SIMD(a, b)), I32x4(-6, 4, -3, 3));
-			ASSERT_EQUAL_SIMD(I32x4(ADD_I32_SIMD(a, c)), I32x4(3, 6, 1, 8));
-			ASSERT_EQUAL_SIMD(I32x4(SUB_I32_SIMD(a, c)), I32x4(-5, -2, -7, 0));
-		}
-		{ // U32x4
-			SIMD_U32x4 a = LOAD_VECTOR_U32_SIMD(4, 5, 6, 7);
-			SIMD_U32x4 b = LOAD_VECTOR_U32_SIMD(6, 5, 4, 3);
-			SIMD_U32x4 c = LOAD_SCALAR_U32_SIMD(10);
-			ASSERT_EQUAL_SIMD(U32x4(ADD_U32_SIMD(a, b)), U32x4(c));
-			ASSERT_EQUAL_SIMD(U32x4(ADD_U32_SIMD(a, c)), U32x4(14, 15, 16, 17));
-			ASSERT_EQUAL_SIMD(U32x4(SUB_U32_SIMD(c, b)), U32x4(a));
-		}
-		{ // U16x8
-			SIMD_U16x8 a = LOAD_VECTOR_U16_SIMD(1, 2, 3, 4, 5, 6, 7, 8);
-			SIMD_U16x8 b = LOAD_VECTOR_U16_SIMD(9, 8, 7, 6, 5, 4, 3, 2);
-			SIMD_U16x8 c = LOAD_SCALAR_U16_SIMD(10);
-			ASSERT_EQUAL_SIMD(U16x8(ADD_U16_SIMD(a, b)), U16x8(c));
-			ASSERT_EQUAL_SIMD(U16x8(ADD_U16_SIMD(a, c)), U16x8(11, 12, 13, 14, 15, 16, 17, 18));
-			ASSERT_EQUAL_SIMD(U16x8(SUB_U16_SIMD(c, b)), U16x8(a));
-			ASSERT_EQUAL_SIMD(U16x8(MUL_U16_SIMD(a, b)), U16x8(9, 16, 21, 24, 25, 24, 21, 16));
-		}
-	#endif
-
-	// Reciprocal: 1 / x
-	ASSERT_EQUAL_SIMD(reciprocal(F32x4(0.5f, 1.0f, 2.0f, 4.0f)), F32x4(2.0f, 1.0f, 0.5f, 0.25f));
-	ASSERT_EQUAL_SIMD(reciprocal(F32x8(0.5f, 1.0f, 2.0f, 4.0f, 8.0f, 10.0f, 100.0f, 1000.0f)), F32x8(2.0f, 1.0f, 0.5f, 0.25f, 0.125f, 0.1f, 0.01f, 0.001f));
-
-	// Reciprocal square root: 1 / sqrt(x)
-	ASSERT_EQUAL_SIMD(reciprocalSquareRoot(F32x4(1.0f, 4.0f, 16.0f, 100.0f)), F32x4(1.0f, 0.5f, 0.25f, 0.1f));
-	ASSERT_EQUAL_SIMD(reciprocalSquareRoot(F32x8(1.0f, 4.0f, 16.0f, 100.0f, 400.0f, 64.0f, 25.0f, 100.0f)), F32x8(1.0f, 0.5f, 0.25f, 0.1f, 0.05f, 0.125f, 0.2f, 0.1f));
-
-	// Square root: sqrt(x)
-	ASSERT_EQUAL_SIMD(squareRoot(F32x4(1.0f, 4.0f, 9.0f, 100.0f)), F32x4(1.0f, 2.0f, 3.0f, 10.0f));
-	ASSERT_EQUAL_SIMD(squareRoot(F32x8(1.0f, 4.0f, 9.0f, 100.0f, 64.0f, 256.0f, 1024.0f, 4096.0f)), F32x8(1.0f, 2.0f, 3.0f, 10.0f, 8.0f, 16.0f, 32.0f, 64.0f));
-
-	// Minimum
-	ASSERT_EQUAL_SIMD(min(F32x4(1.1f, 2.2f, 3.3f, 4.4f), F32x4(5.0f, 3.0f, 1.0f, -1.0f)), F32x4(1.1f, 2.2f, 1.0f, -1.0f));
-	ASSERT_EQUAL_SIMD(min(F32x8(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f), F32x8(5.0f, 3.0f, 1.0f, -1.0f, 4.0f, 5.0f, -2.5f, 10.0f)), F32x8(1.1f, 2.2f, 1.0f, -1.0f, 4.0f, 5.0f, -2.5f, 8.8f));
-
-	// Maximum
-	ASSERT_EQUAL_SIMD(max(F32x4(1.1f, 2.2f, 3.3f, 4.4f), F32x4(5.0f, 3.0f, 1.0f, -1.0f)), F32x4(5.0f, 3.0f, 3.3f, 4.4f));
-	ASSERT_EQUAL_SIMD(max(F32x8(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f), F32x8(5.0f, 3.0f, 1.0f, -1.0f, 4.0f, 5.0f, -2.5f, 10.0f)), F32x8(5.0f, 3.0f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 10.0f));
-
-	// Clamp
-	ASSERT_EQUAL_SIMD(clamp(F32x4(-1.5f), F32x4(-35.1f, 1.0f, 2.0f, 45.7f), F32x4(1.5f)), F32x4(-1.5f, 1.0f, 1.5f, 1.5f));
-	ASSERT_EQUAL_SIMD(clampUpper(F32x4(-35.1f, 1.0f, 2.0f, 45.7f), F32x4(1.5f)), F32x4(-35.1f, 1.0f, 1.5f, 1.5f));
-	ASSERT_EQUAL_SIMD(clampLower(F32x4(-1.5f), F32x4(-35.1f, 1.0f, 2.0f, 45.7f)), F32x4(-1.5f, 1.0f, 2.0f, 45.7f));
-	ASSERT_EQUAL_SIMD(clamp(F32x8(-1.5f), F32x8(-35.1f, 1.0f, 2.0f, 45.7f, 0.0f, -1.0f, 2.1f, -1.9f), F32x8(1.5f)), F32x8(-1.5f, 1.0f, 1.5f, 1.5f, 0.0f, -1.0f, 1.5f, -1.5f));
-	ASSERT_EQUAL_SIMD(clampUpper(F32x8(-35.1f, 1.0f, 2.0f, 45.7f, 0.0f, -1.0f, 2.1f, -1.9f), F32x8(1.5f)), F32x8(-35.1f, 1.0f, 1.5f, 1.5f, 0.0f, -1.0f, 1.5f, -1.9f));
-	ASSERT_EQUAL_SIMD(clampLower(F32x8(-1.5f), F32x8(-35.1f, 1.0f, 2.0f, 45.7f, 0.0f, -1.0f, 2.1f, -1.9f)), F32x8(-1.5f, 1.0f, 2.0f, 45.7f, 0.0f, -1.0f, 2.1f, -1.5f));
-
-	// F32x4 operations
-	ASSERT_EQUAL_SIMD(F32x4(1.1f, -2.2f, 3.3f, 4.0f) + F32x4(2.2f, -4.4f, 6.6f, 8.0f), F32x4(3.3f, -6.6f, 9.9f, 12.0f));
-	ASSERT_EQUAL_SIMD(F32x4(-1.5f, -0.5f, 0.5f, 1.5f) + 1.0f, F32x4(-0.5f, 0.5f, 1.5f, 2.5f));
-	ASSERT_EQUAL_SIMD(1.0f + F32x4(-1.5f, -0.5f, 0.5f, 1.5f), F32x4(-0.5f, 0.5f, 1.5f, 2.5f));
-	ASSERT_EQUAL_SIMD(F32x4(1.1f, 2.2f, 3.3f, 4.4f) - F32x4(0.1f, 0.2f, 0.3f, 0.4f), F32x4(1.0f, 2.0f, 3.0f, 4.0f));
-	ASSERT_EQUAL_SIMD(F32x4(1.0f, 2.0f, 3.0f, 4.0f) - 0.5f, F32x4(0.5f, 1.5f, 2.5f, 3.5f));
-	ASSERT_EQUAL_SIMD(0.5f - F32x4(1.0f, 2.0f, 3.0f, 4.0f), F32x4(-0.5f, -1.5f, -2.5f, -3.5f));
-	ASSERT_EQUAL_SIMD(2.0f * F32x4(1.0f, 2.0f, 3.0f, 4.0f), F32x4(2.0f, 4.0f, 6.0f, 8.0f));
-	ASSERT_EQUAL_SIMD(F32x4(1.0f, -2.0f, 3.0f, -4.0f) * -2.0f, F32x4(-2.0f, 4.0f, -6.0f, 8.0f));
-	ASSERT_EQUAL_SIMD(F32x4(1.0f, -2.0f, 3.0f, -4.0f) * F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(1.0f, 4.0f, 9.0f, 16.0f));
-	ASSERT_EQUAL_SIMD(-F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(-1.0f, 2.0f, -3.0f, 4.0f));
-
-	// F32x8 operations
-	ASSERT_EQUAL_SIMD(F32x8(1.1f, -2.2f, 3.3f, 4.0f, 1.4f, 2.3f, 3.2f, 4.1f) + F32x8(2.2f, -4.4f, 6.6f, 8.0f, 4.11f, 3.22f, 2.33f, 1.44f), F32x8(3.3f, -6.6f, 9.9f, 12.0f, 5.51f, 5.52f, 5.53f, 5.54f));
-	ASSERT_EQUAL_SIMD(F32x8(-1.5f, -0.5f, 0.5f, 1.5f, 1000.0f, 2000.0f, -4000.0f, -1500.0f) + 1.0f, F32x8(-0.5f, 0.5f, 1.5f, 2.5f, 1001.0f, 2001.0f, -3999.0f, -1499.0f));
-	ASSERT_EQUAL_SIMD(1.0f + F32x8(-1.5f, -0.5f, 0.5f, 1.5f, 1000.0f, 2000.0f, -4000.0f, -1500.0f), F32x8(-0.5f, 0.5f, 1.5f, 2.5f, 1001.0f, 2001.0f, -3999.0f, -1499.0f));
-	ASSERT_EQUAL_SIMD(F32x8(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f) - F32x8(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f), F32x8(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f));
-	ASSERT_EQUAL_SIMD(F32x8(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f) - 0.5f, F32x8(0.5f, 1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f, 7.5f));
-	ASSERT_EQUAL_SIMD(0.5f - F32x8(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f), F32x8(-0.5f, -1.5f, -2.5f, -3.5f, -4.5f, -5.5f, -6.5f, -7.5f));
-	ASSERT_EQUAL_SIMD(2.0f * F32x8(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f), F32x8(2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f, 14.0f, 16.0f));
-	ASSERT_EQUAL_SIMD(F32x8(1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f) * -2.0f, F32x8(-2.0f, 4.0f, -6.0f, 8.0f, -10.0f, 12.0f, -14.0f, 16.0f));
-	ASSERT_EQUAL_SIMD(F32x8(1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f) * F32x8(1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f), F32x8(1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f));
-	ASSERT_EQUAL_SIMD(-F32x8(1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f), F32x8(-1.0f, 2.0f, -3.0f, 4.0f, -5.0f, 6.0f, -7.0f, 8.0f));
-
-	// I32x4 operations
-	ASSERT_EQUAL_SIMD(I32x4(1, 2, -3, 4) + I32x4(-2, 4, 6, 8), I32x4(-1, 6, 3, 12));
-	ASSERT_EQUAL_SIMD(I32x4(1, -2, 3, 4) - 4, I32x4(-3, -6, -1, 0));
-	ASSERT_EQUAL_SIMD(10 + I32x4(1, 2, 3, 4), I32x4(11, 12, 13, 14));
-	ASSERT_EQUAL_SIMD(I32x4(1, 2, 3, 4) + I32x4(4), I32x4(5, 6, 7, 8));
-	ASSERT_EQUAL_SIMD(I32x4(10) + I32x4(1, 2, 3, 4), I32x4(11, 12, 13, 14));
-	ASSERT_EQUAL_SIMD(I32x4(-3, 6, -9, 12) * I32x4(1, 2, -3, -4), I32x4(-3, 12, 27, -48));
-	ASSERT_EQUAL_SIMD(-I32x4(1, -2, 3, -4), I32x4(-1, 2, -3, 4));
-
-	// I32x8 operations
-	ASSERT_EQUAL_SIMD(I32x8(1, 2, 3, 4, 5, 6, 7, 8) - 1, I32x8(0, 1, 2, 3, 4, 5, 6, 7));
-	ASSERT_EQUAL_SIMD(1 - I32x8(1, 2, 3, 4, 5, 6, 7, 8), I32x8(0, -1, -2, -3, -4, -5, -6, -7));
-	ASSERT_EQUAL_SIMD(2 * I32x8(1, 2, 3, 4, 5, 6, 7, 8), I32x8(2, 4, 6, 8, 10, 12, 14, 16));
-	ASSERT_EQUAL_SIMD(I32x8(1, -2, 3, -4, 5, -6, 7, -8) * -2, I32x8(-2, 4, -6, 8, -10, 12, -14, 16));
-	ASSERT_EQUAL_SIMD(I32x8(1, -2, 3, -4, 5, -6, 7, -8) * I32x8(1, -2, 3, -4, 5, -6, 7, -8), I32x8(1, 4, 9, 16, 25, 36, 49, 64));
-	ASSERT_EQUAL_SIMD(-I32x8(1, -2, 3, -4, 5, -6, 7, -8), I32x8(-1, 2, -3, 4, -5, 6, -7, 8));
-
-	// U32x4 operations
-	ASSERT_EQUAL_SIMD(U32x4(1, 2, 3, 4) + U32x4(2, 4, 6, 8), U32x4(3, 6, 9, 12));
-	ASSERT_EQUAL_SIMD(U32x4(1, 2, 3, 4) + 4, U32x4(5, 6, 7, 8));
-	ASSERT_EQUAL_SIMD(10 + U32x4(1, 2, 3, 4), U32x4(11, 12, 13, 14));
-	ASSERT_EQUAL_SIMD(U32x4(1, 2, 3, 4) + U32x4(4), U32x4(5, 6, 7, 8));
-	ASSERT_EQUAL_SIMD(U32x4(10) + U32x4(1, 2, 3, 4), U32x4(11, 12, 13, 14));
-	ASSERT_EQUAL_SIMD(U32x4(3, 6, 9, 12) - U32x4(1, 2, 3, 4), U32x4(2, 4, 6, 8));
-	ASSERT_EQUAL_SIMD(U32x4(3, 6, 9, 12) * U32x4(1, 2, 3, 4), U32x4(3, 12, 27, 48));
-
-	// U32x8 operations
-	ASSERT_EQUAL_SIMD(U32x8(1, 2, 3, 4, 5, 6, 7, 8) - 1, U32x8(0, 1, 2, 3, 4, 5, 6, 7));
-	ASSERT_EQUAL_SIMD(10 - U32x8(1, 2, 3, 4, 5, 6, 7, 8), U32x8(9, 8, 7, 6, 5, 4, 3, 2));
-	ASSERT_EQUAL_SIMD(2 * U32x8(1, 2, 3, 4, 5, 6, 7, 8), U32x8(2, 4, 6, 8, 10, 12, 14, 16));
-	ASSERT_EQUAL_SIMD(U32x8(1, 2, 3, 4, 5, 6, 7, 8) * 2, U32x8(2, 4, 6, 8, 10, 12, 14, 16));
-	ASSERT_EQUAL_SIMD(U32x8(1, 2, 3, 4, 5, 6, 7, 8) * U32x8(1, 2, 3, 4, 5, 6, 7, 8), U32x8(1, 4, 9, 16, 25, 36, 49, 64));
-
-	// U16x8 operations
-	ASSERT_EQUAL_SIMD(U16x8(1, 2, 3, 4, 5, 6, 7, 8) + U16x8(2, 4, 6, 8, 10, 12, 14, 16), U16x8(3, 6, 9, 12, 15, 18, 21, 24));
-	ASSERT_EQUAL_SIMD(U16x8(1, 2, 3, 4, 5, 6, 7, 8) + 8, U16x8(9, 10, 11, 12, 13, 14, 15, 16));
-	ASSERT_EQUAL_SIMD(10 + U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(11, 12, 13, 14, 15, 16, 17, 18));
-	ASSERT_EQUAL_SIMD(U16x8(1, 2, 3, 4, 5, 6, 7, 8) + U16x8((uint16_t)8), U16x8(9, 10, 11, 12, 13, 14, 15, 16));
-	ASSERT_EQUAL_SIMD(U16x8((uint16_t)10) + U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(11, 12, 13, 14, 15, 16, 17, 18));
-	ASSERT_EQUAL_SIMD(U16x8(3, 6, 9, 12, 15, 18, 21, 24) - U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(2, 4, 6, 8, 10, 12, 14, 16));
-
-	// U16x16 operations
-	ASSERT_EQUAL_SIMD(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) + U16x16(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32), U16x16(3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48));
-	ASSERT_EQUAL_SIMD(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) + 8, U16x16(9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24));
-	ASSERT_EQUAL_SIMD(8 + U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24));
-	ASSERT_EQUAL_SIMD(U16x16(3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48) - U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32));
-	ASSERT_EQUAL_SIMD(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) - 1, U16x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
-	ASSERT_EQUAL_SIMD(16 - U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
-	ASSERT_EQUAL_SIMD(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) * 2, U16x16(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32));
-	ASSERT_EQUAL_SIMD(2 * U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32));
-
-	// U8x16 operations
-	ASSERT_EQUAL_SIMD(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) + 2, U8x16(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18));
-	ASSERT_EQUAL_SIMD(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) - 1, U8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
-	ASSERT_EQUAL_SIMD(
-	  saturatedAddition(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 255), U8x16((uint8_t)250)),
-	  U8x16(251, 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255)
-	);
-	ASSERT_EQUAL_SIMD(
-	  saturatedSubtraction(
-	  U8x16(128, 128, 128, 0, 255, 255,   0, 200, 123, 80, 46, 46, 46, 255, 255, 255),
-	  U8x16(  0, 128, 255, 0, 255,   0, 255, 100,  23, 81, 45, 46, 47, 128, 127, 200)),
-	  U8x16(128,   0,   0, 0,   0, 255,   0, 100, 100,  0,  1,  0,  0, 127, 128,  55)
-	);
-
-	// U8x32 operations
-	ASSERT_EQUAL_SIMD(
-	      U8x32( 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)
-	    + U8x32( 2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64),
-	      U8x32( 3,  6,  9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57, 60, 63, 66, 69, 72, 75, 78, 81, 84, 87, 90, 93, 96));
-	ASSERT_EQUAL_SIMD(
-	      U8x32( 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32) + 5,
-	      U8x32( 6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37));
-	ASSERT_EQUAL_SIMD(
-	  5 + U8x32( 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32),
-	      U8x32( 6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37));
-	ASSERT_EQUAL_SIMD(
-	      U8x32( 3,  6,  9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57, 60, 63, 66, 69, 72, 75, 78, 81, 84, 87, 90, 93, 96)
-	    - U8x32( 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32),
-	      U8x32( 2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64));
-	ASSERT_EQUAL_SIMD(
-	      U8x32( 6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37) - 5,
-	      U8x32( 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32));
-	ASSERT_EQUAL_SIMD(
-	 33 - U8x32( 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32),
-	      U8x32(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1));
-	ASSERT_EQUAL_SIMD(
-	  saturatedAddition(
-	    U8x32(  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,255),
-	    U8x32((uint8_t)240)),
-	    U8x32(241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255)
-	);
-	ASSERT_EQUAL_SIMD(
-	  saturatedSubtraction(
-	    U8x32(  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,255),
-	    U8x32((uint8_t)16)),
-	    U8x32(  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,239)
-	);
-
-	// Unsigned integer unpacking
-	ASSERT_EQUAL_SIMD(lowerToU32(U16x8(1,2,3,4,5,6,7,8)), U32x4(1, 2, 3, 4));
-	ASSERT_EQUAL_SIMD(higherToU32(U16x8(1,2,3,4,5,6,7,8)), U32x4(5, 6, 7, 8));
-	ASSERT_EQUAL_SIMD(lowerToU16(U8x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)), U16x8(1,2,3,4,5,6,7,8));
-	ASSERT_EQUAL_SIMD(higherToU16(U8x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)), U16x8(9,10,11,12,13,14,15,16));
-	ASSERT_EQUAL_SIMD(lowerToU32(U16x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)), U32x8(1,2,3,4,5,6,7,8));
-	ASSERT_EQUAL_SIMD(higherToU32(U16x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)), U32x8(9,10,11,12,13,14,15,16));
-	ASSERT_EQUAL_SIMD(lowerToU32(U16x16(1,2,3,4,5,6,65535,8,9,10,11,12,13,1000,15,16)), U32x8(1,2,3,4,5,6,65535,8));
-	ASSERT_EQUAL_SIMD(higherToU32(U16x16(1,2,3,4,5,6,65535,8,9,10,11,12,13,1000,15,16)), U32x8(9,10,11,12,13,1000,15,16));
-	ASSERT_EQUAL_SIMD(lowerToU16(U8x32(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,255,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,255)), U16x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,255));
-	ASSERT_EQUAL_SIMD(higherToU16(U8x32(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,255,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,255)), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,255));
-
-	// Bitwise operations
+static void testBitMasks() {
 	ASSERT_EQUAL_SIMD(U32x4(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000) & 0x0000FFFF, U32x4(0x0000FFFF, 0x00005678, 0x0000F0F0, 0x00000000));
 	ASSERT_EQUAL_SIMD(U32x4(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000) & 0xFFFF0000, U32x4(0xFFFF0000, 0x12340000, 0xF0F00000, 0x00000000));
 	ASSERT_EQUAL_SIMD(U32x4(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000) | 0x0000FFFF, U32x4(0xFFFFFFFF, 0x1234FFFF, 0xF0F0FFFF, 0x0000FFFF));
@@ -400,319 +165,402 @@ START_TEST(Simd)
 	    U32x8(0b11001100110000110101010010110011, 0b00101011001011101010001101111001, 0b11001010000110111010010100101100, 0b01010111010001010010101110010110, 0b10101110100110100010101011011001, 0b00101110100111010001101010110000, 0b11101010001011100010101110001111, 0b00101010111100010110010110001000)
 	  ^ U32x8(0b00101101001110100011010010100001, 0b10101110100101000011101001010011, 0b00101011100101001011000010100100, 0b11010011101001000110010110110111, 0b00111100101000101010001101001010, 0b00101110100110000111110011010101, 0b11001010010101010010110010101000, 0b11110000111100001111000011110000),
 	    U32x8(0b11100001111110010110000000010010, 0b10000101101110101001100100101010, 0b11100001100011110001010110001000, 0b10000100111000010100111000100001, 0b10010010001110001000100110010011, 0b00000000000001010110011001100101, 0b00100000011110110000011100100111, 0b11011010000000011001010101111000));
+}
 
+static void testBitShift() {
 	// Bit shift with dynamic uniform offset.
-	uint32_t offset = 0;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << 0,
 	                  U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010));
-	offset = 1;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << 1,
 	                  U16x8(0b1000110110010110, 0b1010101101001100, 0b1001000101100110, 0b1101001011001010, 0b1011001100101010, 0b0110011000011100, 0b0100101010010110, 0b0101101100100100));
-	offset = 2;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << 2,
 	                  U16x8(0b0001101100101100, 0b0101011010011000, 0b0010001011001100, 0b1010010110010100, 0b0110011001010100, 0b1100110000111000, 0b1001010100101100, 0b1011011001001000));
-	offset = 3;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << 3,
 	                  U16x8(0b0011011001011000, 0b1010110100110000, 0b0100010110011000, 0b0100101100101000, 0b1100110010101000, 0b1001100001110000, 0b0010101001011000, 0b0110110010010000));
-	offset = 4;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << 4,
 	                  U16x8(0b0110110010110000, 0b0101101001100000, 0b1000101100110000, 0b1001011001010000, 0b1001100101010000, 0b0011000011100000, 0b0101010010110000, 0b1101100100100000));
-	offset = 5;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << 5,
 	                  U16x8(0b1101100101100000, 0b1011010011000000, 0b0001011001100000, 0b0010110010100000, 0b0011001010100000, 0b0110000111000000, 0b1010100101100000, 0b1011001001000000));
-	offset = 6;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << 6,
 	                  U16x8(0b1011001011000000, 0b0110100110000000, 0b0010110011000000, 0b0101100101000000, 0b0110010101000000, 0b1100001110000000, 0b0101001011000000, 0b0110010010000000));
-	offset = 7;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << 7,
 	                  U16x8(0b0110010110000000, 0b1101001100000000, 0b0101100110000000, 0b1011001010000000, 0b1100101010000000, 0b1000011100000000, 0b1010010110000000, 0b1100100100000000));
-	offset = 8;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << 8,
 	                  U16x8(0b1100101100000000, 0b1010011000000000, 0b1011001100000000, 0b0110010100000000, 0b1001010100000000, 0b0000111000000000, 0b0100101100000000, 0b1001001000000000));
-	offset = 9;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << 9,
 	                  U16x8(0b1001011000000000, 0b0100110000000000, 0b0110011000000000, 0b1100101000000000, 0b0010101000000000, 0b0001110000000000, 0b1001011000000000, 0b0010010000000000));
-	offset = 10;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << 10,
 	                  U16x8(0b0010110000000000, 0b1001100000000000, 0b1100110000000000, 0b1001010000000000, 0b0101010000000000, 0b0011100000000000, 0b0010110000000000, 0b0100100000000000));
-	offset = 11;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << 11,
 	                  U16x8(0b0101100000000000, 0b0011000000000000, 0b1001100000000000, 0b0010100000000000, 0b1010100000000000, 0b0111000000000000, 0b0101100000000000, 0b1001000000000000));
-	offset = 12;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << 12,
 	                  U16x8(0b1011000000000000, 0b0110000000000000, 0b0011000000000000, 0b0101000000000000, 0b0101000000000000, 0b1110000000000000, 0b1011000000000000, 0b0010000000000000));
-	offset = 13;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << 13,
 	                  U16x8(0b0110000000000000, 0b1100000000000000, 0b0110000000000000, 0b1010000000000000, 0b1010000000000000, 0b1100000000000000, 0b0110000000000000, 0b0100000000000000));
-	offset = 14;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << 14,
 	                  U16x8(0b1100000000000000, 0b1000000000000000, 0b1100000000000000, 0b0100000000000000, 0b0100000000000000, 0b1000000000000000, 0b1100000000000000, 0b1000000000000000));
-	offset = 15;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << 15,
 	                  U16x8(0b1000000000000000, 0b0000000000000000, 0b1000000000000000, 0b1000000000000000, 0b1000000000000000, 0b0000000000000000, 0b1000000000000000, 0b0000000000000000));
-	offset = 16;
-	ASSERT_CRASH(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << offset, U"Tried to shift ");
-	offset = 0;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> offset,
+	ASSERT_CRASH(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) << 16, U"Tried to shift ");
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> 0,
 	                  U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010));
-	offset = 1;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> 1,
 	                  U16x8(0b0110001101100101, 0b0010101011010011, 0b0110010001011001, 0b0011010010110010, 0b0010110011001010, 0b0001100110000111, 0b0101001010100101, 0b0001011011001001));
-	offset = 2;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> 2,
 	                  U16x8(0b0011000110110010, 0b0001010101101001, 0b0011001000101100, 0b0001101001011001, 0b0001011001100101, 0b0000110011000011, 0b0010100101010010, 0b0000101101100100));
-	offset = 3;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> 3,
 	                  U16x8(0b0001100011011001, 0b0000101010110100, 0b0001100100010110, 0b0000110100101100, 0b0000101100110010, 0b0000011001100001, 0b0001010010101001, 0b0000010110110010));
-	offset = 4;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> 4,
 	                  U16x8(0b0000110001101100, 0b0000010101011010, 0b0000110010001011, 0b0000011010010110, 0b0000010110011001, 0b0000001100110000, 0b0000101001010100, 0b0000001011011001));
-	offset = 5;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> 5,
 	                  U16x8(0b0000011000110110, 0b0000001010101101, 0b0000011001000101, 0b0000001101001011, 0b0000001011001100, 0b0000000110011000, 0b0000010100101010, 0b0000000101101100));
-	offset = 6;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> 6,
 	                  U16x8(0b0000001100011011, 0b0000000101010110, 0b0000001100100010, 0b0000000110100101, 0b0000000101100110, 0b0000000011001100, 0b0000001010010101, 0b0000000010110110));
-	offset = 7;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> 7,
 	                  U16x8(0b0000000110001101, 0b0000000010101011, 0b0000000110010001, 0b0000000011010010, 0b0000000010110011, 0b0000000001100110, 0b0000000101001010, 0b0000000001011011));
-	offset = 8;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> 8,
 	                  U16x8(0b0000000011000110, 0b0000000001010101, 0b0000000011001000, 0b0000000001101001, 0b0000000001011001, 0b0000000000110011, 0b0000000010100101, 0b0000000000101101));
-	offset = 9;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> 9,
 	                  U16x8(0b0000000001100011, 0b0000000000101010, 0b0000000001100100, 0b0000000000110100, 0b0000000000101100, 0b0000000000011001, 0b0000000001010010, 0b0000000000010110));
-	offset = 10;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> 10,
 	                  U16x8(0b0000000000110001, 0b0000000000010101, 0b0000000000110010, 0b0000000000011010, 0b0000000000010110, 0b0000000000001100, 0b0000000000101001, 0b0000000000001011));
-	offset = 11;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> 11,
 	                  U16x8(0b0000000000011000, 0b0000000000001010, 0b0000000000011001, 0b0000000000001101, 0b0000000000001011, 0b0000000000000110, 0b0000000000010100, 0b0000000000000101));
-	offset = 12;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> 12,
 	                  U16x8(0b0000000000001100, 0b0000000000000101, 0b0000000000001100, 0b0000000000000110, 0b0000000000000101, 0b0000000000000011, 0b0000000000001010, 0b0000000000000010));
-	offset = 13;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> 13,
 	                  U16x8(0b0000000000000110, 0b0000000000000010, 0b0000000000000110, 0b0000000000000011, 0b0000000000000010, 0b0000000000000001, 0b0000000000000101, 0b0000000000000001));
-	offset = 14;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> 14,
 	                  U16x8(0b0000000000000011, 0b0000000000000001, 0b0000000000000011, 0b0000000000000001, 0b0000000000000001, 0b0000000000000000, 0b0000000000000010, 0b0000000000000000));
-	offset = 15;
-	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> 15,
 	                  U16x8(0b0000000000000001, 0b0000000000000000, 0b0000000000000001, 0b0000000000000000, 0b0000000000000000, 0b0000000000000000, 0b0000000000000001, 0b0000000000000000));
-	offset = 16;
-	ASSERT_CRASH(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> offset, U"Tried to shift ");
-
-	offset = 0;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_CRASH(U16x8(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010) >> 16, U"Tried to shift ");
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 0,
 	                  U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010));
-	offset = 1;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 1,
 	                  U32x4(0b10001101100101101010101101001100, 0b10010001011001101101001011001010, 0b10110011001010100110011000011100, 0b01001010100101100101101100100100));
-	offset = 2;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 2,
 	                  U32x4(0b00011011001011010101011010011000, 0b00100010110011011010010110010100, 0b01100110010101001100110000111000, 0b10010101001011001011011001001000));
-	offset = 3;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 3,
 	                  U32x4(0b00110110010110101010110100110000, 0b01000101100110110100101100101000, 0b11001100101010011001100001110000, 0b00101010010110010110110010010000));
-	offset = 4;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 4,
 	                  U32x4(0b01101100101101010101101001100000, 0b10001011001101101001011001010000, 0b10011001010100110011000011100000, 0b01010100101100101101100100100000));
-	offset = 5;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 5,
 	                  U32x4(0b11011001011010101011010011000000, 0b00010110011011010010110010100000, 0b00110010101001100110000111000000, 0b10101001011001011011001001000000));
-	offset = 6;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 6,
 	                  U32x4(0b10110010110101010110100110000000, 0b00101100110110100101100101000000, 0b01100101010011001100001110000000, 0b01010010110010110110010010000000));
-	offset = 7;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 7,
 	                  U32x4(0b01100101101010101101001100000000, 0b01011001101101001011001010000000, 0b11001010100110011000011100000000, 0b10100101100101101100100100000000));
-	offset = 8;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 8,
 	                  U32x4(0b11001011010101011010011000000000, 0b10110011011010010110010100000000, 0b10010101001100110000111000000000, 0b01001011001011011001001000000000));
-	offset = 9;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 9,
 	                  U32x4(0b10010110101010110100110000000000, 0b01100110110100101100101000000000, 0b00101010011001100001110000000000, 0b10010110010110110010010000000000));
-	offset = 10;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 10,
 	                  U32x4(0b00101101010101101001100000000000, 0b11001101101001011001010000000000, 0b01010100110011000011100000000000, 0b00101100101101100100100000000000));
-	offset = 11;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 11,
 	                  U32x4(0b01011010101011010011000000000000, 0b10011011010010110010100000000000, 0b10101001100110000111000000000000, 0b01011001011011001001000000000000));
-	offset = 12;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 12,
 	                  U32x4(0b10110101010110100110000000000000, 0b00110110100101100101000000000000, 0b01010011001100001110000000000000, 0b10110010110110010010000000000000));
-	offset = 13;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 13,
 	                  U32x4(0b01101010101101001100000000000000, 0b01101101001011001010000000000000, 0b10100110011000011100000000000000, 0b01100101101100100100000000000000));
-	offset = 14;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 14,
 	                  U32x4(0b11010101011010011000000000000000, 0b11011010010110010100000000000000, 0b01001100110000111000000000000000, 0b11001011011001001000000000000000));
-	offset = 15;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 15,
 	                  U32x4(0b10101010110100110000000000000000, 0b10110100101100101000000000000000, 0b10011001100001110000000000000000, 0b10010110110010010000000000000000));
-	offset = 16;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 16,
 	                  U32x4(0b01010101101001100000000000000000, 0b01101001011001010000000000000000, 0b00110011000011100000000000000000, 0b00101101100100100000000000000000));
-	offset = 17;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 17,
 	                  U32x4(0b10101011010011000000000000000000, 0b11010010110010100000000000000000, 0b01100110000111000000000000000000, 0b01011011001001000000000000000000));
-	offset = 18;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 18,
 	                  U32x4(0b01010110100110000000000000000000, 0b10100101100101000000000000000000, 0b11001100001110000000000000000000, 0b10110110010010000000000000000000));
-	offset = 19;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 19,
 	                  U32x4(0b10101101001100000000000000000000, 0b01001011001010000000000000000000, 0b10011000011100000000000000000000, 0b01101100100100000000000000000000));
-	offset = 20;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 20,
 	                  U32x4(0b01011010011000000000000000000000, 0b10010110010100000000000000000000, 0b00110000111000000000000000000000, 0b11011001001000000000000000000000));
-	offset = 21;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 21,
 	                  U32x4(0b10110100110000000000000000000000, 0b00101100101000000000000000000000, 0b01100001110000000000000000000000, 0b10110010010000000000000000000000));
-	offset = 22;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 22,
 	                  U32x4(0b01101001100000000000000000000000, 0b01011001010000000000000000000000, 0b11000011100000000000000000000000, 0b01100100100000000000000000000000));
-	offset = 23;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 23,
 	                  U32x4(0b11010011000000000000000000000000, 0b10110010100000000000000000000000, 0b10000111000000000000000000000000, 0b11001001000000000000000000000000));
-	offset = 24;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 24,
 	                  U32x4(0b10100110000000000000000000000000, 0b01100101000000000000000000000000, 0b00001110000000000000000000000000, 0b10010010000000000000000000000000));
-	offset = 25;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 25,
 	                  U32x4(0b01001100000000000000000000000000, 0b11001010000000000000000000000000, 0b00011100000000000000000000000000, 0b00100100000000000000000000000000));
-	offset = 26;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 26,
 	                  U32x4(0b10011000000000000000000000000000, 0b10010100000000000000000000000000, 0b00111000000000000000000000000000, 0b01001000000000000000000000000000));
-	offset = 27;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 27,
 	                  U32x4(0b00110000000000000000000000000000, 0b00101000000000000000000000000000, 0b01110000000000000000000000000000, 0b10010000000000000000000000000000));
-	offset = 28;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 28,
 	                  U32x4(0b01100000000000000000000000000000, 0b01010000000000000000000000000000, 0b11100000000000000000000000000000, 0b00100000000000000000000000000000));
-	offset = 29;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 29,
 	                  U32x4(0b11000000000000000000000000000000, 0b10100000000000000000000000000000, 0b11000000000000000000000000000000, 0b01000000000000000000000000000000));
-	offset = 30;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 30,
 	                  U32x4(0b10000000000000000000000000000000, 0b01000000000000000000000000000000, 0b10000000000000000000000000000000, 0b10000000000000000000000000000000));
-	offset = 31;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 31,
 	                  U32x4(0b00000000000000000000000000000000, 0b10000000000000000000000000000000, 0b00000000000000000000000000000000, 0b00000000000000000000000000000000));
-	offset = 32;
-	ASSERT_CRASH(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << offset, U"Tried to shift ");
-	offset = 0;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_CRASH(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) << 32, U"Tried to shift ");
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 0,
 	                  U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010));
-	offset = 1;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 1,
 	                  U32x4(0b01100011011001011010101011010011, 0b01100100010110011011010010110010, 0b00101100110010101001100110000111, 0b01010010101001011001011011001001));
-	offset = 2;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 2,
 	                  U32x4(0b00110001101100101101010101101001, 0b00110010001011001101101001011001, 0b00010110011001010100110011000011, 0b00101001010100101100101101100100));
-	offset = 3;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 3,
 	                  U32x4(0b00011000110110010110101010110100, 0b00011001000101100110110100101100, 0b00001011001100101010011001100001, 0b00010100101010010110010110110010));
-	offset = 4;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 4,
 	                  U32x4(0b00001100011011001011010101011010, 0b00001100100010110011011010010110, 0b00000101100110010101001100110000, 0b00001010010101001011001011011001));
-	offset = 5;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 5,
 	                  U32x4(0b00000110001101100101101010101101, 0b00000110010001011001101101001011, 0b00000010110011001010100110011000, 0b00000101001010100101100101101100));
-	offset = 6;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 6,
 	                  U32x4(0b00000011000110110010110101010110, 0b00000011001000101100110110100101, 0b00000001011001100101010011001100, 0b00000010100101010010110010110110));
-	offset = 7;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 7,
 	                  U32x4(0b00000001100011011001011010101011, 0b00000001100100010110011011010010, 0b00000000101100110010101001100110, 0b00000001010010101001011001011011));
-	offset = 8;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 8,
 	                  U32x4(0b00000000110001101100101101010101, 0b00000000110010001011001101101001, 0b00000000010110011001010100110011, 0b00000000101001010100101100101101));
-	offset = 9;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 9,
 	                  U32x4(0b00000000011000110110010110101010, 0b00000000011001000101100110110100, 0b00000000001011001100101010011001, 0b00000000010100101010010110010110));
-	offset = 10;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 10,
 	                  U32x4(0b00000000001100011011001011010101, 0b00000000001100100010110011011010, 0b00000000000101100110010101001100, 0b00000000001010010101001011001011));
-	offset = 11;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 11,
 	                  U32x4(0b00000000000110001101100101101010, 0b00000000000110010001011001101101, 0b00000000000010110011001010100110, 0b00000000000101001010100101100101));
-	offset = 12;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 12,
 	                  U32x4(0b00000000000011000110110010110101, 0b00000000000011001000101100110110, 0b00000000000001011001100101010011, 0b00000000000010100101010010110010));
-	offset = 13;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 13,
 	                  U32x4(0b00000000000001100011011001011010, 0b00000000000001100100010110011011, 0b00000000000000101100110010101001, 0b00000000000001010010101001011001));
-	offset = 14;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 14,
 	                  U32x4(0b00000000000000110001101100101101, 0b00000000000000110010001011001101, 0b00000000000000010110011001010100, 0b00000000000000101001010100101100));
-	offset = 15;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 15,
 	                  U32x4(0b00000000000000011000110110010110, 0b00000000000000011001000101100110, 0b00000000000000001011001100101010, 0b00000000000000010100101010010110));
-	offset = 16;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 16,
 	                  U32x4(0b00000000000000001100011011001011, 0b00000000000000001100100010110011, 0b00000000000000000101100110010101, 0b00000000000000001010010101001011));
-	offset = 17;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 17,
 	                  U32x4(0b00000000000000000110001101100101, 0b00000000000000000110010001011001, 0b00000000000000000010110011001010, 0b00000000000000000101001010100101));
-	offset = 18;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 18,
 	                  U32x4(0b00000000000000000011000110110010, 0b00000000000000000011001000101100, 0b00000000000000000001011001100101, 0b00000000000000000010100101010010));
-	offset = 19;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 19,
 	                  U32x4(0b00000000000000000001100011011001, 0b00000000000000000001100100010110, 0b00000000000000000000101100110010, 0b00000000000000000001010010101001));
-	offset = 20;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 20,
 	                  U32x4(0b00000000000000000000110001101100, 0b00000000000000000000110010001011, 0b00000000000000000000010110011001, 0b00000000000000000000101001010100));
-	offset = 21;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 21,
 	                  U32x4(0b00000000000000000000011000110110, 0b00000000000000000000011001000101, 0b00000000000000000000001011001100, 0b00000000000000000000010100101010));
-	offset = 22;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 22,
 	                  U32x4(0b00000000000000000000001100011011, 0b00000000000000000000001100100010, 0b00000000000000000000000101100110, 0b00000000000000000000001010010101));
-	offset = 23;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 23,
 	                  U32x4(0b00000000000000000000000110001101, 0b00000000000000000000000110010001, 0b00000000000000000000000010110011, 0b00000000000000000000000101001010));
-	offset = 24;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 24,
 	                  U32x4(0b00000000000000000000000011000110, 0b00000000000000000000000011001000, 0b00000000000000000000000001011001, 0b00000000000000000000000010100101));
-	offset = 25;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 25,
 	                  U32x4(0b00000000000000000000000001100011, 0b00000000000000000000000001100100, 0b00000000000000000000000000101100, 0b00000000000000000000000001010010));
-	offset = 26;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 26,
 	                  U32x4(0b00000000000000000000000000110001, 0b00000000000000000000000000110010, 0b00000000000000000000000000010110, 0b00000000000000000000000000101001));
-	offset = 27;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 27,
 	                  U32x4(0b00000000000000000000000000011000, 0b00000000000000000000000000011001, 0b00000000000000000000000000001011, 0b00000000000000000000000000010100));
-	offset = 28;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 28,
 	                  U32x4(0b00000000000000000000000000001100, 0b00000000000000000000000000001100, 0b00000000000000000000000000000101, 0b00000000000000000000000000001010));
-	offset = 29;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 29,
 	                  U32x4(0b00000000000000000000000000000110, 0b00000000000000000000000000000110, 0b00000000000000000000000000000010, 0b00000000000000000000000000000101));
-	offset = 30;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 30,
 	                  U32x4(0b00000000000000000000000000000011, 0b00000000000000000000000000000011, 0b00000000000000000000000000000001, 0b00000000000000000000000000000010));
-	offset = 31;
-	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset,
+	ASSERT_EQUAL_SIMD(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 31,
 	                  U32x4(0b00000000000000000000000000000001, 0b00000000000000000000000000000001, 0b00000000000000000000000000000000, 0b00000000000000000000000000000001));
-	offset = 32;
-	ASSERT_CRASH(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> offset, U"Tried to shift ");
-
-	offset = 1;
-	ASSERT_EQUAL_SIMD(U32x4(1, 2, 3, 4) << offset, U32x4(2, 4, 6, 8));
-	offset = 2;
-	ASSERT_EQUAL_SIMD(U32x4(1, 2, 3, 4) << offset, U32x4(4, 8, 12, 16));
-	offset = 3;
-	ASSERT_EQUAL_SIMD(U32x4(1, 2, 3, 4) << offset, U32x4(8, 16, 24, 32));
-	offset = 4;
-	ASSERT_EQUAL_SIMD(U32x4(1, 2, 3, 4) << offset, U32x4(16, 32, 48, 64));
-	offset = 1;
-	ASSERT_EQUAL_SIMD(U32x4(1, 2, 3, 4) >> offset, U32x4(0, 1, 1, 2));
-	ASSERT_EQUAL_SIMD(U32x4(2, 4, 6, 8) >> offset, U32x4(1, 2, 3, 4));
-	offset = 2;
-	ASSERT_EQUAL_SIMD(U32x4(2, 4, 6, 8) >> offset, U32x4(0, 1, 1, 2));
+	ASSERT_CRASH(U32x4(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010) >> 32, U"Tried to shift ");
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) << 0,
+	                  U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) << 1,
+	                  U16x16(0b1000110110010110, 0b1010101101001100, 0b1001000101100110, 0b1101001011001010, 0b1011001100101010, 0b0110011000011100, 0b0100101010010110, 0b0101101100100100, 0b1110010110100100, 0b0001011010100110, 0b1011001000111010, 0b0101011101001010, 0b0111010100101000, 0b1101001010011000, 0b1010001110001010, 0b0110101010010100));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) << 2,
+	                  U16x16(0b0001101100101100, 0b0101011010011000, 0b0010001011001100, 0b1010010110010100, 0b0110011001010100, 0b1100110000111000, 0b1001010100101100, 0b1011011001001000, 0b1100101101001000, 0b0010110101001100, 0b0110010001110100, 0b1010111010010100, 0b1110101001010000, 0b1010010100110000, 0b0100011100010100, 0b1101010100101000));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) << 3,
+	                  U16x16(0b0011011001011000, 0b1010110100110000, 0b0100010110011000, 0b0100101100101000, 0b1100110010101000, 0b1001100001110000, 0b0010101001011000, 0b0110110010010000, 0b1001011010010000, 0b0101101010011000, 0b1100100011101000, 0b0101110100101000, 0b1101010010100000, 0b0100101001100000, 0b1000111000101000, 0b1010101001010000));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) << 4,
+	                  U16x16(0b0110110010110000, 0b0101101001100000, 0b1000101100110000, 0b1001011001010000, 0b1001100101010000, 0b0011000011100000, 0b0101010010110000, 0b1101100100100000, 0b0010110100100000, 0b1011010100110000, 0b1001000111010000, 0b1011101001010000, 0b1010100101000000, 0b1001010011000000, 0b0001110001010000, 0b0101010010100000));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) << 5,
+	                  U16x16(0b1101100101100000, 0b1011010011000000, 0b0001011001100000, 0b0010110010100000, 0b0011001010100000, 0b0110000111000000, 0b1010100101100000, 0b1011001001000000, 0b0101101001000000, 0b0110101001100000, 0b0010001110100000, 0b0111010010100000, 0b0101001010000000, 0b0010100110000000, 0b0011100010100000, 0b1010100101000000));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) << 6,
+	                  U16x16(0b1011001011000000, 0b0110100110000000, 0b0010110011000000, 0b0101100101000000, 0b0110010101000000, 0b1100001110000000, 0b0101001011000000, 0b0110010010000000, 0b1011010010000000, 0b1101010011000000, 0b0100011101000000, 0b1110100101000000, 0b1010010100000000, 0b0101001100000000, 0b0111000101000000, 0b0101001010000000));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) << 7,
+	                  U16x16(0b0110010110000000, 0b1101001100000000, 0b0101100110000000, 0b1011001010000000, 0b1100101010000000, 0b1000011100000000, 0b1010010110000000, 0b1100100100000000, 0b0110100100000000, 0b1010100110000000, 0b1000111010000000, 0b1101001010000000, 0b0100101000000000, 0b1010011000000000, 0b1110001010000000, 0b1010010100000000));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) << 8,
+	                  U16x16(0b1100101100000000, 0b1010011000000000, 0b1011001100000000, 0b0110010100000000, 0b1001010100000000, 0b0000111000000000, 0b0100101100000000, 0b1001001000000000, 0b1101001000000000, 0b0101001100000000, 0b0001110100000000, 0b1010010100000000, 0b1001010000000000, 0b0100110000000000, 0b1100010100000000, 0b0100101000000000));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) << 9,
+	                  U16x16(0b1001011000000000, 0b0100110000000000, 0b0110011000000000, 0b1100101000000000, 0b0010101000000000, 0b0001110000000000, 0b1001011000000000, 0b0010010000000000, 0b1010010000000000, 0b1010011000000000, 0b0011101000000000, 0b0100101000000000, 0b0010100000000000, 0b1001100000000000, 0b1000101000000000, 0b1001010000000000));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) << 10,
+	                  U16x16(0b0010110000000000, 0b1001100000000000, 0b1100110000000000, 0b1001010000000000, 0b0101010000000000, 0b0011100000000000, 0b0010110000000000, 0b0100100000000000, 0b0100100000000000, 0b0100110000000000, 0b0111010000000000, 0b1001010000000000, 0b0101000000000000, 0b0011000000000000, 0b0001010000000000, 0b0010100000000000));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) << 11,
+	                  U16x16(0b0101100000000000, 0b0011000000000000, 0b1001100000000000, 0b0010100000000000, 0b1010100000000000, 0b0111000000000000, 0b0101100000000000, 0b1001000000000000, 0b1001000000000000, 0b1001100000000000, 0b1110100000000000, 0b0010100000000000, 0b1010000000000000, 0b0110000000000000, 0b0010100000000000, 0b0101000000000000));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) << 12,
+	                  U16x16(0b1011000000000000, 0b0110000000000000, 0b0011000000000000, 0b0101000000000000, 0b0101000000000000, 0b1110000000000000, 0b1011000000000000, 0b0010000000000000, 0b0010000000000000, 0b0011000000000000, 0b1101000000000000, 0b0101000000000000, 0b0100000000000000, 0b1100000000000000, 0b0101000000000000, 0b1010000000000000));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) << 13,
+	                  U16x16(0b0110000000000000, 0b1100000000000000, 0b0110000000000000, 0b1010000000000000, 0b1010000000000000, 0b1100000000000000, 0b0110000000000000, 0b0100000000000000, 0b0100000000000000, 0b0110000000000000, 0b1010000000000000, 0b1010000000000000, 0b1000000000000000, 0b1000000000000000, 0b1010000000000000, 0b0100000000000000));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) << 14,
+	                  U16x16(0b1100000000000000, 0b1000000000000000, 0b1100000000000000, 0b0100000000000000, 0b0100000000000000, 0b1000000000000000, 0b1100000000000000, 0b1000000000000000, 0b1000000000000000, 0b1100000000000000, 0b0100000000000000, 0b0100000000000000, 0b0000000000000000, 0b0000000000000000, 0b0100000000000000, 0b1000000000000000));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) << 15,
+	                  U16x16(0b1000000000000000, 0b0000000000000000, 0b1000000000000000, 0b1000000000000000, 0b1000000000000000, 0b0000000000000000, 0b1000000000000000, 0b0000000000000000, 0b0000000000000000, 0b1000000000000000, 0b1000000000000000, 0b1000000000000000, 0b0000000000000000, 0b0000000000000000, 0b1000000000000000, 0b0000000000000000));
+	ASSERT_CRASH(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) << 16, U"Tried to shift ");
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) >> 0,
+	                  U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) >> 1,
+	                  U16x16(0b0110001101100101, 0b0010101011010011, 0b0110010001011001, 0b0011010010110010, 0b0010110011001010, 0b0001100110000111, 0b0101001010100101, 0b0001011011001001, 0b0011100101101001, 0b0100010110101001, 0b0010110010001110, 0b0001010111010010, 0b0001110101001010, 0b0011010010100110, 0b0110100011100010, 0b0001101010100101));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) >> 2,
+	                  U16x16(0b0011000110110010, 0b0001010101101001, 0b0011001000101100, 0b0001101001011001, 0b0001011001100101, 0b0000110011000011, 0b0010100101010010, 0b0000101101100100, 0b0001110010110100, 0b0010001011010100, 0b0001011001000111, 0b0000101011101001, 0b0000111010100101, 0b0001101001010011, 0b0011010001110001, 0b0000110101010010));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) >> 3,
+	                  U16x16(0b0001100011011001, 0b0000101010110100, 0b0001100100010110, 0b0000110100101100, 0b0000101100110010, 0b0000011001100001, 0b0001010010101001, 0b0000010110110010, 0b0000111001011010, 0b0001000101101010, 0b0000101100100011, 0b0000010101110100, 0b0000011101010010, 0b0000110100101001, 0b0001101000111000, 0b0000011010101001));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) >> 4,
+	                  U16x16(0b0000110001101100, 0b0000010101011010, 0b0000110010001011, 0b0000011010010110, 0b0000010110011001, 0b0000001100110000, 0b0000101001010100, 0b0000001011011001, 0b0000011100101101, 0b0000100010110101, 0b0000010110010001, 0b0000001010111010, 0b0000001110101001, 0b0000011010010100, 0b0000110100011100, 0b0000001101010100));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) >> 5,
+	                  U16x16(0b0000011000110110, 0b0000001010101101, 0b0000011001000101, 0b0000001101001011, 0b0000001011001100, 0b0000000110011000, 0b0000010100101010, 0b0000000101101100, 0b0000001110010110, 0b0000010001011010, 0b0000001011001000, 0b0000000101011101, 0b0000000111010100, 0b0000001101001010, 0b0000011010001110, 0b0000000110101010));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) >> 6,
+	                  U16x16(0b0000001100011011, 0b0000000101010110, 0b0000001100100010, 0b0000000110100101, 0b0000000101100110, 0b0000000011001100, 0b0000001010010101, 0b0000000010110110, 0b0000000111001011, 0b0000001000101101, 0b0000000101100100, 0b0000000010101110, 0b0000000011101010, 0b0000000110100101, 0b0000001101000111, 0b0000000011010101));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) >> 7,
+	                  U16x16(0b0000000110001101, 0b0000000010101011, 0b0000000110010001, 0b0000000011010010, 0b0000000010110011, 0b0000000001100110, 0b0000000101001010, 0b0000000001011011, 0b0000000011100101, 0b0000000100010110, 0b0000000010110010, 0b0000000001010111, 0b0000000001110101, 0b0000000011010010, 0b0000000110100011, 0b0000000001101010));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) >> 8,
+	                  U16x16(0b0000000011000110, 0b0000000001010101, 0b0000000011001000, 0b0000000001101001, 0b0000000001011001, 0b0000000000110011, 0b0000000010100101, 0b0000000000101101, 0b0000000001110010, 0b0000000010001011, 0b0000000001011001, 0b0000000000101011, 0b0000000000111010, 0b0000000001101001, 0b0000000011010001, 0b0000000000110101));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) >> 9,
+	                  U16x16(0b0000000001100011, 0b0000000000101010, 0b0000000001100100, 0b0000000000110100, 0b0000000000101100, 0b0000000000011001, 0b0000000001010010, 0b0000000000010110, 0b0000000000111001, 0b0000000001000101, 0b0000000000101100, 0b0000000000010101, 0b0000000000011101, 0b0000000000110100, 0b0000000001101000, 0b0000000000011010));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) >> 10,
+	                  U16x16(0b0000000000110001, 0b0000000000010101, 0b0000000000110010, 0b0000000000011010, 0b0000000000010110, 0b0000000000001100, 0b0000000000101001, 0b0000000000001011, 0b0000000000011100, 0b0000000000100010, 0b0000000000010110, 0b0000000000001010, 0b0000000000001110, 0b0000000000011010, 0b0000000000110100, 0b0000000000001101));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) >> 11,
+	                  U16x16(0b0000000000011000, 0b0000000000001010, 0b0000000000011001, 0b0000000000001101, 0b0000000000001011, 0b0000000000000110, 0b0000000000010100, 0b0000000000000101, 0b0000000000001110, 0b0000000000010001, 0b0000000000001011, 0b0000000000000101, 0b0000000000000111, 0b0000000000001101, 0b0000000000011010, 0b0000000000000110));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) >> 12,
+	                  U16x16(0b0000000000001100, 0b0000000000000101, 0b0000000000001100, 0b0000000000000110, 0b0000000000000101, 0b0000000000000011, 0b0000000000001010, 0b0000000000000010, 0b0000000000000111, 0b0000000000001000, 0b0000000000000101, 0b0000000000000010, 0b0000000000000011, 0b0000000000000110, 0b0000000000001101, 0b0000000000000011));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) >> 13,
+	                  U16x16(0b0000000000000110, 0b0000000000000010, 0b0000000000000110, 0b0000000000000011, 0b0000000000000010, 0b0000000000000001, 0b0000000000000101, 0b0000000000000001, 0b0000000000000011, 0b0000000000000100, 0b0000000000000010, 0b0000000000000001, 0b0000000000000001, 0b0000000000000011, 0b0000000000000110, 0b0000000000000001));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) >> 14,
+	                  U16x16(0b0000000000000011, 0b0000000000000001, 0b0000000000000011, 0b0000000000000001, 0b0000000000000001, 0b0000000000000000, 0b0000000000000010, 0b0000000000000000, 0b0000000000000001, 0b0000000000000010, 0b0000000000000001, 0b0000000000000000, 0b0000000000000000, 0b0000000000000001, 0b0000000000000011, 0b0000000000000000));
+	ASSERT_EQUAL_SIMD(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) >> 15,
+	                  U16x16(0b0000000000000001, 0b0000000000000000, 0b0000000000000001, 0b0000000000000000, 0b0000000000000000, 0b0000000000000000, 0b0000000000000001, 0b0000000000000000, 0b0000000000000000, 0b0000000000000001, 0b0000000000000000, 0b0000000000000000, 0b0000000000000000, 0b0000000000000000, 0b0000000000000001, 0b0000000000000000));
+	ASSERT_CRASH(U16x16(0b1100011011001011, 0b0101010110100110, 0b1100100010110011, 0b0110100101100101, 0b0101100110010101, 0b0011001100001110, 0b1010010101001011, 0b0010110110010010, 0b0111001011010010, 0b1000101101010011, 0b0101100100011101, 0b0010101110100101, 0b0011101010010100, 0b0110100101001100, 0b1101000111000101, 0b0011010101001010) >> 16, U"Tried to shift ");
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 0,
+	                  U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 1,
+	                  U32x8(0b10001101100101101010101101001100, 0b10010001011001101101001011001010, 0b10110011001010100110011000011100, 0b01001010100101100101101100100100, 0b10110100011011010101001011010110, 0b10110101011011001011010110101010, 0b10100010101010010010010010110100, 0b00101011010101011001010101010110));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 2,
+	                  U32x8(0b00011011001011010101011010011000, 0b00100010110011011010010110010100, 0b01100110010101001100110000111000, 0b10010101001011001011011001001000, 0b01101000110110101010010110101100, 0b01101010110110010110101101010100, 0b01000101010100100100100101101000, 0b01010110101010110010101010101100));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 3,
+	                  U32x8(0b00110110010110101010110100110000, 0b01000101100110110100101100101000, 0b11001100101010011001100001110000, 0b00101010010110010110110010010000, 0b11010001101101010100101101011000, 0b11010101101100101101011010101000, 0b10001010101001001001001011010000, 0b10101101010101100101010101011000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 4,
+	                  U32x8(0b01101100101101010101101001100000, 0b10001011001101101001011001010000, 0b10011001010100110011000011100000, 0b01010100101100101101100100100000, 0b10100011011010101001011010110000, 0b10101011011001011010110101010000, 0b00010101010010010010010110100000, 0b01011010101011001010101010110000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 5,
+	                  U32x8(0b11011001011010101011010011000000, 0b00010110011011010010110010100000, 0b00110010101001100110000111000000, 0b10101001011001011011001001000000, 0b01000110110101010010110101100000, 0b01010110110010110101101010100000, 0b00101010100100100100101101000000, 0b10110101010110010101010101100000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 6,
+	                  U32x8(0b10110010110101010110100110000000, 0b00101100110110100101100101000000, 0b01100101010011001100001110000000, 0b01010010110010110110010010000000, 0b10001101101010100101101011000000, 0b10101101100101101011010101000000, 0b01010101001001001001011010000000, 0b01101010101100101010101011000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 7,
+	                  U32x8(0b01100101101010101101001100000000, 0b01011001101101001011001010000000, 0b11001010100110011000011100000000, 0b10100101100101101100100100000000, 0b00011011010101001011010110000000, 0b01011011001011010110101010000000, 0b10101010010010010010110100000000, 0b11010101011001010101010110000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 8,
+	                  U32x8(0b11001011010101011010011000000000, 0b10110011011010010110010100000000, 0b10010101001100110000111000000000, 0b01001011001011011001001000000000, 0b00110110101010010110101100000000, 0b10110110010110101101010100000000, 0b01010100100100100101101000000000, 0b10101010110010101010101100000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 9,
+	                  U32x8(0b10010110101010110100110000000000, 0b01100110110100101100101000000000, 0b00101010011001100001110000000000, 0b10010110010110110010010000000000, 0b01101101010100101101011000000000, 0b01101100101101011010101000000000, 0b10101001001001001011010000000000, 0b01010101100101010101011000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 10,
+	                  U32x8(0b00101101010101101001100000000000, 0b11001101101001011001010000000000, 0b01010100110011000011100000000000, 0b00101100101101100100100000000000, 0b11011010101001011010110000000000, 0b11011001011010110101010000000000, 0b01010010010010010110100000000000, 0b10101011001010101010110000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 11,
+	                  U32x8(0b01011010101011010011000000000000, 0b10011011010010110010100000000000, 0b10101001100110000111000000000000, 0b01011001011011001001000000000000, 0b10110101010010110101100000000000, 0b10110010110101101010100000000000, 0b10100100100100101101000000000000, 0b01010110010101010101100000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 12,
+	                  U32x8(0b10110101010110100110000000000000, 0b00110110100101100101000000000000, 0b01010011001100001110000000000000, 0b10110010110110010010000000000000, 0b01101010100101101011000000000000, 0b01100101101011010101000000000000, 0b01001001001001011010000000000000, 0b10101100101010101011000000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 13,
+	                  U32x8(0b01101010101101001100000000000000, 0b01101101001011001010000000000000, 0b10100110011000011100000000000000, 0b01100101101100100100000000000000, 0b11010101001011010110000000000000, 0b11001011010110101010000000000000, 0b10010010010010110100000000000000, 0b01011001010101010110000000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 14,
+	                  U32x8(0b11010101011010011000000000000000, 0b11011010010110010100000000000000, 0b01001100110000111000000000000000, 0b11001011011001001000000000000000, 0b10101010010110101100000000000000, 0b10010110101101010100000000000000, 0b00100100100101101000000000000000, 0b10110010101010101100000000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 15,
+	                  U32x8(0b10101010110100110000000000000000, 0b10110100101100101000000000000000, 0b10011001100001110000000000000000, 0b10010110110010010000000000000000, 0b01010100101101011000000000000000, 0b00101101011010101000000000000000, 0b01001001001011010000000000000000, 0b01100101010101011000000000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 16,
+	                  U32x8(0b01010101101001100000000000000000, 0b01101001011001010000000000000000, 0b00110011000011100000000000000000, 0b00101101100100100000000000000000, 0b10101001011010110000000000000000, 0b01011010110101010000000000000000, 0b10010010010110100000000000000000, 0b11001010101010110000000000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 17,
+	                  U32x8(0b10101011010011000000000000000000, 0b11010010110010100000000000000000, 0b01100110000111000000000000000000, 0b01011011001001000000000000000000, 0b01010010110101100000000000000000, 0b10110101101010100000000000000000, 0b00100100101101000000000000000000, 0b10010101010101100000000000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 18,
+	                  U32x8(0b01010110100110000000000000000000, 0b10100101100101000000000000000000, 0b11001100001110000000000000000000, 0b10110110010010000000000000000000, 0b10100101101011000000000000000000, 0b01101011010101000000000000000000, 0b01001001011010000000000000000000, 0b00101010101011000000000000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 19,
+	                  U32x8(0b10101101001100000000000000000000, 0b01001011001010000000000000000000, 0b10011000011100000000000000000000, 0b01101100100100000000000000000000, 0b01001011010110000000000000000000, 0b11010110101010000000000000000000, 0b10010010110100000000000000000000, 0b01010101010110000000000000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 20,
+	                  U32x8(0b01011010011000000000000000000000, 0b10010110010100000000000000000000, 0b00110000111000000000000000000000, 0b11011001001000000000000000000000, 0b10010110101100000000000000000000, 0b10101101010100000000000000000000, 0b00100101101000000000000000000000, 0b10101010101100000000000000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 21,
+	                  U32x8(0b10110100110000000000000000000000, 0b00101100101000000000000000000000, 0b01100001110000000000000000000000, 0b10110010010000000000000000000000, 0b00101101011000000000000000000000, 0b01011010101000000000000000000000, 0b01001011010000000000000000000000, 0b01010101011000000000000000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 22,
+	                  U32x8(0b01101001100000000000000000000000, 0b01011001010000000000000000000000, 0b11000011100000000000000000000000, 0b01100100100000000000000000000000, 0b01011010110000000000000000000000, 0b10110101010000000000000000000000, 0b10010110100000000000000000000000, 0b10101010110000000000000000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 23,
+	                  U32x8(0b11010011000000000000000000000000, 0b10110010100000000000000000000000, 0b10000111000000000000000000000000, 0b11001001000000000000000000000000, 0b10110101100000000000000000000000, 0b01101010100000000000000000000000, 0b00101101000000000000000000000000, 0b01010101100000000000000000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 24,
+	                  U32x8(0b10100110000000000000000000000000, 0b01100101000000000000000000000000, 0b00001110000000000000000000000000, 0b10010010000000000000000000000000, 0b01101011000000000000000000000000, 0b11010101000000000000000000000000, 0b01011010000000000000000000000000, 0b10101011000000000000000000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 25,
+	                  U32x8(0b01001100000000000000000000000000, 0b11001010000000000000000000000000, 0b00011100000000000000000000000000, 0b00100100000000000000000000000000, 0b11010110000000000000000000000000, 0b10101010000000000000000000000000, 0b10110100000000000000000000000000, 0b01010110000000000000000000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 26,
+	                  U32x8(0b10011000000000000000000000000000, 0b10010100000000000000000000000000, 0b00111000000000000000000000000000, 0b01001000000000000000000000000000, 0b10101100000000000000000000000000, 0b01010100000000000000000000000000, 0b01101000000000000000000000000000, 0b10101100000000000000000000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 27,
+	                  U32x8(0b00110000000000000000000000000000, 0b00101000000000000000000000000000, 0b01110000000000000000000000000000, 0b10010000000000000000000000000000, 0b01011000000000000000000000000000, 0b10101000000000000000000000000000, 0b11010000000000000000000000000000, 0b01011000000000000000000000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 28,
+	                  U32x8(0b01100000000000000000000000000000, 0b01010000000000000000000000000000, 0b11100000000000000000000000000000, 0b00100000000000000000000000000000, 0b10110000000000000000000000000000, 0b01010000000000000000000000000000, 0b10100000000000000000000000000000, 0b10110000000000000000000000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 29,
+	                  U32x8(0b11000000000000000000000000000000, 0b10100000000000000000000000000000, 0b11000000000000000000000000000000, 0b01000000000000000000000000000000, 0b01100000000000000000000000000000, 0b10100000000000000000000000000000, 0b01000000000000000000000000000000, 0b01100000000000000000000000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 30,
+	                  U32x8(0b10000000000000000000000000000000, 0b01000000000000000000000000000000, 0b10000000000000000000000000000000, 0b10000000000000000000000000000000, 0b11000000000000000000000000000000, 0b01000000000000000000000000000000, 0b10000000000000000000000000000000, 0b11000000000000000000000000000000));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 31,
+	                  U32x8(0b00000000000000000000000000000000, 0b10000000000000000000000000000000, 0b00000000000000000000000000000000, 0b00000000000000000000000000000000, 0b10000000000000000000000000000000, 0b10000000000000000000000000000000, 0b00000000000000000000000000000000, 0b10000000000000000000000000000000));
+	ASSERT_CRASH(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) << 32, U"Tried to shift ");
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 0,
+	                  U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 1,
+	                  U32x8(0b01100011011001011010101011010011, 0b01100100010110011011010010110010, 0b00101100110010101001100110000111, 0b01010010101001011001011011001001, 0b00101101000110110101010010110101, 0b00101101010110110010110101101010, 0b01101000101010100100100100101101, 0b01001010110101010110010101010101));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 2,
+	                  U32x8(0b00110001101100101101010101101001, 0b00110010001011001101101001011001, 0b00010110011001010100110011000011, 0b00101001010100101100101101100100, 0b00010110100011011010101001011010, 0b00010110101011011001011010110101, 0b00110100010101010010010010010110, 0b00100101011010101011001010101010));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 3,
+	                  U32x8(0b00011000110110010110101010110100, 0b00011001000101100110110100101100, 0b00001011001100101010011001100001, 0b00010100101010010110010110110010, 0b00001011010001101101010100101101, 0b00001011010101101100101101011010, 0b00011010001010101001001001001011, 0b00010010101101010101100101010101));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 4,
+	                  U32x8(0b00001100011011001011010101011010, 0b00001100100010110011011010010110, 0b00000101100110010101001100110000, 0b00001010010101001011001011011001, 0b00000101101000110110101010010110, 0b00000101101010110110010110101101, 0b00001101000101010100100100100101, 0b00001001010110101010110010101010));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 5,
+	                  U32x8(0b00000110001101100101101010101101, 0b00000110010001011001101101001011, 0b00000010110011001010100110011000, 0b00000101001010100101100101101100, 0b00000010110100011011010101001011, 0b00000010110101011011001011010110, 0b00000110100010101010010010010010, 0b00000100101011010101011001010101));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 6,
+	                  U32x8(0b00000011000110110010110101010110, 0b00000011001000101100110110100101, 0b00000001011001100101010011001100, 0b00000010100101010010110010110110, 0b00000001011010001101101010100101, 0b00000001011010101101100101101011, 0b00000011010001010101001001001001, 0b00000010010101101010101100101010));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 7,
+	                  U32x8(0b00000001100011011001011010101011, 0b00000001100100010110011011010010, 0b00000000101100110010101001100110, 0b00000001010010101001011001011011, 0b00000000101101000110110101010010, 0b00000000101101010110110010110101, 0b00000001101000101010100100100100, 0b00000001001010110101010110010101));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 8,
+	                  U32x8(0b00000000110001101100101101010101, 0b00000000110010001011001101101001, 0b00000000010110011001010100110011, 0b00000000101001010100101100101101, 0b00000000010110100011011010101001, 0b00000000010110101011011001011010, 0b00000000110100010101010010010010, 0b00000000100101011010101011001010));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 9,
+	                  U32x8(0b00000000011000110110010110101010, 0b00000000011001000101100110110100, 0b00000000001011001100101010011001, 0b00000000010100101010010110010110, 0b00000000001011010001101101010100, 0b00000000001011010101101100101101, 0b00000000011010001010101001001001, 0b00000000010010101101010101100101));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 10,
+	                  U32x8(0b00000000001100011011001011010101, 0b00000000001100100010110011011010, 0b00000000000101100110010101001100, 0b00000000001010010101001011001011, 0b00000000000101101000110110101010, 0b00000000000101101010110110010110, 0b00000000001101000101010100100100, 0b00000000001001010110101010110010));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 11,
+	                  U32x8(0b00000000000110001101100101101010, 0b00000000000110010001011001101101, 0b00000000000010110011001010100110, 0b00000000000101001010100101100101, 0b00000000000010110100011011010101, 0b00000000000010110101011011001011, 0b00000000000110100010101010010010, 0b00000000000100101011010101011001));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 12,
+	                  U32x8(0b00000000000011000110110010110101, 0b00000000000011001000101100110110, 0b00000000000001011001100101010011, 0b00000000000010100101010010110010, 0b00000000000001011010001101101010, 0b00000000000001011010101101100101, 0b00000000000011010001010101001001, 0b00000000000010010101101010101100));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 13,
+	                  U32x8(0b00000000000001100011011001011010, 0b00000000000001100100010110011011, 0b00000000000000101100110010101001, 0b00000000000001010010101001011001, 0b00000000000000101101000110110101, 0b00000000000000101101010110110010, 0b00000000000001101000101010100100, 0b00000000000001001010110101010110));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 14,
+	                  U32x8(0b00000000000000110001101100101101, 0b00000000000000110010001011001101, 0b00000000000000010110011001010100, 0b00000000000000101001010100101100, 0b00000000000000010110100011011010, 0b00000000000000010110101011011001, 0b00000000000000110100010101010010, 0b00000000000000100101011010101011));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 15,
+	                  U32x8(0b00000000000000011000110110010110, 0b00000000000000011001000101100110, 0b00000000000000001011001100101010, 0b00000000000000010100101010010110, 0b00000000000000001011010001101101, 0b00000000000000001011010101101100, 0b00000000000000011010001010101001, 0b00000000000000010010101101010101));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 16,
+	                  U32x8(0b00000000000000001100011011001011, 0b00000000000000001100100010110011, 0b00000000000000000101100110010101, 0b00000000000000001010010101001011, 0b00000000000000000101101000110110, 0b00000000000000000101101010110110, 0b00000000000000001101000101010100, 0b00000000000000001001010110101010));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 17,
+	                  U32x8(0b00000000000000000110001101100101, 0b00000000000000000110010001011001, 0b00000000000000000010110011001010, 0b00000000000000000101001010100101, 0b00000000000000000010110100011011, 0b00000000000000000010110101011011, 0b00000000000000000110100010101010, 0b00000000000000000100101011010101));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 18,
+	                  U32x8(0b00000000000000000011000110110010, 0b00000000000000000011001000101100, 0b00000000000000000001011001100101, 0b00000000000000000010100101010010, 0b00000000000000000001011010001101, 0b00000000000000000001011010101101, 0b00000000000000000011010001010101, 0b00000000000000000010010101101010));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 19,
+	                  U32x8(0b00000000000000000001100011011001, 0b00000000000000000001100100010110, 0b00000000000000000000101100110010, 0b00000000000000000001010010101001, 0b00000000000000000000101101000110, 0b00000000000000000000101101010110, 0b00000000000000000001101000101010, 0b00000000000000000001001010110101));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 20,
+	                  U32x8(0b00000000000000000000110001101100, 0b00000000000000000000110010001011, 0b00000000000000000000010110011001, 0b00000000000000000000101001010100, 0b00000000000000000000010110100011, 0b00000000000000000000010110101011, 0b00000000000000000000110100010101, 0b00000000000000000000100101011010));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 21,
+	                  U32x8(0b00000000000000000000011000110110, 0b00000000000000000000011001000101, 0b00000000000000000000001011001100, 0b00000000000000000000010100101010, 0b00000000000000000000001011010001, 0b00000000000000000000001011010101, 0b00000000000000000000011010001010, 0b00000000000000000000010010101101));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 22,
+	                  U32x8(0b00000000000000000000001100011011, 0b00000000000000000000001100100010, 0b00000000000000000000000101100110, 0b00000000000000000000001010010101, 0b00000000000000000000000101101000, 0b00000000000000000000000101101010, 0b00000000000000000000001101000101, 0b00000000000000000000001001010110));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 23,
+	                  U32x8(0b00000000000000000000000110001101, 0b00000000000000000000000110010001, 0b00000000000000000000000010110011, 0b00000000000000000000000101001010, 0b00000000000000000000000010110100, 0b00000000000000000000000010110101, 0b00000000000000000000000110100010, 0b00000000000000000000000100101011));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 24,
+	                  U32x8(0b00000000000000000000000011000110, 0b00000000000000000000000011001000, 0b00000000000000000000000001011001, 0b00000000000000000000000010100101, 0b00000000000000000000000001011010, 0b00000000000000000000000001011010, 0b00000000000000000000000011010001, 0b00000000000000000000000010010101));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 25,
+	                  U32x8(0b00000000000000000000000001100011, 0b00000000000000000000000001100100, 0b00000000000000000000000000101100, 0b00000000000000000000000001010010, 0b00000000000000000000000000101101, 0b00000000000000000000000000101101, 0b00000000000000000000000001101000, 0b00000000000000000000000001001010));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 26,
+	                  U32x8(0b00000000000000000000000000110001, 0b00000000000000000000000000110010, 0b00000000000000000000000000010110, 0b00000000000000000000000000101001, 0b00000000000000000000000000010110, 0b00000000000000000000000000010110, 0b00000000000000000000000000110100, 0b00000000000000000000000000100101));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 27,
+	                  U32x8(0b00000000000000000000000000011000, 0b00000000000000000000000000011001, 0b00000000000000000000000000001011, 0b00000000000000000000000000010100, 0b00000000000000000000000000001011, 0b00000000000000000000000000001011, 0b00000000000000000000000000011010, 0b00000000000000000000000000010010));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 28,
+	                  U32x8(0b00000000000000000000000000001100, 0b00000000000000000000000000001100, 0b00000000000000000000000000000101, 0b00000000000000000000000000001010, 0b00000000000000000000000000000101, 0b00000000000000000000000000000101, 0b00000000000000000000000000001101, 0b00000000000000000000000000001001));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 29,
+	                  U32x8(0b00000000000000000000000000000110, 0b00000000000000000000000000000110, 0b00000000000000000000000000000010, 0b00000000000000000000000000000101, 0b00000000000000000000000000000010, 0b00000000000000000000000000000010, 0b00000000000000000000000000000110, 0b00000000000000000000000000000100));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 30,
+	                  U32x8(0b00000000000000000000000000000011, 0b00000000000000000000000000000011, 0b00000000000000000000000000000001, 0b00000000000000000000000000000010, 0b00000000000000000000000000000001, 0b00000000000000000000000000000001, 0b00000000000000000000000000000011, 0b00000000000000000000000000000010));
+	ASSERT_EQUAL_SIMD(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 31,
+	                  U32x8(0b00000000000000000000000000000001, 0b00000000000000000000000000000001, 0b00000000000000000000000000000000, 0b00000000000000000000000000000001, 0b00000000000000000000000000000000, 0b00000000000000000000000000000000, 0b00000000000000000000000000000001, 0b00000000000000000000000000000001));
+	ASSERT_CRASH(U32x8(0b11000110110010110101010110100110, 0b11001000101100110110100101100101, 0b01011001100101010011001100001110, 0b10100101010010110010110110010010, 0b01011010001101101010100101101011, 0b01011010101101100101101011010101, 0b11010001010101001001001001011010, 0b10010101101010101100101010101011) >> 32, U"Tried to shift ");
 
 	// Bit shift with multiple offsets.
 	ASSERT_EQUAL_SIMD(U32x4(1, 2, 3, 4) << U32x4(0, 3, 1, 2), U32x4(1, 16, 6, 16));
@@ -752,82 +600,9 @@ START_TEST(Simd)
 	ASSERT_EQUAL_SIMD(
 	    bitShiftRightImmediate<4>(U32x8(0x0AB12CD0, 0xFFFFFFFF, 0x12345678, 0x0000000F, 0x87654321, 0x48484848, 0x76437643, 0x11111111)),
 	                              U32x8(0x00AB12CD, 0x0FFFFFFF, 0x01234567, 0x00000000, 0x08765432, 0x04848484, 0x07643764, 0x01111111));
+}
 
-	// Reinterpret cast.
-	ASSERT_EQUAL_SIMD(
-	  reinterpret_U8FromU32(U32x4(ENDIAN32_BYTE_0, ENDIAN32_BYTE_1, ENDIAN32_BYTE_2, ENDIAN32_BYTE_3)),
-	  U8x16(
-		255, 0, 0, 0,
-		0, 255, 0, 0,
-		0, 0, 255, 0,
-		0, 0, 0, 255
-	  )
-	);
-	ASSERT_EQUAL_SIMD(
-	  reinterpret_U8FromU32(U32x4(
-		ENDIAN32_BYTE_0 | ENDIAN32_BYTE_2,
-		ENDIAN32_BYTE_0 | ENDIAN32_BYTE_3,
-		ENDIAN32_BYTE_1,
-		ENDIAN32_BYTE_1 | ENDIAN32_BYTE_3
-	  )),
-	  U8x16(
-		255, 0, 255, 0,
-		255, 0, 0, 255,
-		0, 255, 0, 0,
-		0, 255, 0, 255
-	  )
-	);
-	ASSERT_EQUAL_SIMD(
-	  reinterpret_U32FromU8(U8x16(
-		255, 0, 255, 0,
-		255, 0, 0, 255,
-		0, 255, 0, 0,
-		0, 255, 0, 255
-	  )),
-	  U32x4(
-		ENDIAN32_BYTE_0 | ENDIAN32_BYTE_2,
-		ENDIAN32_BYTE_0 | ENDIAN32_BYTE_3,
-		ENDIAN32_BYTE_1,
-		ENDIAN32_BYTE_1 | ENDIAN32_BYTE_3
-	  )
-	);
-	#ifdef DSR_BIG_ENDIAN
-		ASSERT_EQUAL_SIMD(
-		  reinterpret_U32FromU16(U16x8(1, 2, 3, 4, 5, 6, 7, 8)),
-		  U32x4(1 * 65536 + 2, 3 * 65536 + 4, 5 * 65536 + 6, 7 * 65536 + 8)
-		);
-		ASSERT_EQUAL_SIMD(
-		  reinterpret_U32FromU16(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)),
-		  U32x8(1 * 65536 + 2, 3 * 65536 + 4, 5 * 65536 + 6, 7 * 65536 + 8, 9 * 65536 + 10, 11 * 65536 + 12, 13 * 65536 + 14, 15 * 65536 + 16)
-		);
-		ASSERT_EQUAL_SIMD(
-		  reinterpret_U16FromU32(U32x4(1 * 65536 + 2, 3 * 65536 + 4, 5 * 65536 + 6, 7 * 65536 + 8)),
-		  U16x8(1, 2, 3, 4, 5, 6, 7, 8)
-		);
-		ASSERT_EQUAL_SIMD(
-		  reinterpret_U16FromU32(U32x8(1 * 65536 + 2, 3 * 65536 + 4, 5 * 65536 + 6, 7 * 65536 + 8, 9 * 65536 + 10, 11 * 65536 + 12, 13 * 65536 + 14, 15 * 65536 + 16)),
-		  U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
-		);
-	#else
-		ASSERT_EQUAL_SIMD(
-		  reinterpret_U32FromU16(U16x8(1, 2, 3, 4, 5, 6, 7, 8)),
-		  U32x4(1 + 2 * 65536, 3 + 4 * 65536, 5 + 6 * 65536, 7 + 8 * 65536)
-		);
-		ASSERT_EQUAL_SIMD(
-		  reinterpret_U32FromU16(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)),
-		  U32x8(1 + 2 * 65536, 3 + 4 * 65536, 5 + 6 * 65536, 7 + 8 * 65536, 9 + 10 * 65536, 11 + 12 * 65536, 13 + 14 * 65536, 15 + 16 * 65536)
-		);
-		ASSERT_EQUAL_SIMD(
-		  reinterpret_U16FromU32(U32x4(1 + 2 * 65536, 3 + 4 * 65536, 5 + 6 * 65536, 7 + 8 * 65536)),
-		  U16x8(1, 2, 3, 4, 5, 6, 7, 8)
-		);
-		ASSERT_EQUAL_SIMD(
-		  reinterpret_U16FromU32(U32x8(1 + 2 * 65536, 3 + 4 * 65536, 5 + 6 * 65536, 7 + 8 * 65536, 9 + 10 * 65536, 11 + 12 * 65536, 13 + 14 * 65536, 15 + 16 * 65536)),
-		  U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
-		);
-	#endif
-
-	// Element shift with insert
+static void testVectorExtract() {
 	ASSERT_EQUAL_SIMD(vectorExtract_0(U32x4(1, 2, 3, 4), U32x4(5, 6, 7, 8)), U32x4(1, 2, 3, 4));
 	ASSERT_EQUAL_SIMD(vectorExtract_1(U32x4(1, 2, 3, 4), U32x4(5, 6, 7, 8)), U32x4(2, 3, 4, 5));
 	ASSERT_EQUAL_SIMD(vectorExtract_2(U32x4(1, 2, 3, 4), U32x4(5, 6, 7, 8)), U32x4(3, 4, 5, 6));
@@ -880,7 +655,6 @@ START_TEST(Simd)
 	ASSERT_EQUAL_SIMD(vectorExtract_15(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31));
 	ASSERT_EQUAL_SIMD(vectorExtract_16(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32));
 
-	// Element shift with insert
 	ASSERT_EQUAL_SIMD(vectorExtract_0(U32x8( 1, 2, 3, 4, 5, 6, 7, 8), U32x8( 9,10,11,12,13,14,15,16)),
 	                                     U32x8( 1, 2, 3, 4, 5, 6, 7, 8));
 	ASSERT_EQUAL_SIMD(vectorExtract_1(U32x8( 1, 2, 3, 4, 5, 6, 7, 8), U32x8( 9,10,11,12,13,14,15,16)),
@@ -920,162 +694,428 @@ START_TEST(Simd)
 	ASSERT_EQUAL_SIMD(vectorExtract_8(I32x8( 1,-2, 3, 4,-5, 6, 7, 8), I32x8( 9,10,11,-12,13,14,15,-16)),
 	                                                                     I32x8( 9,10,11,-12,13,14,15,-16));
 	ASSERT_EQUAL_SIMD(vectorExtract_0(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
-	                                     F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f));
+	                                  F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f));
 	ASSERT_EQUAL_SIMD(vectorExtract_1(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
-	                                          F32x8( -2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f,         9.0f));
+	                                       F32x8( -2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f,         9.0f));
 	ASSERT_EQUAL_SIMD(vectorExtract_2(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
-	                                                 F32x8( 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f,         9.0f, 10.0f));
+	                                              F32x8( 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f,         9.0f, 10.0f));
 	ASSERT_EQUAL_SIMD(vectorExtract_3(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
-	                                                       F32x8( 4.0f,-5.0f, 6.0f, 7.0f, 8.0f,         9.0f, 10.0f, 11.0f));
+	                                                    F32x8( 4.0f,-5.0f, 6.0f, 7.0f, 8.0f,         9.0f, 10.0f, 11.0f));
 	ASSERT_EQUAL_SIMD(vectorExtract_4(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
-	                                                             F32x8(-5.0f, 6.0f, 7.0f, 8.0f,         9.0f, 10.0f, 11.0f,-12.0f));
+	                                                          F32x8(-5.0f, 6.0f, 7.0f, 8.0f,         9.0f, 10.0f, 11.0f,-12.0f));
 	ASSERT_EQUAL_SIMD(vectorExtract_5(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
-	                                                                   F32x8( 6.0f, 7.0f, 8.0f,         9.0f, 10.0f, 11.0f,-12.0f, 13.0f));
+	                                                                F32x8( 6.0f, 7.0f, 8.0f,         9.0f, 10.0f, 11.0f,-12.0f, 13.0f));
 	ASSERT_EQUAL_SIMD(vectorExtract_6(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
-	                                                                         F32x8( 7.0f, 8.0f,         9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f));
+	                                                                      F32x8( 7.0f, 8.0f,         9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f));
 	ASSERT_EQUAL_SIMD(vectorExtract_7(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
-	                                                                               F32x8( 8.0f,         9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f));
+	                                                                            F32x8( 8.0f,         9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f));
 	ASSERT_EQUAL_SIMD(vectorExtract_8(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
-	                                                                                             F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f));
+	                                                                                          F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f));
 	ASSERT_EQUAL_SIMD(vectorExtract_0 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
-	                                      U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16));
+	                                   U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16));
 	ASSERT_EQUAL_SIMD(vectorExtract_1 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
-	                                         U16x16( 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,         17));
+	                                      U16x16( 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,         17));
 	ASSERT_EQUAL_SIMD(vectorExtract_2 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
-	                                            U16x16( 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,         17,18));
+	                                         U16x16( 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,         17,18));
 	ASSERT_EQUAL_SIMD(vectorExtract_3 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
-	                                               U16x16( 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,         17,18,19));
+	                                            U16x16( 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,         17,18,19));
 	ASSERT_EQUAL_SIMD(vectorExtract_4 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
-	                                                  U16x16( 5, 6, 7, 8, 9,10,11,12,13,14,15,16,         17,18,19,20));
+	                                               U16x16( 5, 6, 7, 8, 9,10,11,12,13,14,15,16,         17,18,19,20));
 	ASSERT_EQUAL_SIMD(vectorExtract_5 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
-	                                                     U16x16( 6, 7, 8, 9,10,11,12,13,14,15,16,         17,18,19,20,21));
+	                                                  U16x16( 6, 7, 8, 9,10,11,12,13,14,15,16,         17,18,19,20,21));
 	ASSERT_EQUAL_SIMD(vectorExtract_6 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
-	                                                        U16x16( 7, 8, 9,10,11,12,13,14,15,16,         17,18,19,20,21,22));
+	                                                     U16x16( 7, 8, 9,10,11,12,13,14,15,16,         17,18,19,20,21,22));
 	ASSERT_EQUAL_SIMD(vectorExtract_7 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
-	                                                           U16x16( 8, 9,10,11,12,13,14,15,16,         17,18,19,20,21,22,23));
+	                                                        U16x16( 8, 9,10,11,12,13,14,15,16,         17,18,19,20,21,22,23));
 	ASSERT_EQUAL_SIMD(vectorExtract_8 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
-	                                                              U16x16( 9,10,11,12,13,14,15,16,         17,18,19,20,21,22,23,24));
+	                                                           U16x16( 9,10,11,12,13,14,15,16,         17,18,19,20,21,22,23,24));
 	ASSERT_EQUAL_SIMD(vectorExtract_9 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
-	                                                                 U16x16(10,11,12,13,14,15,16,         17,18,19,20,21,22,23,24,25));
+	                                                              U16x16(10,11,12,13,14,15,16,         17,18,19,20,21,22,23,24,25));
 	ASSERT_EQUAL_SIMD(vectorExtract_10(U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
-	                                                                    U16x16(11,12,13,14,15,16,         17,18,19,20,21,22,23,24,25,26));
+	                                                                 U16x16(11,12,13,14,15,16,         17,18,19,20,21,22,23,24,25,26));
 	ASSERT_EQUAL_SIMD(vectorExtract_11(U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
-	                                                                       U16x16(12,13,14,15,16,         17,18,19,20,21,22,23,24,25,26,27));
+	                                                                    U16x16(12,13,14,15,16,         17,18,19,20,21,22,23,24,25,26,27));
 	ASSERT_EQUAL_SIMD(vectorExtract_12(U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
-	                                                                          U16x16(13,14,15,16,         17,18,19,20,21,22,23,24,25,26,27,28));
+	                                                                       U16x16(13,14,15,16,         17,18,19,20,21,22,23,24,25,26,27,28));
 	ASSERT_EQUAL_SIMD(vectorExtract_13(U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
-	                                                                             U16x16(14,15,16,         17,18,19,20,21,22,23,24,25,26,27,28,29));
+	                                                                          U16x16(14,15,16,         17,18,19,20,21,22,23,24,25,26,27,28,29));
 	ASSERT_EQUAL_SIMD(vectorExtract_14(U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
-	                                                                                U16x16(15,16,         17,18,19,20,21,22,23,24,25,26,27,28,29,30));
+	                                                                             U16x16(15,16,         17,18,19,20,21,22,23,24,25,26,27,28,29,30));
 	ASSERT_EQUAL_SIMD(vectorExtract_15(U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
-	                                                                                   U16x16(16,         17,18,19,20,21,22,23,24,25,26,27,28,29,30,31));
+	                                                                                U16x16(16,         17,18,19,20,21,22,23,24,25,26,27,28,29,30,31));
 	ASSERT_EQUAL_SIMD(vectorExtract_16(U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
-	                                                                                               U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32));
+	                                                                                            U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32));
 	ASSERT_EQUAL_SIMD(vectorExtract_0 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                      U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32));
+	                                   U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32));
 	ASSERT_EQUAL_SIMD(vectorExtract_1 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                      U8x32( 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33));
+	                                      U8x32( 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,        33));
 	ASSERT_EQUAL_SIMD(vectorExtract_2 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                            U8x32( 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34));
+	                                         U8x32( 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,        33,34));
 	ASSERT_EQUAL_SIMD(vectorExtract_3 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                               U8x32( 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35));
+	                                            U8x32( 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,        33,34,35));
 	ASSERT_EQUAL_SIMD(vectorExtract_4 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                  U8x32( 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36));
+	                                               U8x32( 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,        33,34,35,36));
 	ASSERT_EQUAL_SIMD(vectorExtract_5 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                     U8x32( 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37));
+	                                                  U8x32( 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,        33,34,35,36,37));
 	ASSERT_EQUAL_SIMD(vectorExtract_6 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                        U8x32( 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38));
+	                                                     U8x32( 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,        33,34,35,36,37,38));
 	ASSERT_EQUAL_SIMD(vectorExtract_7 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                           U8x32( 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39));
+	                                                        U8x32( 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,        33,34,35,36,37,38,39));
 	ASSERT_EQUAL_SIMD(vectorExtract_8 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                              U8x32( 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40));
+	                                                           U8x32( 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,        33,34,35,36,37,38,39,40));
 	ASSERT_EQUAL_SIMD(vectorExtract_9 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                 U8x32(10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41));
+	                                                              U8x32(10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,        33,34,35,36,37,38,39,40,41));
 	ASSERT_EQUAL_SIMD(vectorExtract_10(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                    U8x32(11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42));
+	                                                                 U8x32(11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,        33,34,35,36,37,38,39,40,41,42));
 	ASSERT_EQUAL_SIMD(vectorExtract_11(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                       U8x32(12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43));
+	                                                                    U8x32(12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,        33,34,35,36,37,38,39,40,41,42,43));
 	ASSERT_EQUAL_SIMD(vectorExtract_12(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                          U8x32(13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44));
+	                                                                       U8x32(13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,        33,34,35,36,37,38,39,40,41,42,43,44));
 	ASSERT_EQUAL_SIMD(vectorExtract_13(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                             U8x32(14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45));
+	                                                                          U8x32(14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,        33,34,35,36,37,38,39,40,41,42,43,44,45));
 	ASSERT_EQUAL_SIMD(vectorExtract_14(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                                U8x32(15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46));
+	                                                                             U8x32(15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,        33,34,35,36,37,38,39,40,41,42,43,44,45,46));
 	ASSERT_EQUAL_SIMD(vectorExtract_15(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                                   U8x32(16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47));
+	                                                                                U8x32(16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,        33,34,35,36,37,38,39,40,41,42,43,44,45,46,47));
 	ASSERT_EQUAL_SIMD(vectorExtract_16(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                                      U8x32(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48));
+	                                                                                   U8x32(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,        33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48));
 	ASSERT_EQUAL_SIMD(vectorExtract_17(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                                         U8x32(18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49));
+	                                                                                      U8x32(18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,        33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49));
 	ASSERT_EQUAL_SIMD(vectorExtract_18(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                                            U8x32(19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50));
+	                                                                                         U8x32(19,20,21,22,23,24,25,26,27,28,29,30,31,32,        33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50));
 	ASSERT_EQUAL_SIMD(vectorExtract_19(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                                               U8x32(20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51));
+	                                                                                            U8x32(20,21,22,23,24,25,26,27,28,29,30,31,32,        33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51));
 	ASSERT_EQUAL_SIMD(vectorExtract_20(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                                                  U8x32(21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52));
+	                                                                                               U8x32(21,22,23,24,25,26,27,28,29,30,31,32,        33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52));
 	ASSERT_EQUAL_SIMD(vectorExtract_21(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                                                     U8x32(22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53));
+	                                                                                                  U8x32(22,23,24,25,26,27,28,29,30,31,32,        33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53));
 	ASSERT_EQUAL_SIMD(vectorExtract_22(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                                                        U8x32(23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54));
+	                                                                                                     U8x32(23,24,25,26,27,28,29,30,31,32,        33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54));
 	ASSERT_EQUAL_SIMD(vectorExtract_23(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                                                           U8x32(24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55));
+	                                                                                                        U8x32(24,25,26,27,28,29,30,31,32,        33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55));
 	ASSERT_EQUAL_SIMD(vectorExtract_24(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                                                              U8x32(25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56));
+	                                                                                                           U8x32(25,26,27,28,29,30,31,32,        33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56));
 	ASSERT_EQUAL_SIMD(vectorExtract_25(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                                                                 U8x32(26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57));
+	                                                                                                              U8x32(26,27,28,29,30,31,32,        33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57));
 	ASSERT_EQUAL_SIMD(vectorExtract_26(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                                                                    U8x32(27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58));
+	                                                                                                                 U8x32(27,28,29,30,31,32,        33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58));
 	ASSERT_EQUAL_SIMD(vectorExtract_27(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                                                                       U8x32(28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59));
+	                                                                                                                    U8x32(28,29,30,31,32,        33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59));
 	ASSERT_EQUAL_SIMD(vectorExtract_28(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                                                                          U8x32(29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60));
+	                                                                                                                       U8x32(29,30,31,32,        33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60));
 	ASSERT_EQUAL_SIMD(vectorExtract_29(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                                                                             U8x32(30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61));
+	                                                                                                                          U8x32(30,31,32,        33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61));
 	ASSERT_EQUAL_SIMD(vectorExtract_30(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                                                                                U8x32(31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62));
+	                                                                                                                             U8x32(31,32,        33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62));
 	ASSERT_EQUAL_SIMD(vectorExtract_31(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                                                                                           U8x32(32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63));
+	                                                                                                                                U8x32(32,        33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63));
 	ASSERT_EQUAL_SIMD(vectorExtract_32(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
-	                                                                                                                                              U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64));
+	                                                                                                                                           U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64));
+}
 
-	{ // Gather test
-		// The Buffer must be kept alive during the pointer's lifetime to prevent freeing the memory too early with reference counting.
-		//   Because SafePointer exists only to be faster than Buffer but safer than a raw pointer.
-		Buffer gatherTestBuffer = buffer_create(sizeof(int32_t) * 32);
-		{
-			// 32-bit floating-point gather
-			SafePointer<float> pointerF = buffer_getSafeData<float>(gatherTestBuffer, "float gather test data");
-			for (int i = 0; i < 32; i++) { // -32.0f, -30.0f, -28.0f, -26.0f ... 24.0f, 26.0f, 28.0f, 30.0f
-				pointerF[i] = i * 2.0f - 32.0f;
-			}
-			ASSERT_EQUAL_SIMD(gather_F32(pointerF     , U32x4(2, 1, 30, 31)), F32x4(-28.0f, -30.0f, 28.0f, 30.0f));
-			ASSERT_EQUAL_SIMD(gather_F32(pointerF + 10, U32x4(0, 1, 2, 3)), F32x4(-12.0f, -10.0f, -8.0f, -6.0f));
-			ASSERT_EQUAL_SIMD(gather_F32(pointerF     , U32x8(2, 1, 28, 29, 3, 0, 30, 31)), F32x8(-28.0f, -30.0f, 24.0f, 26.0f, -26.0f, -32.0f, 28.0f, 30.0f));
-			ASSERT_EQUAL_SIMD(gather_F32(pointerF + 10, U32x8(0, 1, 2, 3, 4, 5, 6, 7)), F32x8(-12.0f, -10.0f, -8.0f, -6.0f, -4.0f, -2.0f, 0.0f, 2.0f));
+static void testGather() {
+	// The Buffer must be kept alive during the pointer's lifetime to prevent freeing the memory too early with reference counting.
+	//   Because SafePointer exists only to be faster than Buffer but safer than a raw pointer.
+	Buffer gatherTestBuffer = buffer_create(sizeof(int32_t) * 32);
+	{
+		// 32-bit floating-point gather
+		SafePointer<float> pointerF = buffer_getSafeData<float>(gatherTestBuffer, "float gather test data");
+		for (int i = 0; i < 32; i++) { // -32.0f, -30.0f, -28.0f, -26.0f ... 24.0f, 26.0f, 28.0f, 30.0f
+			pointerF[i] = i * 2.0f - 32.0f;
 		}
-		{
-			// Signed 32-bit integer gather
-			SafePointer<int32_t> pointerU = buffer_getSafeData<int32_t>(gatherTestBuffer, "int32_t gather test data");
-			for (int i = 0; i < 32; i++) { // -32, -30, -28, -26 ... 24, 26, 28, 30
-				pointerU[i] = i * 2 - 32;
-			}
-			ASSERT_EQUAL_SIMD(gather_I32(pointerU     , U32x4(2, 1, 30, 31)), I32x4(-28, -30, 28, 30));
-			ASSERT_EQUAL_SIMD(gather_I32(pointerU + 10, U32x4(0, 1, 2, 3)), I32x4(-12, -10, -8, -6));
-			ASSERT_EQUAL_SIMD(gather_I32(pointerU     , U32x8(2, 1, 28, 29, 3, 0, 30, 31)), I32x8(-28, -30, 24, 26, -26, -32, 28, 30));
-			ASSERT_EQUAL_SIMD(gather_I32(pointerU + 10, U32x8(0, 1, 2, 3, 4, 5, 6, 7)), I32x8(-12, -10, -8, -6, -4, -2, 0, 2));
+		ASSERT_EQUAL_SIMD(gather_F32(pointerF     , U32x4(2, 1, 30, 31)), F32x4(-28.0f, -30.0f, 28.0f, 30.0f));
+		ASSERT_EQUAL_SIMD(gather_F32(pointerF + 10, U32x4(0, 1, 2, 3)), F32x4(-12.0f, -10.0f, -8.0f, -6.0f));
+		ASSERT_EQUAL_SIMD(gather_F32(pointerF     , U32x8(2, 1, 28, 29, 3, 0, 30, 31)), F32x8(-28.0f, -30.0f, 24.0f, 26.0f, -26.0f, -32.0f, 28.0f, 30.0f));
+		ASSERT_EQUAL_SIMD(gather_F32(pointerF + 10, U32x8(0, 1, 2, 3, 4, 5, 6, 7)), F32x8(-12.0f, -10.0f, -8.0f, -6.0f, -4.0f, -2.0f, 0.0f, 2.0f));
+	}
+	{
+		// Signed 32-bit integer gather
+		SafePointer<int32_t> pointerU = buffer_getSafeData<int32_t>(gatherTestBuffer, "int32_t gather test data");
+		for (int i = 0; i < 32; i++) { // -32, -30, -28, -26 ... 24, 26, 28, 30
+			pointerU[i] = i * 2 - 32;
 		}
-		{
-			// Unsigned 32-bit integer gather
-			SafePointer<uint32_t> pointerI = buffer_getSafeData<uint32_t>(gatherTestBuffer, "uint32_t gather test data");
-			for (int i = 0; i < 32; i++) { // 100, 102, 104, 106 ... 156, 158, 160, 162
-				pointerI[i] = 100 + i * 2;
-			}
-			// Signed 32-bit integer gather
-			ASSERT_EQUAL_SIMD(gather_U32(pointerI     , U32x4(2, 1, 30, 31)), U32x4(104, 102, 160, 162));
-			ASSERT_EQUAL_SIMD(gather_U32(pointerI + 10, U32x4(0, 1, 2, 3)), U32x4(120, 122, 124, 126));
-			ASSERT_EQUAL_SIMD(gather_U32(pointerI     , U32x8(2, 1, 28, 29, 3, 0, 30, 31)), U32x8(104, 102, 156, 158, 106, 100, 160, 162));
-			ASSERT_EQUAL_SIMD(gather_U32(pointerI + 10, U32x8(0, 1, 2, 3, 4, 5, 6, 7)), U32x8(120, 122, 124, 126, 128, 130, 132, 134));
+		ASSERT_EQUAL_SIMD(gather_I32(pointerU     , U32x4(2, 1, 30, 31)), I32x4(-28, -30, 28, 30));
+		ASSERT_EQUAL_SIMD(gather_I32(pointerU + 10, U32x4(0, 1, 2, 3)), I32x4(-12, -10, -8, -6));
+		ASSERT_EQUAL_SIMD(gather_I32(pointerU     , U32x8(2, 1, 28, 29, 3, 0, 30, 31)), I32x8(-28, -30, 24, 26, -26, -32, 28, 30));
+		ASSERT_EQUAL_SIMD(gather_I32(pointerU + 10, U32x8(0, 1, 2, 3, 4, 5, 6, 7)), I32x8(-12, -10, -8, -6, -4, -2, 0, 2));
+	}
+	{
+		// Unsigned 32-bit integer gather
+		SafePointer<uint32_t> pointerI = buffer_getSafeData<uint32_t>(gatherTestBuffer, "uint32_t gather test data");
+		for (int i = 0; i < 32; i++) { // 100, 102, 104, 106 ... 156, 158, 160, 162
+			pointerI[i] = 100 + i * 2;
 		}
+		// Signed 32-bit integer gather
+		ASSERT_EQUAL_SIMD(gather_U32(pointerI     , U32x4(2, 1, 30, 31)), U32x4(104, 102, 160, 162));
+		ASSERT_EQUAL_SIMD(gather_U32(pointerI + 10, U32x4(0, 1, 2, 3)), U32x4(120, 122, 124, 126));
+		ASSERT_EQUAL_SIMD(gather_U32(pointerI     , U32x8(2, 1, 28, 29, 3, 0, 30, 31)), U32x8(104, 102, 156, 158, 106, 100, 160, 162));
+		ASSERT_EQUAL_SIMD(gather_U32(pointerI + 10, U32x8(0, 1, 2, 3, 4, 5, 6, 7)), U32x8(120, 122, 124, 126, 128, 130, 132, 134));
 	}
+}
+
+START_TEST(Simd)
+	printText("\nSIMD test is compiled using:\n");
+	#ifdef USE_SSE2
+		printText("	* SSE2\n");
+	#endif
+	#ifdef USE_SSSE3
+		printText("	* SSSE3\n");
+	#endif
+	#ifdef USE_AVX
+		printText("	* AVX\n");
+	#endif
+	#ifdef USE_AVX2
+		printText("	* AVX2\n");
+	#endif
+	#ifdef USE_NEON
+		printText("	* NEON\n");
+	#endif
+
+	testComparisons();
+
+	// Reciprocal: 1 / x
+	ASSERT_EQUAL_SIMD(reciprocal(F32x4(0.5f, 1.0f, 2.0f, 4.0f)), F32x4(2.0f, 1.0f, 0.5f, 0.25f));
+	ASSERT_EQUAL_SIMD(reciprocal(F32x8(0.5f, 1.0f, 2.0f, 4.0f, 8.0f, 10.0f, 100.0f, 1000.0f)), F32x8(2.0f, 1.0f, 0.5f, 0.25f, 0.125f, 0.1f, 0.01f, 0.001f));
+
+	// Reciprocal square root: 1 / sqrt(x)
+	ASSERT_EQUAL_SIMD(reciprocalSquareRoot(F32x4(1.0f, 4.0f, 16.0f, 100.0f)), F32x4(1.0f, 0.5f, 0.25f, 0.1f));
+	ASSERT_EQUAL_SIMD(reciprocalSquareRoot(F32x8(1.0f, 4.0f, 16.0f, 100.0f, 400.0f, 64.0f, 25.0f, 100.0f)), F32x8(1.0f, 0.5f, 0.25f, 0.1f, 0.05f, 0.125f, 0.2f, 0.1f));
+
+	// Square root: sqrt(x)
+	ASSERT_EQUAL_SIMD(squareRoot(F32x4(1.0f, 4.0f, 9.0f, 100.0f)), F32x4(1.0f, 2.0f, 3.0f, 10.0f));
+	ASSERT_EQUAL_SIMD(squareRoot(F32x8(1.0f, 4.0f, 9.0f, 100.0f, 64.0f, 256.0f, 1024.0f, 4096.0f)), F32x8(1.0f, 2.0f, 3.0f, 10.0f, 8.0f, 16.0f, 32.0f, 64.0f));
+
+	// Minimum
+	ASSERT_EQUAL_SIMD(min(F32x4(1.1f, 2.2f, 3.3f, 4.4f), F32x4(5.0f, 3.0f, 1.0f, -1.0f)), F32x4(1.1f, 2.2f, 1.0f, -1.0f));
+	ASSERT_EQUAL_SIMD(min(F32x8(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f), F32x8(5.0f, 3.0f, 1.0f, -1.0f, 4.0f, 5.0f, -2.5f, 10.0f)), F32x8(1.1f, 2.2f, 1.0f, -1.0f, 4.0f, 5.0f, -2.5f, 8.8f));
+
+	// Maximum
+	ASSERT_EQUAL_SIMD(max(F32x4(1.1f, 2.2f, 3.3f, 4.4f), F32x4(5.0f, 3.0f, 1.0f, -1.0f)), F32x4(5.0f, 3.0f, 3.3f, 4.4f));
+	ASSERT_EQUAL_SIMD(max(F32x8(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f), F32x8(5.0f, 3.0f, 1.0f, -1.0f, 4.0f, 5.0f, -2.5f, 10.0f)), F32x8(5.0f, 3.0f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 10.0f));
+
+	// Clamp
+	ASSERT_EQUAL_SIMD(clamp(F32x4(-1.5f), F32x4(-35.1f, 1.0f, 2.0f, 45.7f), F32x4(1.5f)), F32x4(-1.5f, 1.0f, 1.5f, 1.5f));
+	ASSERT_EQUAL_SIMD(clampUpper(F32x4(-35.1f, 1.0f, 2.0f, 45.7f), F32x4(1.5f)), F32x4(-35.1f, 1.0f, 1.5f, 1.5f));
+	ASSERT_EQUAL_SIMD(clampLower(F32x4(-1.5f), F32x4(-35.1f, 1.0f, 2.0f, 45.7f)), F32x4(-1.5f, 1.0f, 2.0f, 45.7f));
+	ASSERT_EQUAL_SIMD(clamp(F32x8(-1.5f), F32x8(-35.1f, 1.0f, 2.0f, 45.7f, 0.0f, -1.0f, 2.1f, -1.9f), F32x8(1.5f)), F32x8(-1.5f, 1.0f, 1.5f, 1.5f, 0.0f, -1.0f, 1.5f, -1.5f));
+	ASSERT_EQUAL_SIMD(clampUpper(F32x8(-35.1f, 1.0f, 2.0f, 45.7f, 0.0f, -1.0f, 2.1f, -1.9f), F32x8(1.5f)), F32x8(-35.1f, 1.0f, 1.5f, 1.5f, 0.0f, -1.0f, 1.5f, -1.9f));
+	ASSERT_EQUAL_SIMD(clampLower(F32x8(-1.5f), F32x8(-35.1f, 1.0f, 2.0f, 45.7f, 0.0f, -1.0f, 2.1f, -1.9f)), F32x8(-1.5f, 1.0f, 2.0f, 45.7f, 0.0f, -1.0f, 2.1f, -1.5f));
+
+	// F32x4 operations
+	ASSERT_EQUAL_SIMD(F32x4(1.1f, -2.2f, 3.3f, 4.0f) + F32x4(2.2f, -4.4f, 6.6f, 8.0f), F32x4(3.3f, -6.6f, 9.9f, 12.0f));
+	ASSERT_EQUAL_SIMD(F32x4(-1.5f, -0.5f, 0.5f, 1.5f) + 1.0f, F32x4(-0.5f, 0.5f, 1.5f, 2.5f));
+	ASSERT_EQUAL_SIMD(1.0f + F32x4(-1.5f, -0.5f, 0.5f, 1.5f), F32x4(-0.5f, 0.5f, 1.5f, 2.5f));
+	ASSERT_EQUAL_SIMD(F32x4(1.1f, 2.2f, 3.3f, 4.4f) - F32x4(0.1f, 0.2f, 0.3f, 0.4f), F32x4(1.0f, 2.0f, 3.0f, 4.0f));
+	ASSERT_EQUAL_SIMD(F32x4(1.0f, 2.0f, 3.0f, 4.0f) - 0.5f, F32x4(0.5f, 1.5f, 2.5f, 3.5f));
+	ASSERT_EQUAL_SIMD(0.5f - F32x4(1.0f, 2.0f, 3.0f, 4.0f), F32x4(-0.5f, -1.5f, -2.5f, -3.5f));
+	ASSERT_EQUAL_SIMD(2.0f * F32x4(1.0f, 2.0f, 3.0f, 4.0f), F32x4(2.0f, 4.0f, 6.0f, 8.0f));
+	ASSERT_EQUAL_SIMD(F32x4(1.0f, -2.0f, 3.0f, -4.0f) * -2.0f, F32x4(-2.0f, 4.0f, -6.0f, 8.0f));
+	ASSERT_EQUAL_SIMD(F32x4(1.0f, -2.0f, 3.0f, -4.0f) * F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(1.0f, 4.0f, 9.0f, 16.0f));
+	ASSERT_EQUAL_SIMD(-F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(-1.0f, 2.0f, -3.0f, 4.0f));
+
+	// F32x8 operations
+	ASSERT_EQUAL_SIMD(F32x8(1.1f, -2.2f, 3.3f, 4.0f, 1.4f, 2.3f, 3.2f, 4.1f) + F32x8(2.2f, -4.4f, 6.6f, 8.0f, 4.11f, 3.22f, 2.33f, 1.44f), F32x8(3.3f, -6.6f, 9.9f, 12.0f, 5.51f, 5.52f, 5.53f, 5.54f));
+	ASSERT_EQUAL_SIMD(F32x8(-1.5f, -0.5f, 0.5f, 1.5f, 1000.0f, 2000.0f, -4000.0f, -1500.0f) + 1.0f, F32x8(-0.5f, 0.5f, 1.5f, 2.5f, 1001.0f, 2001.0f, -3999.0f, -1499.0f));
+	ASSERT_EQUAL_SIMD(1.0f + F32x8(-1.5f, -0.5f, 0.5f, 1.5f, 1000.0f, 2000.0f, -4000.0f, -1500.0f), F32x8(-0.5f, 0.5f, 1.5f, 2.5f, 1001.0f, 2001.0f, -3999.0f, -1499.0f));
+	ASSERT_EQUAL_SIMD(F32x8(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f) - F32x8(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f), F32x8(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f));
+	ASSERT_EQUAL_SIMD(F32x8(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f) - 0.5f, F32x8(0.5f, 1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f, 7.5f));
+	ASSERT_EQUAL_SIMD(0.5f - F32x8(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f), F32x8(-0.5f, -1.5f, -2.5f, -3.5f, -4.5f, -5.5f, -6.5f, -7.5f));
+	ASSERT_EQUAL_SIMD(2.0f * F32x8(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f), F32x8(2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f, 14.0f, 16.0f));
+	ASSERT_EQUAL_SIMD(F32x8(1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f) * -2.0f, F32x8(-2.0f, 4.0f, -6.0f, 8.0f, -10.0f, 12.0f, -14.0f, 16.0f));
+	ASSERT_EQUAL_SIMD(F32x8(1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f) * F32x8(1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f), F32x8(1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f));
+	ASSERT_EQUAL_SIMD(-F32x8(1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f), F32x8(-1.0f, 2.0f, -3.0f, 4.0f, -5.0f, 6.0f, -7.0f, 8.0f));
+
+	// I32x4 operations
+	ASSERT_EQUAL_SIMD(I32x4(1, 2, -3, 4) + I32x4(-2, 4, 6, 8), I32x4(-1, 6, 3, 12));
+	ASSERT_EQUAL_SIMD(I32x4(1, -2, 3, 4) - 4, I32x4(-3, -6, -1, 0));
+	ASSERT_EQUAL_SIMD(10 + I32x4(1, 2, 3, 4), I32x4(11, 12, 13, 14));
+	ASSERT_EQUAL_SIMD(I32x4(1, 2, 3, 4) + I32x4(4), I32x4(5, 6, 7, 8));
+	ASSERT_EQUAL_SIMD(I32x4(10) + I32x4(1, 2, 3, 4), I32x4(11, 12, 13, 14));
+	ASSERT_EQUAL_SIMD(I32x4(-3, 6, -9, 12) * I32x4(1, 2, -3, -4), I32x4(-3, 12, 27, -48));
+	ASSERT_EQUAL_SIMD(-I32x4(1, -2, 3, -4), I32x4(-1, 2, -3, 4));
+
+	// I32x8 operations
+	ASSERT_EQUAL_SIMD(I32x8(1, 2, 3, 4, 5, 6, 7, 8) - 1, I32x8(0, 1, 2, 3, 4, 5, 6, 7));
+	ASSERT_EQUAL_SIMD(1 - I32x8(1, 2, 3, 4, 5, 6, 7, 8), I32x8(0, -1, -2, -3, -4, -5, -6, -7));
+	ASSERT_EQUAL_SIMD(2 * I32x8(1, 2, 3, 4, 5, 6, 7, 8), I32x8(2, 4, 6, 8, 10, 12, 14, 16));
+	ASSERT_EQUAL_SIMD(I32x8(1, -2, 3, -4, 5, -6, 7, -8) * -2, I32x8(-2, 4, -6, 8, -10, 12, -14, 16));
+	ASSERT_EQUAL_SIMD(I32x8(1, -2, 3, -4, 5, -6, 7, -8) * I32x8(1, -2, 3, -4, 5, -6, 7, -8), I32x8(1, 4, 9, 16, 25, 36, 49, 64));
+	ASSERT_EQUAL_SIMD(-I32x8(1, -2, 3, -4, 5, -6, 7, -8), I32x8(-1, 2, -3, 4, -5, 6, -7, 8));
+
+	// U32x4 operations
+	ASSERT_EQUAL_SIMD(U32x4(1, 2, 3, 4) + U32x4(2, 4, 6, 8), U32x4(3, 6, 9, 12));
+	ASSERT_EQUAL_SIMD(U32x4(1, 2, 3, 4) + 4, U32x4(5, 6, 7, 8));
+	ASSERT_EQUAL_SIMD(10 + U32x4(1, 2, 3, 4), U32x4(11, 12, 13, 14));
+	ASSERT_EQUAL_SIMD(U32x4(1, 2, 3, 4) + U32x4(4), U32x4(5, 6, 7, 8));
+	ASSERT_EQUAL_SIMD(U32x4(10) + U32x4(1, 2, 3, 4), U32x4(11, 12, 13, 14));
+	ASSERT_EQUAL_SIMD(U32x4(3, 6, 9, 12) - U32x4(1, 2, 3, 4), U32x4(2, 4, 6, 8));
+	ASSERT_EQUAL_SIMD(U32x4(3, 6, 9, 12) * U32x4(1, 2, 3, 4), U32x4(3, 12, 27, 48));
+
+	// U32x8 operations
+	ASSERT_EQUAL_SIMD(U32x8(1, 2, 3, 4, 5, 6, 7, 8) - 1, U32x8(0, 1, 2, 3, 4, 5, 6, 7));
+	ASSERT_EQUAL_SIMD(10 - U32x8(1, 2, 3, 4, 5, 6, 7, 8), U32x8(9, 8, 7, 6, 5, 4, 3, 2));
+	ASSERT_EQUAL_SIMD(2 * U32x8(1, 2, 3, 4, 5, 6, 7, 8), U32x8(2, 4, 6, 8, 10, 12, 14, 16));
+	ASSERT_EQUAL_SIMD(U32x8(1, 2, 3, 4, 5, 6, 7, 8) * 2, U32x8(2, 4, 6, 8, 10, 12, 14, 16));
+	ASSERT_EQUAL_SIMD(U32x8(1, 2, 3, 4, 5, 6, 7, 8) * U32x8(1, 2, 3, 4, 5, 6, 7, 8), U32x8(1, 4, 9, 16, 25, 36, 49, 64));
+
+	// U16x8 operations
+	ASSERT_EQUAL_SIMD(U16x8(1, 2, 3, 4, 5, 6, 7, 8) + U16x8(2, 4, 6, 8, 10, 12, 14, 16), U16x8(3, 6, 9, 12, 15, 18, 21, 24));
+	ASSERT_EQUAL_SIMD(U16x8(1, 2, 3, 4, 5, 6, 7, 8) + 8, U16x8(9, 10, 11, 12, 13, 14, 15, 16));
+	ASSERT_EQUAL_SIMD(10 + U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(11, 12, 13, 14, 15, 16, 17, 18));
+	ASSERT_EQUAL_SIMD(U16x8(1, 2, 3, 4, 5, 6, 7, 8) + U16x8((uint16_t)8), U16x8(9, 10, 11, 12, 13, 14, 15, 16));
+	ASSERT_EQUAL_SIMD(U16x8((uint16_t)10) + U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(11, 12, 13, 14, 15, 16, 17, 18));
+	ASSERT_EQUAL_SIMD(U16x8(3, 6, 9, 12, 15, 18, 21, 24) - U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(2, 4, 6, 8, 10, 12, 14, 16));
+
+	// U16x16 operations
+	ASSERT_EQUAL_SIMD(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) + U16x16(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32), U16x16(3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48));
+	ASSERT_EQUAL_SIMD(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) + 8, U16x16(9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24));
+	ASSERT_EQUAL_SIMD(8 + U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24));
+	ASSERT_EQUAL_SIMD(U16x16(3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48) - U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32));
+	ASSERT_EQUAL_SIMD(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) - 1, U16x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
+	ASSERT_EQUAL_SIMD(16 - U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
+	ASSERT_EQUAL_SIMD(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) * 2, U16x16(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32));
+	ASSERT_EQUAL_SIMD(2 * U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32));
+
+	// U8x16 operations
+	ASSERT_EQUAL_SIMD(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) + 2, U8x16(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18));
+	ASSERT_EQUAL_SIMD(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) - 1, U8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
+	ASSERT_EQUAL_SIMD(
+	  saturatedAddition(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 255), U8x16((uint8_t)250)),
+	  U8x16(251, 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255)
+	);
+	ASSERT_EQUAL_SIMD(
+	  saturatedSubtraction(
+	  U8x16(128, 128, 128, 0, 255, 255,   0, 200, 123, 80, 46, 46, 46, 255, 255, 255),
+	  U8x16(  0, 128, 255, 0, 255,   0, 255, 100,  23, 81, 45, 46, 47, 128, 127, 200)),
+	  U8x16(128,   0,   0, 0,   0, 255,   0, 100, 100,  0,  1,  0,  0, 127, 128,  55)
+	);
+
+	// U8x32 operations
+	ASSERT_EQUAL_SIMD(
+	      U8x32( 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)
+	    + U8x32( 2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64),
+	      U8x32( 3,  6,  9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57, 60, 63, 66, 69, 72, 75, 78, 81, 84, 87, 90, 93, 96));
+	ASSERT_EQUAL_SIMD(
+	      U8x32( 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32) + 5,
+	      U8x32( 6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37));
+	ASSERT_EQUAL_SIMD(
+	  5 + U8x32( 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32),
+	      U8x32( 6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37));
+	ASSERT_EQUAL_SIMD(
+	      U8x32( 3,  6,  9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57, 60, 63, 66, 69, 72, 75, 78, 81, 84, 87, 90, 93, 96)
+	    - U8x32( 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32),
+	      U8x32( 2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64));
+	ASSERT_EQUAL_SIMD(
+	      U8x32( 6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37) - 5,
+	      U8x32( 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32));
+	ASSERT_EQUAL_SIMD(
+	 33 - U8x32( 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32),
+	      U8x32(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1));
+	ASSERT_EQUAL_SIMD(
+	  saturatedAddition(
+	    U8x32(  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,255),
+	    U8x32((uint8_t)240)),
+	    U8x32(241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255)
+	);
+	ASSERT_EQUAL_SIMD(
+	  saturatedSubtraction(
+	    U8x32(  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,255),
+	    U8x32((uint8_t)16)),
+	    U8x32(  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,239)
+	);
+
+	// Unsigned integer unpacking
+	ASSERT_EQUAL_SIMD(lowerToU32(U16x8(1,2,3,4,5,6,7,8)), U32x4(1, 2, 3, 4));
+	ASSERT_EQUAL_SIMD(higherToU32(U16x8(1,2,3,4,5,6,7,8)), U32x4(5, 6, 7, 8));
+	ASSERT_EQUAL_SIMD(lowerToU16(U8x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)), U16x8(1,2,3,4,5,6,7,8));
+	ASSERT_EQUAL_SIMD(higherToU16(U8x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)), U16x8(9,10,11,12,13,14,15,16));
+	ASSERT_EQUAL_SIMD(lowerToU32(U16x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)), U32x8(1,2,3,4,5,6,7,8));
+	ASSERT_EQUAL_SIMD(higherToU32(U16x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)), U32x8(9,10,11,12,13,14,15,16));
+	ASSERT_EQUAL_SIMD(lowerToU32(U16x16(1,2,3,4,5,6,65535,8,9,10,11,12,13,1000,15,16)), U32x8(1,2,3,4,5,6,65535,8));
+	ASSERT_EQUAL_SIMD(higherToU32(U16x16(1,2,3,4,5,6,65535,8,9,10,11,12,13,1000,15,16)), U32x8(9,10,11,12,13,1000,15,16));
+	ASSERT_EQUAL_SIMD(lowerToU16(U8x32(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,255,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,255)), U16x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,255));
+	ASSERT_EQUAL_SIMD(higherToU16(U8x32(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,255,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,255)), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,255));
+
+	testBitMasks();
+
+	testBitShift();
+
+	// Reinterpret cast.
+	ASSERT_EQUAL_SIMD(
+	  reinterpret_U8FromU32(U32x4(ENDIAN32_BYTE_0, ENDIAN32_BYTE_1, ENDIAN32_BYTE_2, ENDIAN32_BYTE_3)),
+	  U8x16(
+		255, 0, 0, 0,
+		0, 255, 0, 0,
+		0, 0, 255, 0,
+		0, 0, 0, 255
+	  )
+	);
+	ASSERT_EQUAL_SIMD(
+	  reinterpret_U8FromU32(U32x4(
+		ENDIAN32_BYTE_0 | ENDIAN32_BYTE_2,
+		ENDIAN32_BYTE_0 | ENDIAN32_BYTE_3,
+		ENDIAN32_BYTE_1,
+		ENDIAN32_BYTE_1 | ENDIAN32_BYTE_3
+	  )),
+	  U8x16(
+		255, 0, 255, 0,
+		255, 0, 0, 255,
+		0, 255, 0, 0,
+		0, 255, 0, 255
+	  )
+	);
+	ASSERT_EQUAL_SIMD(
+	  reinterpret_U32FromU8(U8x16(
+		255, 0, 255, 0,
+		255, 0, 0, 255,
+		0, 255, 0, 0,
+		0, 255, 0, 255
+	  )),
+	  U32x4(
+		ENDIAN32_BYTE_0 | ENDIAN32_BYTE_2,
+		ENDIAN32_BYTE_0 | ENDIAN32_BYTE_3,
+		ENDIAN32_BYTE_1,
+		ENDIAN32_BYTE_1 | ENDIAN32_BYTE_3
+	  )
+	);
+	#ifdef DSR_BIG_ENDIAN
+		ASSERT_EQUAL_SIMD(
+		  reinterpret_U32FromU16(U16x8(1, 2, 3, 4, 5, 6, 7, 8)),
+		  U32x4(1 * 65536 + 2, 3 * 65536 + 4, 5 * 65536 + 6, 7 * 65536 + 8)
+		);
+		ASSERT_EQUAL_SIMD(
+		  reinterpret_U32FromU16(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)),
+		  U32x8(1 * 65536 + 2, 3 * 65536 + 4, 5 * 65536 + 6, 7 * 65536 + 8, 9 * 65536 + 10, 11 * 65536 + 12, 13 * 65536 + 14, 15 * 65536 + 16)
+		);
+		ASSERT_EQUAL_SIMD(
+		  reinterpret_U16FromU32(U32x4(1 * 65536 + 2, 3 * 65536 + 4, 5 * 65536 + 6, 7 * 65536 + 8)),
+		  U16x8(1, 2, 3, 4, 5, 6, 7, 8)
+		);
+		ASSERT_EQUAL_SIMD(
+		  reinterpret_U16FromU32(U32x8(1 * 65536 + 2, 3 * 65536 + 4, 5 * 65536 + 6, 7 * 65536 + 8, 9 * 65536 + 10, 11 * 65536 + 12, 13 * 65536 + 14, 15 * 65536 + 16)),
+		  U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
+		);
+	#else
+		ASSERT_EQUAL_SIMD(
+		  reinterpret_U32FromU16(U16x8(1, 2, 3, 4, 5, 6, 7, 8)),
+		  U32x4(1 + 2 * 65536, 3 + 4 * 65536, 5 + 6 * 65536, 7 + 8 * 65536)
+		);
+		ASSERT_EQUAL_SIMD(
+		  reinterpret_U32FromU16(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)),
+		  U32x8(1 + 2 * 65536, 3 + 4 * 65536, 5 + 6 * 65536, 7 + 8 * 65536, 9 + 10 * 65536, 11 + 12 * 65536, 13 + 14 * 65536, 15 + 16 * 65536)
+		);
+		ASSERT_EQUAL_SIMD(
+		  reinterpret_U16FromU32(U32x4(1 + 2 * 65536, 3 + 4 * 65536, 5 + 6 * 65536, 7 + 8 * 65536)),
+		  U16x8(1, 2, 3, 4, 5, 6, 7, 8)
+		);
+		ASSERT_EQUAL_SIMD(
+		  reinterpret_U16FromU32(U32x8(1 + 2 * 65536, 3 + 4 * 65536, 5 + 6 * 65536, 7 + 8 * 65536, 9 + 10 * 65536, 11 + 12 * 65536, 13 + 14 * 65536, 15 + 16 * 65536)),
+		  U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
+		);
+	#endif
+
+	testVectorExtract();
+
+	testGather();
 
 END_TEST