Parcourir la source

Explicitly defining copy, assignment and move operators to supress the compiler bug.

David Piuva il y a 10 mois
Parent
commit
b22011f774
2 fichiers modifiés avec 165 ajouts et 45 suppressions
  1. 6 6
      Source/DFPSR/api/filterAPI.cpp
  2. 159 39
      Source/DFPSR/base/simd.h

+ 6 - 6
Source/DFPSR/api/filterAPI.cpp

@@ -52,13 +52,13 @@ static inline U32x4 mixColorsUniform(const U32x4 &colorA, const U32x4 &colorB, u
 	U16x8 weightA = U16x8(invRatio);
 	U16x8 weightA = U16x8(invRatio);
 	U16x8 weightB = U16x8(ratio);
 	U16x8 weightB = U16x8(ratio);
 	U32x4 lowMask(0x00FF00FFu);
 	U32x4 lowMask(0x00FF00FFu);
-	U16x8 lowColorA = U16x8(colorA & lowMask);
-	U16x8 lowColorB = U16x8(colorB & lowMask);
+	U16x8 lowColorA = reinterpret_U16FromU32(U32x4(colorA & lowMask));
+	U16x8 lowColorB = reinterpret_U16FromU32(U32x4(colorB & lowMask));
 	U32x4 highMask(0xFF00FF00u);
 	U32x4 highMask(0xFF00FF00u);
-	U16x8 highColorA = bitShiftRightImmediate<8>(U16x8((colorA & highMask)));
-	U16x8 highColorB = bitShiftRightImmediate<8>(U16x8((colorB & highMask)));
-	U32x4 lowColor = (((lowColorA * weightA) + (lowColorB * weightB))).get_U32();
-	U32x4 highColor = (((highColorA * weightA) + (highColorB * weightB))).get_U32();
+	U16x8 highColorA = bitShiftRightImmediate<8>(reinterpret_U16FromU32(colorA & highMask));
+	U16x8 highColorB = bitShiftRightImmediate<8>(reinterpret_U16FromU32(colorB & highMask));
+	U32x4 lowColor = reinterpret_U32FromU16(((lowColorA * weightA) + (lowColorB * weightB)));
+	U32x4 highColor = reinterpret_U32FromU16(((highColorA * weightA) + (highColorB * weightB)));
 	return ((bitShiftRightImmediate<8>(lowColor) & lowMask) | (highColor & highMask));
 	return ((bitShiftRightImmediate<8>(lowColor) & lowMask) | (highColor & highMask));
 }
 }
 
 

+ 159 - 39
Source/DFPSR/base/simd.h

@@ -434,6 +434,22 @@
 			F32x4(float a1, float a2, float a3, float a4) : v(LOAD_VECTOR_F32_SIMD(a1, a2, a3, a4)) {}
 			F32x4(float a1, float a2, float a3, float a4) : v(LOAD_VECTOR_F32_SIMD(a1, a2, a3, a4)) {}
 			// Construct a portable vector from a single duplicated scalar
 			// Construct a portable vector from a single duplicated scalar
 			explicit F32x4(float scalar) : v(LOAD_SCALAR_F32_SIMD(scalar)) {}
 			explicit F32x4(float scalar) : v(LOAD_SCALAR_F32_SIMD(scalar)) {}
+			// Copy constructor.
+			F32x4(const F32x4& other) {
+				v = other.v;
+			}
+			// Assignment operator.
+			F32x4& operator=(const F32x4& other) {
+				if (this != &other) {
+					v = other.v;
+				}
+				return *this;
+			}
+			// Move operator.
+			F32x4& operator=(F32x4&& other) noexcept {
+				v = other.v;
+				return *this;
+			}
 		#else
 		#else
 			public:
 			public:
 			// Emulate a SIMD vector as an array of scalars without hardware support.
 			// Emulate a SIMD vector as an array of scalars without hardware support.
@@ -541,6 +557,22 @@
 			I32x4(int32_t a1, int32_t a2, int32_t a3, int32_t a4) : v(LOAD_VECTOR_I32_SIMD(a1, a2, a3, a4)) {}
 			I32x4(int32_t a1, int32_t a2, int32_t a3, int32_t a4) : v(LOAD_VECTOR_I32_SIMD(a1, a2, a3, a4)) {}
 			// Construct a portable vector from a single duplicated scalar
 			// Construct a portable vector from a single duplicated scalar
 			explicit I32x4(int32_t scalar) : v(LOAD_SCALAR_I32_SIMD(scalar)) {}
 			explicit I32x4(int32_t scalar) : v(LOAD_SCALAR_I32_SIMD(scalar)) {}
+			// Copy constructor.
+			I32x4(const I32x4& other) {
+				v = other.v;
+			}
+			// Assignment operator.
+			I32x4& operator=(const I32x4& other) {
+				if (this != &other) {
+					v = other.v;
+				}
+				return *this;
+			}
+			// Move operator.
+			I32x4& operator=(I32x4&& other) noexcept {
+				v = other.v;
+				return *this;
+			}
 		#else
 		#else
 			public:
 			public:
 			// Emulate a SIMD vector as an array of scalars without hardware support.
 			// Emulate a SIMD vector as an array of scalars without hardware support.
@@ -648,6 +680,22 @@
 			U32x4(uint32_t a1, uint32_t a2, uint32_t a3, uint32_t a4) : v(LOAD_VECTOR_U32_SIMD(a1, a2, a3, a4)) {}
 			U32x4(uint32_t a1, uint32_t a2, uint32_t a3, uint32_t a4) : v(LOAD_VECTOR_U32_SIMD(a1, a2, a3, a4)) {}
 			// Construct a portable vector from a single duplicated scalar
 			// Construct a portable vector from a single duplicated scalar
 			explicit U32x4(uint32_t scalar) : v(LOAD_SCALAR_U32_SIMD(scalar)) {}
 			explicit U32x4(uint32_t scalar) : v(LOAD_SCALAR_U32_SIMD(scalar)) {}
+			// Copy constructor.
+			U32x4(const U32x4& other) {
+				v = other.v;
+			}
+			// Assignment operator.
+			U32x4& operator=(const U32x4& other) {
+				if (this != &other) {
+					v = other.v;
+				}
+				return *this;
+			}
+			// Move operator.
+			U32x4& operator=(U32x4&& other) noexcept {
+				v = other.v;
+				return *this;
+			}
 		#else
 		#else
 			public:
 			public:
 			// Emulate a SIMD vector as an array of scalars without hardware support.
 			// Emulate a SIMD vector as an array of scalars without hardware support.
@@ -751,34 +799,31 @@
 			SIMD_U16x8 v;
 			SIMD_U16x8 v;
 			// Construct a portable vector from a native SIMD vector
 			// Construct a portable vector from a native SIMD vector
 			explicit U16x8(const SIMD_U16x8& v) : v(v) {}
 			explicit U16x8(const SIMD_U16x8& v) : v(v) {}
-			// Construct a vector of 8 x 16-bit unsigned integers from a vector of 4 x 32-bit unsigned integers
-			//   Reinterpret casting is used
-			explicit U16x8(const U32x4& vector) : v(REINTERPRET_U32_TO_U16_SIMD(vector.v)) {}
 			// Construct a portable vector from a set of scalars
 			// Construct a portable vector from a set of scalars
 			U16x8(uint16_t a1, uint16_t a2, uint16_t a3, uint16_t a4, uint16_t a5, uint16_t a6, uint16_t a7, uint16_t a8) : v(LOAD_VECTOR_U16_SIMD(a1, a2, a3, a4, a5, a6, a7, a8)) {}
 			U16x8(uint16_t a1, uint16_t a2, uint16_t a3, uint16_t a4, uint16_t a5, uint16_t a6, uint16_t a7, uint16_t a8) : v(LOAD_VECTOR_U16_SIMD(a1, a2, a3, a4, a5, a6, a7, a8)) {}
-			// Construct a vector of 8 x 16-bit unsigned integers from a single duplicated 32-bit unsigned integer
-			//   Reinterpret casting is used
-			// TODO: Remove all reintreprets from constructors to improve readability
-			//explicit U16x8(uint32_t scalar) : v(REINTERPRET_U32_TO_U16_SIMD(LOAD_SCALAR_U32_SIMD(scalar))) {}
 			// Construct a portable vector from a single duplicated scalar
 			// Construct a portable vector from a single duplicated scalar
 			explicit U16x8(uint16_t scalar) : v(LOAD_SCALAR_U16_SIMD(scalar)) {}
 			explicit U16x8(uint16_t scalar) : v(LOAD_SCALAR_U16_SIMD(scalar)) {}
-			// Reinterpret cast to a vector of 4 x 32-bit unsigned integers
-			U32x4 get_U32() const {
-				return U32x4(REINTERPRET_U16_TO_U32_SIMD(this->v));
+			// Copy constructor.
+			U16x8(const U16x8& other) {
+				v = other.v;
+			}
+			// Assignment operator.
+			U16x8& operator=(const U16x8& other) {
+				if (this != &other) {
+					v = other.v;
+				}
+				return *this;
+			}
+			// Move operator.
+			U16x8& operator=(U16x8&& other) noexcept {
+				v = other.v;
+				return *this;
 			}
 			}
 		#else
 		#else
 			public:
 			public:
 			// Emulate a SIMD vector as an array of scalars without hardware support.
 			// Emulate a SIMD vector as an array of scalars without hardware support.
 			// Only accessible while emulating!
 			// Only accessible while emulating!
 			uint16_t scalars[8];
 			uint16_t scalars[8];
-			// Construct a vector of 8 x 16-bit unsigned integers from a vector of 4 x 32-bit unsigned integers
-			//   Reinterpret casting is used
-			explicit U16x8(const U32x4& vector) {
-				uint64_t *target = (uint64_t*)this->scalars;
-				uint64_t *source = (uint64_t*)vector.scalars;
-				target[0] = source[0];
-				target[1] = source[1];
-			}
 			// Construct a portable vector from a set of scalars
 			// Construct a portable vector from a set of scalars
 			U16x8(uint16_t a1, uint16_t a2, uint16_t a3, uint16_t a4, uint16_t a5, uint16_t a6, uint16_t a7, uint16_t a8) {
 			U16x8(uint16_t a1, uint16_t a2, uint16_t a3, uint16_t a4, uint16_t a5, uint16_t a6, uint16_t a7, uint16_t a8) {
 				this->scalars[0] = a1;
 				this->scalars[0] = a1;
@@ -790,15 +835,6 @@
 				this->scalars[6] = a7;
 				this->scalars[6] = a7;
 				this->scalars[7] = a8;
 				this->scalars[7] = a8;
 			}
 			}
-			// Construct a vector of 8 x 16-bit unsigned integers from a single duplicated 32-bit unsigned integer
-			//   Reinterpret casting is used
-			explicit U16x8(uint32_t scalar) {
-				uint32_t *target = (uint32_t*)this->scalars;
-				target[0] = scalar;
-				target[1] = scalar;
-				target[2] = scalar;
-				target[3] = scalar;
-			}
 			// Construct a portable vector from a single duplicated scalar
 			// Construct a portable vector from a single duplicated scalar
 			explicit U16x8(uint16_t scalar) {
 			explicit U16x8(uint16_t scalar) {
 				this->scalars[0] = scalar;
 				this->scalars[0] = scalar;
@@ -810,15 +846,6 @@
 				this->scalars[6] = scalar;
 				this->scalars[6] = scalar;
 				this->scalars[7] = scalar;
 				this->scalars[7] = scalar;
 			}
 			}
-			// Reinterpret cast to a vector of 4 x 32-bit unsigned integers
-			U32x4 get_U32() const {
-				U32x4 result(0);
-				uint64_t *target = (uint64_t*)result.scalars;
-				uint64_t *source = (uint64_t*)this->scalars;
-				target[0] = source[0];
-				target[1] = source[1];
-				return result;
-			}
 		#endif
 		#endif
 		// Create a gradient vector using start and increment, so that arbitrary length vectors have a way to initialize linear iterations.
 		// Create a gradient vector using start and increment, so that arbitrary length vectors have a way to initialize linear iterations.
 		static inline U16x8 createGradient(uint16_t start, uint16_t increment) {
 		static inline U16x8 createGradient(uint16_t start, uint16_t increment) {
@@ -909,6 +936,22 @@
 			: v(LOAD_VECTOR_U8_SIMD(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16)) {}
 			: v(LOAD_VECTOR_U8_SIMD(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16)) {}
 			// Construct a portable vector from a single duplicated scalar
 			// Construct a portable vector from a single duplicated scalar
 			explicit U8x16(uint8_t scalar) : v(LOAD_SCALAR_U8_SIMD(scalar)) {}
 			explicit U8x16(uint8_t scalar) : v(LOAD_SCALAR_U8_SIMD(scalar)) {}
+			// Copy constructor.
+			U8x16(const U8x16& other) {
+				v = other.v;
+			}
+			// Assignment operator.
+			U8x16& operator=(const U8x16& other) {
+				if (this != &other) {
+					v = other.v;
+				}
+				return *this;
+			}
+			// Move operator.
+			U8x16& operator=(U8x16&& other) noexcept {
+				v = other.v;
+				return *this;
+			}
 		#else
 		#else
 			public:
 			public:
 			// Emulate a SIMD vector as an array of scalars without hardware support.
 			// Emulate a SIMD vector as an array of scalars without hardware support.
@@ -1067,6 +1110,22 @@
 					ALIGN32 __m256 target = _mm256_set1_ps(scalar);
 					ALIGN32 __m256 target = _mm256_set1_ps(scalar);
 					this->v = target;
 					this->v = target;
 				}
 				}
+				// Copy constructor.
+				F32x8(const F32x8& other) {
+					v = other.v;
+				}
+				// Assignment operator.
+				F32x8& operator=(const F32x8& other) {
+					if (this != &other) {
+						v = other.v;
+					}
+					return *this;
+				}
+				// Move operator.
+				F32x8& operator=(F32x8&& other) noexcept {
+					v = other.v;
+					return *this;
+				}
 			#else
 			#else
 				#error "Missing constructors for the F32x8 type!\n"
 				#error "Missing constructors for the F32x8 type!\n"
 			#endif
 			#endif
@@ -1184,6 +1243,22 @@
 					ALIGN32 __m256i target = _mm256_set1_epi32(scalar);
 					ALIGN32 __m256i target = _mm256_set1_epi32(scalar);
 					this->v = target;
 					this->v = target;
 				}
 				}
+				// Copy constructor.
+				I32x8(const I32x8& other) {
+					v = other.v;
+				}
+				// Assignment operator.
+				I32x8& operator=(const I32x8& other) {
+					if (this != &other) {
+						v = other.v;
+					}
+					return *this;
+				}
+				// Move operator.
+				I32x8& operator=(I32x8&& other) noexcept {
+					v = other.v;
+					return *this;
+				}
 			#else
 			#else
 				#error "Missing constructors for the I32x8 type!\n"
 				#error "Missing constructors for the I32x8 type!\n"
 			#endif
 			#endif
@@ -1301,6 +1376,22 @@
 					ALIGN32 __m256i target = _mm256_set1_epi32(scalar);
 					ALIGN32 __m256i target = _mm256_set1_epi32(scalar);
 					this->v = target;
 					this->v = target;
 				}
 				}
+				// Copy constructor.
+				U32x8(const U32x8& other) {
+					v = other.v;
+				}
+				// Assignment operator.
+				U32x8& operator=(const U32x8& other) {
+					if (this != &other) {
+						v = other.v;
+					}
+					return *this;
+				}
+				// Move operator.
+				U32x8& operator=(U32x8&& other) noexcept {
+					v = other.v;
+					return *this;
+				}
 			#else
 			#else
 				#error "Missing constructors for the U32x8 type!\n"
 				#error "Missing constructors for the U32x8 type!\n"
 			#endif
 			#endif
@@ -1409,9 +1500,6 @@
 			SIMD_U16x16 v;
 			SIMD_U16x16 v;
 			// Construct a portable vector from a native SIMD vector
 			// Construct a portable vector from a native SIMD vector
 			explicit U16x16(const SIMD_U16x16& v) : v(v) {}
 			explicit U16x16(const SIMD_U16x16& v) : v(v) {}
-			// Construct a vector of 16 x 16-bit unsigned integers from a vector of 8 x 32-bit unsigned integers
-			//   Reinterpret casting is used
-			//explicit U16x16(const U32x8& vector) : v(REINTERPRET_U32_TO_U16_SIMD256(vector.v)) {}
 			#if defined(USE_AVX2)
 			#if defined(USE_AVX2)
 				// Construct a portable vector from a set of scalars.
 				// Construct a portable vector from a set of scalars.
 				U16x16(uint16_t a1, uint16_t a2, uint16_t a3, uint16_t a4, uint16_t a5, uint16_t a6, uint16_t a7, uint16_t a8,
 				U16x16(uint16_t a1, uint16_t a2, uint16_t a3, uint16_t a4, uint16_t a5, uint16_t a6, uint16_t a7, uint16_t a8,
@@ -1424,6 +1512,22 @@
 					ALIGN32 __m256i target = _mm256_set1_epi16(scalar);
 					ALIGN32 __m256i target = _mm256_set1_epi16(scalar);
 					this->v = target;
 					this->v = target;
 				}
 				}
+				// Copy constructor.
+				U16x16(const U16x16& other) {
+					v = other.v;
+				}
+				// Assignment operator.
+				U16x16& operator=(const U16x16& other) {
+					if (this != &other) {
+						v = other.v;
+					}
+					return *this;
+				}
+				// Move operator.
+				U16x16& operator=(U16x16&& other) noexcept {
+					v = other.v;
+					return *this;
+				}
 			#else
 			#else
 				#error "Missing constructors for the U16x16 type!\n"
 				#error "Missing constructors for the U16x16 type!\n"
 			#endif
 			#endif
@@ -1594,6 +1698,22 @@
 					ALIGN32 __m256i target = _mm256_set1_epi8(scalar);
 					ALIGN32 __m256i target = _mm256_set1_epi8(scalar);
 					this->v = target;
 					this->v = target;
 				}
 				}
+				// Copy constructor.
+				U8x32(const U8x32& other) {
+					v = other.v;
+				}
+				// Assignment operator.
+				U8x32& operator=(const U8x32& other) {
+					if (this != &other) {
+						v = other.v;
+					}
+					return *this;
+				}
+				// Move operator.
+				U8x32& operator=(U8x32&& other) noexcept {
+					v = other.v;
+					return *this;
+				}
 			#else
 			#else
 				#error "Missing constructors for the U8x32 type!\n"
 				#error "Missing constructors for the U8x32 type!\n"
 			#endif
 			#endif