// zlib open source license // // Copyright (c) 2017 to 2023 David Forsgren Piuva // // This software is provided 'as-is', without any express or implied // warranty. In no event will the authors be held liable for any damages // arising from the use of this software. // // Permission is granted to anyone to use this software for any purpose, // including commercial applications, and to alter it and redistribute it // freely, subject to the following restrictions: // // 1. The origin of this software must not be misrepresented; you must not // claim that you wrote the original software. If you use this software // in a product, an acknowledgment in the product documentation would be // appreciated but is not required. // // 2. Altered source versions must be plainly marked as such, and must not be // misrepresented as being the original software. // // 3. This notice may not be removed or altered from any source // distribution. #include "simd.h" #include "../math/FVector.h" // Linear algebra of up to three dimensions. For operating on four unrelated vectors in parallel. // Unlike simd.h, this is not a hardware abstraction layer using assembly intrinsics directly. // This module builds on top of simd.h for higher levels of abstraction. #ifndef DFPSR_SIMD_3D #define DFPSR_SIMD_3D // These are the infix operations for 2D SIMD vectors F32x4x2, F32x8x2... #define SIMD_VECTOR_INFIX_OPERATORS_2D(VECTOR_TYPE, SIMD_TYPE, ELEMENT_TYPE) \ inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \ return VECTOR_TYPE(left.v1 + right.v1, left.v2 + right.v2); \ } \ inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \ return VECTOR_TYPE(left.v1 + right, left.v2 + right); \ } \ inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \ return VECTOR_TYPE(left.v1 + right, left.v2 + right); \ } \ inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \ return VECTOR_TYPE(left.v1 - right.v1, left.v2 - right.v2); \ } \ inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \ return VECTOR_TYPE(left.v1 - right, left.v2 - right); \ } \ inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \ return VECTOR_TYPE(left.v1 - right, left.v2 - right); \ } \ inline VECTOR_TYPE operator-(const VECTOR_TYPE& value) { \ return VECTOR_TYPE(-value.v1, -value.v2); \ } \ inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \ return VECTOR_TYPE(left.v1 * right.v1, left.v2 * right.v2); \ } \ inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \ return VECTOR_TYPE(left.v1 * right, left.v2 * right); \ } \ inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \ return VECTOR_TYPE(left.v1 * right, left.v2 * right); \ } \ inline SIMD_TYPE dotProduct(const VECTOR_TYPE &a, const VECTOR_TYPE &b) { \ return (a.v1 * b.v1) + (a.v2 * b.v2); \ } \ inline SIMD_TYPE squareLength(const VECTOR_TYPE &v) { \ return dotProduct(v, v); \ } \ inline SIMD_TYPE length(const VECTOR_TYPE &v) { \ return squareLength(v).squareRoot(); \ } \ inline VECTOR_TYPE normalize(const VECTOR_TYPE &v) { \ return v * squareLength(v).reciprocalSquareRoot(); \ } // These are the infix operations for 3D SIMD vectors F32x4x3, F32x8x3... #define SIMD_VECTOR_INFIX_OPERATORS_3D(VECTOR_TYPE, SIMD_TYPE, ELEMENT_TYPE) \ inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \ return VECTOR_TYPE(left.v1 + right.v1, left.v2 + right.v2, left.v3 + right.v3); \ } \ inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \ return VECTOR_TYPE(left.v1 + right, left.v2 + right, left.v3 + right); \ } \ inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \ return VECTOR_TYPE(left.v1 + right, left.v2 + right, left.v3 + right); \ } \ inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \ return VECTOR_TYPE(left.v1 - right.v1, left.v2 - right.v2, left.v3 - right.v3); \ } \ inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \ return VECTOR_TYPE(left.v1 - right, left.v2 - right, left.v3 - right); \ } \ inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \ return VECTOR_TYPE(left.v1 - right, left.v2 - right, left.v3 - right); \ } \ inline VECTOR_TYPE operator-(const VECTOR_TYPE& value) { \ return VECTOR_TYPE(-value.v1, -value.v2, -value.v3); \ } \ inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \ return VECTOR_TYPE(left.v1 * right.v1, left.v2 * right.v2, left.v3 * right.v3); \ } \ inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \ return VECTOR_TYPE(left.v1 * right, left.v2 * right, left.v3 * right); \ } \ inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \ return VECTOR_TYPE(left.v1 * right, left.v2 * right, left.v3 * right); \ } \ inline SIMD_TYPE dotProduct(const VECTOR_TYPE &a, const VECTOR_TYPE &b) { \ return (a.v1 * b.v1) + (a.v2 * b.v2) + (a.v3 * b.v3); \ } \ inline SIMD_TYPE squareLength(const VECTOR_TYPE &v) { \ return dotProduct(v, v); \ } \ inline SIMD_TYPE length(const VECTOR_TYPE &v) { \ return squareLength(v).squareRoot(); \ } \ inline VECTOR_TYPE normalize(const VECTOR_TYPE &v) { \ return v * squareLength(v).reciprocalSquareRoot(); \ } // These are the available in-plaxe operations for 2D SIMD vectors F32x4x2, F32x8x2... #define SIMD_VECTOR_MEMBER_OPERATORS_2D(VECTOR_TYPE, SIMD_TYPE, ELEMENT_TYPE) \ inline VECTOR_TYPE& operator+=(const VECTOR_TYPE& offset) { this->v1 = this->v1 + offset.v1; this->v2 = this->v2 + offset.v2; return *this; } \ inline VECTOR_TYPE& operator-=(const VECTOR_TYPE& offset) { this->v1 = this->v1 - offset.v1; this->v2 = this->v2 - offset.v2; return *this; } \ inline VECTOR_TYPE& operator*=(const VECTOR_TYPE& scale) { this->v1 = this->v1 * scale.v1; this->v2 = this->v2 * scale.v2; return *this; } \ inline VECTOR_TYPE& operator+=(const SIMD_TYPE& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; return *this; } \ inline VECTOR_TYPE& operator-=(const SIMD_TYPE& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; return *this; } \ inline VECTOR_TYPE& operator*=(const SIMD_TYPE& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; return *this; } \ inline VECTOR_TYPE& operator+=(const ELEMENT_TYPE& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; return *this; } \ inline VECTOR_TYPE& operator-=(const ELEMENT_TYPE& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; return *this; } \ inline VECTOR_TYPE& operator*=(const ELEMENT_TYPE& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; return *this; } // These are the available in-plaxe operations for 3D SIMD vectors F32x4x3, F32x8x3... #define SIMD_VECTOR_MEMBER_OPERATORS_3D(VECTOR_TYPE, SIMD_TYPE, ELEMENT_TYPE) \ inline VECTOR_TYPE& operator+=(const VECTOR_TYPE& offset) { this->v1 = this->v1 + offset.v1; this->v2 = this->v2 + offset.v2; this->v3 = this->v3 + offset.v3; return *this; } \ inline VECTOR_TYPE& operator-=(const VECTOR_TYPE& offset) { this->v1 = this->v1 - offset.v1; this->v2 = this->v2 - offset.v2; this->v3 = this->v3 - offset.v3; return *this; } \ inline VECTOR_TYPE& operator*=(const VECTOR_TYPE& scale) { this->v1 = this->v1 * scale.v1; this->v2 = this->v2 * scale.v2; this->v3 = this->v3 * scale.v3; return *this; } \ inline VECTOR_TYPE& operator+=(const SIMD_TYPE& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; this->v3 = this->v3 + offset; return *this; } \ inline VECTOR_TYPE& operator-=(const SIMD_TYPE& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; this->v3 = this->v3 - offset; return *this; } \ inline VECTOR_TYPE& operator*=(const SIMD_TYPE& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; this->v3 = this->v3 * scale; return *this; } \ inline VECTOR_TYPE& operator+=(const ELEMENT_TYPE& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; this->v3 = this->v3 + offset; return *this; } \ inline VECTOR_TYPE& operator-=(const ELEMENT_TYPE& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; this->v3 = this->v3 - offset; return *this; } \ inline VECTOR_TYPE& operator*=(const ELEMENT_TYPE& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; this->v3 = this->v3 * scale; return *this; } // 128x2-bit SIMD vectorized 2D math vector stored in xxxxyyyy format (one planar SIMD vector per dimension). struct F32x4x2 { F32x4 v1, v2; // Direct constructor given 3 rows of length 4 F32x4x2(const F32x4& v1, const F32x4& v2) : v1(v1), v2(v2) {} // Gradient constructor from an initial vector and the increment for each element. static F32x4x2 createGradient(const dsr::FVector3D& start, const dsr::FVector3D& increment) { return F32x4x2( F32x4::createGradient(start.x, increment.x), F32x4::createGradient(start.y, increment.y) ); } // Transposed constructor given 4 columns of length 2 (Only allowed for fixed size SIMD, not X or F vector lengths) F32x4x2(const dsr::FVector2D& a, const dsr::FVector2D& b, const dsr::FVector2D& c, const dsr::FVector2D& d) : v1(a.x, b.x, c.x, d.x), v2(a.y, b.y, c.y, d.y) {} // Transposed constructor given a single repeated column F32x4x2(const dsr::FVector2D& v) : v1(F32x4(v.x)), v2(F32x4(v.y)) {} // In-place math operations SIMD_VECTOR_MEMBER_OPERATORS_2D(F32x4x2, F32x4, float) }; SIMD_VECTOR_INFIX_OPERATORS_2D(F32x4x2, F32x4, float) // 256x2-bit SIMD vectorized 2D math vector stored in xxxxxxxxyyyyyyyy format (one planar SIMD vector per dimension). struct F32x8x2 { F32x8 v1, v2; // Direct constructor given 3 rows of length 4 F32x8x2(const F32x8& v1, const F32x8& v2) : v1(v1), v2(v2) {} // Gradient constructor from an initial vector and the increment for each element. static F32x8x2 createGradient(const dsr::FVector3D& start, const dsr::FVector3D& increment) { return F32x8x2( F32x8::createGradient(start.x, increment.x), F32x8::createGradient(start.y, increment.y) ); } // Transposed constructor given 4 columns of length 2 (Only allowed for fixed size SIMD, not X or F vector lengths) F32x8x2(const dsr::FVector2D& a, const dsr::FVector2D& b, const dsr::FVector2D& c, const dsr::FVector2D& d, const dsr::FVector2D& e, const dsr::FVector2D& f, const dsr::FVector2D& g, const dsr::FVector2D& h) : v1(a.x, b.x, c.x, d.x, e.x, f.x, g.x, h.x), v2(a.y, b.y, c.y, d.y, e.y, f.y, g.y, h.y) {} // Transposed constructor given a single repeated column F32x8x2(const dsr::FVector2D& v) : v1(F32x8(v.x)), v2(F32x8(v.y)) {} // In-place math operations SIMD_VECTOR_MEMBER_OPERATORS_2D(F32x8x2, F32x8, float) }; SIMD_VECTOR_INFIX_OPERATORS_2D(F32x8x2, F32x8, float) // 128x3-bit SIMD vectorized 3D math vector stored in xxxxyyyyzzzz format (one planar SIMD vector per dimension). struct F32x4x3 { F32x4 v1, v2, v3; // Direct constructor given 3 rows of length 4 F32x4x3(const F32x4& v1, const F32x4& v2, const F32x4& v3) : v1(v1), v2(v2), v3(v3) {} // Gradient constructor from an initial vector and the increment for each element. static F32x4x3 createGradient(const dsr::FVector3D& start, const dsr::FVector3D& increment) { return F32x4x3( F32x4::createGradient(start.x, increment.x), F32x4::createGradient(start.y, increment.y), F32x4::createGradient(start.z, increment.z) ); } // Transposed constructor given 4 columns of length 3 F32x4x3(const dsr::FVector3D& a, const dsr::FVector3D& b, const dsr::FVector3D& c, const dsr::FVector3D& d) : v1(a.x, b.x, c.x, d.x), v2(a.y, b.y, c.y, d.y), v3(a.z, b.z, c.z, d.z) {} // Transposed constructor given a single repeated column F32x4x3(const dsr::FVector3D& v) : v1(F32x4(v.x)), v2(F32x4(v.y)), v3(F32x4(v.z)) {} // In-place math operations SIMD_VECTOR_MEMBER_OPERATORS_3D(F32x4x3, F32x4, float) }; SIMD_VECTOR_INFIX_OPERATORS_3D(F32x4x3, F32x4, float) // 256x3-bit SIMD vectorized 3D math vector stored in xxxxxxxxyyyyyyyyzzzzzzzz format (one planar SIMD vector per dimension). struct F32x8x3 { F32x8 v1, v2, v3; // Direct constructor given 3 rows of length 4 F32x8x3(const F32x8& v1, const F32x8& v2, const F32x8& v3) : v1(v1), v2(v2), v3(v3) {} // Gradient constructor from an initial vector and the increment for each element. static F32x8x3 createGradient(const dsr::FVector3D& start, const dsr::FVector3D& increment) { return F32x8x3( F32x8::createGradient(start.x, increment.x), F32x8::createGradient(start.y, increment.y), F32x8::createGradient(start.z, increment.z) ); } // Transposed constructor given 4 columns of length 3 F32x8x3(const dsr::FVector3D& a, const dsr::FVector3D& b, const dsr::FVector3D& c, const dsr::FVector3D& d, const dsr::FVector3D& e, const dsr::FVector3D& f, const dsr::FVector3D& g, const dsr::FVector3D& h) : v1(a.x, b.x, c.x, d.x, e.x, f.x, g.x, h.x), v2(a.y, b.y, c.y, d.y, e.y, f.y, g.y, h.y), v3(a.z, b.z, c.z, d.z, e.z, f.z, g.z, h.z) {} // Transposed constructor given a single repeated column F32x8x3(const dsr::FVector3D& v) : v1(F32x8(v.x)), v2(F32x8(v.y)), v3(F32x8(v.z)) {} // In-place math operations SIMD_VECTOR_MEMBER_OPERATORS_3D(F32x8x3, F32x8, float) }; SIMD_VECTOR_INFIX_OPERATORS_3D(F32x8x3, F32x8, float) // X vector aliases #if DSR_DEFAULT_VECTOR_SIZE == 16 using F32xXx3 = F32x4x3; using F32xXx2 = F32x4x2; #elif DSR_DEFAULT_VECTOR_SIZE == 32 using F32xXx3 = F32x8x3; using F32xXx2 = F32x8x2; #endif // F vector aliases #if DSR_FLOAT_VECTOR_SIZE == 16 using F32xFx3 = F32x4x3; using F32xFx2 = F32x4x2; #elif DSR_FLOAT_VECTOR_SIZE == 32 using F32xFx3 = F32x8x3; using F32xFx2 = F32x8x2; #endif #endif