| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284 |
- // zlib open source license
- //
- // Copyright (c) 2017 to 2023 David Forsgren Piuva
- //
- // This software is provided 'as-is', without any express or implied
- // warranty. In no event will the authors be held liable for any damages
- // arising from the use of this software.
- //
- // Permission is granted to anyone to use this software for any purpose,
- // including commercial applications, and to alter it and redistribute it
- // freely, subject to the following restrictions:
- //
- // 1. The origin of this software must not be misrepresented; you must not
- // claim that you wrote the original software. If you use this software
- // in a product, an acknowledgment in the product documentation would be
- // appreciated but is not required.
- //
- // 2. Altered source versions must be plainly marked as such, and must not be
- // misrepresented as being the original software.
- //
- // 3. This notice may not be removed or altered from any source
- // distribution.
- #include "simd.h"
- #include "../math/FVector.h"
- // Linear algebra of up to three dimensions. For operating on four unrelated vectors in parallel.
- // Unlike simd.h, this is not a hardware abstraction layer using assembly intrinsics directly.
- // This module builds on top of simd.h for higher levels of abstraction.
- #ifndef DFPSR_SIMD_3D
- #define DFPSR_SIMD_3D
- namespace dsr {
- // These are the infix operations for 2D SIMD vectors F32x4x2, F32x8x2...
- #define SIMD_VECTOR_INFIX_OPERATORS_2D(VECTOR_TYPE, SIMD_TYPE, ELEMENT_TYPE) \
- inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
- return VECTOR_TYPE(left.v1 + right.v1, left.v2 + right.v2); \
- } \
- inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
- return VECTOR_TYPE(left.v1 + right, left.v2 + right); \
- } \
- inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
- return VECTOR_TYPE(left.v1 + right, left.v2 + right); \
- } \
- inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
- return VECTOR_TYPE(left.v1 - right.v1, left.v2 - right.v2); \
- } \
- inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
- return VECTOR_TYPE(left.v1 - right, left.v2 - right); \
- } \
- inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
- return VECTOR_TYPE(left.v1 - right, left.v2 - right); \
- } \
- inline VECTOR_TYPE operator-(const VECTOR_TYPE& value) { \
- return VECTOR_TYPE(-value.v1, -value.v2); \
- } \
- inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
- return VECTOR_TYPE(left.v1 * right.v1, left.v2 * right.v2); \
- } \
- inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
- return VECTOR_TYPE(left.v1 * right, left.v2 * right); \
- } \
- inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
- return VECTOR_TYPE(left.v1 * right, left.v2 * right); \
- } \
- inline SIMD_TYPE dotProduct(const VECTOR_TYPE &a, const VECTOR_TYPE &b) { \
- return (a.v1 * b.v1) + (a.v2 * b.v2); \
- } \
- inline SIMD_TYPE squareLength(const VECTOR_TYPE &v) { \
- return dotProduct(v, v); \
- } \
- inline SIMD_TYPE length(const VECTOR_TYPE &v) { \
- return squareLength(v).squareRoot(); \
- } \
- inline VECTOR_TYPE normalize(const VECTOR_TYPE &v) { \
- return v * squareLength(v).reciprocalSquareRoot(); \
- }
- // These are the infix operations for 3D SIMD vectors F32x4x3, F32x8x3...
- #define SIMD_VECTOR_INFIX_OPERATORS_3D(VECTOR_TYPE, SIMD_TYPE, ELEMENT_TYPE) \
- inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
- return VECTOR_TYPE(left.v1 + right.v1, left.v2 + right.v2, left.v3 + right.v3); \
- } \
- inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
- return VECTOR_TYPE(left.v1 + right, left.v2 + right, left.v3 + right); \
- } \
- inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
- return VECTOR_TYPE(left.v1 + right, left.v2 + right, left.v3 + right); \
- } \
- inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
- return VECTOR_TYPE(left.v1 - right.v1, left.v2 - right.v2, left.v3 - right.v3); \
- } \
- inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
- return VECTOR_TYPE(left.v1 - right, left.v2 - right, left.v3 - right); \
- } \
- inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
- return VECTOR_TYPE(left.v1 - right, left.v2 - right, left.v3 - right); \
- } \
- inline VECTOR_TYPE operator-(const VECTOR_TYPE& value) { \
- return VECTOR_TYPE(-value.v1, -value.v2, -value.v3); \
- } \
- inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
- return VECTOR_TYPE(left.v1 * right.v1, left.v2 * right.v2, left.v3 * right.v3); \
- } \
- inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
- return VECTOR_TYPE(left.v1 * right, left.v2 * right, left.v3 * right); \
- } \
- inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
- return VECTOR_TYPE(left.v1 * right, left.v2 * right, left.v3 * right); \
- } \
- inline SIMD_TYPE dotProduct(const VECTOR_TYPE &a, const VECTOR_TYPE &b) { \
- return (a.v1 * b.v1) + (a.v2 * b.v2) + (a.v3 * b.v3); \
- } \
- inline SIMD_TYPE squareLength(const VECTOR_TYPE &v) { \
- return dotProduct(v, v); \
- } \
- inline SIMD_TYPE length(const VECTOR_TYPE &v) { \
- return squareLength(v).squareRoot(); \
- } \
- inline VECTOR_TYPE normalize(const VECTOR_TYPE &v) { \
- return v * squareLength(v).reciprocalSquareRoot(); \
- }
- // These are the available in-plaxe operations for 2D SIMD vectors F32x4x2, F32x8x2...
- #define SIMD_VECTOR_MEMBER_OPERATORS_2D(VECTOR_TYPE, SIMD_TYPE, ELEMENT_TYPE) \
- inline VECTOR_TYPE& operator+=(const VECTOR_TYPE& offset) { this->v1 = this->v1 + offset.v1; this->v2 = this->v2 + offset.v2; return *this; } \
- inline VECTOR_TYPE& operator-=(const VECTOR_TYPE& offset) { this->v1 = this->v1 - offset.v1; this->v2 = this->v2 - offset.v2; return *this; } \
- inline VECTOR_TYPE& operator*=(const VECTOR_TYPE& scale) { this->v1 = this->v1 * scale.v1; this->v2 = this->v2 * scale.v2; return *this; } \
- inline VECTOR_TYPE& operator+=(const SIMD_TYPE& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; return *this; } \
- inline VECTOR_TYPE& operator-=(const SIMD_TYPE& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; return *this; } \
- inline VECTOR_TYPE& operator*=(const SIMD_TYPE& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; return *this; } \
- inline VECTOR_TYPE& operator+=(const ELEMENT_TYPE& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; return *this; } \
- inline VECTOR_TYPE& operator-=(const ELEMENT_TYPE& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; return *this; } \
- inline VECTOR_TYPE& operator*=(const ELEMENT_TYPE& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; return *this; }
- // These are the available in-plaxe operations for 3D SIMD vectors F32x4x3, F32x8x3...
- #define SIMD_VECTOR_MEMBER_OPERATORS_3D(VECTOR_TYPE, SIMD_TYPE, ELEMENT_TYPE) \
- inline VECTOR_TYPE& operator+=(const VECTOR_TYPE& offset) { this->v1 = this->v1 + offset.v1; this->v2 = this->v2 + offset.v2; this->v3 = this->v3 + offset.v3; return *this; } \
- inline VECTOR_TYPE& operator-=(const VECTOR_TYPE& offset) { this->v1 = this->v1 - offset.v1; this->v2 = this->v2 - offset.v2; this->v3 = this->v3 - offset.v3; return *this; } \
- inline VECTOR_TYPE& operator*=(const VECTOR_TYPE& scale) { this->v1 = this->v1 * scale.v1; this->v2 = this->v2 * scale.v2; this->v3 = this->v3 * scale.v3; return *this; } \
- inline VECTOR_TYPE& operator+=(const SIMD_TYPE& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; this->v3 = this->v3 + offset; return *this; } \
- inline VECTOR_TYPE& operator-=(const SIMD_TYPE& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; this->v3 = this->v3 - offset; return *this; } \
- inline VECTOR_TYPE& operator*=(const SIMD_TYPE& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; this->v3 = this->v3 * scale; return *this; } \
- inline VECTOR_TYPE& operator+=(const ELEMENT_TYPE& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; this->v3 = this->v3 + offset; return *this; } \
- inline VECTOR_TYPE& operator-=(const ELEMENT_TYPE& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; this->v3 = this->v3 - offset; return *this; } \
- inline VECTOR_TYPE& operator*=(const ELEMENT_TYPE& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; this->v3 = this->v3 * scale; return *this; }
- // 128x2-bit SIMD vectorized 2D math vector stored in xxxxyyyy format (one planar SIMD vector per dimension).
- struct F32x4x2 {
- F32x4 v1, v2;
- // Direct constructor given 3 rows of length 4
- F32x4x2(const F32x4& v1, const F32x4& v2)
- : v1(v1), v2(v2) {}
- // Gradient constructor from an initial vector and the increment for each element.
- static F32x4x2 createGradient(const dsr::FVector3D& start, const dsr::FVector3D& increment) {
- return F32x4x2(
- F32x4::createGradient(start.x, increment.x),
- F32x4::createGradient(start.y, increment.y)
- );
- }
- // Transposed constructor given 4 columns of length 2 (Only allowed for fixed size SIMD, not X or F vector lengths)
- F32x4x2(const dsr::FVector2D& a, const dsr::FVector2D& b, const dsr::FVector2D& c, const dsr::FVector2D& d)
- : v1(a.x, b.x, c.x, d.x),
- v2(a.y, b.y, c.y, d.y) {}
- // Transposed constructor given a single repeated column
- F32x4x2(const dsr::FVector2D& v)
- : v1(F32x4(v.x)),
- v2(F32x4(v.y)) {}
- // In-place math operations
- SIMD_VECTOR_MEMBER_OPERATORS_2D(F32x4x2, F32x4, float)
- };
- SIMD_VECTOR_INFIX_OPERATORS_2D(F32x4x2, F32x4, float)
- // 256x2-bit SIMD vectorized 2D math vector stored in xxxxxxxxyyyyyyyy format (one planar SIMD vector per dimension).
- struct F32x8x2 {
- F32x8 v1, v2;
- // Direct constructor given 3 rows of length 4
- F32x8x2(const F32x8& v1, const F32x8& v2)
- : v1(v1), v2(v2) {}
- // Gradient constructor from an initial vector and the increment for each element.
- static F32x8x2 createGradient(const dsr::FVector3D& start, const dsr::FVector3D& increment) {
- return F32x8x2(
- F32x8::createGradient(start.x, increment.x),
- F32x8::createGradient(start.y, increment.y)
- );
- }
- // Transposed constructor given 4 columns of length 2 (Only allowed for fixed size SIMD, not X or F vector lengths)
- F32x8x2(const dsr::FVector2D& a, const dsr::FVector2D& b, const dsr::FVector2D& c, const dsr::FVector2D& d, const dsr::FVector2D& e, const dsr::FVector2D& f, const dsr::FVector2D& g, const dsr::FVector2D& h)
- : v1(a.x, b.x, c.x, d.x, e.x, f.x, g.x, h.x),
- v2(a.y, b.y, c.y, d.y, e.y, f.y, g.y, h.y) {}
- // Transposed constructor given a single repeated column
- F32x8x2(const dsr::FVector2D& v)
- : v1(F32x8(v.x)),
- v2(F32x8(v.y)) {}
- // In-place math operations
- SIMD_VECTOR_MEMBER_OPERATORS_2D(F32x8x2, F32x8, float)
- };
- SIMD_VECTOR_INFIX_OPERATORS_2D(F32x8x2, F32x8, float)
- // 128x3-bit SIMD vectorized 3D math vector stored in xxxxyyyyzzzz format (one planar SIMD vector per dimension).
- struct F32x4x3 {
- F32x4 v1, v2, v3;
- // Direct constructor given 3 rows of length 4
- F32x4x3(const F32x4& v1, const F32x4& v2, const F32x4& v3)
- : v1(v1), v2(v2), v3(v3) {}
- // Gradient constructor from an initial vector and the increment for each element.
- static F32x4x3 createGradient(const dsr::FVector3D& start, const dsr::FVector3D& increment) {
- return F32x4x3(
- F32x4::createGradient(start.x, increment.x),
- F32x4::createGradient(start.y, increment.y),
- F32x4::createGradient(start.z, increment.z)
- );
- }
- // Transposed constructor given 4 columns of length 3
- F32x4x3(const dsr::FVector3D& a, const dsr::FVector3D& b, const dsr::FVector3D& c, const dsr::FVector3D& d)
- : v1(a.x, b.x, c.x, d.x),
- v2(a.y, b.y, c.y, d.y),
- v3(a.z, b.z, c.z, d.z) {}
- // Transposed constructor given a single repeated column
- F32x4x3(const dsr::FVector3D& v)
- : v1(F32x4(v.x)),
- v2(F32x4(v.y)),
- v3(F32x4(v.z)) {}
- // In-place math operations
- SIMD_VECTOR_MEMBER_OPERATORS_3D(F32x4x3, F32x4, float)
- };
- SIMD_VECTOR_INFIX_OPERATORS_3D(F32x4x3, F32x4, float)
- // 256x3-bit SIMD vectorized 3D math vector stored in xxxxxxxxyyyyyyyyzzzzzzzz format (one planar SIMD vector per dimension).
- struct F32x8x3 {
- F32x8 v1, v2, v3;
- // Direct constructor given 3 rows of length 4
- F32x8x3(const F32x8& v1, const F32x8& v2, const F32x8& v3)
- : v1(v1), v2(v2), v3(v3) {}
- // Gradient constructor from an initial vector and the increment for each element.
- static F32x8x3 createGradient(const dsr::FVector3D& start, const dsr::FVector3D& increment) {
- return F32x8x3(
- F32x8::createGradient(start.x, increment.x),
- F32x8::createGradient(start.y, increment.y),
- F32x8::createGradient(start.z, increment.z)
- );
- }
- // Transposed constructor given 4 columns of length 3
- F32x8x3(const dsr::FVector3D& a, const dsr::FVector3D& b, const dsr::FVector3D& c, const dsr::FVector3D& d, const dsr::FVector3D& e, const dsr::FVector3D& f, const dsr::FVector3D& g, const dsr::FVector3D& h)
- : v1(a.x, b.x, c.x, d.x, e.x, f.x, g.x, h.x),
- v2(a.y, b.y, c.y, d.y, e.y, f.y, g.y, h.y),
- v3(a.z, b.z, c.z, d.z, e.z, f.z, g.z, h.z) {}
- // Transposed constructor given a single repeated column
- F32x8x3(const dsr::FVector3D& v)
- : v1(F32x8(v.x)),
- v2(F32x8(v.y)),
- v3(F32x8(v.z)) {}
- // In-place math operations
- SIMD_VECTOR_MEMBER_OPERATORS_3D(F32x8x3, F32x8, float)
- };
- SIMD_VECTOR_INFIX_OPERATORS_3D(F32x8x3, F32x8, float)
- // X vector aliases
- #if DSR_DEFAULT_VECTOR_SIZE == 16
- using F32xXx3 = F32x4x3;
- using F32xXx2 = F32x4x2;
- #elif DSR_DEFAULT_VECTOR_SIZE == 32
- using F32xXx3 = F32x8x3;
- using F32xXx2 = F32x8x2;
- #endif
- // F vector aliases
- #if DSR_FLOAT_VECTOR_SIZE == 16
- using F32xFx3 = F32x4x3;
- using F32xFx2 = F32x4x2;
- #elif DSR_FLOAT_VECTOR_SIZE == 32
- using F32xFx3 = F32x8x3;
- using F32xFx2 = F32x8x2;
- #endif
- #undef SIMD_VECTOR_MEMBER_OPERATORS_2D
- #undef SIMD_VECTOR_MEMBER_OPERATORS_3D
- }
- #endif
|