simd3D.h 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. // zlib open source license
  2. //
  3. // Copyright (c) 2017 to 2022 David Forsgren Piuva
  4. //
  5. // This software is provided 'as-is', without any express or implied
  6. // warranty. In no event will the authors be held liable for any damages
  7. // arising from the use of this software.
  8. //
  9. // Permission is granted to anyone to use this software for any purpose,
  10. // including commercial applications, and to alter it and redistribute it
  11. // freely, subject to the following restrictions:
  12. //
  13. // 1. The origin of this software must not be misrepresented; you must not
  14. // claim that you wrote the original software. If you use this software
  15. // in a product, an acknowledgment in the product documentation would be
  16. // appreciated but is not required.
  17. //
  18. // 2. Altered source versions must be plainly marked as such, and must not be
  19. // misrepresented as being the original software.
  20. //
  21. // 3. This notice may not be removed or altered from any source
  22. // distribution.
  23. #include "simd.h"
  24. #include "../math/FVector.h"
  25. // Linear algebra of up to three dimensions. For operating on four unrelated vectors in parallel.
  26. // Unlike simd.h, this is not a hardware abstraction layer using assembly intrinsics directly.
  27. // This module builds on top of simd.h for higher levels of abstraction.
  28. #ifndef DFPSR_SIMD_3D
  29. #define DFPSR_SIMD_3D
  30. // 3D vector in xxxxyyyyzzzz format
  31. struct F32x4x3 {
  32. F32x4 v1, v2, v3;
  33. // Direct constructor given 3 rows of length 4
  34. F32x4x3(const F32x4& v1, const F32x4& v2, const F32x4& v3)
  35. : v1(v1), v2(v2), v3(v3) {}
  36. // Transposed constructor given 4 columns of length 3
  37. F32x4x3(const dsr::FVector3D& vx, const dsr::FVector3D& vy, const dsr::FVector3D& vz, const dsr::FVector3D& vw)
  38. : v1(F32x4(vx.x, vy.x, vz.x, vw.x)),
  39. v2(F32x4(vx.y, vy.y, vz.y, vw.y)),
  40. v3(F32x4(vx.z, vy.z, vz.z, vw.z)) {}
  41. // Transposed constructor given a single repeated column
  42. F32x4x3(const dsr::FVector3D& v)
  43. : v1(F32x4(v.x, v.x, v.x, v.x)),
  44. v2(F32x4(v.y, v.y, v.y, v.y)),
  45. v3(F32x4(v.z, v.z, v.z, v.z)) {}
  46. // In-place math operations
  47. inline F32x4x3& operator+=(const F32x4x3& offset) { this->v1 = this->v1 + offset.v1; this->v2 = this->v2 + offset.v2; this->v3 = this->v3 + offset.v3; return *this; }
  48. inline F32x4x3& operator-=(const F32x4x3& offset) { this->v1 = this->v1 - offset.v1; this->v2 = this->v2 - offset.v2; this->v3 = this->v3 - offset.v3; return *this; }
  49. inline F32x4x3& operator*=(const F32x4x3& scale) { this->v1 = this->v1 * scale.v1; this->v2 = this->v2 * scale.v2; this->v3 = this->v3 * scale.v3; return *this; }
  50. inline F32x4x3& operator+=(const F32x4& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; this->v3 = this->v3 + offset; return *this; }
  51. inline F32x4x3& operator-=(const F32x4& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; this->v3 = this->v3 - offset; return *this; }
  52. inline F32x4x3& operator*=(const F32x4& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; this->v3 = this->v3 * scale; return *this; }
  53. inline F32x4x3& operator+=(const float& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; this->v3 = this->v3 + offset; return *this; }
  54. inline F32x4x3& operator-=(const float& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; this->v3 = this->v3 - offset; return *this; }
  55. inline F32x4x3& operator*=(const float& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; this->v3 = this->v3 * scale; return *this; }
  56. };
  57. inline F32x4x3 operator+(const F32x4x3 &left, const F32x4x3 &right) {
  58. return F32x4x3(left.v1 + right.v1, left.v2 + right.v2, left.v3 + right.v3);
  59. }
  60. inline F32x4x3 operator+(const F32x4x3 &left, const F32x4 &right) {
  61. return F32x4x3(left.v1 + right, left.v2 + right, left.v3 + right);
  62. }
  63. inline F32x4x3 operator+(const F32x4x3 &left, const float &right) {
  64. return F32x4x3(left.v1 + right, left.v2 + right, left.v3 + right);
  65. }
  66. inline F32x4x3 operator-(const F32x4x3 &left, const F32x4x3 &right) {
  67. return F32x4x3(left.v1 - right.v1, left.v2 - right.v2, left.v3 - right.v3);
  68. }
  69. inline F32x4x3 operator-(const F32x4x3 &left, const F32x4 &right) {
  70. return F32x4x3(left.v1 - right, left.v2 - right, left.v3 - right);
  71. }
  72. inline F32x4x3 operator-(const F32x4x3 &left, const float &right) {
  73. return F32x4x3(left.v1 - right, left.v2 - right, left.v3 - right);
  74. }
  75. inline F32x4x3 operator-(const F32x4x3& value) {
  76. return F32x4x3(-value.v1, -value.v2, -value.v3);
  77. }
  78. inline F32x4x3 operator*(const F32x4x3 &left, const F32x4x3 &right) {
  79. return F32x4x3(left.v1 * right.v1, left.v2 * right.v2, left.v3 * right.v3);
  80. }
  81. inline F32x4x3 operator*(const F32x4x3 &left, const F32x4 &right) {
  82. return F32x4x3(left.v1 * right, left.v2 * right, left.v3 * right);
  83. }
  84. inline F32x4x3 operator*(const F32x4x3 &left, const float &right) {
  85. return F32x4x3(left.v1 * right, left.v2 * right, left.v3 * right);
  86. }
  87. inline F32x4 dotProduct(const F32x4x3 &a, const F32x4x3 &b) {
  88. return (a.v1 * b.v1) + (a.v2 * b.v2) + (a.v3 * b.v3);
  89. }
  90. inline F32x4 squareLength(const F32x4x3 &v) {
  91. return dotProduct(v, v);
  92. }
  93. inline F32x4 length(const F32x4x3 &v) {
  94. return squareLength(v).squareRoot();
  95. }
  96. inline F32x4x3 normalize(const F32x4x3 &v) {
  97. return v * squareLength(v).reciprocalSquareRoot();
  98. }
  99. // 2D vector in xxxxyyyy format
  100. struct F32x4x2 {
  101. F32x4 v1, v2;
  102. // Direct constructor given 3 rows of length 4
  103. F32x4x2(const F32x4& v1, const F32x4& v2)
  104. : v1(v1), v2(v2) {}
  105. // Transposed constructor given 4 columns of length 3
  106. F32x4x2(const dsr::FVector2D& vx, const dsr::FVector2D& vy, const dsr::FVector2D& vz, const dsr::FVector2D& vw)
  107. : v1(F32x4(vx.x, vy.x, vz.x, vw.x)),
  108. v2(F32x4(vx.y, vy.y, vz.y, vw.y)) {}
  109. // Transposed constructor given a single repeated column
  110. F32x4x2(const dsr::FVector2D& v)
  111. : v1(F32x4(v.x, v.x, v.x, v.x)),
  112. v2(F32x4(v.y, v.y, v.y, v.y)) {}
  113. // In-place math operations
  114. inline F32x4x2& operator+=(const F32x4x2& offset) { this->v1 = this->v1 + offset.v1; this->v2 = this->v2 + offset.v2; return *this; }
  115. inline F32x4x2& operator-=(const F32x4x2& offset) { this->v1 = this->v1 - offset.v1; this->v2 = this->v2 - offset.v2; return *this; }
  116. inline F32x4x2& operator*=(const F32x4x2& scale) { this->v1 = this->v1 * scale.v1; this->v2 = this->v2 * scale.v2; return *this; }
  117. inline F32x4x2& operator+=(const F32x4& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; return *this; }
  118. inline F32x4x2& operator-=(const F32x4& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; return *this; }
  119. inline F32x4x2& operator*=(const F32x4& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; return *this; }
  120. inline F32x4x2& operator+=(const float& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; return *this; }
  121. inline F32x4x2& operator-=(const float& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; return *this; }
  122. inline F32x4x2& operator*=(const float& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; return *this; }
  123. };
  124. inline F32x4x2 operator+(const F32x4x2 &left, const F32x4x2 &right) {
  125. return F32x4x2(left.v1 + right.v1, left.v2 + right.v2);
  126. }
  127. inline F32x4x2 operator+(const F32x4x2 &left, const F32x4 &right) {
  128. return F32x4x2(left.v1 + right, left.v2 + right);
  129. }
  130. inline F32x4x2 operator+(const F32x4x2 &left, const float &right) {
  131. return F32x4x2(left.v1 + right, left.v2 + right);
  132. }
  133. inline F32x4x2 operator-(const F32x4x2 &left, const F32x4x2 &right) {
  134. return F32x4x2(left.v1 - right.v1, left.v2 - right.v2);
  135. }
  136. inline F32x4x2 operator-(const F32x4x2 &left, const F32x4 &right) {
  137. return F32x4x2(left.v1 - right, left.v2 - right);
  138. }
  139. inline F32x4x2 operator-(const F32x4x2 &left, const float &right) {
  140. return F32x4x2(left.v1 - right, left.v2 - right);
  141. }
  142. inline F32x4x2 operator-(const F32x4x2& value) {
  143. return F32x4x2(-value.v1, -value.v2);
  144. }
  145. inline F32x4x2 operator*(const F32x4x2 &left, const F32x4x2 &right) {
  146. return F32x4x2(left.v1 * right.v1, left.v2 * right.v2);
  147. }
  148. inline F32x4x2 operator*(const F32x4x2 &left, const F32x4 &right) {
  149. return F32x4x2(left.v1 * right, left.v2 * right);
  150. }
  151. inline F32x4x2 operator*(const F32x4x2 &left, const float &right) {
  152. return F32x4x2(left.v1 * right, left.v2 * right);
  153. }
  154. inline F32x4 dotProduct(const F32x4x2 &a, const F32x4x2 &b) {
  155. return (a.v1 * b.v1) + (a.v2 * b.v2);
  156. }
  157. inline F32x4 squareLength(const F32x4x2 &v) {
  158. return dotProduct(v, v);
  159. }
  160. inline F32x4 length(const F32x4x2 &v) {
  161. return squareLength(v).squareRoot();
  162. }
  163. inline F32x4x2 normalize(const F32x4x2 &v) {
  164. return v * squareLength(v).reciprocalSquareRoot();
  165. }
  166. #endif