simd3D.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. // zlib open source license
  2. //
  3. // Copyright (c) 2017 to 2023 David Forsgren Piuva
  4. //
  5. // This software is provided 'as-is', without any express or implied
  6. // warranty. In no event will the authors be held liable for any damages
  7. // arising from the use of this software.
  8. //
  9. // Permission is granted to anyone to use this software for any purpose,
  10. // including commercial applications, and to alter it and redistribute it
  11. // freely, subject to the following restrictions:
  12. //
  13. // 1. The origin of this software must not be misrepresented; you must not
  14. // claim that you wrote the original software. If you use this software
  15. // in a product, an acknowledgment in the product documentation would be
  16. // appreciated but is not required.
  17. //
  18. // 2. Altered source versions must be plainly marked as such, and must not be
  19. // misrepresented as being the original software.
  20. //
  21. // 3. This notice may not be removed or altered from any source
  22. // distribution.
  23. #include "simd.h"
  24. #include "../math/FVector.h"
  25. // Linear algebra of up to three dimensions. For operating on four unrelated vectors in parallel.
  26. // Unlike simd.h, this is not a hardware abstraction layer using assembly intrinsics directly.
  27. // This module builds on top of simd.h for higher levels of abstraction.
  28. #ifndef DFPSR_SIMD_3D
  29. #define DFPSR_SIMD_3D
  30. // These are the infix operations for 2D SIMD vectors F32x4x2, F32x8x2...
  31. #define SIMD_VECTOR_INFIX_OPERATORS_2D(VECTOR_TYPE, SIMD_TYPE, ELEMENT_TYPE) \
  32. inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
  33. return VECTOR_TYPE(left.v1 + right.v1, left.v2 + right.v2); \
  34. } \
  35. inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
  36. return VECTOR_TYPE(left.v1 + right, left.v2 + right); \
  37. } \
  38. inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
  39. return VECTOR_TYPE(left.v1 + right, left.v2 + right); \
  40. } \
  41. inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
  42. return VECTOR_TYPE(left.v1 - right.v1, left.v2 - right.v2); \
  43. } \
  44. inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
  45. return VECTOR_TYPE(left.v1 - right, left.v2 - right); \
  46. } \
  47. inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
  48. return VECTOR_TYPE(left.v1 - right, left.v2 - right); \
  49. } \
  50. inline VECTOR_TYPE operator-(const VECTOR_TYPE& value) { \
  51. return VECTOR_TYPE(-value.v1, -value.v2); \
  52. } \
  53. inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
  54. return VECTOR_TYPE(left.v1 * right.v1, left.v2 * right.v2); \
  55. } \
  56. inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
  57. return VECTOR_TYPE(left.v1 * right, left.v2 * right); \
  58. } \
  59. inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
  60. return VECTOR_TYPE(left.v1 * right, left.v2 * right); \
  61. } \
  62. inline SIMD_TYPE dotProduct(const VECTOR_TYPE &a, const VECTOR_TYPE &b) { \
  63. return (a.v1 * b.v1) + (a.v2 * b.v2); \
  64. } \
  65. inline SIMD_TYPE squareLength(const VECTOR_TYPE &v) { \
  66. return dotProduct(v, v); \
  67. } \
  68. inline SIMD_TYPE length(const VECTOR_TYPE &v) { \
  69. return squareLength(v).squareRoot(); \
  70. } \
  71. inline VECTOR_TYPE normalize(const VECTOR_TYPE &v) { \
  72. return v * squareLength(v).reciprocalSquareRoot(); \
  73. }
  74. // These are the infix operations for 3D SIMD vectors F32x4x3, F32x8x3...
  75. #define SIMD_VECTOR_INFIX_OPERATORS_3D(VECTOR_TYPE, SIMD_TYPE, ELEMENT_TYPE) \
  76. inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
  77. return VECTOR_TYPE(left.v1 + right.v1, left.v2 + right.v2, left.v3 + right.v3); \
  78. } \
  79. inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
  80. return VECTOR_TYPE(left.v1 + right, left.v2 + right, left.v3 + right); \
  81. } \
  82. inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
  83. return VECTOR_TYPE(left.v1 + right, left.v2 + right, left.v3 + right); \
  84. } \
  85. inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
  86. return VECTOR_TYPE(left.v1 - right.v1, left.v2 - right.v2, left.v3 - right.v3); \
  87. } \
  88. inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
  89. return VECTOR_TYPE(left.v1 - right, left.v2 - right, left.v3 - right); \
  90. } \
  91. inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
  92. return VECTOR_TYPE(left.v1 - right, left.v2 - right, left.v3 - right); \
  93. } \
  94. inline VECTOR_TYPE operator-(const VECTOR_TYPE& value) { \
  95. return VECTOR_TYPE(-value.v1, -value.v2, -value.v3); \
  96. } \
  97. inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
  98. return VECTOR_TYPE(left.v1 * right.v1, left.v2 * right.v2, left.v3 * right.v3); \
  99. } \
  100. inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
  101. return VECTOR_TYPE(left.v1 * right, left.v2 * right, left.v3 * right); \
  102. } \
  103. inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
  104. return VECTOR_TYPE(left.v1 * right, left.v2 * right, left.v3 * right); \
  105. } \
  106. inline SIMD_TYPE dotProduct(const VECTOR_TYPE &a, const VECTOR_TYPE &b) { \
  107. return (a.v1 * b.v1) + (a.v2 * b.v2) + (a.v3 * b.v3); \
  108. } \
  109. inline SIMD_TYPE squareLength(const VECTOR_TYPE &v) { \
  110. return dotProduct(v, v); \
  111. } \
  112. inline SIMD_TYPE length(const VECTOR_TYPE &v) { \
  113. return squareLength(v).squareRoot(); \
  114. } \
  115. inline VECTOR_TYPE normalize(const VECTOR_TYPE &v) { \
  116. return v * squareLength(v).reciprocalSquareRoot(); \
  117. }
  118. // These are the available in-plaxe operations for 2D SIMD vectors F32x4x2, F32x8x2...
  119. #define SIMD_VECTOR_MEMBER_OPERATORS_2D(VECTOR_TYPE, SIMD_TYPE, ELEMENT_TYPE) \
  120. inline VECTOR_TYPE& operator+=(const VECTOR_TYPE& offset) { this->v1 = this->v1 + offset.v1; this->v2 = this->v2 + offset.v2; return *this; } \
  121. inline VECTOR_TYPE& operator-=(const VECTOR_TYPE& offset) { this->v1 = this->v1 - offset.v1; this->v2 = this->v2 - offset.v2; return *this; } \
  122. inline VECTOR_TYPE& operator*=(const VECTOR_TYPE& scale) { this->v1 = this->v1 * scale.v1; this->v2 = this->v2 * scale.v2; return *this; } \
  123. inline VECTOR_TYPE& operator+=(const SIMD_TYPE& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; return *this; } \
  124. inline VECTOR_TYPE& operator-=(const SIMD_TYPE& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; return *this; } \
  125. inline VECTOR_TYPE& operator*=(const SIMD_TYPE& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; return *this; } \
  126. inline VECTOR_TYPE& operator+=(const ELEMENT_TYPE& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; return *this; } \
  127. inline VECTOR_TYPE& operator-=(const ELEMENT_TYPE& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; return *this; } \
  128. inline VECTOR_TYPE& operator*=(const ELEMENT_TYPE& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; return *this; }
  129. // These are the available in-plaxe operations for 3D SIMD vectors F32x4x3, F32x8x3...
  130. #define SIMD_VECTOR_MEMBER_OPERATORS_3D(VECTOR_TYPE, SIMD_TYPE, ELEMENT_TYPE) \
  131. inline VECTOR_TYPE& operator+=(const VECTOR_TYPE& offset) { this->v1 = this->v1 + offset.v1; this->v2 = this->v2 + offset.v2; this->v3 = this->v3 + offset.v3; return *this; } \
  132. inline VECTOR_TYPE& operator-=(const VECTOR_TYPE& offset) { this->v1 = this->v1 - offset.v1; this->v2 = this->v2 - offset.v2; this->v3 = this->v3 - offset.v3; return *this; } \
  133. inline VECTOR_TYPE& operator*=(const VECTOR_TYPE& scale) { this->v1 = this->v1 * scale.v1; this->v2 = this->v2 * scale.v2; this->v3 = this->v3 * scale.v3; return *this; } \
  134. inline VECTOR_TYPE& operator+=(const SIMD_TYPE& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; this->v3 = this->v3 + offset; return *this; } \
  135. inline VECTOR_TYPE& operator-=(const SIMD_TYPE& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; this->v3 = this->v3 - offset; return *this; } \
  136. inline VECTOR_TYPE& operator*=(const SIMD_TYPE& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; this->v3 = this->v3 * scale; return *this; } \
  137. inline VECTOR_TYPE& operator+=(const ELEMENT_TYPE& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; this->v3 = this->v3 + offset; return *this; } \
  138. inline VECTOR_TYPE& operator-=(const ELEMENT_TYPE& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; this->v3 = this->v3 - offset; return *this; } \
  139. inline VECTOR_TYPE& operator*=(const ELEMENT_TYPE& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; this->v3 = this->v3 * scale; return *this; }
  140. // 128x2-bit SIMD vectorized 2D math vector stored in xxxxyyyy format (one planar SIMD vector per dimension).
  141. struct F32x4x2 {
  142. F32x4 v1, v2;
  143. // Direct constructor given 3 rows of length 4
  144. F32x4x2(const F32x4& v1, const F32x4& v2)
  145. : v1(v1), v2(v2) {}
  146. // Gradient constructor from an initial vector and the increment for each element.
  147. static F32x4x2 createGradient(const dsr::FVector3D& start, const dsr::FVector3D& increment) {
  148. return F32x4x2(
  149. F32x4::createGradient(start.x, increment.x),
  150. F32x4::createGradient(start.y, increment.y)
  151. );
  152. }
  153. // Transposed constructor given 4 columns of length 2 (Only allowed for fixed size SIMD, not X or F vector lengths)
  154. F32x4x2(const dsr::FVector2D& a, const dsr::FVector2D& b, const dsr::FVector2D& c, const dsr::FVector2D& d)
  155. : v1(a.x, b.x, c.x, d.x),
  156. v2(a.y, b.y, c.y, d.y) {}
  157. // Transposed constructor given a single repeated column
  158. F32x4x2(const dsr::FVector2D& v)
  159. : v1(F32x4(v.x)),
  160. v2(F32x4(v.y)) {}
  161. // In-place math operations
  162. SIMD_VECTOR_MEMBER_OPERATORS_2D(F32x4x2, F32x4, float)
  163. };
  164. SIMD_VECTOR_INFIX_OPERATORS_2D(F32x4x2, F32x4, float)
  165. // 256x2-bit SIMD vectorized 2D math vector stored in xxxxxxxxyyyyyyyy format (one planar SIMD vector per dimension).
  166. struct F32x8x2 {
  167. F32x8 v1, v2;
  168. // Direct constructor given 3 rows of length 4
  169. F32x8x2(const F32x8& v1, const F32x8& v2)
  170. : v1(v1), v2(v2) {}
  171. // Gradient constructor from an initial vector and the increment for each element.
  172. static F32x8x2 createGradient(const dsr::FVector3D& start, const dsr::FVector3D& increment) {
  173. return F32x8x2(
  174. F32x8::createGradient(start.x, increment.x),
  175. F32x8::createGradient(start.y, increment.y)
  176. );
  177. }
  178. // Transposed constructor given 4 columns of length 2 (Only allowed for fixed size SIMD, not X or F vector lengths)
  179. F32x8x2(const dsr::FVector2D& a, const dsr::FVector2D& b, const dsr::FVector2D& c, const dsr::FVector2D& d, const dsr::FVector2D& e, const dsr::FVector2D& f, const dsr::FVector2D& g, const dsr::FVector2D& h)
  180. : v1(a.x, b.x, c.x, d.x, e.x, f.x, g.x, h.x),
  181. v2(a.y, b.y, c.y, d.y, e.y, f.y, g.y, h.y) {}
  182. // Transposed constructor given a single repeated column
  183. F32x8x2(const dsr::FVector2D& v)
  184. : v1(F32x8(v.x)),
  185. v2(F32x8(v.y)) {}
  186. // In-place math operations
  187. SIMD_VECTOR_MEMBER_OPERATORS_2D(F32x8x2, F32x8, float)
  188. };
  189. SIMD_VECTOR_INFIX_OPERATORS_2D(F32x8x2, F32x8, float)
  190. // 128x3-bit SIMD vectorized 3D math vector stored in xxxxyyyyzzzz format (one planar SIMD vector per dimension).
  191. struct F32x4x3 {
  192. F32x4 v1, v2, v3;
  193. // Direct constructor given 3 rows of length 4
  194. F32x4x3(const F32x4& v1, const F32x4& v2, const F32x4& v3)
  195. : v1(v1), v2(v2), v3(v3) {}
  196. // Gradient constructor from an initial vector and the increment for each element.
  197. static F32x4x3 createGradient(const dsr::FVector3D& start, const dsr::FVector3D& increment) {
  198. return F32x4x3(
  199. F32x4::createGradient(start.x, increment.x),
  200. F32x4::createGradient(start.y, increment.y),
  201. F32x4::createGradient(start.z, increment.z)
  202. );
  203. }
  204. // Transposed constructor given 4 columns of length 3
  205. F32x4x3(const dsr::FVector3D& a, const dsr::FVector3D& b, const dsr::FVector3D& c, const dsr::FVector3D& d)
  206. : v1(a.x, b.x, c.x, d.x),
  207. v2(a.y, b.y, c.y, d.y),
  208. v3(a.z, b.z, c.z, d.z) {}
  209. // Transposed constructor given a single repeated column
  210. F32x4x3(const dsr::FVector3D& v)
  211. : v1(F32x4(v.x)),
  212. v2(F32x4(v.y)),
  213. v3(F32x4(v.z)) {}
  214. // In-place math operations
  215. SIMD_VECTOR_MEMBER_OPERATORS_3D(F32x4x3, F32x4, float)
  216. };
  217. SIMD_VECTOR_INFIX_OPERATORS_3D(F32x4x3, F32x4, float)
  218. // 256x3-bit SIMD vectorized 3D math vector stored in xxxxxxxxyyyyyyyyzzzzzzzz format (one planar SIMD vector per dimension).
  219. struct F32x8x3 {
  220. F32x8 v1, v2, v3;
  221. // Direct constructor given 3 rows of length 4
  222. F32x8x3(const F32x8& v1, const F32x8& v2, const F32x8& v3)
  223. : v1(v1), v2(v2), v3(v3) {}
  224. // Gradient constructor from an initial vector and the increment for each element.
  225. static F32x8x3 createGradient(const dsr::FVector3D& start, const dsr::FVector3D& increment) {
  226. return F32x8x3(
  227. F32x8::createGradient(start.x, increment.x),
  228. F32x8::createGradient(start.y, increment.y),
  229. F32x8::createGradient(start.z, increment.z)
  230. );
  231. }
  232. // Transposed constructor given 4 columns of length 3
  233. F32x8x3(const dsr::FVector3D& a, const dsr::FVector3D& b, const dsr::FVector3D& c, const dsr::FVector3D& d, const dsr::FVector3D& e, const dsr::FVector3D& f, const dsr::FVector3D& g, const dsr::FVector3D& h)
  234. : v1(a.x, b.x, c.x, d.x, e.x, f.x, g.x, h.x),
  235. v2(a.y, b.y, c.y, d.y, e.y, f.y, g.y, h.y),
  236. v3(a.z, b.z, c.z, d.z, e.z, f.z, g.z, h.z) {}
  237. // Transposed constructor given a single repeated column
  238. F32x8x3(const dsr::FVector3D& v)
  239. : v1(F32x8(v.x)),
  240. v2(F32x8(v.y)),
  241. v3(F32x8(v.z)) {}
  242. // In-place math operations
  243. SIMD_VECTOR_MEMBER_OPERATORS_3D(F32x8x3, F32x8, float)
  244. };
  245. SIMD_VECTOR_INFIX_OPERATORS_3D(F32x8x3, F32x8, float)
  246. // X vector aliases
  247. #if DSR_DEFAULT_VECTOR_SIZE == 16
  248. using F32xXx3 = F32x4x3;
  249. using F32xXx2 = F32x4x2;
  250. #elif DSR_DEFAULT_VECTOR_SIZE == 32
  251. using F32xXx3 = F32x8x3;
  252. using F32xXx2 = F32x8x2;
  253. #endif
  254. // F vector aliases
  255. #if DSR_FLOAT_VECTOR_SIZE == 16
  256. using F32xFx3 = F32x4x3;
  257. using F32xFx2 = F32x4x2;
  258. #elif DSR_FLOAT_VECTOR_SIZE == 32
  259. using F32xFx3 = F32x8x3;
  260. using F32xFx2 = F32x8x2;
  261. #endif
  262. #endif