simd3D.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. // zlib open source license
  2. //
  3. // Copyright (c) 2017 to 2023 David Forsgren Piuva
  4. //
  5. // This software is provided 'as-is', without any express or implied
  6. // warranty. In no event will the authors be held liable for any damages
  7. // arising from the use of this software.
  8. //
  9. // Permission is granted to anyone to use this software for any purpose,
  10. // including commercial applications, and to alter it and redistribute it
  11. // freely, subject to the following restrictions:
  12. //
  13. // 1. The origin of this software must not be misrepresented; you must not
  14. // claim that you wrote the original software. If you use this software
  15. // in a product, an acknowledgment in the product documentation would be
  16. // appreciated but is not required.
  17. //
  18. // 2. Altered source versions must be plainly marked as such, and must not be
  19. // misrepresented as being the original software.
  20. //
  21. // 3. This notice may not be removed or altered from any source
  22. // distribution.
  23. // TODO: Replace simd3D.h with templates in the math folder using noSimd.h.
  24. #include "simd.h"
  25. #include "../math/FVector.h"
  26. // Linear algebra of up to three dimensions. For operating on four unrelated vectors in parallel.
  27. // Unlike simd.h, this is not a hardware abstraction layer using assembly intrinsics directly.
  28. // This module builds on top of simd.h for higher levels of abstraction.
  29. #ifndef DFPSR_SIMD_3D
  30. #define DFPSR_SIMD_3D
  31. namespace dsr {
  32. // These are the infix operations for 2D SIMD vectors F32x4x2, F32x8x2...
  33. #define SIMD_VECTOR_INFIX_OPERATORS_2D(VECTOR_TYPE, SIMD_TYPE, ELEMENT_TYPE) \
  34. inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
  35. return VECTOR_TYPE(left.v1 + right.v1, left.v2 + right.v2); \
  36. } \
  37. inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
  38. return VECTOR_TYPE(left.v1 + right, left.v2 + right); \
  39. } \
  40. inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
  41. return VECTOR_TYPE(left.v1 + right, left.v2 + right); \
  42. } \
  43. inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
  44. return VECTOR_TYPE(left.v1 - right.v1, left.v2 - right.v2); \
  45. } \
  46. inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
  47. return VECTOR_TYPE(left.v1 - right, left.v2 - right); \
  48. } \
  49. inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
  50. return VECTOR_TYPE(left.v1 - right, left.v2 - right); \
  51. } \
  52. inline VECTOR_TYPE operator-(const VECTOR_TYPE& value) { \
  53. return VECTOR_TYPE(-value.v1, -value.v2); \
  54. } \
  55. inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
  56. return VECTOR_TYPE(left.v1 * right.v1, left.v2 * right.v2); \
  57. } \
  58. inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
  59. return VECTOR_TYPE(left.v1 * right, left.v2 * right); \
  60. } \
  61. inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
  62. return VECTOR_TYPE(left.v1 * right, left.v2 * right); \
  63. } \
  64. inline SIMD_TYPE dotProduct(const VECTOR_TYPE &a, const VECTOR_TYPE &b) { \
  65. return (a.v1 * b.v1) + (a.v2 * b.v2); \
  66. } \
  67. inline SIMD_TYPE squareLength(const VECTOR_TYPE &v) { \
  68. return dotProduct(v, v); \
  69. } \
  70. inline SIMD_TYPE length(const VECTOR_TYPE &v) { \
  71. return squareRoot(squareLength(v)); \
  72. } \
  73. inline VECTOR_TYPE normalize(const VECTOR_TYPE &v) { \
  74. return v * reciprocalSquareRoot(squareLength(v)); \
  75. }
  76. // These are the infix operations for 3D SIMD vectors F32x4x3, F32x8x3...
  77. #define SIMD_VECTOR_INFIX_OPERATORS_3D(VECTOR_TYPE, SIMD_TYPE, ELEMENT_TYPE) \
  78. inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
  79. return VECTOR_TYPE(left.v1 + right.v1, left.v2 + right.v2, left.v3 + right.v3); \
  80. } \
  81. inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
  82. return VECTOR_TYPE(left.v1 + right, left.v2 + right, left.v3 + right); \
  83. } \
  84. inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
  85. return VECTOR_TYPE(left.v1 + right, left.v2 + right, left.v3 + right); \
  86. } \
  87. inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
  88. return VECTOR_TYPE(left.v1 - right.v1, left.v2 - right.v2, left.v3 - right.v3); \
  89. } \
  90. inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
  91. return VECTOR_TYPE(left.v1 - right, left.v2 - right, left.v3 - right); \
  92. } \
  93. inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
  94. return VECTOR_TYPE(left.v1 - right, left.v2 - right, left.v3 - right); \
  95. } \
  96. inline VECTOR_TYPE operator-(const VECTOR_TYPE& value) { \
  97. return VECTOR_TYPE(-value.v1, -value.v2, -value.v3); \
  98. } \
  99. inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
  100. return VECTOR_TYPE(left.v1 * right.v1, left.v2 * right.v2, left.v3 * right.v3); \
  101. } \
  102. inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
  103. return VECTOR_TYPE(left.v1 * right, left.v2 * right, left.v3 * right); \
  104. } \
  105. inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
  106. return VECTOR_TYPE(left.v1 * right, left.v2 * right, left.v3 * right); \
  107. } \
  108. inline SIMD_TYPE dotProduct(const VECTOR_TYPE &a, const VECTOR_TYPE &b) { \
  109. return (a.v1 * b.v1) + (a.v2 * b.v2) + (a.v3 * b.v3); \
  110. } \
  111. inline SIMD_TYPE squareLength(const VECTOR_TYPE &v) { \
  112. return dotProduct(v, v); \
  113. } \
  114. inline SIMD_TYPE length(const VECTOR_TYPE &v) { \
  115. return squareRoot(squareLength(v)); \
  116. } \
  117. inline VECTOR_TYPE normalize(const VECTOR_TYPE &v) { \
  118. return v * reciprocalSquareRoot(squareLength(v)); \
  119. }
  120. // These are the available in-plaxe operations for 2D SIMD vectors F32x4x2, F32x8x2...
  121. #define SIMD_VECTOR_MEMBER_OPERATORS_2D(VECTOR_TYPE, SIMD_TYPE, ELEMENT_TYPE) \
  122. inline VECTOR_TYPE& operator+=(const VECTOR_TYPE& offset) { this->v1 = this->v1 + offset.v1; this->v2 = this->v2 + offset.v2; return *this; } \
  123. inline VECTOR_TYPE& operator-=(const VECTOR_TYPE& offset) { this->v1 = this->v1 - offset.v1; this->v2 = this->v2 - offset.v2; return *this; } \
  124. inline VECTOR_TYPE& operator*=(const VECTOR_TYPE& scale) { this->v1 = this->v1 * scale.v1; this->v2 = this->v2 * scale.v2; return *this; } \
  125. inline VECTOR_TYPE& operator+=(const SIMD_TYPE& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; return *this; } \
  126. inline VECTOR_TYPE& operator-=(const SIMD_TYPE& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; return *this; } \
  127. inline VECTOR_TYPE& operator*=(const SIMD_TYPE& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; return *this; } \
  128. inline VECTOR_TYPE& operator+=(const ELEMENT_TYPE& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; return *this; } \
  129. inline VECTOR_TYPE& operator-=(const ELEMENT_TYPE& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; return *this; } \
  130. inline VECTOR_TYPE& operator*=(const ELEMENT_TYPE& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; return *this; }
  131. // These are the available in-plaxe operations for 3D SIMD vectors F32x4x3, F32x8x3...
  132. #define SIMD_VECTOR_MEMBER_OPERATORS_3D(VECTOR_TYPE, SIMD_TYPE, ELEMENT_TYPE) \
  133. inline VECTOR_TYPE& operator+=(const VECTOR_TYPE& offset) { this->v1 = this->v1 + offset.v1; this->v2 = this->v2 + offset.v2; this->v3 = this->v3 + offset.v3; return *this; } \
  134. inline VECTOR_TYPE& operator-=(const VECTOR_TYPE& offset) { this->v1 = this->v1 - offset.v1; this->v2 = this->v2 - offset.v2; this->v3 = this->v3 - offset.v3; return *this; } \
  135. inline VECTOR_TYPE& operator*=(const VECTOR_TYPE& scale) { this->v1 = this->v1 * scale.v1; this->v2 = this->v2 * scale.v2; this->v3 = this->v3 * scale.v3; return *this; } \
  136. inline VECTOR_TYPE& operator+=(const SIMD_TYPE& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; this->v3 = this->v3 + offset; return *this; } \
  137. inline VECTOR_TYPE& operator-=(const SIMD_TYPE& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; this->v3 = this->v3 - offset; return *this; } \
  138. inline VECTOR_TYPE& operator*=(const SIMD_TYPE& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; this->v3 = this->v3 * scale; return *this; } \
  139. inline VECTOR_TYPE& operator+=(const ELEMENT_TYPE& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; this->v3 = this->v3 + offset; return *this; } \
  140. inline VECTOR_TYPE& operator-=(const ELEMENT_TYPE& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; this->v3 = this->v3 - offset; return *this; } \
  141. inline VECTOR_TYPE& operator*=(const ELEMENT_TYPE& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; this->v3 = this->v3 * scale; return *this; }
  142. // 128x2-bit SIMD vectorized 2D math vector stored in xxxxyyyy format (one planar SIMD vector per dimension).
  143. struct F32x4x2 {
  144. F32x4 v1, v2;
  145. // Direct constructor given 3 rows of length 4
  146. F32x4x2(const F32x4& v1, const F32x4& v2)
  147. : v1(v1), v2(v2) {}
  148. // Gradient constructor from an initial vector and the increment for each element.
  149. static F32x4x2 createGradient(const dsr::FVector3D& start, const dsr::FVector3D& increment) {
  150. return F32x4x2(
  151. F32x4::createGradient(start.x, increment.x),
  152. F32x4::createGradient(start.y, increment.y)
  153. );
  154. }
  155. // Transposed constructor given 4 columns of length 2 (Only allowed for fixed size SIMD, not X or F vector lengths)
  156. F32x4x2(const dsr::FVector2D& a, const dsr::FVector2D& b, const dsr::FVector2D& c, const dsr::FVector2D& d)
  157. : v1(a.x, b.x, c.x, d.x),
  158. v2(a.y, b.y, c.y, d.y) {}
  159. // Transposed constructor given a single repeated column
  160. F32x4x2(const dsr::FVector2D& v)
  161. : v1(F32x4(v.x)),
  162. v2(F32x4(v.y)) {}
  163. // In-place math operations
  164. SIMD_VECTOR_MEMBER_OPERATORS_2D(F32x4x2, F32x4, float)
  165. };
  166. SIMD_VECTOR_INFIX_OPERATORS_2D(F32x4x2, F32x4, float)
  167. // 256x2-bit SIMD vectorized 2D math vector stored in xxxxxxxxyyyyyyyy format (one planar SIMD vector per dimension).
  168. struct F32x8x2 {
  169. F32x8 v1, v2;
  170. // Direct constructor given 3 rows of length 4
  171. F32x8x2(const F32x8& v1, const F32x8& v2)
  172. : v1(v1), v2(v2) {}
  173. // Gradient constructor from an initial vector and the increment for each element.
  174. static F32x8x2 createGradient(const dsr::FVector3D& start, const dsr::FVector3D& increment) {
  175. return F32x8x2(
  176. F32x8::createGradient(start.x, increment.x),
  177. F32x8::createGradient(start.y, increment.y)
  178. );
  179. }
  180. // Transposed constructor given 4 columns of length 2 (Only allowed for fixed size SIMD, not X or F vector lengths)
  181. F32x8x2(const dsr::FVector2D& a, const dsr::FVector2D& b, const dsr::FVector2D& c, const dsr::FVector2D& d, const dsr::FVector2D& e, const dsr::FVector2D& f, const dsr::FVector2D& g, const dsr::FVector2D& h)
  182. : v1(a.x, b.x, c.x, d.x, e.x, f.x, g.x, h.x),
  183. v2(a.y, b.y, c.y, d.y, e.y, f.y, g.y, h.y) {}
  184. // Transposed constructor given a single repeated column
  185. F32x8x2(const dsr::FVector2D& v)
  186. : v1(F32x8(v.x)),
  187. v2(F32x8(v.y)) {}
  188. // In-place math operations
  189. SIMD_VECTOR_MEMBER_OPERATORS_2D(F32x8x2, F32x8, float)
  190. };
  191. SIMD_VECTOR_INFIX_OPERATORS_2D(F32x8x2, F32x8, float)
  192. // 128x3-bit SIMD vectorized 3D math vector stored in xxxxyyyyzzzz format (one planar SIMD vector per dimension).
  193. struct F32x4x3 {
  194. F32x4 v1, v2, v3;
  195. // Direct constructor given 3 rows of length 4
  196. F32x4x3(const F32x4& v1, const F32x4& v2, const F32x4& v3)
  197. : v1(v1), v2(v2), v3(v3) {}
  198. // Gradient constructor from an initial vector and the increment for each element.
  199. static F32x4x3 createGradient(const dsr::FVector3D& start, const dsr::FVector3D& increment) {
  200. return F32x4x3(
  201. F32x4::createGradient(start.x, increment.x),
  202. F32x4::createGradient(start.y, increment.y),
  203. F32x4::createGradient(start.z, increment.z)
  204. );
  205. }
  206. // Transposed constructor given 4 columns of length 3
  207. F32x4x3(const dsr::FVector3D& a, const dsr::FVector3D& b, const dsr::FVector3D& c, const dsr::FVector3D& d)
  208. : v1(a.x, b.x, c.x, d.x),
  209. v2(a.y, b.y, c.y, d.y),
  210. v3(a.z, b.z, c.z, d.z) {}
  211. // Transposed constructor given a single repeated column
  212. F32x4x3(const dsr::FVector3D& v)
  213. : v1(F32x4(v.x)),
  214. v2(F32x4(v.y)),
  215. v3(F32x4(v.z)) {}
  216. // In-place math operations
  217. SIMD_VECTOR_MEMBER_OPERATORS_3D(F32x4x3, F32x4, float)
  218. };
  219. SIMD_VECTOR_INFIX_OPERATORS_3D(F32x4x3, F32x4, float)
  220. // 256x3-bit SIMD vectorized 3D math vector stored in xxxxxxxxyyyyyyyyzzzzzzzz format (one planar SIMD vector per dimension).
  221. struct F32x8x3 {
  222. F32x8 v1, v2, v3;
  223. // Direct constructor given 3 rows of length 4
  224. F32x8x3(const F32x8& v1, const F32x8& v2, const F32x8& v3)
  225. : v1(v1), v2(v2), v3(v3) {}
  226. // Gradient constructor from an initial vector and the increment for each element.
  227. static F32x8x3 createGradient(const dsr::FVector3D& start, const dsr::FVector3D& increment) {
  228. return F32x8x3(
  229. F32x8::createGradient(start.x, increment.x),
  230. F32x8::createGradient(start.y, increment.y),
  231. F32x8::createGradient(start.z, increment.z)
  232. );
  233. }
  234. // Transposed constructor given 4 columns of length 3
  235. F32x8x3(const dsr::FVector3D& a, const dsr::FVector3D& b, const dsr::FVector3D& c, const dsr::FVector3D& d, const dsr::FVector3D& e, const dsr::FVector3D& f, const dsr::FVector3D& g, const dsr::FVector3D& h)
  236. : v1(a.x, b.x, c.x, d.x, e.x, f.x, g.x, h.x),
  237. v2(a.y, b.y, c.y, d.y, e.y, f.y, g.y, h.y),
  238. v3(a.z, b.z, c.z, d.z, e.z, f.z, g.z, h.z) {}
  239. // Transposed constructor given a single repeated column
  240. F32x8x3(const dsr::FVector3D& v)
  241. : v1(F32x8(v.x)),
  242. v2(F32x8(v.y)),
  243. v3(F32x8(v.z)) {}
  244. // In-place math operations
  245. SIMD_VECTOR_MEMBER_OPERATORS_3D(F32x8x3, F32x8, float)
  246. };
  247. SIMD_VECTOR_INFIX_OPERATORS_3D(F32x8x3, F32x8, float)
  248. // TODO: Refactor into template types to allow using vector lengths that are not known in compile time, such as ARM SVE registers.
  249. // X vector aliases
  250. #if DSR_DEFAULT_VECTOR_SIZE == 16
  251. using F32xXx3 = F32x4x3;
  252. using F32xXx2 = F32x4x2;
  253. #elif DSR_DEFAULT_VECTOR_SIZE == 32
  254. using F32xXx3 = F32x8x3;
  255. using F32xXx2 = F32x8x2;
  256. #endif
  257. // F vector aliases
  258. #if DSR_FLOAT_VECTOR_SIZE == 16
  259. using F32xFx3 = F32x4x3;
  260. using F32xFx2 = F32x4x2;
  261. #elif DSR_FLOAT_VECTOR_SIZE == 32
  262. using F32xFx3 = F32x8x3;
  263. using F32xFx2 = F32x8x2;
  264. #endif
  265. #undef SIMD_VECTOR_MEMBER_OPERATORS_2D
  266. #undef SIMD_VECTOR_MEMBER_OPERATORS_3D
  267. }
  268. #endif