simd3D.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. // zlib open source license
  2. //
  3. // Copyright (c) 2017 to 2023 David Forsgren Piuva
  4. //
  5. // This software is provided 'as-is', without any express or implied
  6. // warranty. In no event will the authors be held liable for any damages
  7. // arising from the use of this software.
  8. //
  9. // Permission is granted to anyone to use this software for any purpose,
  10. // including commercial applications, and to alter it and redistribute it
  11. // freely, subject to the following restrictions:
  12. //
  13. // 1. The origin of this software must not be misrepresented; you must not
  14. // claim that you wrote the original software. If you use this software
  15. // in a product, an acknowledgment in the product documentation would be
  16. // appreciated but is not required.
  17. //
  18. // 2. Altered source versions must be plainly marked as such, and must not be
  19. // misrepresented as being the original software.
  20. //
  21. // 3. This notice may not be removed or altered from any source
  22. // distribution.
  23. #include "simd.h"
  24. #include "../math/FVector.h"
  25. // Linear algebra of up to three dimensions. For operating on four unrelated vectors in parallel.
  26. // Unlike simd.h, this is not a hardware abstraction layer using assembly intrinsics directly.
  27. // This module builds on top of simd.h for higher levels of abstraction.
  28. #ifndef DFPSR_SIMD_3D
  29. #define DFPSR_SIMD_3D
  30. namespace dsr {
  31. // These are the infix operations for 2D SIMD vectors F32x4x2, F32x8x2...
  32. #define SIMD_VECTOR_INFIX_OPERATORS_2D(VECTOR_TYPE, SIMD_TYPE, ELEMENT_TYPE) \
  33. inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
  34. return VECTOR_TYPE(left.v1 + right.v1, left.v2 + right.v2); \
  35. } \
  36. inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
  37. return VECTOR_TYPE(left.v1 + right, left.v2 + right); \
  38. } \
  39. inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
  40. return VECTOR_TYPE(left.v1 + right, left.v2 + right); \
  41. } \
  42. inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
  43. return VECTOR_TYPE(left.v1 - right.v1, left.v2 - right.v2); \
  44. } \
  45. inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
  46. return VECTOR_TYPE(left.v1 - right, left.v2 - right); \
  47. } \
  48. inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
  49. return VECTOR_TYPE(left.v1 - right, left.v2 - right); \
  50. } \
  51. inline VECTOR_TYPE operator-(const VECTOR_TYPE& value) { \
  52. return VECTOR_TYPE(-value.v1, -value.v2); \
  53. } \
  54. inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
  55. return VECTOR_TYPE(left.v1 * right.v1, left.v2 * right.v2); \
  56. } \
  57. inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
  58. return VECTOR_TYPE(left.v1 * right, left.v2 * right); \
  59. } \
  60. inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
  61. return VECTOR_TYPE(left.v1 * right, left.v2 * right); \
  62. } \
  63. inline SIMD_TYPE dotProduct(const VECTOR_TYPE &a, const VECTOR_TYPE &b) { \
  64. return (a.v1 * b.v1) + (a.v2 * b.v2); \
  65. } \
  66. inline SIMD_TYPE squareLength(const VECTOR_TYPE &v) { \
  67. return dotProduct(v, v); \
  68. } \
  69. inline SIMD_TYPE length(const VECTOR_TYPE &v) { \
  70. return squareLength(v).squareRoot(); \
  71. } \
  72. inline VECTOR_TYPE normalize(const VECTOR_TYPE &v) { \
  73. return v * squareLength(v).reciprocalSquareRoot(); \
  74. }
  75. // These are the infix operations for 3D SIMD vectors F32x4x3, F32x8x3...
  76. #define SIMD_VECTOR_INFIX_OPERATORS_3D(VECTOR_TYPE, SIMD_TYPE, ELEMENT_TYPE) \
  77. inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
  78. return VECTOR_TYPE(left.v1 + right.v1, left.v2 + right.v2, left.v3 + right.v3); \
  79. } \
  80. inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
  81. return VECTOR_TYPE(left.v1 + right, left.v2 + right, left.v3 + right); \
  82. } \
  83. inline VECTOR_TYPE operator+(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
  84. return VECTOR_TYPE(left.v1 + right, left.v2 + right, left.v3 + right); \
  85. } \
  86. inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
  87. return VECTOR_TYPE(left.v1 - right.v1, left.v2 - right.v2, left.v3 - right.v3); \
  88. } \
  89. inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
  90. return VECTOR_TYPE(left.v1 - right, left.v2 - right, left.v3 - right); \
  91. } \
  92. inline VECTOR_TYPE operator-(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
  93. return VECTOR_TYPE(left.v1 - right, left.v2 - right, left.v3 - right); \
  94. } \
  95. inline VECTOR_TYPE operator-(const VECTOR_TYPE& value) { \
  96. return VECTOR_TYPE(-value.v1, -value.v2, -value.v3); \
  97. } \
  98. inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const VECTOR_TYPE &right) { \
  99. return VECTOR_TYPE(left.v1 * right.v1, left.v2 * right.v2, left.v3 * right.v3); \
  100. } \
  101. inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const SIMD_TYPE &right) { \
  102. return VECTOR_TYPE(left.v1 * right, left.v2 * right, left.v3 * right); \
  103. } \
  104. inline VECTOR_TYPE operator*(const VECTOR_TYPE &left, const ELEMENT_TYPE &right) { \
  105. return VECTOR_TYPE(left.v1 * right, left.v2 * right, left.v3 * right); \
  106. } \
  107. inline SIMD_TYPE dotProduct(const VECTOR_TYPE &a, const VECTOR_TYPE &b) { \
  108. return (a.v1 * b.v1) + (a.v2 * b.v2) + (a.v3 * b.v3); \
  109. } \
  110. inline SIMD_TYPE squareLength(const VECTOR_TYPE &v) { \
  111. return dotProduct(v, v); \
  112. } \
  113. inline SIMD_TYPE length(const VECTOR_TYPE &v) { \
  114. return squareLength(v).squareRoot(); \
  115. } \
  116. inline VECTOR_TYPE normalize(const VECTOR_TYPE &v) { \
  117. return v * squareLength(v).reciprocalSquareRoot(); \
  118. }
  119. // These are the available in-plaxe operations for 2D SIMD vectors F32x4x2, F32x8x2...
  120. #define SIMD_VECTOR_MEMBER_OPERATORS_2D(VECTOR_TYPE, SIMD_TYPE, ELEMENT_TYPE) \
  121. inline VECTOR_TYPE& operator+=(const VECTOR_TYPE& offset) { this->v1 = this->v1 + offset.v1; this->v2 = this->v2 + offset.v2; return *this; } \
  122. inline VECTOR_TYPE& operator-=(const VECTOR_TYPE& offset) { this->v1 = this->v1 - offset.v1; this->v2 = this->v2 - offset.v2; return *this; } \
  123. inline VECTOR_TYPE& operator*=(const VECTOR_TYPE& scale) { this->v1 = this->v1 * scale.v1; this->v2 = this->v2 * scale.v2; return *this; } \
  124. inline VECTOR_TYPE& operator+=(const SIMD_TYPE& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; return *this; } \
  125. inline VECTOR_TYPE& operator-=(const SIMD_TYPE& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; return *this; } \
  126. inline VECTOR_TYPE& operator*=(const SIMD_TYPE& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; return *this; } \
  127. inline VECTOR_TYPE& operator+=(const ELEMENT_TYPE& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; return *this; } \
  128. inline VECTOR_TYPE& operator-=(const ELEMENT_TYPE& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; return *this; } \
  129. inline VECTOR_TYPE& operator*=(const ELEMENT_TYPE& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; return *this; }
  130. // These are the available in-plaxe operations for 3D SIMD vectors F32x4x3, F32x8x3...
  131. #define SIMD_VECTOR_MEMBER_OPERATORS_3D(VECTOR_TYPE, SIMD_TYPE, ELEMENT_TYPE) \
  132. inline VECTOR_TYPE& operator+=(const VECTOR_TYPE& offset) { this->v1 = this->v1 + offset.v1; this->v2 = this->v2 + offset.v2; this->v3 = this->v3 + offset.v3; return *this; } \
  133. inline VECTOR_TYPE& operator-=(const VECTOR_TYPE& offset) { this->v1 = this->v1 - offset.v1; this->v2 = this->v2 - offset.v2; this->v3 = this->v3 - offset.v3; return *this; } \
  134. inline VECTOR_TYPE& operator*=(const VECTOR_TYPE& scale) { this->v1 = this->v1 * scale.v1; this->v2 = this->v2 * scale.v2; this->v3 = this->v3 * scale.v3; return *this; } \
  135. inline VECTOR_TYPE& operator+=(const SIMD_TYPE& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; this->v3 = this->v3 + offset; return *this; } \
  136. inline VECTOR_TYPE& operator-=(const SIMD_TYPE& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; this->v3 = this->v3 - offset; return *this; } \
  137. inline VECTOR_TYPE& operator*=(const SIMD_TYPE& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; this->v3 = this->v3 * scale; return *this; } \
  138. inline VECTOR_TYPE& operator+=(const ELEMENT_TYPE& offset) { this->v1 = this->v1 + offset; this->v2 = this->v2 + offset; this->v3 = this->v3 + offset; return *this; } \
  139. inline VECTOR_TYPE& operator-=(const ELEMENT_TYPE& offset) { this->v1 = this->v1 - offset; this->v2 = this->v2 - offset; this->v3 = this->v3 - offset; return *this; } \
  140. inline VECTOR_TYPE& operator*=(const ELEMENT_TYPE& scale) { this->v1 = this->v1 * scale; this->v2 = this->v2 * scale; this->v3 = this->v3 * scale; return *this; }
  141. // 128x2-bit SIMD vectorized 2D math vector stored in xxxxyyyy format (one planar SIMD vector per dimension).
  142. struct F32x4x2 {
  143. F32x4 v1, v2;
  144. // Direct constructor given 3 rows of length 4
  145. F32x4x2(const F32x4& v1, const F32x4& v2)
  146. : v1(v1), v2(v2) {}
  147. // Gradient constructor from an initial vector and the increment for each element.
  148. static F32x4x2 createGradient(const dsr::FVector3D& start, const dsr::FVector3D& increment) {
  149. return F32x4x2(
  150. F32x4::createGradient(start.x, increment.x),
  151. F32x4::createGradient(start.y, increment.y)
  152. );
  153. }
  154. // Transposed constructor given 4 columns of length 2 (Only allowed for fixed size SIMD, not X or F vector lengths)
  155. F32x4x2(const dsr::FVector2D& a, const dsr::FVector2D& b, const dsr::FVector2D& c, const dsr::FVector2D& d)
  156. : v1(a.x, b.x, c.x, d.x),
  157. v2(a.y, b.y, c.y, d.y) {}
  158. // Transposed constructor given a single repeated column
  159. F32x4x2(const dsr::FVector2D& v)
  160. : v1(F32x4(v.x)),
  161. v2(F32x4(v.y)) {}
  162. // In-place math operations
  163. SIMD_VECTOR_MEMBER_OPERATORS_2D(F32x4x2, F32x4, float)
  164. };
  165. SIMD_VECTOR_INFIX_OPERATORS_2D(F32x4x2, F32x4, float)
  166. // 256x2-bit SIMD vectorized 2D math vector stored in xxxxxxxxyyyyyyyy format (one planar SIMD vector per dimension).
  167. struct F32x8x2 {
  168. F32x8 v1, v2;
  169. // Direct constructor given 3 rows of length 4
  170. F32x8x2(const F32x8& v1, const F32x8& v2)
  171. : v1(v1), v2(v2) {}
  172. // Gradient constructor from an initial vector and the increment for each element.
  173. static F32x8x2 createGradient(const dsr::FVector3D& start, const dsr::FVector3D& increment) {
  174. return F32x8x2(
  175. F32x8::createGradient(start.x, increment.x),
  176. F32x8::createGradient(start.y, increment.y)
  177. );
  178. }
  179. // Transposed constructor given 4 columns of length 2 (Only allowed for fixed size SIMD, not X or F vector lengths)
  180. F32x8x2(const dsr::FVector2D& a, const dsr::FVector2D& b, const dsr::FVector2D& c, const dsr::FVector2D& d, const dsr::FVector2D& e, const dsr::FVector2D& f, const dsr::FVector2D& g, const dsr::FVector2D& h)
  181. : v1(a.x, b.x, c.x, d.x, e.x, f.x, g.x, h.x),
  182. v2(a.y, b.y, c.y, d.y, e.y, f.y, g.y, h.y) {}
  183. // Transposed constructor given a single repeated column
  184. F32x8x2(const dsr::FVector2D& v)
  185. : v1(F32x8(v.x)),
  186. v2(F32x8(v.y)) {}
  187. // In-place math operations
  188. SIMD_VECTOR_MEMBER_OPERATORS_2D(F32x8x2, F32x8, float)
  189. };
  190. SIMD_VECTOR_INFIX_OPERATORS_2D(F32x8x2, F32x8, float)
  191. // 128x3-bit SIMD vectorized 3D math vector stored in xxxxyyyyzzzz format (one planar SIMD vector per dimension).
  192. struct F32x4x3 {
  193. F32x4 v1, v2, v3;
  194. // Direct constructor given 3 rows of length 4
  195. F32x4x3(const F32x4& v1, const F32x4& v2, const F32x4& v3)
  196. : v1(v1), v2(v2), v3(v3) {}
  197. // Gradient constructor from an initial vector and the increment for each element.
  198. static F32x4x3 createGradient(const dsr::FVector3D& start, const dsr::FVector3D& increment) {
  199. return F32x4x3(
  200. F32x4::createGradient(start.x, increment.x),
  201. F32x4::createGradient(start.y, increment.y),
  202. F32x4::createGradient(start.z, increment.z)
  203. );
  204. }
  205. // Transposed constructor given 4 columns of length 3
  206. F32x4x3(const dsr::FVector3D& a, const dsr::FVector3D& b, const dsr::FVector3D& c, const dsr::FVector3D& d)
  207. : v1(a.x, b.x, c.x, d.x),
  208. v2(a.y, b.y, c.y, d.y),
  209. v3(a.z, b.z, c.z, d.z) {}
  210. // Transposed constructor given a single repeated column
  211. F32x4x3(const dsr::FVector3D& v)
  212. : v1(F32x4(v.x)),
  213. v2(F32x4(v.y)),
  214. v3(F32x4(v.z)) {}
  215. // In-place math operations
  216. SIMD_VECTOR_MEMBER_OPERATORS_3D(F32x4x3, F32x4, float)
  217. };
  218. SIMD_VECTOR_INFIX_OPERATORS_3D(F32x4x3, F32x4, float)
  219. // 256x3-bit SIMD vectorized 3D math vector stored in xxxxxxxxyyyyyyyyzzzzzzzz format (one planar SIMD vector per dimension).
  220. struct F32x8x3 {
  221. F32x8 v1, v2, v3;
  222. // Direct constructor given 3 rows of length 4
  223. F32x8x3(const F32x8& v1, const F32x8& v2, const F32x8& v3)
  224. : v1(v1), v2(v2), v3(v3) {}
  225. // Gradient constructor from an initial vector and the increment for each element.
  226. static F32x8x3 createGradient(const dsr::FVector3D& start, const dsr::FVector3D& increment) {
  227. return F32x8x3(
  228. F32x8::createGradient(start.x, increment.x),
  229. F32x8::createGradient(start.y, increment.y),
  230. F32x8::createGradient(start.z, increment.z)
  231. );
  232. }
  233. // Transposed constructor given 4 columns of length 3
  234. F32x8x3(const dsr::FVector3D& a, const dsr::FVector3D& b, const dsr::FVector3D& c, const dsr::FVector3D& d, const dsr::FVector3D& e, const dsr::FVector3D& f, const dsr::FVector3D& g, const dsr::FVector3D& h)
  235. : v1(a.x, b.x, c.x, d.x, e.x, f.x, g.x, h.x),
  236. v2(a.y, b.y, c.y, d.y, e.y, f.y, g.y, h.y),
  237. v3(a.z, b.z, c.z, d.z, e.z, f.z, g.z, h.z) {}
  238. // Transposed constructor given a single repeated column
  239. F32x8x3(const dsr::FVector3D& v)
  240. : v1(F32x8(v.x)),
  241. v2(F32x8(v.y)),
  242. v3(F32x8(v.z)) {}
  243. // In-place math operations
  244. SIMD_VECTOR_MEMBER_OPERATORS_3D(F32x8x3, F32x8, float)
  245. };
  246. SIMD_VECTOR_INFIX_OPERATORS_3D(F32x8x3, F32x8, float)
  247. // X vector aliases
  248. #if DSR_DEFAULT_VECTOR_SIZE == 16
  249. using F32xXx3 = F32x4x3;
  250. using F32xXx2 = F32x4x2;
  251. #elif DSR_DEFAULT_VECTOR_SIZE == 32
  252. using F32xXx3 = F32x8x3;
  253. using F32xXx2 = F32x8x2;
  254. #endif
  255. // F vector aliases
  256. #if DSR_FLOAT_VECTOR_SIZE == 16
  257. using F32xFx3 = F32x4x3;
  258. using F32xFx2 = F32x4x2;
  259. #elif DSR_FLOAT_VECTOR_SIZE == 32
  260. using F32xFx3 = F32x8x3;
  261. using F32xFx2 = F32x8x2;
  262. #endif
  263. #undef SIMD_VECTOR_MEMBER_OPERATORS_2D
  264. #undef SIMD_VECTOR_MEMBER_OPERATORS_3D
  265. }
  266. #endif