Matrix3x4.h 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736
  1. // Copyright (c) 2008-2023 the Urho3D project
  2. // License: MIT
  3. #pragma once
  4. #include "../Math/Matrix4.h"
  5. #ifdef URHO3D_SSE
  6. #include <emmintrin.h>
  7. #endif
  8. namespace Urho3D
  9. {
  10. /// 3x4 matrix for scene node transform calculations.
  11. class URHO3D_API Matrix3x4
  12. {
  13. public:
  14. #ifdef _MSC_VER
  15. #pragma warning(push)
  16. #pragma warning(disable:26495)
  17. #endif
  18. /// Construct an identity matrix.
  19. Matrix3x4() noexcept
  20. #ifndef URHO3D_SSE
  21. :m00_(1.0f),
  22. m01_(0.0f),
  23. m02_(0.0f),
  24. m03_(0.0f),
  25. m10_(0.0f),
  26. m11_(1.0f),
  27. m12_(0.0f),
  28. m13_(0.0f),
  29. m20_(0.0f),
  30. m21_(0.0f),
  31. m22_(1.0f),
  32. m23_(0.0f)
  33. #endif
  34. {
  35. #ifdef URHO3D_SSE
  36. _mm_storeu_ps(&m00_, _mm_set_ps(0.f, 0.f, 0.f, 1.f));
  37. _mm_storeu_ps(&m10_, _mm_set_ps(0.f, 0.f, 1.f, 0.f));
  38. _mm_storeu_ps(&m20_, _mm_set_ps(0.f, 1.f, 0.f, 0.f));
  39. #endif
  40. }
  41. /// Copy-construct from another matrix.
  42. Matrix3x4(const Matrix3x4& matrix) noexcept = default;
  43. /// Copy-construct from a 3x3 matrix and set the extra elements to identity.
  44. explicit Matrix3x4(const Matrix3& matrix) noexcept :
  45. m00_(matrix.m00_),
  46. m01_(matrix.m01_),
  47. m02_(matrix.m02_),
  48. m03_(0.0f),
  49. m10_(matrix.m10_),
  50. m11_(matrix.m11_),
  51. m12_(matrix.m12_),
  52. m13_(0.0f),
  53. m20_(matrix.m20_),
  54. m21_(matrix.m21_),
  55. m22_(matrix.m22_),
  56. m23_(0.0f)
  57. {
  58. }
  59. /// Copy-construct from a 4x4 matrix which is assumed to contain no projection.
  60. explicit Matrix3x4(const Matrix4& matrix) noexcept
  61. #ifndef URHO3D_SSE
  62. :m00_(matrix.m00_),
  63. m01_(matrix.m01_),
  64. m02_(matrix.m02_),
  65. m03_(matrix.m03_),
  66. m10_(matrix.m10_),
  67. m11_(matrix.m11_),
  68. m12_(matrix.m12_),
  69. m13_(matrix.m13_),
  70. m20_(matrix.m20_),
  71. m21_(matrix.m21_),
  72. m22_(matrix.m22_),
  73. m23_(matrix.m23_)
  74. #endif
  75. {
  76. #ifdef URHO3D_SSE
  77. _mm_storeu_ps(&m00_, _mm_loadu_ps(&matrix.m00_));
  78. _mm_storeu_ps(&m10_, _mm_loadu_ps(&matrix.m10_));
  79. _mm_storeu_ps(&m20_, _mm_loadu_ps(&matrix.m20_));
  80. #endif
  81. }
  82. /// Construct from values.
  83. Matrix3x4(float v00, float v01, float v02, float v03,
  84. float v10, float v11, float v12, float v13,
  85. float v20, float v21, float v22, float v23) noexcept :
  86. m00_(v00),
  87. m01_(v01),
  88. m02_(v02),
  89. m03_(v03),
  90. m10_(v10),
  91. m11_(v11),
  92. m12_(v12),
  93. m13_(v13),
  94. m20_(v20),
  95. m21_(v21),
  96. m22_(v22),
  97. m23_(v23)
  98. {
  99. }
  100. /// Construct from a float array.
  101. explicit Matrix3x4(const float* data) noexcept
  102. #ifndef URHO3D_SSE
  103. :m00_(data[0]),
  104. m01_(data[1]),
  105. m02_(data[2]),
  106. m03_(data[3]),
  107. m10_(data[4]),
  108. m11_(data[5]),
  109. m12_(data[6]),
  110. m13_(data[7]),
  111. m20_(data[8]),
  112. m21_(data[9]),
  113. m22_(data[10]),
  114. m23_(data[11])
  115. #endif
  116. {
  117. #ifdef URHO3D_SSE
  118. _mm_storeu_ps(&m00_, _mm_loadu_ps(data));
  119. _mm_storeu_ps(&m10_, _mm_loadu_ps(data + 4));
  120. _mm_storeu_ps(&m20_, _mm_loadu_ps(data + 8));
  121. #endif
  122. }
  123. /// Construct from translation, rotation and uniform scale.
  124. Matrix3x4(const Vector3& translation, const Quaternion& rotation, float scale) noexcept
  125. {
  126. #ifdef URHO3D_SSE
  127. __m128 t = _mm_set_ps(1.f, translation.z_, translation.y_, translation.x_);
  128. __m128 q = _mm_loadu_ps(&rotation.w_);
  129. __m128 s = _mm_set_ps(1.f, scale, scale, scale);
  130. SetFromTRS(t, q, s);
  131. #else
  132. SetRotation(rotation.RotationMatrix() * scale);
  133. SetTranslation(translation);
  134. #endif
  135. }
  136. /// Construct from translation, rotation and nonuniform scale.
  137. Matrix3x4(const Vector3& translation, const Quaternion& rotation, const Vector3& scale) noexcept
  138. {
  139. #ifdef URHO3D_SSE
  140. __m128 t = _mm_set_ps(1.f, translation.z_, translation.y_, translation.x_);
  141. __m128 q = _mm_loadu_ps(&rotation.w_);
  142. __m128 s = _mm_set_ps(1.f, scale.z_, scale.y_, scale.x_);
  143. SetFromTRS(t, q, s);
  144. #else
  145. SetRotation(rotation.RotationMatrix().Scaled(scale));
  146. SetTranslation(translation);
  147. #endif
  148. }
  149. #ifdef _MSC_VER
  150. #pragma warning(pop)
  151. #endif
  152. /// Assign from another matrix.
  153. Matrix3x4& operator =(const Matrix3x4& rhs) noexcept = default;
  154. /// Assign from a 3x3 matrix and set the extra elements to identity.
  155. Matrix3x4& operator =(const Matrix3& rhs) noexcept
  156. {
  157. m00_ = rhs.m00_;
  158. m01_ = rhs.m01_;
  159. m02_ = rhs.m02_;
  160. m03_ = 0.0f;
  161. m10_ = rhs.m10_;
  162. m11_ = rhs.m11_;
  163. m12_ = rhs.m12_;
  164. m13_ = 0.0f;
  165. m20_ = rhs.m20_;
  166. m21_ = rhs.m21_;
  167. m22_ = rhs.m22_;
  168. m23_ = 0.0f;
  169. return *this;
  170. }
  171. /// Assign from a 4x4 matrix which is assumed to contain no projection.
  172. Matrix3x4& operator =(const Matrix4& rhs) noexcept
  173. {
  174. #ifdef URHO3D_SSE
  175. _mm_storeu_ps(&m00_, _mm_loadu_ps(&rhs.m00_));
  176. _mm_storeu_ps(&m10_, _mm_loadu_ps(&rhs.m10_));
  177. _mm_storeu_ps(&m20_, _mm_loadu_ps(&rhs.m20_));
  178. #else
  179. m00_ = rhs.m00_;
  180. m01_ = rhs.m01_;
  181. m02_ = rhs.m02_;
  182. m03_ = rhs.m03_;
  183. m10_ = rhs.m10_;
  184. m11_ = rhs.m11_;
  185. m12_ = rhs.m12_;
  186. m13_ = rhs.m13_;
  187. m20_ = rhs.m20_;
  188. m21_ = rhs.m21_;
  189. m22_ = rhs.m22_;
  190. m23_ = rhs.m23_;
  191. #endif
  192. return *this;
  193. }
  194. /// Test for equality with another matrix without epsilon.
  195. bool operator ==(const Matrix3x4& rhs) const
  196. {
  197. #ifdef URHO3D_SSE
  198. __m128 c0 = _mm_cmpeq_ps(_mm_loadu_ps(&m00_), _mm_loadu_ps(&rhs.m00_));
  199. __m128 c1 = _mm_cmpeq_ps(_mm_loadu_ps(&m10_), _mm_loadu_ps(&rhs.m10_));
  200. c0 = _mm_and_ps(c0, c1);
  201. __m128 c2 = _mm_cmpeq_ps(_mm_loadu_ps(&m20_), _mm_loadu_ps(&rhs.m20_));
  202. c0 = _mm_and_ps(c0, c2);
  203. __m128 hi = _mm_movehl_ps(c0, c0);
  204. c0 = _mm_and_ps(c0, hi);
  205. hi = _mm_shuffle_ps(c0, c0, _MM_SHUFFLE(1, 1, 1, 1));
  206. c0 = _mm_and_ps(c0, hi);
  207. return _mm_cvtsi128_si32(_mm_castps_si128(c0)) == -1;
  208. #else
  209. const float* leftData = Data();
  210. const float* rightData = rhs.Data();
  211. for (unsigned i = 0; i < 12; ++i)
  212. {
  213. if (leftData[i] != rightData[i])
  214. return false;
  215. }
  216. return true;
  217. #endif
  218. }
  219. /// Test for inequality with another matrix without epsilon.
  220. bool operator !=(const Matrix3x4& rhs) const { return !(*this == rhs); }
  221. /// Multiply a Vector3 which is assumed to represent position.
  222. Vector3 operator *(const Vector3& rhs) const
  223. {
  224. #ifdef URHO3D_SSE
  225. __m128 vec = _mm_set_ps(1.f, rhs.z_, rhs.y_, rhs.x_);
  226. __m128 r0 = _mm_mul_ps(_mm_loadu_ps(&m00_), vec);
  227. __m128 r1 = _mm_mul_ps(_mm_loadu_ps(&m10_), vec);
  228. __m128 t0 = _mm_unpacklo_ps(r0, r1);
  229. __m128 t1 = _mm_unpackhi_ps(r0, r1);
  230. t0 = _mm_add_ps(t0, t1);
  231. __m128 r2 = _mm_mul_ps(_mm_loadu_ps(&m20_), vec);
  232. __m128 r3 = _mm_setzero_ps();
  233. __m128 t2 = _mm_unpacklo_ps(r2, r3);
  234. __m128 t3 = _mm_unpackhi_ps(r2, r3);
  235. t2 = _mm_add_ps(t2, t3);
  236. vec = _mm_add_ps(_mm_movelh_ps(t0, t2), _mm_movehl_ps(t2, t0));
  237. return Vector3(
  238. _mm_cvtss_f32(vec),
  239. _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, _MM_SHUFFLE(1, 1, 1, 1))),
  240. _mm_cvtss_f32(_mm_movehl_ps(vec, vec)));
  241. #else
  242. return Vector3(
  243. (m00_ * rhs.x_ + m01_ * rhs.y_ + m02_ * rhs.z_ + m03_),
  244. (m10_ * rhs.x_ + m11_ * rhs.y_ + m12_ * rhs.z_ + m13_),
  245. (m20_ * rhs.x_ + m21_ * rhs.y_ + m22_ * rhs.z_ + m23_)
  246. );
  247. #endif
  248. }
  249. /// Multiply a Vector4.
  250. Vector3 operator *(const Vector4& rhs) const
  251. {
  252. #ifdef URHO3D_SSE
  253. __m128 vec = _mm_loadu_ps(&rhs.x_);
  254. __m128 r0 = _mm_mul_ps(_mm_loadu_ps(&m00_), vec);
  255. __m128 r1 = _mm_mul_ps(_mm_loadu_ps(&m10_), vec);
  256. __m128 t0 = _mm_unpacklo_ps(r0, r1);
  257. __m128 t1 = _mm_unpackhi_ps(r0, r1);
  258. t0 = _mm_add_ps(t0, t1);
  259. __m128 r2 = _mm_mul_ps(_mm_loadu_ps(&m20_), vec);
  260. __m128 r3 = _mm_setzero_ps();
  261. __m128 t2 = _mm_unpacklo_ps(r2, r3);
  262. __m128 t3 = _mm_unpackhi_ps(r2, r3);
  263. t2 = _mm_add_ps(t2, t3);
  264. vec = _mm_add_ps(_mm_movelh_ps(t0, t2), _mm_movehl_ps(t2, t0));
  265. return Vector3(
  266. _mm_cvtss_f32(vec),
  267. _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, _MM_SHUFFLE(1, 1, 1, 1))),
  268. _mm_cvtss_f32(_mm_movehl_ps(vec, vec)));
  269. #else
  270. return Vector3(
  271. (m00_ * rhs.x_ + m01_ * rhs.y_ + m02_ * rhs.z_ + m03_ * rhs.w_),
  272. (m10_ * rhs.x_ + m11_ * rhs.y_ + m12_ * rhs.z_ + m13_ * rhs.w_),
  273. (m20_ * rhs.x_ + m21_ * rhs.y_ + m22_ * rhs.z_ + m23_ * rhs.w_)
  274. );
  275. #endif
  276. }
  277. /// Add a matrix.
  278. Matrix3x4 operator +(const Matrix3x4& rhs) const
  279. {
  280. #ifdef URHO3D_SSE
  281. Matrix3x4 ret;
  282. _mm_storeu_ps(&ret.m00_, _mm_add_ps(_mm_loadu_ps(&m00_), _mm_loadu_ps(&rhs.m00_)));
  283. _mm_storeu_ps(&ret.m10_, _mm_add_ps(_mm_loadu_ps(&m10_), _mm_loadu_ps(&rhs.m10_)));
  284. _mm_storeu_ps(&ret.m20_, _mm_add_ps(_mm_loadu_ps(&m20_), _mm_loadu_ps(&rhs.m20_)));
  285. return ret;
  286. #else
  287. return Matrix3x4(
  288. m00_ + rhs.m00_,
  289. m01_ + rhs.m01_,
  290. m02_ + rhs.m02_,
  291. m03_ + rhs.m03_,
  292. m10_ + rhs.m10_,
  293. m11_ + rhs.m11_,
  294. m12_ + rhs.m12_,
  295. m13_ + rhs.m13_,
  296. m20_ + rhs.m20_,
  297. m21_ + rhs.m21_,
  298. m22_ + rhs.m22_,
  299. m23_ + rhs.m23_
  300. );
  301. #endif
  302. }
  303. /// Subtract a matrix.
  304. Matrix3x4 operator -(const Matrix3x4& rhs) const
  305. {
  306. #ifdef URHO3D_SSE
  307. Matrix3x4 ret;
  308. _mm_storeu_ps(&ret.m00_, _mm_sub_ps(_mm_loadu_ps(&m00_), _mm_loadu_ps(&rhs.m00_)));
  309. _mm_storeu_ps(&ret.m10_, _mm_sub_ps(_mm_loadu_ps(&m10_), _mm_loadu_ps(&rhs.m10_)));
  310. _mm_storeu_ps(&ret.m20_, _mm_sub_ps(_mm_loadu_ps(&m20_), _mm_loadu_ps(&rhs.m20_)));
  311. return ret;
  312. #else
  313. return Matrix3x4(
  314. m00_ - rhs.m00_,
  315. m01_ - rhs.m01_,
  316. m02_ - rhs.m02_,
  317. m03_ - rhs.m03_,
  318. m10_ - rhs.m10_,
  319. m11_ - rhs.m11_,
  320. m12_ - rhs.m12_,
  321. m13_ - rhs.m13_,
  322. m20_ - rhs.m20_,
  323. m21_ - rhs.m21_,
  324. m22_ - rhs.m22_,
  325. m23_ - rhs.m23_
  326. );
  327. #endif
  328. }
  329. /// Multiply with a scalar.
  330. Matrix3x4 operator *(float rhs) const
  331. {
  332. #ifdef URHO3D_SSE
  333. Matrix3x4 ret;
  334. const __m128 mul = _mm_set1_ps(rhs);
  335. _mm_storeu_ps(&ret.m00_, _mm_mul_ps(_mm_loadu_ps(&m00_), mul));
  336. _mm_storeu_ps(&ret.m10_, _mm_mul_ps(_mm_loadu_ps(&m10_), mul));
  337. _mm_storeu_ps(&ret.m20_, _mm_mul_ps(_mm_loadu_ps(&m20_), mul));
  338. return ret;
  339. #else
  340. return Matrix3x4(
  341. m00_ * rhs,
  342. m01_ * rhs,
  343. m02_ * rhs,
  344. m03_ * rhs,
  345. m10_ * rhs,
  346. m11_ * rhs,
  347. m12_ * rhs,
  348. m13_ * rhs,
  349. m20_ * rhs,
  350. m21_ * rhs,
  351. m22_ * rhs,
  352. m23_ * rhs
  353. );
  354. #endif
  355. }
  356. /// Multiply a matrix.
  357. Matrix3x4 operator *(const Matrix3x4& rhs) const
  358. {
  359. #ifdef URHO3D_SSE
  360. Matrix3x4 out;
  361. __m128 r0 = _mm_loadu_ps(&rhs.m00_);
  362. __m128 r1 = _mm_loadu_ps(&rhs.m10_);
  363. __m128 r2 = _mm_loadu_ps(&rhs.m20_);
  364. __m128 r3 = _mm_set_ps(1.f, 0.f, 0.f, 0.f);
  365. __m128 l = _mm_loadu_ps(&m00_);
  366. __m128 t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  367. __m128 t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  368. __m128 t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  369. __m128 t3 = _mm_mul_ps(l, r3);
  370. _mm_storeu_ps(&out.m00_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  371. l = _mm_loadu_ps(&m10_);
  372. t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  373. t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  374. t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  375. t3 = _mm_mul_ps(l, r3);
  376. _mm_storeu_ps(&out.m10_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  377. l = _mm_loadu_ps(&m20_);
  378. t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  379. t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  380. t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  381. t3 = _mm_mul_ps(l, r3);
  382. _mm_storeu_ps(&out.m20_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  383. return out;
  384. #else
  385. return Matrix3x4(
  386. m00_ * rhs.m00_ + m01_ * rhs.m10_ + m02_ * rhs.m20_,
  387. m00_ * rhs.m01_ + m01_ * rhs.m11_ + m02_ * rhs.m21_,
  388. m00_ * rhs.m02_ + m01_ * rhs.m12_ + m02_ * rhs.m22_,
  389. m00_ * rhs.m03_ + m01_ * rhs.m13_ + m02_ * rhs.m23_ + m03_,
  390. m10_ * rhs.m00_ + m11_ * rhs.m10_ + m12_ * rhs.m20_,
  391. m10_ * rhs.m01_ + m11_ * rhs.m11_ + m12_ * rhs.m21_,
  392. m10_ * rhs.m02_ + m11_ * rhs.m12_ + m12_ * rhs.m22_,
  393. m10_ * rhs.m03_ + m11_ * rhs.m13_ + m12_ * rhs.m23_ + m13_,
  394. m20_ * rhs.m00_ + m21_ * rhs.m10_ + m22_ * rhs.m20_,
  395. m20_ * rhs.m01_ + m21_ * rhs.m11_ + m22_ * rhs.m21_,
  396. m20_ * rhs.m02_ + m21_ * rhs.m12_ + m22_ * rhs.m22_,
  397. m20_ * rhs.m03_ + m21_ * rhs.m13_ + m22_ * rhs.m23_ + m23_
  398. );
  399. #endif
  400. }
  401. /// Multiply a 4x4 matrix.
  402. Matrix4 operator *(const Matrix4& rhs) const
  403. {
  404. #ifdef URHO3D_SSE
  405. Matrix4 out;
  406. __m128 r0 = _mm_loadu_ps(&rhs.m00_);
  407. __m128 r1 = _mm_loadu_ps(&rhs.m10_);
  408. __m128 r2 = _mm_loadu_ps(&rhs.m20_);
  409. __m128 r3 = _mm_loadu_ps(&rhs.m30_);
  410. __m128 l = _mm_loadu_ps(&m00_);
  411. __m128 t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  412. __m128 t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  413. __m128 t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  414. __m128 t3 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(3, 3, 3, 3)), r3);
  415. _mm_storeu_ps(&out.m00_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  416. l = _mm_loadu_ps(&m10_);
  417. t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  418. t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  419. t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  420. t3 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(3, 3, 3, 3)), r3);
  421. _mm_storeu_ps(&out.m10_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  422. l = _mm_loadu_ps(&m20_);
  423. t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  424. t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  425. t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  426. t3 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(3, 3, 3, 3)), r3);
  427. _mm_storeu_ps(&out.m20_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  428. _mm_storeu_ps(&out.m30_, r3);
  429. return out;
  430. #else
  431. return Matrix4(
  432. m00_ * rhs.m00_ + m01_ * rhs.m10_ + m02_ * rhs.m20_ + m03_ * rhs.m30_,
  433. m00_ * rhs.m01_ + m01_ * rhs.m11_ + m02_ * rhs.m21_ + m03_ * rhs.m31_,
  434. m00_ * rhs.m02_ + m01_ * rhs.m12_ + m02_ * rhs.m22_ + m03_ * rhs.m32_,
  435. m00_ * rhs.m03_ + m01_ * rhs.m13_ + m02_ * rhs.m23_ + m03_ * rhs.m33_,
  436. m10_ * rhs.m00_ + m11_ * rhs.m10_ + m12_ * rhs.m20_ + m13_ * rhs.m30_,
  437. m10_ * rhs.m01_ + m11_ * rhs.m11_ + m12_ * rhs.m21_ + m13_ * rhs.m31_,
  438. m10_ * rhs.m02_ + m11_ * rhs.m12_ + m12_ * rhs.m22_ + m13_ * rhs.m32_,
  439. m10_ * rhs.m03_ + m11_ * rhs.m13_ + m12_ * rhs.m23_ + m13_ * rhs.m33_,
  440. m20_ * rhs.m00_ + m21_ * rhs.m10_ + m22_ * rhs.m20_ + m23_ * rhs.m30_,
  441. m20_ * rhs.m01_ + m21_ * rhs.m11_ + m22_ * rhs.m21_ + m23_ * rhs.m31_,
  442. m20_ * rhs.m02_ + m21_ * rhs.m12_ + m22_ * rhs.m22_ + m23_ * rhs.m32_,
  443. m20_ * rhs.m03_ + m21_ * rhs.m13_ + m22_ * rhs.m23_ + m23_ * rhs.m33_,
  444. rhs.m30_,
  445. rhs.m31_,
  446. rhs.m32_,
  447. rhs.m33_
  448. );
  449. #endif
  450. }
  451. /// Set translation elements.
  452. void SetTranslation(const Vector3& translation)
  453. {
  454. m03_ = translation.x_;
  455. m13_ = translation.y_;
  456. m23_ = translation.z_;
  457. }
  458. /// Set rotation elements from a 3x3 matrix.
  459. void SetRotation(const Matrix3& rotation)
  460. {
  461. m00_ = rotation.m00_;
  462. m01_ = rotation.m01_;
  463. m02_ = rotation.m02_;
  464. m10_ = rotation.m10_;
  465. m11_ = rotation.m11_;
  466. m12_ = rotation.m12_;
  467. m20_ = rotation.m20_;
  468. m21_ = rotation.m21_;
  469. m22_ = rotation.m22_;
  470. }
  471. /// Set scaling elements.
  472. void SetScale(const Vector3& scale)
  473. {
  474. m00_ = scale.x_;
  475. m11_ = scale.y_;
  476. m22_ = scale.z_;
  477. }
  478. /// Set uniform scaling elements.
  479. void SetScale(float scale)
  480. {
  481. m00_ = scale;
  482. m11_ = scale;
  483. m22_ = scale;
  484. }
  485. /// Return the combined rotation and scaling matrix.
  486. Matrix3 ToMatrix3() const
  487. {
  488. return Matrix3(
  489. m00_,
  490. m01_,
  491. m02_,
  492. m10_,
  493. m11_,
  494. m12_,
  495. m20_,
  496. m21_,
  497. m22_
  498. );
  499. }
  500. /// Convert to a 4x4 matrix by filling in an identity last row.
  501. Matrix4 ToMatrix4() const
  502. {
  503. #ifdef URHO3D_SSE
  504. Matrix4 ret;
  505. _mm_storeu_ps(&ret.m00_, _mm_loadu_ps(&m00_));
  506. _mm_storeu_ps(&ret.m10_, _mm_loadu_ps(&m10_));
  507. _mm_storeu_ps(&ret.m20_, _mm_loadu_ps(&m20_));
  508. _mm_storeu_ps(&ret.m30_, _mm_set_ps(1.f, 0.f, 0.f, 0.f));
  509. return ret;
  510. #else
  511. return Matrix4(
  512. m00_,
  513. m01_,
  514. m02_,
  515. m03_,
  516. m10_,
  517. m11_,
  518. m12_,
  519. m13_,
  520. m20_,
  521. m21_,
  522. m22_,
  523. m23_,
  524. 0.0f,
  525. 0.0f,
  526. 0.0f,
  527. 1.0f
  528. );
  529. #endif
  530. }
  531. /// Return the rotation matrix with scaling removed.
  532. Matrix3 RotationMatrix() const
  533. {
  534. Vector3 invScale(
  535. 1.0f / sqrtf(m00_ * m00_ + m10_ * m10_ + m20_ * m20_),
  536. 1.0f / sqrtf(m01_ * m01_ + m11_ * m11_ + m21_ * m21_),
  537. 1.0f / sqrtf(m02_ * m02_ + m12_ * m12_ + m22_ * m22_)
  538. );
  539. return ToMatrix3().Scaled(invScale);
  540. }
  541. /// Return the translation part.
  542. Vector3 Translation() const
  543. {
  544. return Vector3(
  545. m03_,
  546. m13_,
  547. m23_
  548. );
  549. }
  550. /// Return the rotation part.
  551. Quaternion Rotation() const { return Quaternion(RotationMatrix()); }
  552. /// Return the scaling part.
  553. Vector3 Scale() const
  554. {
  555. return Vector3(
  556. sqrtf(m00_ * m00_ + m10_ * m10_ + m20_ * m20_),
  557. sqrtf(m01_ * m01_ + m11_ * m11_ + m21_ * m21_),
  558. sqrtf(m02_ * m02_ + m12_ * m12_ + m22_ * m22_)
  559. );
  560. }
  561. /// Return the scaling part with the sign. Reference rotation matrix is required to avoid ambiguity.
  562. Vector3 SignedScale(const Matrix3& rotation) const
  563. {
  564. return Vector3(
  565. rotation.m00_ * m00_ + rotation.m10_ * m10_ + rotation.m20_ * m20_,
  566. rotation.m01_ * m01_ + rotation.m11_ * m11_ + rotation.m21_ * m21_,
  567. rotation.m02_ * m02_ + rotation.m12_ * m12_ + rotation.m22_ * m22_
  568. );
  569. }
  570. /// Test for equality with another matrix with epsilon.
  571. bool Equals(const Matrix3x4& rhs) const
  572. {
  573. const float* leftData = Data();
  574. const float* rightData = rhs.Data();
  575. for (unsigned i = 0; i < 12; ++i)
  576. {
  577. if (!Urho3D::Equals(leftData[i], rightData[i]))
  578. return false;
  579. }
  580. return true;
  581. }
  582. /// Return decomposition to translation, rotation and scale.
  583. void Decompose(Vector3& translation, Quaternion& rotation, Vector3& scale) const;
  584. /// Return inverse.
  585. Matrix3x4 Inverse() const;
  586. /// Return float data.
  587. const float* Data() const { return &m00_; }
  588. /// Return matrix element.
  589. float Element(unsigned i, unsigned j) const { return Data()[i * 4 + j]; }
  590. /// Return matrix row.
  591. Vector4 Row(unsigned i) const { return Vector4(Element(i, 0), Element(i, 1), Element(i, 2), Element(i, 3)); }
  592. /// Return matrix column.
  593. Vector3 Column(unsigned j) const { return Vector3(Element(0, j), Element(1, j), Element(2, j)); }
  594. /// Return whether any element is NaN.
  595. bool IsNaN() const
  596. {
  597. const float* data = Data();
  598. for (unsigned i = 0; i < 12; ++i)
  599. {
  600. if (Urho3D::IsNaN(data[i]))
  601. return true;
  602. }
  603. return false;
  604. }
  605. /// Return whether any element is Inf.
  606. bool IsInf() const
  607. {
  608. const float* data = Data();
  609. for (unsigned i = 0; i < 12; ++i)
  610. {
  611. if (Urho3D::IsInf(data[i]))
  612. return true;
  613. }
  614. return false;
  615. }
  616. /// Return as string.
  617. String ToString() const;
  618. float m00_;
  619. float m01_;
  620. float m02_;
  621. float m03_;
  622. float m10_;
  623. float m11_;
  624. float m12_;
  625. float m13_;
  626. float m20_;
  627. float m21_;
  628. float m22_;
  629. float m23_;
  630. /// Zero matrix.
  631. static const Matrix3x4 ZERO;
  632. /// Identity matrix.
  633. static const Matrix3x4 IDENTITY;
  634. #ifdef URHO3D_SSE
  635. private:
  636. /// \brief Sets this matrix from the given translation, rotation (as quaternion (w,x,y,z)), and nonuniform scale (x,y,z) parameters. Note: the w component of the scale parameter passed to this function must be 1.
  637. void inline SetFromTRS(__m128 t, __m128 q, __m128 s)
  638. {
  639. q = _mm_shuffle_ps(q, q, _MM_SHUFFLE(0, 3, 2, 1));
  640. __m128 one = _mm_set_ps(0, 0, 0, 1);
  641. const __m128 sseX1 = _mm_castsi128_ps(_mm_set_epi32((int)0x80000000UL, (int)0x80000000UL, 0, (int)0x80000000UL));
  642. __m128 q2 = _mm_add_ps(q, q);
  643. __m128 t2 = _mm_add_ss(_mm_xor_ps(_mm_mul_ps(_mm_shuffle_ps(q, q, _MM_SHUFFLE(3, 3, 3, 2)), _mm_shuffle_ps(q2, q2, _MM_SHUFFLE(0, 1, 2, 2))), sseX1), one);
  644. const __m128 sseX0 = _mm_shuffle_ps(sseX1, sseX1, _MM_SHUFFLE(0, 3, 2, 1));
  645. __m128 t0 = _mm_mul_ps(_mm_shuffle_ps(q, q, _MM_SHUFFLE(1, 0, 0, 1)), _mm_shuffle_ps(q2, q2, _MM_SHUFFLE(2, 2, 1, 1)));
  646. __m128 t1 = _mm_xor_ps(t0, sseX0);
  647. __m128 r0 = _mm_sub_ps(t2, t1);
  648. __m128 xx2 = _mm_mul_ss(q, q2);
  649. __m128 r1 = _mm_sub_ps(_mm_xor_ps(t2, sseX0), _mm_move_ss(t1, xx2));
  650. r1 = _mm_shuffle_ps(r1, r1, _MM_SHUFFLE(2, 3, 0, 1));
  651. __m128 r2 = _mm_shuffle_ps(_mm_movehl_ps(r0, r1), _mm_sub_ss(_mm_sub_ss(one, xx2), t0), _MM_SHUFFLE(2, 0, 3, 1));
  652. __m128 tmp0 = _mm_unpacklo_ps(r0, r1);
  653. __m128 tmp2 = _mm_unpacklo_ps(r2, t);
  654. __m128 tmp1 = _mm_unpackhi_ps(r0, r1);
  655. __m128 tmp3 = _mm_unpackhi_ps(r2, t);
  656. _mm_storeu_ps(&m00_, _mm_mul_ps(_mm_movelh_ps(tmp0, tmp2), s));
  657. _mm_storeu_ps(&m10_, _mm_mul_ps(_mm_movehl_ps(tmp2, tmp0), s));
  658. _mm_storeu_ps(&m20_, _mm_mul_ps(_mm_movelh_ps(tmp1, tmp3), s));
  659. }
  660. #endif
  661. };
  662. /// Multiply a 3x4 matrix with a scalar.
  663. inline Matrix3x4 operator *(float lhs, const Matrix3x4& rhs) { return rhs * lhs; }
  664. }