Matrix3x4.h 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745
  1. //
  2. // Copyright (c) 2008-2020 the Urho3D project.
  3. //
  4. // Permission is hereby granted, free of charge, to any person obtaining a copy
  5. // of this software and associated documentation files (the "Software"), to deal
  6. // in the Software without restriction, including without limitation the rights
  7. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. // copies of the Software, and to permit persons to whom the Software is
  9. // furnished to do so, subject to the following conditions:
  10. //
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20. // THE SOFTWARE.
  21. //
  22. #pragma once
  23. #include "../Math/Matrix4.h"
  24. #ifdef URHO3D_SSE
  25. #include <emmintrin.h>
  26. #endif
  27. namespace Urho3D
  28. {
  29. /// 3x4 matrix for scene node transform calculations.
  30. class URHO3D_API Matrix3x4
  31. {
  32. public:
  33. /// Construct an identity matrix.
  34. Matrix3x4() noexcept
  35. #ifndef URHO3D_SSE
  36. :m00_(1.0f),
  37. m01_(0.0f),
  38. m02_(0.0f),
  39. m03_(0.0f),
  40. m10_(0.0f),
  41. m11_(1.0f),
  42. m12_(0.0f),
  43. m13_(0.0f),
  44. m20_(0.0f),
  45. m21_(0.0f),
  46. m22_(1.0f),
  47. m23_(0.0f)
  48. #endif
  49. {
  50. #ifdef URHO3D_SSE
  51. _mm_storeu_ps(&m00_, _mm_set_ps(0.f, 0.f, 0.f, 1.f));
  52. _mm_storeu_ps(&m10_, _mm_set_ps(0.f, 0.f, 1.f, 0.f));
  53. _mm_storeu_ps(&m20_, _mm_set_ps(0.f, 1.f, 0.f, 0.f));
  54. #endif
  55. }
  56. /// Copy-construct from another matrix.
  57. Matrix3x4(const Matrix3x4& matrix) noexcept = default;
  58. /// Copy-construct from a 3x3 matrix and set the extra elements to identity.
  59. explicit Matrix3x4(const Matrix3& matrix) noexcept :
  60. m00_(matrix.m00_),
  61. m01_(matrix.m01_),
  62. m02_(matrix.m02_),
  63. m03_(0.0f),
  64. m10_(matrix.m10_),
  65. m11_(matrix.m11_),
  66. m12_(matrix.m12_),
  67. m13_(0.0f),
  68. m20_(matrix.m20_),
  69. m21_(matrix.m21_),
  70. m22_(matrix.m22_),
  71. m23_(0.0f)
  72. {
  73. }
  74. /// Copy-construct from a 4x4 matrix which is assumed to contain no projection.
  75. explicit Matrix3x4(const Matrix4& matrix) noexcept
  76. #ifndef URHO3D_SSE
  77. :m00_(matrix.m00_),
  78. m01_(matrix.m01_),
  79. m02_(matrix.m02_),
  80. m03_(matrix.m03_),
  81. m10_(matrix.m10_),
  82. m11_(matrix.m11_),
  83. m12_(matrix.m12_),
  84. m13_(matrix.m13_),
  85. m20_(matrix.m20_),
  86. m21_(matrix.m21_),
  87. m22_(matrix.m22_),
  88. m23_(matrix.m23_)
  89. #endif
  90. {
  91. #ifdef URHO3D_SSE
  92. _mm_storeu_ps(&m00_, _mm_loadu_ps(&matrix.m00_));
  93. _mm_storeu_ps(&m10_, _mm_loadu_ps(&matrix.m10_));
  94. _mm_storeu_ps(&m20_, _mm_loadu_ps(&matrix.m20_));
  95. #endif
  96. }
  97. /// Construct from values.
  98. Matrix3x4(float v00, float v01, float v02, float v03,
  99. float v10, float v11, float v12, float v13,
  100. float v20, float v21, float v22, float v23) noexcept :
  101. m00_(v00),
  102. m01_(v01),
  103. m02_(v02),
  104. m03_(v03),
  105. m10_(v10),
  106. m11_(v11),
  107. m12_(v12),
  108. m13_(v13),
  109. m20_(v20),
  110. m21_(v21),
  111. m22_(v22),
  112. m23_(v23)
  113. {
  114. }
  115. /// Construct from a float array.
  116. explicit Matrix3x4(const float* data) noexcept
  117. #ifndef URHO3D_SSE
  118. :m00_(data[0]),
  119. m01_(data[1]),
  120. m02_(data[2]),
  121. m03_(data[3]),
  122. m10_(data[4]),
  123. m11_(data[5]),
  124. m12_(data[6]),
  125. m13_(data[7]),
  126. m20_(data[8]),
  127. m21_(data[9]),
  128. m22_(data[10]),
  129. m23_(data[11])
  130. #endif
  131. {
  132. #ifdef URHO3D_SSE
  133. _mm_storeu_ps(&m00_, _mm_loadu_ps(data));
  134. _mm_storeu_ps(&m10_, _mm_loadu_ps(data + 4));
  135. _mm_storeu_ps(&m20_, _mm_loadu_ps(data + 8));
  136. #endif
  137. }
  138. /// Construct from translation, rotation and uniform scale.
  139. Matrix3x4(const Vector3& translation, const Quaternion& rotation, float scale) noexcept
  140. {
  141. #ifdef URHO3D_SSE
  142. __m128 t = _mm_set_ps(1.f, translation.z_, translation.y_, translation.x_);
  143. __m128 q = _mm_loadu_ps(&rotation.w_);
  144. __m128 s = _mm_set_ps(1.f, scale, scale, scale);
  145. SetFromTRS(t, q, s);
  146. #else
  147. SetRotation(rotation.RotationMatrix() * scale);
  148. SetTranslation(translation);
  149. #endif
  150. }
  151. /// Construct from translation, rotation and nonuniform scale.
  152. Matrix3x4(const Vector3& translation, const Quaternion& rotation, const Vector3& scale) noexcept
  153. {
  154. #ifdef URHO3D_SSE
  155. __m128 t = _mm_set_ps(1.f, translation.z_, translation.y_, translation.x_);
  156. __m128 q = _mm_loadu_ps(&rotation.w_);
  157. __m128 s = _mm_set_ps(1.f, scale.z_, scale.y_, scale.x_);
  158. SetFromTRS(t, q, s);
  159. #else
  160. SetRotation(rotation.RotationMatrix().Scaled(scale));
  161. SetTranslation(translation);
  162. #endif
  163. }
  164. /// Assign from another matrix.
  165. Matrix3x4& operator =(const Matrix3x4& rhs) noexcept = default;
  166. /// Assign from a 3x3 matrix and set the extra elements to identity.
  167. Matrix3x4& operator =(const Matrix3& rhs) noexcept
  168. {
  169. m00_ = rhs.m00_;
  170. m01_ = rhs.m01_;
  171. m02_ = rhs.m02_;
  172. m03_ = 0.0f;
  173. m10_ = rhs.m10_;
  174. m11_ = rhs.m11_;
  175. m12_ = rhs.m12_;
  176. m13_ = 0.0f;
  177. m20_ = rhs.m20_;
  178. m21_ = rhs.m21_;
  179. m22_ = rhs.m22_;
  180. m23_ = 0.0f;
  181. return *this;
  182. }
  183. /// Assign from a 4x4 matrix which is assumed to contain no projection.
  184. Matrix3x4& operator =(const Matrix4& rhs) noexcept
  185. {
  186. #ifdef URHO3D_SSE
  187. _mm_storeu_ps(&m00_, _mm_loadu_ps(&rhs.m00_));
  188. _mm_storeu_ps(&m10_, _mm_loadu_ps(&rhs.m10_));
  189. _mm_storeu_ps(&m20_, _mm_loadu_ps(&rhs.m20_));
  190. #else
  191. m00_ = rhs.m00_;
  192. m01_ = rhs.m01_;
  193. m02_ = rhs.m02_;
  194. m03_ = rhs.m03_;
  195. m10_ = rhs.m10_;
  196. m11_ = rhs.m11_;
  197. m12_ = rhs.m12_;
  198. m13_ = rhs.m13_;
  199. m20_ = rhs.m20_;
  200. m21_ = rhs.m21_;
  201. m22_ = rhs.m22_;
  202. m23_ = rhs.m23_;
  203. #endif
  204. return *this;
  205. }
  206. /// Test for equality with another matrix without epsilon.
  207. bool operator ==(const Matrix3x4& rhs) const
  208. {
  209. #ifdef URHO3D_SSE
  210. __m128 c0 = _mm_cmpeq_ps(_mm_loadu_ps(&m00_), _mm_loadu_ps(&rhs.m00_));
  211. __m128 c1 = _mm_cmpeq_ps(_mm_loadu_ps(&m10_), _mm_loadu_ps(&rhs.m10_));
  212. c0 = _mm_and_ps(c0, c1);
  213. __m128 c2 = _mm_cmpeq_ps(_mm_loadu_ps(&m20_), _mm_loadu_ps(&rhs.m20_));
  214. c0 = _mm_and_ps(c0, c2);
  215. __m128 hi = _mm_movehl_ps(c0, c0);
  216. c0 = _mm_and_ps(c0, hi);
  217. hi = _mm_shuffle_ps(c0, c0, _MM_SHUFFLE(1, 1, 1, 1));
  218. c0 = _mm_and_ps(c0, hi);
  219. return _mm_cvtsi128_si32(_mm_castps_si128(c0)) == -1;
  220. #else
  221. const float* leftData = Data();
  222. const float* rightData = rhs.Data();
  223. for (unsigned i = 0; i < 12; ++i)
  224. {
  225. if (leftData[i] != rightData[i])
  226. return false;
  227. }
  228. return true;
  229. #endif
  230. }
  231. /// Test for inequality with another matrix without epsilon.
  232. bool operator !=(const Matrix3x4& rhs) const { return !(*this == rhs); }
  233. /// Multiply a Vector3 which is assumed to represent position.
  234. Vector3 operator *(const Vector3& rhs) const
  235. {
  236. #ifdef URHO3D_SSE
  237. __m128 vec = _mm_set_ps(1.f, rhs.z_, rhs.y_, rhs.x_);
  238. __m128 r0 = _mm_mul_ps(_mm_loadu_ps(&m00_), vec);
  239. __m128 r1 = _mm_mul_ps(_mm_loadu_ps(&m10_), vec);
  240. __m128 t0 = _mm_unpacklo_ps(r0, r1);
  241. __m128 t1 = _mm_unpackhi_ps(r0, r1);
  242. t0 = _mm_add_ps(t0, t1);
  243. __m128 r2 = _mm_mul_ps(_mm_loadu_ps(&m20_), vec);
  244. __m128 r3 = _mm_setzero_ps();
  245. __m128 t2 = _mm_unpacklo_ps(r2, r3);
  246. __m128 t3 = _mm_unpackhi_ps(r2, r3);
  247. t2 = _mm_add_ps(t2, t3);
  248. vec = _mm_add_ps(_mm_movelh_ps(t0, t2), _mm_movehl_ps(t2, t0));
  249. return Vector3(
  250. _mm_cvtss_f32(vec),
  251. _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, _MM_SHUFFLE(1, 1, 1, 1))),
  252. _mm_cvtss_f32(_mm_movehl_ps(vec, vec)));
  253. #else
  254. return Vector3(
  255. (m00_ * rhs.x_ + m01_ * rhs.y_ + m02_ * rhs.z_ + m03_),
  256. (m10_ * rhs.x_ + m11_ * rhs.y_ + m12_ * rhs.z_ + m13_),
  257. (m20_ * rhs.x_ + m21_ * rhs.y_ + m22_ * rhs.z_ + m23_)
  258. );
  259. #endif
  260. }
  261. /// Multiply a Vector4.
  262. Vector3 operator *(const Vector4& rhs) const
  263. {
  264. #ifdef URHO3D_SSE
  265. __m128 vec = _mm_loadu_ps(&rhs.x_);
  266. __m128 r0 = _mm_mul_ps(_mm_loadu_ps(&m00_), vec);
  267. __m128 r1 = _mm_mul_ps(_mm_loadu_ps(&m10_), vec);
  268. __m128 t0 = _mm_unpacklo_ps(r0, r1);
  269. __m128 t1 = _mm_unpackhi_ps(r0, r1);
  270. t0 = _mm_add_ps(t0, t1);
  271. __m128 r2 = _mm_mul_ps(_mm_loadu_ps(&m20_), vec);
  272. __m128 r3 = _mm_setzero_ps();
  273. __m128 t2 = _mm_unpacklo_ps(r2, r3);
  274. __m128 t3 = _mm_unpackhi_ps(r2, r3);
  275. t2 = _mm_add_ps(t2, t3);
  276. vec = _mm_add_ps(_mm_movelh_ps(t0, t2), _mm_movehl_ps(t2, t0));
  277. return Vector3(
  278. _mm_cvtss_f32(vec),
  279. _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, _MM_SHUFFLE(1, 1, 1, 1))),
  280. _mm_cvtss_f32(_mm_movehl_ps(vec, vec)));
  281. #else
  282. return Vector3(
  283. (m00_ * rhs.x_ + m01_ * rhs.y_ + m02_ * rhs.z_ + m03_ * rhs.w_),
  284. (m10_ * rhs.x_ + m11_ * rhs.y_ + m12_ * rhs.z_ + m13_ * rhs.w_),
  285. (m20_ * rhs.x_ + m21_ * rhs.y_ + m22_ * rhs.z_ + m23_ * rhs.w_)
  286. );
  287. #endif
  288. }
  289. /// Add a matrix.
  290. Matrix3x4 operator +(const Matrix3x4& rhs) const
  291. {
  292. #ifdef URHO3D_SSE
  293. Matrix3x4 ret;
  294. _mm_storeu_ps(&ret.m00_, _mm_add_ps(_mm_loadu_ps(&m00_), _mm_loadu_ps(&rhs.m00_)));
  295. _mm_storeu_ps(&ret.m10_, _mm_add_ps(_mm_loadu_ps(&m10_), _mm_loadu_ps(&rhs.m10_)));
  296. _mm_storeu_ps(&ret.m20_, _mm_add_ps(_mm_loadu_ps(&m20_), _mm_loadu_ps(&rhs.m20_)));
  297. return ret;
  298. #else
  299. return Matrix3x4(
  300. m00_ + rhs.m00_,
  301. m01_ + rhs.m01_,
  302. m02_ + rhs.m02_,
  303. m03_ + rhs.m03_,
  304. m10_ + rhs.m10_,
  305. m11_ + rhs.m11_,
  306. m12_ + rhs.m12_,
  307. m13_ + rhs.m13_,
  308. m20_ + rhs.m20_,
  309. m21_ + rhs.m21_,
  310. m22_ + rhs.m22_,
  311. m23_ + rhs.m23_
  312. );
  313. #endif
  314. }
  315. /// Subtract a matrix.
  316. Matrix3x4 operator -(const Matrix3x4& rhs) const
  317. {
  318. #ifdef URHO3D_SSE
  319. Matrix3x4 ret;
  320. _mm_storeu_ps(&ret.m00_, _mm_sub_ps(_mm_loadu_ps(&m00_), _mm_loadu_ps(&rhs.m00_)));
  321. _mm_storeu_ps(&ret.m10_, _mm_sub_ps(_mm_loadu_ps(&m10_), _mm_loadu_ps(&rhs.m10_)));
  322. _mm_storeu_ps(&ret.m20_, _mm_sub_ps(_mm_loadu_ps(&m20_), _mm_loadu_ps(&rhs.m20_)));
  323. return ret;
  324. #else
  325. return Matrix3x4(
  326. m00_ - rhs.m00_,
  327. m01_ - rhs.m01_,
  328. m02_ - rhs.m02_,
  329. m03_ - rhs.m03_,
  330. m10_ - rhs.m10_,
  331. m11_ - rhs.m11_,
  332. m12_ - rhs.m12_,
  333. m13_ - rhs.m13_,
  334. m20_ - rhs.m20_,
  335. m21_ - rhs.m21_,
  336. m22_ - rhs.m22_,
  337. m23_ - rhs.m23_
  338. );
  339. #endif
  340. }
  341. /// Multiply with a scalar.
  342. Matrix3x4 operator *(float rhs) const
  343. {
  344. #ifdef URHO3D_SSE
  345. Matrix3x4 ret;
  346. const __m128 mul = _mm_set1_ps(rhs);
  347. _mm_storeu_ps(&ret.m00_, _mm_mul_ps(_mm_loadu_ps(&m00_), mul));
  348. _mm_storeu_ps(&ret.m10_, _mm_mul_ps(_mm_loadu_ps(&m10_), mul));
  349. _mm_storeu_ps(&ret.m20_, _mm_mul_ps(_mm_loadu_ps(&m20_), mul));
  350. return ret;
  351. #else
  352. return Matrix3x4(
  353. m00_ * rhs,
  354. m01_ * rhs,
  355. m02_ * rhs,
  356. m03_ * rhs,
  357. m10_ * rhs,
  358. m11_ * rhs,
  359. m12_ * rhs,
  360. m13_ * rhs,
  361. m20_ * rhs,
  362. m21_ * rhs,
  363. m22_ * rhs,
  364. m23_ * rhs
  365. );
  366. #endif
  367. }
  368. /// Multiply a matrix.
  369. Matrix3x4 operator *(const Matrix3x4& rhs) const
  370. {
  371. #ifdef URHO3D_SSE
  372. Matrix3x4 out;
  373. __m128 r0 = _mm_loadu_ps(&rhs.m00_);
  374. __m128 r1 = _mm_loadu_ps(&rhs.m10_);
  375. __m128 r2 = _mm_loadu_ps(&rhs.m20_);
  376. __m128 r3 = _mm_set_ps(1.f, 0.f, 0.f, 0.f);
  377. __m128 l = _mm_loadu_ps(&m00_);
  378. __m128 t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  379. __m128 t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  380. __m128 t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  381. __m128 t3 = _mm_mul_ps(l, r3);
  382. _mm_storeu_ps(&out.m00_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  383. l = _mm_loadu_ps(&m10_);
  384. t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  385. t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  386. t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  387. t3 = _mm_mul_ps(l, r3);
  388. _mm_storeu_ps(&out.m10_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  389. l = _mm_loadu_ps(&m20_);
  390. t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  391. t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  392. t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  393. t3 = _mm_mul_ps(l, r3);
  394. _mm_storeu_ps(&out.m20_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  395. return out;
  396. #else
  397. return Matrix3x4(
  398. m00_ * rhs.m00_ + m01_ * rhs.m10_ + m02_ * rhs.m20_,
  399. m00_ * rhs.m01_ + m01_ * rhs.m11_ + m02_ * rhs.m21_,
  400. m00_ * rhs.m02_ + m01_ * rhs.m12_ + m02_ * rhs.m22_,
  401. m00_ * rhs.m03_ + m01_ * rhs.m13_ + m02_ * rhs.m23_ + m03_,
  402. m10_ * rhs.m00_ + m11_ * rhs.m10_ + m12_ * rhs.m20_,
  403. m10_ * rhs.m01_ + m11_ * rhs.m11_ + m12_ * rhs.m21_,
  404. m10_ * rhs.m02_ + m11_ * rhs.m12_ + m12_ * rhs.m22_,
  405. m10_ * rhs.m03_ + m11_ * rhs.m13_ + m12_ * rhs.m23_ + m13_,
  406. m20_ * rhs.m00_ + m21_ * rhs.m10_ + m22_ * rhs.m20_,
  407. m20_ * rhs.m01_ + m21_ * rhs.m11_ + m22_ * rhs.m21_,
  408. m20_ * rhs.m02_ + m21_ * rhs.m12_ + m22_ * rhs.m22_,
  409. m20_ * rhs.m03_ + m21_ * rhs.m13_ + m22_ * rhs.m23_ + m23_
  410. );
  411. #endif
  412. }
  413. /// Multiply a 4x4 matrix.
  414. Matrix4 operator *(const Matrix4& rhs) const
  415. {
  416. #ifdef URHO3D_SSE
  417. Matrix4 out;
  418. __m128 r0 = _mm_loadu_ps(&rhs.m00_);
  419. __m128 r1 = _mm_loadu_ps(&rhs.m10_);
  420. __m128 r2 = _mm_loadu_ps(&rhs.m20_);
  421. __m128 r3 = _mm_loadu_ps(&rhs.m30_);
  422. __m128 l = _mm_loadu_ps(&m00_);
  423. __m128 t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  424. __m128 t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  425. __m128 t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  426. __m128 t3 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(3, 3, 3, 3)), r3);
  427. _mm_storeu_ps(&out.m00_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  428. l = _mm_loadu_ps(&m10_);
  429. t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  430. t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  431. t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  432. t3 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(3, 3, 3, 3)), r3);
  433. _mm_storeu_ps(&out.m10_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  434. l = _mm_loadu_ps(&m20_);
  435. t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  436. t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  437. t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  438. t3 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(3, 3, 3, 3)), r3);
  439. _mm_storeu_ps(&out.m20_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  440. _mm_storeu_ps(&out.m30_, r3);
  441. return out;
  442. #else
  443. return Matrix4(
  444. m00_ * rhs.m00_ + m01_ * rhs.m10_ + m02_ * rhs.m20_ + m03_ * rhs.m30_,
  445. m00_ * rhs.m01_ + m01_ * rhs.m11_ + m02_ * rhs.m21_ + m03_ * rhs.m31_,
  446. m00_ * rhs.m02_ + m01_ * rhs.m12_ + m02_ * rhs.m22_ + m03_ * rhs.m32_,
  447. m00_ * rhs.m03_ + m01_ * rhs.m13_ + m02_ * rhs.m23_ + m03_ * rhs.m33_,
  448. m10_ * rhs.m00_ + m11_ * rhs.m10_ + m12_ * rhs.m20_ + m13_ * rhs.m30_,
  449. m10_ * rhs.m01_ + m11_ * rhs.m11_ + m12_ * rhs.m21_ + m13_ * rhs.m31_,
  450. m10_ * rhs.m02_ + m11_ * rhs.m12_ + m12_ * rhs.m22_ + m13_ * rhs.m32_,
  451. m10_ * rhs.m03_ + m11_ * rhs.m13_ + m12_ * rhs.m23_ + m13_ * rhs.m33_,
  452. m20_ * rhs.m00_ + m21_ * rhs.m10_ + m22_ * rhs.m20_ + m23_ * rhs.m30_,
  453. m20_ * rhs.m01_ + m21_ * rhs.m11_ + m22_ * rhs.m21_ + m23_ * rhs.m31_,
  454. m20_ * rhs.m02_ + m21_ * rhs.m12_ + m22_ * rhs.m22_ + m23_ * rhs.m32_,
  455. m20_ * rhs.m03_ + m21_ * rhs.m13_ + m22_ * rhs.m23_ + m23_ * rhs.m33_,
  456. rhs.m30_,
  457. rhs.m31_,
  458. rhs.m32_,
  459. rhs.m33_
  460. );
  461. #endif
  462. }
  463. /// Set translation elements.
  464. void SetTranslation(const Vector3& translation)
  465. {
  466. m03_ = translation.x_;
  467. m13_ = translation.y_;
  468. m23_ = translation.z_;
  469. }
  470. /// Set rotation elements from a 3x3 matrix.
  471. void SetRotation(const Matrix3& rotation)
  472. {
  473. m00_ = rotation.m00_;
  474. m01_ = rotation.m01_;
  475. m02_ = rotation.m02_;
  476. m10_ = rotation.m10_;
  477. m11_ = rotation.m11_;
  478. m12_ = rotation.m12_;
  479. m20_ = rotation.m20_;
  480. m21_ = rotation.m21_;
  481. m22_ = rotation.m22_;
  482. }
  483. /// Set scaling elements.
  484. void SetScale(const Vector3& scale)
  485. {
  486. m00_ = scale.x_;
  487. m11_ = scale.y_;
  488. m22_ = scale.z_;
  489. }
  490. /// Set uniform scaling elements.
  491. void SetScale(float scale)
  492. {
  493. m00_ = scale;
  494. m11_ = scale;
  495. m22_ = scale;
  496. }
  497. /// Return the combined rotation and scaling matrix.
  498. Matrix3 ToMatrix3() const
  499. {
  500. return Matrix3(
  501. m00_,
  502. m01_,
  503. m02_,
  504. m10_,
  505. m11_,
  506. m12_,
  507. m20_,
  508. m21_,
  509. m22_
  510. );
  511. }
  512. /// Convert to a 4x4 matrix by filling in an identity last row.
  513. Matrix4 ToMatrix4() const
  514. {
  515. #ifdef URHO3D_SSE
  516. Matrix4 ret;
  517. _mm_storeu_ps(&ret.m00_, _mm_loadu_ps(&m00_));
  518. _mm_storeu_ps(&ret.m10_, _mm_loadu_ps(&m10_));
  519. _mm_storeu_ps(&ret.m20_, _mm_loadu_ps(&m20_));
  520. _mm_storeu_ps(&ret.m30_, _mm_set_ps(1.f, 0.f, 0.f, 0.f));
  521. return ret;
  522. #else
  523. return Matrix4(
  524. m00_,
  525. m01_,
  526. m02_,
  527. m03_,
  528. m10_,
  529. m11_,
  530. m12_,
  531. m13_,
  532. m20_,
  533. m21_,
  534. m22_,
  535. m23_,
  536. 0.0f,
  537. 0.0f,
  538. 0.0f,
  539. 1.0f
  540. );
  541. #endif
  542. }
  543. /// Return the rotation matrix with scaling removed.
  544. Matrix3 RotationMatrix() const
  545. {
  546. Vector3 invScale(
  547. 1.0f / sqrtf(m00_ * m00_ + m10_ * m10_ + m20_ * m20_),
  548. 1.0f / sqrtf(m01_ * m01_ + m11_ * m11_ + m21_ * m21_),
  549. 1.0f / sqrtf(m02_ * m02_ + m12_ * m12_ + m22_ * m22_)
  550. );
  551. return ToMatrix3().Scaled(invScale);
  552. }
  553. /// Return the translation part.
  554. Vector3 Translation() const
  555. {
  556. return Vector3(
  557. m03_,
  558. m13_,
  559. m23_
  560. );
  561. }
  562. /// Return the rotation part.
  563. Quaternion Rotation() const { return Quaternion(RotationMatrix()); }
  564. /// Return the scaling part.
  565. Vector3 Scale() const
  566. {
  567. return Vector3(
  568. sqrtf(m00_ * m00_ + m10_ * m10_ + m20_ * m20_),
  569. sqrtf(m01_ * m01_ + m11_ * m11_ + m21_ * m21_),
  570. sqrtf(m02_ * m02_ + m12_ * m12_ + m22_ * m22_)
  571. );
  572. }
  573. /// Return the scaling part with the sign. Reference rotation matrix is required to avoid ambiguity.
  574. Vector3 SignedScale(const Matrix3& rotation) const
  575. {
  576. return Vector3(
  577. rotation.m00_ * m00_ + rotation.m10_ * m10_ + rotation.m20_ * m20_,
  578. rotation.m01_ * m01_ + rotation.m11_ * m11_ + rotation.m21_ * m21_,
  579. rotation.m02_ * m02_ + rotation.m12_ * m12_ + rotation.m22_ * m22_
  580. );
  581. }
  582. /// Test for equality with another matrix with epsilon.
  583. bool Equals(const Matrix3x4& rhs) const
  584. {
  585. const float* leftData = Data();
  586. const float* rightData = rhs.Data();
  587. for (unsigned i = 0; i < 12; ++i)
  588. {
  589. if (!Urho3D::Equals(leftData[i], rightData[i]))
  590. return false;
  591. }
  592. return true;
  593. }
  594. /// Return decomposition to translation, rotation and scale.
  595. void Decompose(Vector3& translation, Quaternion& rotation, Vector3& scale) const;
  596. /// Return inverse.
  597. Matrix3x4 Inverse() const;
  598. /// Return float data.
  599. const float* Data() const { return &m00_; }
  600. /// Return matrix element.
  601. float Element(unsigned i, unsigned j) const { return Data()[i * 4 + j]; }
  602. /// Return matrix row.
  603. Vector4 Row(unsigned i) const { return Vector4(Element(i, 0), Element(i, 1), Element(i, 2), Element(i, 3)); }
  604. /// Return matrix column.
  605. Vector3 Column(unsigned j) const { return Vector3(Element(0, j), Element(1, j), Element(2, j)); }
  606. /// Return whether any element is NaN.
  607. bool IsNaN() const
  608. {
  609. const float* data = Data();
  610. for (unsigned i = 0; i < 12; ++i)
  611. {
  612. if (Urho3D::IsNaN(data[i]))
  613. return true;
  614. }
  615. return false;
  616. }
  617. /// Return whether any element is Inf.
  618. bool IsInf() const
  619. {
  620. const float* data = Data();
  621. for (unsigned i = 0; i < 12; ++i)
  622. {
  623. if (Urho3D::IsInf(data[i]))
  624. return true;
  625. }
  626. return false;
  627. }
  628. /// Return as string.
  629. String ToString() const;
  630. float m00_;
  631. float m01_;
  632. float m02_;
  633. float m03_;
  634. float m10_;
  635. float m11_;
  636. float m12_;
  637. float m13_;
  638. float m20_;
  639. float m21_;
  640. float m22_;
  641. float m23_;
  642. /// Zero matrix.
  643. static const Matrix3x4 ZERO;
  644. /// Identity matrix.
  645. static const Matrix3x4 IDENTITY;
  646. #ifdef URHO3D_SSE
  647. private:
  648. /// \brief Sets this matrix from the given translation, rotation (as quaternion (w,x,y,z)), and nonuniform scale (x,y,z) parameters. Note: the w component of the scale parameter passed to this function must be 1.
  649. void inline SetFromTRS(__m128 t, __m128 q, __m128 s)
  650. {
  651. q = _mm_shuffle_ps(q, q, _MM_SHUFFLE(0, 3, 2, 1));
  652. __m128 one = _mm_set_ps(0, 0, 0, 1);
  653. const __m128 sseX1 = _mm_castsi128_ps(_mm_set_epi32((int)0x80000000UL, (int)0x80000000UL, 0, (int)0x80000000UL));
  654. __m128 q2 = _mm_add_ps(q, q);
  655. __m128 t2 = _mm_add_ss(_mm_xor_ps(_mm_mul_ps(_mm_shuffle_ps(q, q, _MM_SHUFFLE(3, 3, 3, 2)), _mm_shuffle_ps(q2, q2, _MM_SHUFFLE(0, 1, 2, 2))), sseX1), one);
  656. const __m128 sseX0 = _mm_shuffle_ps(sseX1, sseX1, _MM_SHUFFLE(0, 3, 2, 1));
  657. __m128 t0 = _mm_mul_ps(_mm_shuffle_ps(q, q, _MM_SHUFFLE(1, 0, 0, 1)), _mm_shuffle_ps(q2, q2, _MM_SHUFFLE(2, 2, 1, 1)));
  658. __m128 t1 = _mm_xor_ps(t0, sseX0);
  659. __m128 r0 = _mm_sub_ps(t2, t1);
  660. __m128 xx2 = _mm_mul_ss(q, q2);
  661. __m128 r1 = _mm_sub_ps(_mm_xor_ps(t2, sseX0), _mm_move_ss(t1, xx2));
  662. r1 = _mm_shuffle_ps(r1, r1, _MM_SHUFFLE(2, 3, 0, 1));
  663. __m128 r2 = _mm_shuffle_ps(_mm_movehl_ps(r0, r1), _mm_sub_ss(_mm_sub_ss(one, xx2), t0), _MM_SHUFFLE(2, 0, 3, 1));
  664. __m128 tmp0 = _mm_unpacklo_ps(r0, r1);
  665. __m128 tmp2 = _mm_unpacklo_ps(r2, t);
  666. __m128 tmp1 = _mm_unpackhi_ps(r0, r1);
  667. __m128 tmp3 = _mm_unpackhi_ps(r2, t);
  668. _mm_storeu_ps(&m00_, _mm_mul_ps(_mm_movelh_ps(tmp0, tmp2), s));
  669. _mm_storeu_ps(&m10_, _mm_mul_ps(_mm_movehl_ps(tmp2, tmp0), s));
  670. _mm_storeu_ps(&m20_, _mm_mul_ps(_mm_movelh_ps(tmp1, tmp3), s));
  671. }
  672. #endif
  673. };
  674. /// Multiply a 3x4 matrix with a scalar.
  675. inline Matrix3x4 operator *(float lhs, const Matrix3x4& rhs) { return rhs * lhs; }
  676. }