Matrix3x4.h 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764
  1. //
  2. // Copyright (c) 2008-2017 the Urho3D project.
  3. //
  4. // Permission is hereby granted, free of charge, to any person obtaining a copy
  5. // of this software and associated documentation files (the "Software"), to deal
  6. // in the Software without restriction, including without limitation the rights
  7. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. // copies of the Software, and to permit persons to whom the Software is
  9. // furnished to do so, subject to the following conditions:
  10. //
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20. // THE SOFTWARE.
  21. //
  22. #pragma once
  23. #include "../Math/Matrix4.h"
  24. #ifdef ATOMIC_SSE
  25. #include <emmintrin.h>
  26. #endif
  27. namespace Atomic
  28. {
  29. /// 3x4 matrix for scene node transform calculations.
  30. class ATOMIC_API Matrix3x4
  31. {
  32. public:
  33. /// Construct an identity matrix.
  34. Matrix3x4()
  35. #ifndef ATOMIC_SSE
  36. :m00_(1.0f),
  37. m01_(0.0f),
  38. m02_(0.0f),
  39. m03_(0.0f),
  40. m10_(0.0f),
  41. m11_(1.0f),
  42. m12_(0.0f),
  43. m13_(0.0f),
  44. m20_(0.0f),
  45. m21_(0.0f),
  46. m22_(1.0f),
  47. m23_(0.0f)
  48. #endif
  49. {
  50. #ifdef ATOMIC_SSE
  51. _mm_storeu_ps(&m00_, _mm_set_ps(0.f, 0.f, 0.f, 1.f));
  52. _mm_storeu_ps(&m10_, _mm_set_ps(0.f, 0.f, 1.f, 0.f));
  53. _mm_storeu_ps(&m20_, _mm_set_ps(0.f, 1.f, 0.f, 0.f));
  54. #endif
  55. }
  56. /// Copy-construct from another matrix.
  57. Matrix3x4(const Matrix3x4& matrix)
  58. #if defined(ATOMIC_SSE) && (!defined(_MSC_VER) || _MSC_VER >= 1700) /* Visual Studio 2012 and newer. VS2010 has a bug with these, see https://github.com/urho3d/Urho3D/issues/1044 */
  59. {
  60. _mm_storeu_ps(&m00_, _mm_loadu_ps(&matrix.m00_));
  61. _mm_storeu_ps(&m10_, _mm_loadu_ps(&matrix.m10_));
  62. _mm_storeu_ps(&m20_, _mm_loadu_ps(&matrix.m20_));
  63. }
  64. #else
  65. :m00_(matrix.m00_),
  66. m01_(matrix.m01_),
  67. m02_(matrix.m02_),
  68. m03_(matrix.m03_),
  69. m10_(matrix.m10_),
  70. m11_(matrix.m11_),
  71. m12_(matrix.m12_),
  72. m13_(matrix.m13_),
  73. m20_(matrix.m20_),
  74. m21_(matrix.m21_),
  75. m22_(matrix.m22_),
  76. m23_(matrix.m23_)
  77. {
  78. }
  79. #endif
  80. /// Copy-construct from a 3x3 matrix and set the extra elements to identity.
  81. Matrix3x4(const Matrix3& matrix) :
  82. m00_(matrix.m00_),
  83. m01_(matrix.m01_),
  84. m02_(matrix.m02_),
  85. m03_(0.0f),
  86. m10_(matrix.m10_),
  87. m11_(matrix.m11_),
  88. m12_(matrix.m12_),
  89. m13_(0.0f),
  90. m20_(matrix.m20_),
  91. m21_(matrix.m21_),
  92. m22_(matrix.m22_),
  93. m23_(0.0f)
  94. {
  95. }
  96. /// Copy-construct from a 4x4 matrix which is assumed to contain no projection.
  97. Matrix3x4(const Matrix4& matrix)
  98. #ifndef ATOMIC_SSE
  99. :m00_(matrix.m00_),
  100. m01_(matrix.m01_),
  101. m02_(matrix.m02_),
  102. m03_(matrix.m03_),
  103. m10_(matrix.m10_),
  104. m11_(matrix.m11_),
  105. m12_(matrix.m12_),
  106. m13_(matrix.m13_),
  107. m20_(matrix.m20_),
  108. m21_(matrix.m21_),
  109. m22_(matrix.m22_),
  110. m23_(matrix.m23_)
  111. #endif
  112. {
  113. #ifdef ATOMIC_SSE
  114. _mm_storeu_ps(&m00_, _mm_loadu_ps(&matrix.m00_));
  115. _mm_storeu_ps(&m10_, _mm_loadu_ps(&matrix.m10_));
  116. _mm_storeu_ps(&m20_, _mm_loadu_ps(&matrix.m20_));
  117. #endif
  118. }
  119. /// Construct from values.
  120. Matrix3x4(float v00, float v01, float v02, float v03,
  121. float v10, float v11, float v12, float v13,
  122. float v20, float v21, float v22, float v23) :
  123. m00_(v00),
  124. m01_(v01),
  125. m02_(v02),
  126. m03_(v03),
  127. m10_(v10),
  128. m11_(v11),
  129. m12_(v12),
  130. m13_(v13),
  131. m20_(v20),
  132. m21_(v21),
  133. m22_(v22),
  134. m23_(v23)
  135. {
  136. }
  137. /// Construct from a float array.
  138. explicit Matrix3x4(const float* data)
  139. #ifndef ATOMIC_SSE
  140. :m00_(data[0]),
  141. m01_(data[1]),
  142. m02_(data[2]),
  143. m03_(data[3]),
  144. m10_(data[4]),
  145. m11_(data[5]),
  146. m12_(data[6]),
  147. m13_(data[7]),
  148. m20_(data[8]),
  149. m21_(data[9]),
  150. m22_(data[10]),
  151. m23_(data[11])
  152. #endif
  153. {
  154. #ifdef ATOMIC_SSE
  155. _mm_storeu_ps(&m00_, _mm_loadu_ps(data));
  156. _mm_storeu_ps(&m10_, _mm_loadu_ps(data + 4));
  157. _mm_storeu_ps(&m20_, _mm_loadu_ps(data + 8));
  158. #endif
  159. }
  160. /// Construct from translation, rotation and uniform scale.
  161. Matrix3x4(const Vector3& translation, const Quaternion& rotation, float scale)
  162. {
  163. #ifdef ATOMIC_SSE
  164. __m128 t = _mm_set_ps(1.f, translation.z_, translation.y_, translation.x_);
  165. __m128 q = _mm_loadu_ps(&rotation.w_);
  166. __m128 s = _mm_set_ps(1.f, scale, scale, scale);
  167. SetFromTRS(t, q, s);
  168. #else
  169. SetRotation(rotation.RotationMatrix() * scale);
  170. SetTranslation(translation);
  171. #endif
  172. }
  173. /// Construct from translation, rotation and nonuniform scale.
  174. Matrix3x4(const Vector3& translation, const Quaternion& rotation, const Vector3& scale)
  175. {
  176. #ifdef ATOMIC_SSE
  177. __m128 t = _mm_set_ps(1.f, translation.z_, translation.y_, translation.x_);
  178. __m128 q = _mm_loadu_ps(&rotation.w_);
  179. __m128 s = _mm_set_ps(1.f, scale.z_, scale.y_, scale.x_);
  180. SetFromTRS(t, q, s);
  181. #else
  182. SetRotation(rotation.RotationMatrix().Scaled(scale));
  183. SetTranslation(translation);
  184. #endif
  185. }
  186. /// Assign from another matrix.
  187. Matrix3x4& operator =(const Matrix3x4& rhs)
  188. {
  189. #if defined(ATOMIC_SSE) && (!defined(_MSC_VER) || _MSC_VER >= 1700) /* Visual Studio 2012 and newer. VS2010 has a bug with these, see https://github.com/urho3d/Urho3D/issues/1044 */
  190. _mm_storeu_ps(&m00_, _mm_loadu_ps(&rhs.m00_));
  191. _mm_storeu_ps(&m10_, _mm_loadu_ps(&rhs.m10_));
  192. _mm_storeu_ps(&m20_, _mm_loadu_ps(&rhs.m20_));
  193. #else
  194. m00_ = rhs.m00_;
  195. m01_ = rhs.m01_;
  196. m02_ = rhs.m02_;
  197. m03_ = rhs.m03_;
  198. m10_ = rhs.m10_;
  199. m11_ = rhs.m11_;
  200. m12_ = rhs.m12_;
  201. m13_ = rhs.m13_;
  202. m20_ = rhs.m20_;
  203. m21_ = rhs.m21_;
  204. m22_ = rhs.m22_;
  205. m23_ = rhs.m23_;
  206. #endif
  207. return *this;
  208. }
  209. /// Assign from a 3x3 matrix and set the extra elements to identity.
  210. Matrix3x4& operator =(const Matrix3& rhs)
  211. {
  212. m00_ = rhs.m00_;
  213. m01_ = rhs.m01_;
  214. m02_ = rhs.m02_;
  215. m03_ = 0.0;
  216. m10_ = rhs.m10_;
  217. m11_ = rhs.m11_;
  218. m12_ = rhs.m12_;
  219. m13_ = 0.0;
  220. m20_ = rhs.m20_;
  221. m21_ = rhs.m21_;
  222. m22_ = rhs.m22_;
  223. m23_ = 0.0;
  224. return *this;
  225. }
  226. /// Assign from a 4x4 matrix which is assumed to contain no projection.
  227. Matrix3x4& operator =(const Matrix4& rhs)
  228. {
  229. #ifdef ATOMIC_SSE
  230. _mm_storeu_ps(&m00_, _mm_loadu_ps(&rhs.m00_));
  231. _mm_storeu_ps(&m10_, _mm_loadu_ps(&rhs.m10_));
  232. _mm_storeu_ps(&m20_, _mm_loadu_ps(&rhs.m20_));
  233. #else
  234. m00_ = rhs.m00_;
  235. m01_ = rhs.m01_;
  236. m02_ = rhs.m02_;
  237. m03_ = rhs.m03_;
  238. m10_ = rhs.m10_;
  239. m11_ = rhs.m11_;
  240. m12_ = rhs.m12_;
  241. m13_ = rhs.m13_;
  242. m20_ = rhs.m20_;
  243. m21_ = rhs.m21_;
  244. m22_ = rhs.m22_;
  245. m23_ = rhs.m23_;
  246. #endif
  247. return *this;
  248. }
  249. /// Test for equality with another matrix without epsilon.
  250. bool operator ==(const Matrix3x4& rhs) const
  251. {
  252. #ifdef ATOMIC_SSE
  253. __m128 c0 = _mm_cmpeq_ps(_mm_loadu_ps(&m00_), _mm_loadu_ps(&rhs.m00_));
  254. __m128 c1 = _mm_cmpeq_ps(_mm_loadu_ps(&m10_), _mm_loadu_ps(&rhs.m10_));
  255. c0 = _mm_and_ps(c0, c1);
  256. __m128 c2 = _mm_cmpeq_ps(_mm_loadu_ps(&m20_), _mm_loadu_ps(&rhs.m20_));
  257. c0 = _mm_and_ps(c0, c2);
  258. __m128 hi = _mm_movehl_ps(c0, c0);
  259. c0 = _mm_and_ps(c0, hi);
  260. hi = _mm_shuffle_ps(c0, c0, _MM_SHUFFLE(1, 1, 1, 1));
  261. c0 = _mm_and_ps(c0, hi);
  262. return _mm_cvtsi128_si32(_mm_castps_si128(c0)) == -1;
  263. #else
  264. const float* leftData = Data();
  265. const float* rightData = rhs.Data();
  266. for (unsigned i = 0; i < 12; ++i)
  267. {
  268. if (leftData[i] != rightData[i])
  269. return false;
  270. }
  271. return true;
  272. #endif
  273. }
  274. /// Test for inequality with another matrix without epsilon.
  275. bool operator !=(const Matrix3x4& rhs) const { return !(*this == rhs); }
  276. /// Multiply a Vector3 which is assumed to represent position.
  277. Vector3 operator *(const Vector3& rhs) const
  278. {
  279. #ifdef ATOMIC_SSE
  280. __m128 vec = _mm_set_ps(1.f, rhs.z_, rhs.y_, rhs.x_);
  281. __m128 r0 = _mm_mul_ps(_mm_loadu_ps(&m00_), vec);
  282. __m128 r1 = _mm_mul_ps(_mm_loadu_ps(&m10_), vec);
  283. __m128 t0 = _mm_unpacklo_ps(r0, r1);
  284. __m128 t1 = _mm_unpackhi_ps(r0, r1);
  285. t0 = _mm_add_ps(t0, t1);
  286. __m128 r2 = _mm_mul_ps(_mm_loadu_ps(&m20_), vec);
  287. __m128 r3 = _mm_setzero_ps();
  288. __m128 t2 = _mm_unpacklo_ps(r2, r3);
  289. __m128 t3 = _mm_unpackhi_ps(r2, r3);
  290. t2 = _mm_add_ps(t2, t3);
  291. vec = _mm_add_ps(_mm_movelh_ps(t0, t2), _mm_movehl_ps(t2, t0));
  292. return Vector3(
  293. _mm_cvtss_f32(vec),
  294. _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, _MM_SHUFFLE(1, 1, 1, 1))),
  295. _mm_cvtss_f32(_mm_movehl_ps(vec, vec)));
  296. #else
  297. return Vector3(
  298. (m00_ * rhs.x_ + m01_ * rhs.y_ + m02_ * rhs.z_ + m03_),
  299. (m10_ * rhs.x_ + m11_ * rhs.y_ + m12_ * rhs.z_ + m13_),
  300. (m20_ * rhs.x_ + m21_ * rhs.y_ + m22_ * rhs.z_ + m23_)
  301. );
  302. #endif
  303. }
  304. /// Multiply a Vector4.
  305. Vector3 operator *(const Vector4& rhs) const
  306. {
  307. #ifdef ATOMIC_SSE
  308. __m128 vec = _mm_loadu_ps(&rhs.x_);
  309. __m128 r0 = _mm_mul_ps(_mm_loadu_ps(&m00_), vec);
  310. __m128 r1 = _mm_mul_ps(_mm_loadu_ps(&m10_), vec);
  311. __m128 t0 = _mm_unpacklo_ps(r0, r1);
  312. __m128 t1 = _mm_unpackhi_ps(r0, r1);
  313. t0 = _mm_add_ps(t0, t1);
  314. __m128 r2 = _mm_mul_ps(_mm_loadu_ps(&m20_), vec);
  315. __m128 r3 = _mm_setzero_ps();
  316. __m128 t2 = _mm_unpacklo_ps(r2, r3);
  317. __m128 t3 = _mm_unpackhi_ps(r2, r3);
  318. t2 = _mm_add_ps(t2, t3);
  319. vec = _mm_add_ps(_mm_movelh_ps(t0, t2), _mm_movehl_ps(t2, t0));
  320. return Vector3(
  321. _mm_cvtss_f32(vec),
  322. _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, _MM_SHUFFLE(1, 1, 1, 1))),
  323. _mm_cvtss_f32(_mm_movehl_ps(vec, vec)));
  324. #else
  325. return Vector3(
  326. (m00_ * rhs.x_ + m01_ * rhs.y_ + m02_ * rhs.z_ + m03_ * rhs.w_),
  327. (m10_ * rhs.x_ + m11_ * rhs.y_ + m12_ * rhs.z_ + m13_ * rhs.w_),
  328. (m20_ * rhs.x_ + m21_ * rhs.y_ + m22_ * rhs.z_ + m23_ * rhs.w_)
  329. );
  330. #endif
  331. }
  332. /// Add a matrix.
  333. Matrix3x4 operator +(const Matrix3x4& rhs) const
  334. {
  335. #ifdef ATOMIC_SSE
  336. Matrix3x4 ret;
  337. _mm_storeu_ps(&ret.m00_, _mm_add_ps(_mm_loadu_ps(&m00_), _mm_loadu_ps(&rhs.m00_)));
  338. _mm_storeu_ps(&ret.m10_, _mm_add_ps(_mm_loadu_ps(&m10_), _mm_loadu_ps(&rhs.m10_)));
  339. _mm_storeu_ps(&ret.m20_, _mm_add_ps(_mm_loadu_ps(&m20_), _mm_loadu_ps(&rhs.m20_)));
  340. return ret;
  341. #else
  342. return Matrix3x4(
  343. m00_ + rhs.m00_,
  344. m01_ + rhs.m01_,
  345. m02_ + rhs.m02_,
  346. m03_ + rhs.m03_,
  347. m10_ + rhs.m10_,
  348. m11_ + rhs.m11_,
  349. m12_ + rhs.m12_,
  350. m13_ + rhs.m13_,
  351. m20_ + rhs.m20_,
  352. m21_ + rhs.m21_,
  353. m22_ + rhs.m22_,
  354. m23_ + rhs.m23_
  355. );
  356. #endif
  357. }
  358. /// Subtract a matrix.
  359. Matrix3x4 operator -(const Matrix3x4& rhs) const
  360. {
  361. #ifdef ATOMIC_SSE
  362. Matrix3x4 ret;
  363. _mm_storeu_ps(&ret.m00_, _mm_sub_ps(_mm_loadu_ps(&m00_), _mm_loadu_ps(&rhs.m00_)));
  364. _mm_storeu_ps(&ret.m10_, _mm_sub_ps(_mm_loadu_ps(&m10_), _mm_loadu_ps(&rhs.m10_)));
  365. _mm_storeu_ps(&ret.m20_, _mm_sub_ps(_mm_loadu_ps(&m20_), _mm_loadu_ps(&rhs.m20_)));
  366. return ret;
  367. #else
  368. return Matrix3x4(
  369. m00_ - rhs.m00_,
  370. m01_ - rhs.m01_,
  371. m02_ - rhs.m02_,
  372. m03_ - rhs.m03_,
  373. m10_ - rhs.m10_,
  374. m11_ - rhs.m11_,
  375. m12_ - rhs.m12_,
  376. m13_ - rhs.m13_,
  377. m20_ - rhs.m20_,
  378. m21_ - rhs.m21_,
  379. m22_ - rhs.m22_,
  380. m23_ - rhs.m23_
  381. );
  382. #endif
  383. }
  384. /// Multiply with a scalar.
  385. Matrix3x4 operator *(float rhs) const
  386. {
  387. #ifdef ATOMIC_SSE
  388. Matrix3x4 ret;
  389. const __m128 mul = _mm_set1_ps(rhs);
  390. _mm_storeu_ps(&ret.m00_, _mm_mul_ps(_mm_loadu_ps(&m00_), mul));
  391. _mm_storeu_ps(&ret.m10_, _mm_mul_ps(_mm_loadu_ps(&m10_), mul));
  392. _mm_storeu_ps(&ret.m20_, _mm_mul_ps(_mm_loadu_ps(&m20_), mul));
  393. return ret;
  394. #else
  395. return Matrix3x4(
  396. m00_ * rhs,
  397. m01_ * rhs,
  398. m02_ * rhs,
  399. m03_ * rhs,
  400. m10_ * rhs,
  401. m11_ * rhs,
  402. m12_ * rhs,
  403. m13_ * rhs,
  404. m20_ * rhs,
  405. m21_ * rhs,
  406. m22_ * rhs,
  407. m23_ * rhs
  408. );
  409. #endif
  410. }
  411. /// Multiply a matrix.
  412. Matrix3x4 operator *(const Matrix3x4& rhs) const
  413. {
  414. #ifdef ATOMIC_SSE
  415. Matrix3x4 out;
  416. __m128 r0 = _mm_loadu_ps(&rhs.m00_);
  417. __m128 r1 = _mm_loadu_ps(&rhs.m10_);
  418. __m128 r2 = _mm_loadu_ps(&rhs.m20_);
  419. __m128 r3 = _mm_set_ps(1.f, 0.f, 0.f, 0.f);
  420. __m128 l = _mm_loadu_ps(&m00_);
  421. __m128 t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  422. __m128 t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  423. __m128 t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  424. __m128 t3 = _mm_mul_ps(l, r3);
  425. _mm_storeu_ps(&out.m00_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  426. l = _mm_loadu_ps(&m10_);
  427. t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  428. t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  429. t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  430. t3 = _mm_mul_ps(l, r3);
  431. _mm_storeu_ps(&out.m10_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  432. l = _mm_loadu_ps(&m20_);
  433. t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  434. t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  435. t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  436. t3 = _mm_mul_ps(l, r3);
  437. _mm_storeu_ps(&out.m20_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  438. return out;
  439. #else
  440. return Matrix3x4(
  441. m00_ * rhs.m00_ + m01_ * rhs.m10_ + m02_ * rhs.m20_,
  442. m00_ * rhs.m01_ + m01_ * rhs.m11_ + m02_ * rhs.m21_,
  443. m00_ * rhs.m02_ + m01_ * rhs.m12_ + m02_ * rhs.m22_,
  444. m00_ * rhs.m03_ + m01_ * rhs.m13_ + m02_ * rhs.m23_ + m03_,
  445. m10_ * rhs.m00_ + m11_ * rhs.m10_ + m12_ * rhs.m20_,
  446. m10_ * rhs.m01_ + m11_ * rhs.m11_ + m12_ * rhs.m21_,
  447. m10_ * rhs.m02_ + m11_ * rhs.m12_ + m12_ * rhs.m22_,
  448. m10_ * rhs.m03_ + m11_ * rhs.m13_ + m12_ * rhs.m23_ + m13_,
  449. m20_ * rhs.m00_ + m21_ * rhs.m10_ + m22_ * rhs.m20_,
  450. m20_ * rhs.m01_ + m21_ * rhs.m11_ + m22_ * rhs.m21_,
  451. m20_ * rhs.m02_ + m21_ * rhs.m12_ + m22_ * rhs.m22_,
  452. m20_ * rhs.m03_ + m21_ * rhs.m13_ + m22_ * rhs.m23_ + m23_
  453. );
  454. #endif
  455. }
  456. /// Multiply a 4x4 matrix.
  457. Matrix4 operator *(const Matrix4& rhs) const
  458. {
  459. #ifdef ATOMIC_SSE
  460. Matrix4 out;
  461. __m128 r0 = _mm_loadu_ps(&rhs.m00_);
  462. __m128 r1 = _mm_loadu_ps(&rhs.m10_);
  463. __m128 r2 = _mm_loadu_ps(&rhs.m20_);
  464. __m128 r3 = _mm_loadu_ps(&rhs.m30_);
  465. __m128 l = _mm_loadu_ps(&m00_);
  466. __m128 t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  467. __m128 t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  468. __m128 t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  469. __m128 t3 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(3, 3, 3, 3)), r3);
  470. _mm_storeu_ps(&out.m00_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  471. l = _mm_loadu_ps(&m10_);
  472. t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  473. t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  474. t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  475. t3 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(3, 3, 3, 3)), r3);
  476. _mm_storeu_ps(&out.m10_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  477. l = _mm_loadu_ps(&m20_);
  478. t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  479. t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  480. t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  481. t3 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(3, 3, 3, 3)), r3);
  482. _mm_storeu_ps(&out.m20_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  483. _mm_storeu_ps(&out.m30_, r3);
  484. return out;
  485. #else
  486. return Matrix4(
  487. m00_ * rhs.m00_ + m01_ * rhs.m10_ + m02_ * rhs.m20_ + m03_ * rhs.m30_,
  488. m00_ * rhs.m01_ + m01_ * rhs.m11_ + m02_ * rhs.m21_ + m03_ * rhs.m31_,
  489. m00_ * rhs.m02_ + m01_ * rhs.m12_ + m02_ * rhs.m22_ + m03_ * rhs.m32_,
  490. m00_ * rhs.m03_ + m01_ * rhs.m13_ + m02_ * rhs.m23_ + m03_ * rhs.m33_,
  491. m10_ * rhs.m00_ + m11_ * rhs.m10_ + m12_ * rhs.m20_ + m13_ * rhs.m30_,
  492. m10_ * rhs.m01_ + m11_ * rhs.m11_ + m12_ * rhs.m21_ + m13_ * rhs.m31_,
  493. m10_ * rhs.m02_ + m11_ * rhs.m12_ + m12_ * rhs.m22_ + m13_ * rhs.m32_,
  494. m10_ * rhs.m03_ + m11_ * rhs.m13_ + m12_ * rhs.m23_ + m13_ * rhs.m33_,
  495. m20_ * rhs.m00_ + m21_ * rhs.m10_ + m22_ * rhs.m20_ + m23_ * rhs.m30_,
  496. m20_ * rhs.m01_ + m21_ * rhs.m11_ + m22_ * rhs.m21_ + m23_ * rhs.m31_,
  497. m20_ * rhs.m02_ + m21_ * rhs.m12_ + m22_ * rhs.m22_ + m23_ * rhs.m32_,
  498. m20_ * rhs.m03_ + m21_ * rhs.m13_ + m22_ * rhs.m23_ + m23_ * rhs.m33_,
  499. rhs.m30_,
  500. rhs.m31_,
  501. rhs.m32_,
  502. rhs.m33_
  503. );
  504. #endif
  505. }
  506. /// Set translation elements.
  507. void SetTranslation(const Vector3& translation)
  508. {
  509. m03_ = translation.x_;
  510. m13_ = translation.y_;
  511. m23_ = translation.z_;
  512. }
  513. /// Set rotation elements from a 3x3 matrix.
  514. void SetRotation(const Matrix3& rotation)
  515. {
  516. m00_ = rotation.m00_;
  517. m01_ = rotation.m01_;
  518. m02_ = rotation.m02_;
  519. m10_ = rotation.m10_;
  520. m11_ = rotation.m11_;
  521. m12_ = rotation.m12_;
  522. m20_ = rotation.m20_;
  523. m21_ = rotation.m21_;
  524. m22_ = rotation.m22_;
  525. }
  526. /// Set scaling elements.
  527. void SetScale(const Vector3& scale)
  528. {
  529. m00_ = scale.x_;
  530. m11_ = scale.y_;
  531. m22_ = scale.z_;
  532. }
  533. /// Set uniform scaling elements.
  534. void SetScale(float scale)
  535. {
  536. m00_ = scale;
  537. m11_ = scale;
  538. m22_ = scale;
  539. }
  540. /// Return the combined rotation and scaling matrix.
  541. Matrix3 ToMatrix3() const
  542. {
  543. return Matrix3(
  544. m00_,
  545. m01_,
  546. m02_,
  547. m10_,
  548. m11_,
  549. m12_,
  550. m20_,
  551. m21_,
  552. m22_
  553. );
  554. }
  555. /// Convert to a 4x4 matrix by filling in an identity last row.
  556. Matrix4 ToMatrix4() const
  557. {
  558. #ifdef ATOMIC_SSE
  559. Matrix4 ret;
  560. _mm_storeu_ps(&ret.m00_, _mm_loadu_ps(&m00_));
  561. _mm_storeu_ps(&ret.m10_, _mm_loadu_ps(&m10_));
  562. _mm_storeu_ps(&ret.m20_, _mm_loadu_ps(&m20_));
  563. _mm_storeu_ps(&ret.m30_, _mm_set_ps(1.f, 0.f, 0.f, 0.f));
  564. return ret;
  565. #else
  566. return Matrix4(
  567. m00_,
  568. m01_,
  569. m02_,
  570. m03_,
  571. m10_,
  572. m11_,
  573. m12_,
  574. m13_,
  575. m20_,
  576. m21_,
  577. m22_,
  578. m23_,
  579. 0.0f,
  580. 0.0f,
  581. 0.0f,
  582. 1.0f
  583. );
  584. #endif
  585. }
  586. /// Return the rotation matrix with scaling removed.
  587. Matrix3 RotationMatrix() const
  588. {
  589. Vector3 invScale(
  590. 1.0f / sqrtf(m00_ * m00_ + m10_ * m10_ + m20_ * m20_),
  591. 1.0f / sqrtf(m01_ * m01_ + m11_ * m11_ + m21_ * m21_),
  592. 1.0f / sqrtf(m02_ * m02_ + m12_ * m12_ + m22_ * m22_)
  593. );
  594. return ToMatrix3().Scaled(invScale);
  595. }
  596. /// Return the translation part.
  597. Vector3 Translation() const
  598. {
  599. return Vector3(
  600. m03_,
  601. m13_,
  602. m23_
  603. );
  604. }
  605. /// Return the rotation part.
  606. Quaternion Rotation() const { return Quaternion(RotationMatrix()); }
  607. /// Return the scaling part.
  608. Vector3 Scale() const
  609. {
  610. return Vector3(
  611. sqrtf(m00_ * m00_ + m10_ * m10_ + m20_ * m20_),
  612. sqrtf(m01_ * m01_ + m11_ * m11_ + m21_ * m21_),
  613. sqrtf(m02_ * m02_ + m12_ * m12_ + m22_ * m22_)
  614. );
  615. }
  616. /// Return the scaling part with the sign. Reference rotation matrix is required to avoid ambiguity.
  617. Vector3 SignedScale(const Matrix3& rotation) const
  618. {
  619. return Vector3(
  620. rotation.m00_ * m00_ + rotation.m10_ * m10_ + rotation.m20_ * m20_,
  621. rotation.m01_ * m01_ + rotation.m11_ * m11_ + rotation.m21_ * m21_,
  622. rotation.m02_ * m02_ + rotation.m12_ * m12_ + rotation.m22_ * m22_
  623. );
  624. }
  625. /// Test for equality with another matrix with epsilon.
  626. bool Equals(const Matrix3x4& rhs) const
  627. {
  628. const float* leftData = Data();
  629. const float* rightData = rhs.Data();
  630. for (unsigned i = 0; i < 12; ++i)
  631. {
  632. if (!Atomic::Equals(leftData[i], rightData[i]))
  633. return false;
  634. }
  635. return true;
  636. }
  637. /// Return decomposition to translation, rotation and scale.
  638. void Decompose(Vector3& translation, Quaternion& rotation, Vector3& scale) const;
  639. /// Return inverse.
  640. Matrix3x4 Inverse() const;
  641. /// Return float data.
  642. const float* Data() const { return &m00_; }
  643. /// Return matrix element.
  644. float Element(unsigned i, unsigned j) const { return Data()[i * 4 + j]; }
  645. /// Return matrix row.
  646. Vector4 Row(unsigned i) const { return Vector4(Element(i, 0), Element(i, 1), Element(i, 2), Element(i, 3)); }
  647. /// Return matrix column.
  648. Vector3 Column(unsigned j) const { return Vector3(Element(0, j), Element(1, j), Element(2, j)); }
  649. /// Return as string.
  650. String ToString() const;
  651. float m00_;
  652. float m01_;
  653. float m02_;
  654. float m03_;
  655. float m10_;
  656. float m11_;
  657. float m12_;
  658. float m13_;
  659. float m20_;
  660. float m21_;
  661. float m22_;
  662. float m23_;
  663. /// Zero matrix.
  664. static const Matrix3x4 ZERO;
  665. /// Identity matrix.
  666. static const Matrix3x4 IDENTITY;
  667. #ifdef ATOMIC_SSE
  668. private:
  669. /// \brief Sets this matrix from the given translation, rotation (as quaternion (w,x,y,z)), and nonuniform scale (x,y,z) parameters. Note: the w component of the scale parameter passed to this function must be 1.
  670. void inline SetFromTRS(__m128 t, __m128 q, __m128 s)
  671. {
  672. q = _mm_shuffle_ps(q, q, _MM_SHUFFLE(0, 3, 2, 1));
  673. __m128 one = _mm_set_ps(0, 0, 0, 1);
  674. const __m128 sseX1 = _mm_castsi128_ps(_mm_set_epi32((int)0x80000000UL, (int)0x80000000UL, 0, (int)0x80000000UL));
  675. __m128 q2 = _mm_add_ps(q, q);
  676. __m128 t2 = _mm_add_ss(_mm_xor_ps(_mm_mul_ps(_mm_shuffle_ps(q, q, _MM_SHUFFLE(3, 3, 3, 2)), _mm_shuffle_ps(q2, q2, _MM_SHUFFLE(0, 1, 2, 2))), sseX1), one);
  677. const __m128 sseX0 = _mm_shuffle_ps(sseX1, sseX1, _MM_SHUFFLE(0, 3, 2, 1));
  678. __m128 t0 = _mm_mul_ps(_mm_shuffle_ps(q, q, _MM_SHUFFLE(1, 0, 0, 1)), _mm_shuffle_ps(q2, q2, _MM_SHUFFLE(2, 2, 1, 1)));
  679. __m128 t1 = _mm_xor_ps(t0, sseX0);
  680. __m128 r0 = _mm_sub_ps(t2, t1);
  681. __m128 xx2 = _mm_mul_ss(q, q2);
  682. __m128 r1 = _mm_sub_ps(_mm_xor_ps(t2, sseX0), _mm_move_ss(t1, xx2));
  683. r1 = _mm_shuffle_ps(r1, r1, _MM_SHUFFLE(2, 3, 0, 1));
  684. __m128 r2 = _mm_shuffle_ps(_mm_movehl_ps(r0, r1), _mm_sub_ss(_mm_sub_ss(one, xx2), t0), _MM_SHUFFLE(2, 0, 3, 1));
  685. __m128 tmp0 = _mm_unpacklo_ps(r0, r1);
  686. __m128 tmp2 = _mm_unpacklo_ps(r2, t);
  687. __m128 tmp1 = _mm_unpackhi_ps(r0, r1);
  688. __m128 tmp3 = _mm_unpackhi_ps(r2, t);
  689. _mm_storeu_ps(&m00_, _mm_mul_ps(_mm_movelh_ps(tmp0, tmp2), s));
  690. _mm_storeu_ps(&m10_, _mm_mul_ps(_mm_movehl_ps(tmp2, tmp0), s));
  691. _mm_storeu_ps(&m20_, _mm_mul_ps(_mm_movelh_ps(tmp1, tmp3), s));
  692. }
  693. #endif
  694. };
  695. /// Multiply a 3x4 matrix with a scalar.
  696. inline Matrix3x4 operator *(float lhs, const Matrix3x4& rhs) { return rhs * lhs; }
  697. }