Matrix4.h 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717
  1. //
  2. // Copyright (c) 2008-2017 the Urho3D project.
  3. //
  4. // Permission is hereby granted, free of charge, to any person obtaining a copy
  5. // of this software and associated documentation files (the "Software"), to deal
  6. // in the Software without restriction, including without limitation the rights
  7. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. // copies of the Software, and to permit persons to whom the Software is
  9. // furnished to do so, subject to the following conditions:
  10. //
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20. // THE SOFTWARE.
  21. //
  22. #pragma once
  23. #include "../Math/Quaternion.h"
  24. #include "../Math/Vector4.h"
  25. #ifdef ATOMIC_SSE
  26. #include <emmintrin.h>
  27. #endif
  28. namespace Atomic
  29. {
  30. class Matrix3x4;
  31. /// 4x4 matrix for arbitrary linear transforms including projection.
  32. class ATOMIC_API Matrix4
  33. {
  34. public:
  35. /// Construct an identity matrix.
  36. Matrix4()
  37. #ifndef ATOMIC_SSE
  38. :m00_(1.0f),
  39. m01_(0.0f),
  40. m02_(0.0f),
  41. m03_(0.0f),
  42. m10_(0.0f),
  43. m11_(1.0f),
  44. m12_(0.0f),
  45. m13_(0.0f),
  46. m20_(0.0f),
  47. m21_(0.0f),
  48. m22_(1.0f),
  49. m23_(0.0f),
  50. m30_(0.0f),
  51. m31_(0.0f),
  52. m32_(0.0f),
  53. m33_(1.0f)
  54. #endif
  55. {
  56. #ifdef ATOMIC_SSE
  57. _mm_storeu_ps(&m00_, _mm_set_ps(0.f, 0.f, 0.f, 1.f));
  58. _mm_storeu_ps(&m10_, _mm_set_ps(0.f, 0.f, 1.f, 0.f));
  59. _mm_storeu_ps(&m20_, _mm_set_ps(0.f, 1.f, 0.f, 0.f));
  60. _mm_storeu_ps(&m30_, _mm_set_ps(1.f, 0.f, 0.f, 0.f));
  61. #endif
  62. }
  63. /// Copy-construct from another matrix.
  64. Matrix4(const Matrix4& matrix)
  65. #ifndef ATOMIC_SSE
  66. :m00_(matrix.m00_),
  67. m01_(matrix.m01_),
  68. m02_(matrix.m02_),
  69. m03_(matrix.m03_),
  70. m10_(matrix.m10_),
  71. m11_(matrix.m11_),
  72. m12_(matrix.m12_),
  73. m13_(matrix.m13_),
  74. m20_(matrix.m20_),
  75. m21_(matrix.m21_),
  76. m22_(matrix.m22_),
  77. m23_(matrix.m23_),
  78. m30_(matrix.m30_),
  79. m31_(matrix.m31_),
  80. m32_(matrix.m32_),
  81. m33_(matrix.m33_)
  82. #endif
  83. {
  84. #ifdef ATOMIC_SSE
  85. _mm_storeu_ps(&m00_, _mm_loadu_ps(&matrix.m00_));
  86. _mm_storeu_ps(&m10_, _mm_loadu_ps(&matrix.m10_));
  87. _mm_storeu_ps(&m20_, _mm_loadu_ps(&matrix.m20_));
  88. _mm_storeu_ps(&m30_, _mm_loadu_ps(&matrix.m30_));
  89. #endif
  90. }
  91. /// Copy-construct from a 3x3 matrix and set the extra elements to identity.
  92. Matrix4(const Matrix3& matrix) :
  93. m00_(matrix.m00_),
  94. m01_(matrix.m01_),
  95. m02_(matrix.m02_),
  96. m03_(0.0f),
  97. m10_(matrix.m10_),
  98. m11_(matrix.m11_),
  99. m12_(matrix.m12_),
  100. m13_(0.0f),
  101. m20_(matrix.m20_),
  102. m21_(matrix.m21_),
  103. m22_(matrix.m22_),
  104. m23_(0.0f),
  105. m30_(0.0f),
  106. m31_(0.0f),
  107. m32_(0.0f),
  108. m33_(1.0f)
  109. {
  110. }
  111. /// Construct from values.
  112. Matrix4(float v00, float v01, float v02, float v03,
  113. float v10, float v11, float v12, float v13,
  114. float v20, float v21, float v22, float v23,
  115. float v30, float v31, float v32, float v33) :
  116. m00_(v00),
  117. m01_(v01),
  118. m02_(v02),
  119. m03_(v03),
  120. m10_(v10),
  121. m11_(v11),
  122. m12_(v12),
  123. m13_(v13),
  124. m20_(v20),
  125. m21_(v21),
  126. m22_(v22),
  127. m23_(v23),
  128. m30_(v30),
  129. m31_(v31),
  130. m32_(v32),
  131. m33_(v33)
  132. {
  133. }
  134. /// Construct from a float array.
  135. explicit Matrix4(const float* data)
  136. #ifndef ATOMIC_SSE
  137. :m00_(data[0]),
  138. m01_(data[1]),
  139. m02_(data[2]),
  140. m03_(data[3]),
  141. m10_(data[4]),
  142. m11_(data[5]),
  143. m12_(data[6]),
  144. m13_(data[7]),
  145. m20_(data[8]),
  146. m21_(data[9]),
  147. m22_(data[10]),
  148. m23_(data[11]),
  149. m30_(data[12]),
  150. m31_(data[13]),
  151. m32_(data[14]),
  152. m33_(data[15])
  153. #endif
  154. {
  155. #ifdef ATOMIC_SSE
  156. _mm_storeu_ps(&m00_, _mm_loadu_ps(data));
  157. _mm_storeu_ps(&m10_, _mm_loadu_ps(data + 4));
  158. _mm_storeu_ps(&m20_, _mm_loadu_ps(data + 8));
  159. _mm_storeu_ps(&m30_, _mm_loadu_ps(data + 12));
  160. #endif
  161. }
  162. /// Assign from another matrix.
  163. Matrix4& operator =(const Matrix4& rhs)
  164. {
  165. #ifdef ATOMIC_SSE
  166. _mm_storeu_ps(&m00_, _mm_loadu_ps(&rhs.m00_));
  167. _mm_storeu_ps(&m10_, _mm_loadu_ps(&rhs.m10_));
  168. _mm_storeu_ps(&m20_, _mm_loadu_ps(&rhs.m20_));
  169. _mm_storeu_ps(&m30_, _mm_loadu_ps(&rhs.m30_));
  170. #else
  171. m00_ = rhs.m00_;
  172. m01_ = rhs.m01_;
  173. m02_ = rhs.m02_;
  174. m03_ = rhs.m03_;
  175. m10_ = rhs.m10_;
  176. m11_ = rhs.m11_;
  177. m12_ = rhs.m12_;
  178. m13_ = rhs.m13_;
  179. m20_ = rhs.m20_;
  180. m21_ = rhs.m21_;
  181. m22_ = rhs.m22_;
  182. m23_ = rhs.m23_;
  183. m30_ = rhs.m30_;
  184. m31_ = rhs.m31_;
  185. m32_ = rhs.m32_;
  186. m33_ = rhs.m33_;
  187. #endif
  188. return *this;
  189. }
  190. /// Assign from a 3x3 matrix. Set the extra elements to identity.
  191. Matrix4& operator =(const Matrix3& rhs)
  192. {
  193. m00_ = rhs.m00_;
  194. m01_ = rhs.m01_;
  195. m02_ = rhs.m02_;
  196. m03_ = 0.0f;
  197. m10_ = rhs.m10_;
  198. m11_ = rhs.m11_;
  199. m12_ = rhs.m12_;
  200. m13_ = 0.0f;
  201. m20_ = rhs.m20_;
  202. m21_ = rhs.m21_;
  203. m22_ = rhs.m22_;
  204. m23_ = 0.0f;
  205. m30_ = 0.0f;
  206. m31_ = 0.0f;
  207. m32_ = 0.0f;
  208. m33_ = 1.0f;
  209. return *this;
  210. }
  211. /// Test for equality with another matrix without epsilon.
  212. bool operator ==(const Matrix4& rhs) const
  213. {
  214. #ifdef ATOMIC_SSE
  215. __m128 c0 = _mm_cmpeq_ps(_mm_loadu_ps(&m00_), _mm_loadu_ps(&rhs.m00_));
  216. __m128 c1 = _mm_cmpeq_ps(_mm_loadu_ps(&m10_), _mm_loadu_ps(&rhs.m10_));
  217. c0 = _mm_and_ps(c0, c1);
  218. __m128 c2 = _mm_cmpeq_ps(_mm_loadu_ps(&m20_), _mm_loadu_ps(&rhs.m20_));
  219. __m128 c3 = _mm_cmpeq_ps(_mm_loadu_ps(&m30_), _mm_loadu_ps(&rhs.m30_));
  220. c2 = _mm_and_ps(c2, c3);
  221. c0 = _mm_and_ps(c0, c2);
  222. __m128 hi = _mm_movehl_ps(c0, c0);
  223. c0 = _mm_and_ps(c0, hi);
  224. hi = _mm_shuffle_ps(c0, c0, _MM_SHUFFLE(1, 1, 1, 1));
  225. c0 = _mm_and_ps(c0, hi);
  226. return _mm_cvtsi128_si32(_mm_castps_si128(c0)) == -1;
  227. #else
  228. const float* leftData = Data();
  229. const float* rightData = rhs.Data();
  230. for (unsigned i = 0; i < 16; ++i)
  231. {
  232. if (leftData[i] != rightData[i])
  233. return false;
  234. }
  235. return true;
  236. #endif
  237. }
  238. /// Test for inequality with another matrix without epsilon.
  239. bool operator !=(const Matrix4& rhs) const { return !(*this == rhs); }
  240. /// Multiply a Vector3 which is assumed to represent position.
  241. Vector3 operator *(const Vector3& rhs) const
  242. {
  243. #ifdef ATOMIC_SSE
  244. __m128 vec = _mm_set_ps(1.f, rhs.z_, rhs.y_, rhs.x_);
  245. __m128 r0 = _mm_mul_ps(_mm_loadu_ps(&m00_), vec);
  246. __m128 r1 = _mm_mul_ps(_mm_loadu_ps(&m10_), vec);
  247. __m128 t0 = _mm_unpacklo_ps(r0, r1);
  248. __m128 t1 = _mm_unpackhi_ps(r0, r1);
  249. t0 = _mm_add_ps(t0, t1);
  250. __m128 r2 = _mm_mul_ps(_mm_loadu_ps(&m20_), vec);
  251. __m128 r3 = _mm_mul_ps(_mm_loadu_ps(&m30_), vec);
  252. __m128 t2 = _mm_unpacklo_ps(r2, r3);
  253. __m128 t3 = _mm_unpackhi_ps(r2, r3);
  254. t2 = _mm_add_ps(t2, t3);
  255. vec = _mm_add_ps(_mm_movelh_ps(t0, t2), _mm_movehl_ps(t2, t0));
  256. vec = _mm_div_ps(vec, _mm_shuffle_ps(vec, vec, _MM_SHUFFLE(3, 3, 3, 3)));
  257. return Vector3(
  258. _mm_cvtss_f32(vec),
  259. _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, _MM_SHUFFLE(1, 1, 1, 1))),
  260. _mm_cvtss_f32(_mm_movehl_ps(vec, vec)));
  261. #else
  262. float invW = 1.0f / (m30_ * rhs.x_ + m31_ * rhs.y_ + m32_ * rhs.z_ + m33_);
  263. return Vector3(
  264. (m00_ * rhs.x_ + m01_ * rhs.y_ + m02_ * rhs.z_ + m03_) * invW,
  265. (m10_ * rhs.x_ + m11_ * rhs.y_ + m12_ * rhs.z_ + m13_) * invW,
  266. (m20_ * rhs.x_ + m21_ * rhs.y_ + m22_ * rhs.z_ + m23_) * invW
  267. );
  268. #endif
  269. }
  270. /// Multiply a Vector4.
  271. Vector4 operator *(const Vector4& rhs) const
  272. {
  273. #ifdef ATOMIC_SSE
  274. __m128 vec = _mm_loadu_ps(&rhs.x_);
  275. __m128 r0 = _mm_mul_ps(_mm_loadu_ps(&m00_), vec);
  276. __m128 r1 = _mm_mul_ps(_mm_loadu_ps(&m10_), vec);
  277. __m128 t0 = _mm_unpacklo_ps(r0, r1);
  278. __m128 t1 = _mm_unpackhi_ps(r0, r1);
  279. t0 = _mm_add_ps(t0, t1);
  280. __m128 r2 = _mm_mul_ps(_mm_loadu_ps(&m20_), vec);
  281. __m128 r3 = _mm_mul_ps(_mm_loadu_ps(&m30_), vec);
  282. __m128 t2 = _mm_unpacklo_ps(r2, r3);
  283. __m128 t3 = _mm_unpackhi_ps(r2, r3);
  284. t2 = _mm_add_ps(t2, t3);
  285. vec = _mm_add_ps(_mm_movelh_ps(t0, t2), _mm_movehl_ps(t2, t0));
  286. Vector4 ret;
  287. _mm_storeu_ps(&ret.x_, vec);
  288. return ret;
  289. #else
  290. return Vector4(
  291. m00_ * rhs.x_ + m01_ * rhs.y_ + m02_ * rhs.z_ + m03_ * rhs.w_,
  292. m10_ * rhs.x_ + m11_ * rhs.y_ + m12_ * rhs.z_ + m13_ * rhs.w_,
  293. m20_ * rhs.x_ + m21_ * rhs.y_ + m22_ * rhs.z_ + m23_ * rhs.w_,
  294. m30_ * rhs.x_ + m31_ * rhs.y_ + m32_ * rhs.z_ + m33_ * rhs.w_
  295. );
  296. #endif
  297. }
  298. /// Add a matrix.
  299. Matrix4 operator +(const Matrix4& rhs) const
  300. {
  301. #ifdef ATOMIC_SSE
  302. Matrix4 ret;
  303. _mm_storeu_ps(&ret.m00_, _mm_add_ps(_mm_loadu_ps(&m00_), _mm_loadu_ps(&rhs.m00_)));
  304. _mm_storeu_ps(&ret.m10_, _mm_add_ps(_mm_loadu_ps(&m10_), _mm_loadu_ps(&rhs.m10_)));
  305. _mm_storeu_ps(&ret.m20_, _mm_add_ps(_mm_loadu_ps(&m20_), _mm_loadu_ps(&rhs.m20_)));
  306. _mm_storeu_ps(&ret.m30_, _mm_add_ps(_mm_loadu_ps(&m30_), _mm_loadu_ps(&rhs.m30_)));
  307. return ret;
  308. #else
  309. return Matrix4(
  310. m00_ + rhs.m00_,
  311. m01_ + rhs.m01_,
  312. m02_ + rhs.m02_,
  313. m03_ + rhs.m03_,
  314. m10_ + rhs.m10_,
  315. m11_ + rhs.m11_,
  316. m12_ + rhs.m12_,
  317. m13_ + rhs.m13_,
  318. m20_ + rhs.m20_,
  319. m21_ + rhs.m21_,
  320. m22_ + rhs.m22_,
  321. m23_ + rhs.m23_,
  322. m30_ + rhs.m30_,
  323. m31_ + rhs.m31_,
  324. m32_ + rhs.m32_,
  325. m33_ + rhs.m33_
  326. );
  327. #endif
  328. }
  329. /// Subtract a matrix.
  330. Matrix4 operator -(const Matrix4& rhs) const
  331. {
  332. #ifdef ATOMIC_SSE
  333. Matrix4 ret;
  334. _mm_storeu_ps(&ret.m00_, _mm_sub_ps(_mm_loadu_ps(&m00_), _mm_loadu_ps(&rhs.m00_)));
  335. _mm_storeu_ps(&ret.m10_, _mm_sub_ps(_mm_loadu_ps(&m10_), _mm_loadu_ps(&rhs.m10_)));
  336. _mm_storeu_ps(&ret.m20_, _mm_sub_ps(_mm_loadu_ps(&m20_), _mm_loadu_ps(&rhs.m20_)));
  337. _mm_storeu_ps(&ret.m30_, _mm_sub_ps(_mm_loadu_ps(&m30_), _mm_loadu_ps(&rhs.m30_)));
  338. return ret;
  339. #else
  340. return Matrix4(
  341. m00_ - rhs.m00_,
  342. m01_ - rhs.m01_,
  343. m02_ - rhs.m02_,
  344. m03_ - rhs.m03_,
  345. m10_ - rhs.m10_,
  346. m11_ - rhs.m11_,
  347. m12_ - rhs.m12_,
  348. m13_ - rhs.m13_,
  349. m20_ - rhs.m20_,
  350. m21_ - rhs.m21_,
  351. m22_ - rhs.m22_,
  352. m23_ - rhs.m23_,
  353. m30_ - rhs.m30_,
  354. m31_ - rhs.m31_,
  355. m32_ - rhs.m32_,
  356. m33_ - rhs.m33_
  357. );
  358. #endif
  359. }
  360. /// Multiply with a scalar.
  361. Matrix4 operator *(float rhs) const
  362. {
  363. #ifdef ATOMIC_SSE
  364. Matrix4 ret;
  365. const __m128 mul = _mm_set1_ps(rhs);
  366. _mm_storeu_ps(&ret.m00_, _mm_mul_ps(_mm_loadu_ps(&m00_), mul));
  367. _mm_storeu_ps(&ret.m10_, _mm_mul_ps(_mm_loadu_ps(&m10_), mul));
  368. _mm_storeu_ps(&ret.m20_, _mm_mul_ps(_mm_loadu_ps(&m20_), mul));
  369. _mm_storeu_ps(&ret.m30_, _mm_mul_ps(_mm_loadu_ps(&m30_), mul));
  370. return ret;
  371. #else
  372. return Matrix4(
  373. m00_ * rhs,
  374. m01_ * rhs,
  375. m02_ * rhs,
  376. m03_ * rhs,
  377. m10_ * rhs,
  378. m11_ * rhs,
  379. m12_ * rhs,
  380. m13_ * rhs,
  381. m20_ * rhs,
  382. m21_ * rhs,
  383. m22_ * rhs,
  384. m23_ * rhs,
  385. m30_ * rhs,
  386. m31_ * rhs,
  387. m32_ * rhs,
  388. m33_ * rhs
  389. );
  390. #endif
  391. }
  392. /// Multiply a matrix.
  393. Matrix4 operator *(const Matrix4& rhs) const
  394. {
  395. #ifdef ATOMIC_SSE
  396. Matrix4 out;
  397. __m128 r0 = _mm_loadu_ps(&rhs.m00_);
  398. __m128 r1 = _mm_loadu_ps(&rhs.m10_);
  399. __m128 r2 = _mm_loadu_ps(&rhs.m20_);
  400. __m128 r3 = _mm_loadu_ps(&rhs.m30_);
  401. __m128 l = _mm_loadu_ps(&m00_);
  402. __m128 t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  403. __m128 t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  404. __m128 t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  405. __m128 t3 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(3, 3, 3, 3)), r3);
  406. _mm_storeu_ps(&out.m00_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  407. l = _mm_loadu_ps(&m10_);
  408. t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  409. t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  410. t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  411. t3 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(3, 3, 3, 3)), r3);
  412. _mm_storeu_ps(&out.m10_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  413. l = _mm_loadu_ps(&m20_);
  414. t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  415. t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  416. t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  417. t3 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(3, 3, 3, 3)), r3);
  418. _mm_storeu_ps(&out.m20_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  419. l = _mm_loadu_ps(&m30_);
  420. t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  421. t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  422. t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  423. t3 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(3, 3, 3, 3)), r3);
  424. _mm_storeu_ps(&out.m30_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  425. return out;
  426. #else
  427. return Matrix4(
  428. m00_ * rhs.m00_ + m01_ * rhs.m10_ + m02_ * rhs.m20_ + m03_ * rhs.m30_,
  429. m00_ * rhs.m01_ + m01_ * rhs.m11_ + m02_ * rhs.m21_ + m03_ * rhs.m31_,
  430. m00_ * rhs.m02_ + m01_ * rhs.m12_ + m02_ * rhs.m22_ + m03_ * rhs.m32_,
  431. m00_ * rhs.m03_ + m01_ * rhs.m13_ + m02_ * rhs.m23_ + m03_ * rhs.m33_,
  432. m10_ * rhs.m00_ + m11_ * rhs.m10_ + m12_ * rhs.m20_ + m13_ * rhs.m30_,
  433. m10_ * rhs.m01_ + m11_ * rhs.m11_ + m12_ * rhs.m21_ + m13_ * rhs.m31_,
  434. m10_ * rhs.m02_ + m11_ * rhs.m12_ + m12_ * rhs.m22_ + m13_ * rhs.m32_,
  435. m10_ * rhs.m03_ + m11_ * rhs.m13_ + m12_ * rhs.m23_ + m13_ * rhs.m33_,
  436. m20_ * rhs.m00_ + m21_ * rhs.m10_ + m22_ * rhs.m20_ + m23_ * rhs.m30_,
  437. m20_ * rhs.m01_ + m21_ * rhs.m11_ + m22_ * rhs.m21_ + m23_ * rhs.m31_,
  438. m20_ * rhs.m02_ + m21_ * rhs.m12_ + m22_ * rhs.m22_ + m23_ * rhs.m32_,
  439. m20_ * rhs.m03_ + m21_ * rhs.m13_ + m22_ * rhs.m23_ + m23_ * rhs.m33_,
  440. m30_ * rhs.m00_ + m31_ * rhs.m10_ + m32_ * rhs.m20_ + m33_ * rhs.m30_,
  441. m30_ * rhs.m01_ + m31_ * rhs.m11_ + m32_ * rhs.m21_ + m33_ * rhs.m31_,
  442. m30_ * rhs.m02_ + m31_ * rhs.m12_ + m32_ * rhs.m22_ + m33_ * rhs.m32_,
  443. m30_ * rhs.m03_ + m31_ * rhs.m13_ + m32_ * rhs.m23_ + m33_ * rhs.m33_
  444. );
  445. #endif
  446. }
  447. /// Multiply with a 3x4 matrix.
  448. Matrix4 operator *(const Matrix3x4& rhs) const;
  449. /// Set translation elements.
  450. void SetTranslation(const Vector3& translation)
  451. {
  452. m03_ = translation.x_;
  453. m13_ = translation.y_;
  454. m23_ = translation.z_;
  455. }
  456. /// Set rotation elements from a 3x3 matrix.
  457. void SetRotation(const Matrix3& rotation)
  458. {
  459. m00_ = rotation.m00_;
  460. m01_ = rotation.m01_;
  461. m02_ = rotation.m02_;
  462. m10_ = rotation.m10_;
  463. m11_ = rotation.m11_;
  464. m12_ = rotation.m12_;
  465. m20_ = rotation.m20_;
  466. m21_ = rotation.m21_;
  467. m22_ = rotation.m22_;
  468. }
  469. /// Set scaling elements.
  470. void SetScale(const Vector3& scale)
  471. {
  472. m00_ = scale.x_;
  473. m11_ = scale.y_;
  474. m22_ = scale.z_;
  475. }
  476. /// Set uniform scaling elements.
  477. void SetScale(float scale)
  478. {
  479. m00_ = scale;
  480. m11_ = scale;
  481. m22_ = scale;
  482. }
  483. /// Return the combined rotation and scaling matrix.
  484. Matrix3 ToMatrix3() const
  485. {
  486. return Matrix3(
  487. m00_,
  488. m01_,
  489. m02_,
  490. m10_,
  491. m11_,
  492. m12_,
  493. m20_,
  494. m21_,
  495. m22_
  496. );
  497. }
  498. /// Return the rotation matrix with scaling removed.
  499. Matrix3 RotationMatrix() const
  500. {
  501. Vector3 invScale(
  502. 1.0f / sqrtf(m00_ * m00_ + m10_ * m10_ + m20_ * m20_),
  503. 1.0f / sqrtf(m01_ * m01_ + m11_ * m11_ + m21_ * m21_),
  504. 1.0f / sqrtf(m02_ * m02_ + m12_ * m12_ + m22_ * m22_)
  505. );
  506. return ToMatrix3().Scaled(invScale);
  507. }
  508. /// Return the translation part.
  509. Vector3 Translation() const
  510. {
  511. return Vector3(
  512. m03_,
  513. m13_,
  514. m23_
  515. );
  516. }
  517. /// Return the rotation part.
  518. Quaternion Rotation() const { return Quaternion(RotationMatrix()); }
  519. /// Return the scaling part.
  520. Vector3 Scale() const
  521. {
  522. return Vector3(
  523. sqrtf(m00_ * m00_ + m10_ * m10_ + m20_ * m20_),
  524. sqrtf(m01_ * m01_ + m11_ * m11_ + m21_ * m21_),
  525. sqrtf(m02_ * m02_ + m12_ * m12_ + m22_ * m22_)
  526. );
  527. }
  528. /// Return the scaling part with the sign. Reference rotation matrix is required to avoid ambiguity.
  529. Vector3 SignedScale(const Matrix3& rotation) const
  530. {
  531. return Vector3(
  532. rotation.m00_ * m00_ + rotation.m10_ * m10_ + rotation.m20_ * m20_,
  533. rotation.m01_ * m01_ + rotation.m11_ * m11_ + rotation.m21_ * m21_,
  534. rotation.m02_ * m02_ + rotation.m12_ * m12_ + rotation.m22_ * m22_
  535. );
  536. }
  537. /// Return transposed.
  538. Matrix4 Transpose() const
  539. {
  540. #ifdef ATOMIC_SSE
  541. __m128 m0 = _mm_loadu_ps(&m00_);
  542. __m128 m1 = _mm_loadu_ps(&m10_);
  543. __m128 m2 = _mm_loadu_ps(&m20_);
  544. __m128 m3 = _mm_loadu_ps(&m30_);
  545. _MM_TRANSPOSE4_PS(m0, m1, m2, m3);
  546. Matrix4 out;
  547. _mm_storeu_ps(&out.m00_, m0);
  548. _mm_storeu_ps(&out.m10_, m1);
  549. _mm_storeu_ps(&out.m20_, m2);
  550. _mm_storeu_ps(&out.m30_, m3);
  551. return out;
  552. #else
  553. return Matrix4(
  554. m00_,
  555. m10_,
  556. m20_,
  557. m30_,
  558. m01_,
  559. m11_,
  560. m21_,
  561. m31_,
  562. m02_,
  563. m12_,
  564. m22_,
  565. m32_,
  566. m03_,
  567. m13_,
  568. m23_,
  569. m33_
  570. );
  571. #endif
  572. }
  573. /// Test for equality with another matrix with epsilon.
  574. bool Equals(const Matrix4& rhs) const
  575. {
  576. const float* leftData = Data();
  577. const float* rightData = rhs.Data();
  578. for (unsigned i = 0; i < 16; ++i)
  579. {
  580. if (!Atomic::Equals(leftData[i], rightData[i]))
  581. return false;
  582. }
  583. return true;
  584. }
  585. /// Return decomposition to translation, rotation and scale.
  586. void Decompose(Vector3& translation, Quaternion& rotation, Vector3& scale) const;
  587. /// Return inverse.
  588. Matrix4 Inverse() const;
  589. /// Return float data.
  590. const float* Data() const { return &m00_; }
  591. /// Return matrix element.
  592. float Element(unsigned i, unsigned j) const { return Data()[i * 4 + j]; }
  593. /// Return matrix row.
  594. Vector4 Row(unsigned i) const { return Vector4(Element(i, 0), Element(i, 1), Element(i, 2), Element(i, 3)); }
  595. /// Return matrix column.
  596. Vector4 Column(unsigned j) const { return Vector4(Element(0, j), Element(1, j), Element(2, j), Element(3, j)); }
  597. /// Return as string.
  598. String ToString() const;
  599. float m00_;
  600. float m01_;
  601. float m02_;
  602. float m03_;
  603. float m10_;
  604. float m11_;
  605. float m12_;
  606. float m13_;
  607. float m20_;
  608. float m21_;
  609. float m22_;
  610. float m23_;
  611. float m30_;
  612. float m31_;
  613. float m32_;
  614. float m33_;
  615. /// Bulk transpose matrices.
  616. static void BulkTranspose(float* dest, const float* src, unsigned count)
  617. {
  618. for (unsigned i = 0; i < count; ++i)
  619. {
  620. #ifdef ATOMIC_SSE
  621. __m128 m0 = _mm_loadu_ps(src);
  622. __m128 m1 = _mm_loadu_ps(src + 4);
  623. __m128 m2 = _mm_loadu_ps(src + 8);
  624. __m128 m3 = _mm_loadu_ps(src + 12);
  625. _MM_TRANSPOSE4_PS(m0, m1, m2, m3);
  626. _mm_storeu_ps(dest, m0);
  627. _mm_storeu_ps(dest + 4, m1);
  628. _mm_storeu_ps(dest + 8, m2);
  629. _mm_storeu_ps(dest + 12, m3);
  630. #else
  631. dest[0] = src[0];
  632. dest[1] = src[4];
  633. dest[2] = src[8];
  634. dest[3] = src[12];
  635. dest[4] = src[1];
  636. dest[5] = src[5];
  637. dest[6] = src[9];
  638. dest[7] = src[13];
  639. dest[8] = src[2];
  640. dest[9] = src[6];
  641. dest[10] = src[10];
  642. dest[11] = src[14];
  643. dest[12] = src[3];
  644. dest[13] = src[7];
  645. dest[14] = src[11];
  646. dest[15] = src[15];
  647. #endif
  648. dest += 16;
  649. src += 16;
  650. }
  651. }
  652. /// Zero matrix.
  653. static const Matrix4 ZERO;
  654. /// Identity matrix.
  655. static const Matrix4 IDENTITY;
  656. };
  657. /// Multiply a 4x4 matrix with a scalar.
  658. inline Matrix4 operator *(float lhs, const Matrix4& rhs) { return rhs * lhs; }
  659. }