2
0

Matrix4.h 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732
  1. // Copyright (c) 2008-2023 the Urho3D project
  2. // License: MIT
  3. #pragma once
  4. #include "../Math/Quaternion.h"
  5. #include "../Math/Vector4.h"
  6. #ifdef URHO3D_SSE
  7. #include <emmintrin.h>
  8. #endif
  9. namespace Urho3D
  10. {
  11. class Matrix3x4;
  12. /// 4x4 matrix for arbitrary linear transforms including projection.
  13. class URHO3D_API Matrix4
  14. {
  15. public:
  16. #ifdef _MSC_VER
  17. #pragma warning(push)
  18. #pragma warning(disable:26495)
  19. #endif
  20. /// Construct an identity matrix.
  21. Matrix4() noexcept
  22. #ifndef URHO3D_SSE
  23. :m00_(1.0f),
  24. m01_(0.0f),
  25. m02_(0.0f),
  26. m03_(0.0f),
  27. m10_(0.0f),
  28. m11_(1.0f),
  29. m12_(0.0f),
  30. m13_(0.0f),
  31. m20_(0.0f),
  32. m21_(0.0f),
  33. m22_(1.0f),
  34. m23_(0.0f),
  35. m30_(0.0f),
  36. m31_(0.0f),
  37. m32_(0.0f),
  38. m33_(1.0f)
  39. #endif
  40. {
  41. #ifdef URHO3D_SSE
  42. _mm_storeu_ps(&m00_, _mm_set_ps(0.f, 0.f, 0.f, 1.f));
  43. _mm_storeu_ps(&m10_, _mm_set_ps(0.f, 0.f, 1.f, 0.f));
  44. _mm_storeu_ps(&m20_, _mm_set_ps(0.f, 1.f, 0.f, 0.f));
  45. _mm_storeu_ps(&m30_, _mm_set_ps(1.f, 0.f, 0.f, 0.f));
  46. #endif
  47. }
  48. /// Copy-construct from another matrix.
  49. Matrix4(const Matrix4& matrix) noexcept
  50. #ifndef URHO3D_SSE
  51. :m00_(matrix.m00_),
  52. m01_(matrix.m01_),
  53. m02_(matrix.m02_),
  54. m03_(matrix.m03_),
  55. m10_(matrix.m10_),
  56. m11_(matrix.m11_),
  57. m12_(matrix.m12_),
  58. m13_(matrix.m13_),
  59. m20_(matrix.m20_),
  60. m21_(matrix.m21_),
  61. m22_(matrix.m22_),
  62. m23_(matrix.m23_),
  63. m30_(matrix.m30_),
  64. m31_(matrix.m31_),
  65. m32_(matrix.m32_),
  66. m33_(matrix.m33_)
  67. #endif
  68. {
  69. #ifdef URHO3D_SSE
  70. _mm_storeu_ps(&m00_, _mm_loadu_ps(&matrix.m00_));
  71. _mm_storeu_ps(&m10_, _mm_loadu_ps(&matrix.m10_));
  72. _mm_storeu_ps(&m20_, _mm_loadu_ps(&matrix.m20_));
  73. _mm_storeu_ps(&m30_, _mm_loadu_ps(&matrix.m30_));
  74. #endif
  75. }
  76. /// Copy-construct from a 3x3 matrix and set the extra elements to identity.
  77. explicit Matrix4(const Matrix3& matrix) noexcept :
  78. m00_(matrix.m00_),
  79. m01_(matrix.m01_),
  80. m02_(matrix.m02_),
  81. m03_(0.0f),
  82. m10_(matrix.m10_),
  83. m11_(matrix.m11_),
  84. m12_(matrix.m12_),
  85. m13_(0.0f),
  86. m20_(matrix.m20_),
  87. m21_(matrix.m21_),
  88. m22_(matrix.m22_),
  89. m23_(0.0f),
  90. m30_(0.0f),
  91. m31_(0.0f),
  92. m32_(0.0f),
  93. m33_(1.0f)
  94. {
  95. }
  96. /// Construct from values.
  97. Matrix4(float v00, float v01, float v02, float v03,
  98. float v10, float v11, float v12, float v13,
  99. float v20, float v21, float v22, float v23,
  100. float v30, float v31, float v32, float v33) noexcept :
  101. m00_(v00),
  102. m01_(v01),
  103. m02_(v02),
  104. m03_(v03),
  105. m10_(v10),
  106. m11_(v11),
  107. m12_(v12),
  108. m13_(v13),
  109. m20_(v20),
  110. m21_(v21),
  111. m22_(v22),
  112. m23_(v23),
  113. m30_(v30),
  114. m31_(v31),
  115. m32_(v32),
  116. m33_(v33)
  117. {
  118. }
  119. /// Construct from a float array.
  120. explicit Matrix4(const float* data) noexcept
  121. #ifndef URHO3D_SSE
  122. :m00_(data[0]),
  123. m01_(data[1]),
  124. m02_(data[2]),
  125. m03_(data[3]),
  126. m10_(data[4]),
  127. m11_(data[5]),
  128. m12_(data[6]),
  129. m13_(data[7]),
  130. m20_(data[8]),
  131. m21_(data[9]),
  132. m22_(data[10]),
  133. m23_(data[11]),
  134. m30_(data[12]),
  135. m31_(data[13]),
  136. m32_(data[14]),
  137. m33_(data[15])
  138. #endif
  139. {
  140. #ifdef URHO3D_SSE
  141. _mm_storeu_ps(&m00_, _mm_loadu_ps(data));
  142. _mm_storeu_ps(&m10_, _mm_loadu_ps(data + 4));
  143. _mm_storeu_ps(&m20_, _mm_loadu_ps(data + 8));
  144. _mm_storeu_ps(&m30_, _mm_loadu_ps(data + 12));
  145. #endif
  146. }
  147. #ifdef _MSC_VER
  148. #pragma warning(pop)
  149. #endif
  150. /// Assign from another matrix.
  151. Matrix4& operator =(const Matrix4& rhs) noexcept
  152. {
  153. #ifdef URHO3D_SSE
  154. _mm_storeu_ps(&m00_, _mm_loadu_ps(&rhs.m00_));
  155. _mm_storeu_ps(&m10_, _mm_loadu_ps(&rhs.m10_));
  156. _mm_storeu_ps(&m20_, _mm_loadu_ps(&rhs.m20_));
  157. _mm_storeu_ps(&m30_, _mm_loadu_ps(&rhs.m30_));
  158. #else
  159. m00_ = rhs.m00_;
  160. m01_ = rhs.m01_;
  161. m02_ = rhs.m02_;
  162. m03_ = rhs.m03_;
  163. m10_ = rhs.m10_;
  164. m11_ = rhs.m11_;
  165. m12_ = rhs.m12_;
  166. m13_ = rhs.m13_;
  167. m20_ = rhs.m20_;
  168. m21_ = rhs.m21_;
  169. m22_ = rhs.m22_;
  170. m23_ = rhs.m23_;
  171. m30_ = rhs.m30_;
  172. m31_ = rhs.m31_;
  173. m32_ = rhs.m32_;
  174. m33_ = rhs.m33_;
  175. #endif
  176. return *this;
  177. }
  178. /// Assign from a 3x3 matrix. Set the extra elements to identity.
  179. Matrix4& operator =(const Matrix3& rhs) noexcept
  180. {
  181. m00_ = rhs.m00_;
  182. m01_ = rhs.m01_;
  183. m02_ = rhs.m02_;
  184. m03_ = 0.0f;
  185. m10_ = rhs.m10_;
  186. m11_ = rhs.m11_;
  187. m12_ = rhs.m12_;
  188. m13_ = 0.0f;
  189. m20_ = rhs.m20_;
  190. m21_ = rhs.m21_;
  191. m22_ = rhs.m22_;
  192. m23_ = 0.0f;
  193. m30_ = 0.0f;
  194. m31_ = 0.0f;
  195. m32_ = 0.0f;
  196. m33_ = 1.0f;
  197. return *this;
  198. }
  199. /// Test for equality with another matrix without epsilon.
  200. bool operator ==(const Matrix4& rhs) const
  201. {
  202. #ifdef URHO3D_SSE
  203. __m128 c0 = _mm_cmpeq_ps(_mm_loadu_ps(&m00_), _mm_loadu_ps(&rhs.m00_));
  204. __m128 c1 = _mm_cmpeq_ps(_mm_loadu_ps(&m10_), _mm_loadu_ps(&rhs.m10_));
  205. c0 = _mm_and_ps(c0, c1);
  206. __m128 c2 = _mm_cmpeq_ps(_mm_loadu_ps(&m20_), _mm_loadu_ps(&rhs.m20_));
  207. __m128 c3 = _mm_cmpeq_ps(_mm_loadu_ps(&m30_), _mm_loadu_ps(&rhs.m30_));
  208. c2 = _mm_and_ps(c2, c3);
  209. c0 = _mm_and_ps(c0, c2);
  210. __m128 hi = _mm_movehl_ps(c0, c0);
  211. c0 = _mm_and_ps(c0, hi);
  212. hi = _mm_shuffle_ps(c0, c0, _MM_SHUFFLE(1, 1, 1, 1));
  213. c0 = _mm_and_ps(c0, hi);
  214. return _mm_cvtsi128_si32(_mm_castps_si128(c0)) == -1;
  215. #else
  216. const float* leftData = Data();
  217. const float* rightData = rhs.Data();
  218. for (unsigned i = 0; i < 16; ++i)
  219. {
  220. if (leftData[i] != rightData[i])
  221. return false;
  222. }
  223. return true;
  224. #endif
  225. }
  226. /// Test for inequality with another matrix without epsilon.
  227. bool operator !=(const Matrix4& rhs) const { return !(*this == rhs); }
  228. /// Multiply a Vector3 which is assumed to represent position.
  229. Vector3 operator *(const Vector3& rhs) const
  230. {
  231. #ifdef URHO3D_SSE
  232. __m128 vec = _mm_set_ps(1.f, rhs.z_, rhs.y_, rhs.x_);
  233. __m128 r0 = _mm_mul_ps(_mm_loadu_ps(&m00_), vec);
  234. __m128 r1 = _mm_mul_ps(_mm_loadu_ps(&m10_), vec);
  235. __m128 t0 = _mm_unpacklo_ps(r0, r1);
  236. __m128 t1 = _mm_unpackhi_ps(r0, r1);
  237. t0 = _mm_add_ps(t0, t1);
  238. __m128 r2 = _mm_mul_ps(_mm_loadu_ps(&m20_), vec);
  239. __m128 r3 = _mm_mul_ps(_mm_loadu_ps(&m30_), vec);
  240. __m128 t2 = _mm_unpacklo_ps(r2, r3);
  241. __m128 t3 = _mm_unpackhi_ps(r2, r3);
  242. t2 = _mm_add_ps(t2, t3);
  243. vec = _mm_add_ps(_mm_movelh_ps(t0, t2), _mm_movehl_ps(t2, t0));
  244. vec = _mm_div_ps(vec, _mm_shuffle_ps(vec, vec, _MM_SHUFFLE(3, 3, 3, 3)));
  245. return Vector3(
  246. _mm_cvtss_f32(vec),
  247. _mm_cvtss_f32(_mm_shuffle_ps(vec, vec, _MM_SHUFFLE(1, 1, 1, 1))),
  248. _mm_cvtss_f32(_mm_movehl_ps(vec, vec)));
  249. #else
  250. float invW = 1.0f / (m30_ * rhs.x_ + m31_ * rhs.y_ + m32_ * rhs.z_ + m33_);
  251. return Vector3(
  252. (m00_ * rhs.x_ + m01_ * rhs.y_ + m02_ * rhs.z_ + m03_) * invW,
  253. (m10_ * rhs.x_ + m11_ * rhs.y_ + m12_ * rhs.z_ + m13_) * invW,
  254. (m20_ * rhs.x_ + m21_ * rhs.y_ + m22_ * rhs.z_ + m23_) * invW
  255. );
  256. #endif
  257. }
  258. /// Multiply a Vector4.
  259. Vector4 operator *(const Vector4& rhs) const
  260. {
  261. #ifdef URHO3D_SSE
  262. __m128 vec = _mm_loadu_ps(&rhs.x_);
  263. __m128 r0 = _mm_mul_ps(_mm_loadu_ps(&m00_), vec);
  264. __m128 r1 = _mm_mul_ps(_mm_loadu_ps(&m10_), vec);
  265. __m128 t0 = _mm_unpacklo_ps(r0, r1);
  266. __m128 t1 = _mm_unpackhi_ps(r0, r1);
  267. t0 = _mm_add_ps(t0, t1);
  268. __m128 r2 = _mm_mul_ps(_mm_loadu_ps(&m20_), vec);
  269. __m128 r3 = _mm_mul_ps(_mm_loadu_ps(&m30_), vec);
  270. __m128 t2 = _mm_unpacklo_ps(r2, r3);
  271. __m128 t3 = _mm_unpackhi_ps(r2, r3);
  272. t2 = _mm_add_ps(t2, t3);
  273. vec = _mm_add_ps(_mm_movelh_ps(t0, t2), _mm_movehl_ps(t2, t0));
  274. Vector4 ret;
  275. _mm_storeu_ps(&ret.x_, vec);
  276. return ret;
  277. #else
  278. return Vector4(
  279. m00_ * rhs.x_ + m01_ * rhs.y_ + m02_ * rhs.z_ + m03_ * rhs.w_,
  280. m10_ * rhs.x_ + m11_ * rhs.y_ + m12_ * rhs.z_ + m13_ * rhs.w_,
  281. m20_ * rhs.x_ + m21_ * rhs.y_ + m22_ * rhs.z_ + m23_ * rhs.w_,
  282. m30_ * rhs.x_ + m31_ * rhs.y_ + m32_ * rhs.z_ + m33_ * rhs.w_
  283. );
  284. #endif
  285. }
  286. /// Add a matrix.
  287. Matrix4 operator +(const Matrix4& rhs) const
  288. {
  289. #ifdef URHO3D_SSE
  290. Matrix4 ret;
  291. _mm_storeu_ps(&ret.m00_, _mm_add_ps(_mm_loadu_ps(&m00_), _mm_loadu_ps(&rhs.m00_)));
  292. _mm_storeu_ps(&ret.m10_, _mm_add_ps(_mm_loadu_ps(&m10_), _mm_loadu_ps(&rhs.m10_)));
  293. _mm_storeu_ps(&ret.m20_, _mm_add_ps(_mm_loadu_ps(&m20_), _mm_loadu_ps(&rhs.m20_)));
  294. _mm_storeu_ps(&ret.m30_, _mm_add_ps(_mm_loadu_ps(&m30_), _mm_loadu_ps(&rhs.m30_)));
  295. return ret;
  296. #else
  297. return Matrix4(
  298. m00_ + rhs.m00_,
  299. m01_ + rhs.m01_,
  300. m02_ + rhs.m02_,
  301. m03_ + rhs.m03_,
  302. m10_ + rhs.m10_,
  303. m11_ + rhs.m11_,
  304. m12_ + rhs.m12_,
  305. m13_ + rhs.m13_,
  306. m20_ + rhs.m20_,
  307. m21_ + rhs.m21_,
  308. m22_ + rhs.m22_,
  309. m23_ + rhs.m23_,
  310. m30_ + rhs.m30_,
  311. m31_ + rhs.m31_,
  312. m32_ + rhs.m32_,
  313. m33_ + rhs.m33_
  314. );
  315. #endif
  316. }
  317. /// Subtract a matrix.
  318. Matrix4 operator -(const Matrix4& rhs) const
  319. {
  320. #ifdef URHO3D_SSE
  321. Matrix4 ret;
  322. _mm_storeu_ps(&ret.m00_, _mm_sub_ps(_mm_loadu_ps(&m00_), _mm_loadu_ps(&rhs.m00_)));
  323. _mm_storeu_ps(&ret.m10_, _mm_sub_ps(_mm_loadu_ps(&m10_), _mm_loadu_ps(&rhs.m10_)));
  324. _mm_storeu_ps(&ret.m20_, _mm_sub_ps(_mm_loadu_ps(&m20_), _mm_loadu_ps(&rhs.m20_)));
  325. _mm_storeu_ps(&ret.m30_, _mm_sub_ps(_mm_loadu_ps(&m30_), _mm_loadu_ps(&rhs.m30_)));
  326. return ret;
  327. #else
  328. return Matrix4(
  329. m00_ - rhs.m00_,
  330. m01_ - rhs.m01_,
  331. m02_ - rhs.m02_,
  332. m03_ - rhs.m03_,
  333. m10_ - rhs.m10_,
  334. m11_ - rhs.m11_,
  335. m12_ - rhs.m12_,
  336. m13_ - rhs.m13_,
  337. m20_ - rhs.m20_,
  338. m21_ - rhs.m21_,
  339. m22_ - rhs.m22_,
  340. m23_ - rhs.m23_,
  341. m30_ - rhs.m30_,
  342. m31_ - rhs.m31_,
  343. m32_ - rhs.m32_,
  344. m33_ - rhs.m33_
  345. );
  346. #endif
  347. }
  348. /// Multiply with a scalar.
  349. Matrix4 operator *(float rhs) const
  350. {
  351. #ifdef URHO3D_SSE
  352. Matrix4 ret;
  353. const __m128 mul = _mm_set1_ps(rhs);
  354. _mm_storeu_ps(&ret.m00_, _mm_mul_ps(_mm_loadu_ps(&m00_), mul));
  355. _mm_storeu_ps(&ret.m10_, _mm_mul_ps(_mm_loadu_ps(&m10_), mul));
  356. _mm_storeu_ps(&ret.m20_, _mm_mul_ps(_mm_loadu_ps(&m20_), mul));
  357. _mm_storeu_ps(&ret.m30_, _mm_mul_ps(_mm_loadu_ps(&m30_), mul));
  358. return ret;
  359. #else
  360. return Matrix4(
  361. m00_ * rhs,
  362. m01_ * rhs,
  363. m02_ * rhs,
  364. m03_ * rhs,
  365. m10_ * rhs,
  366. m11_ * rhs,
  367. m12_ * rhs,
  368. m13_ * rhs,
  369. m20_ * rhs,
  370. m21_ * rhs,
  371. m22_ * rhs,
  372. m23_ * rhs,
  373. m30_ * rhs,
  374. m31_ * rhs,
  375. m32_ * rhs,
  376. m33_ * rhs
  377. );
  378. #endif
  379. }
  380. /// Multiply a matrix.
  381. Matrix4 operator *(const Matrix4& rhs) const
  382. {
  383. #ifdef URHO3D_SSE
  384. Matrix4 out;
  385. __m128 r0 = _mm_loadu_ps(&rhs.m00_);
  386. __m128 r1 = _mm_loadu_ps(&rhs.m10_);
  387. __m128 r2 = _mm_loadu_ps(&rhs.m20_);
  388. __m128 r3 = _mm_loadu_ps(&rhs.m30_);
  389. __m128 l = _mm_loadu_ps(&m00_);
  390. __m128 t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  391. __m128 t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  392. __m128 t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  393. __m128 t3 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(3, 3, 3, 3)), r3);
  394. _mm_storeu_ps(&out.m00_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  395. l = _mm_loadu_ps(&m10_);
  396. t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  397. t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  398. t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  399. t3 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(3, 3, 3, 3)), r3);
  400. _mm_storeu_ps(&out.m10_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  401. l = _mm_loadu_ps(&m20_);
  402. t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  403. t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  404. t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  405. t3 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(3, 3, 3, 3)), r3);
  406. _mm_storeu_ps(&out.m20_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  407. l = _mm_loadu_ps(&m30_);
  408. t0 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(0, 0, 0, 0)), r0);
  409. t1 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(1, 1, 1, 1)), r1);
  410. t2 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(2, 2, 2, 2)), r2);
  411. t3 = _mm_mul_ps(_mm_shuffle_ps(l, l, _MM_SHUFFLE(3, 3, 3, 3)), r3);
  412. _mm_storeu_ps(&out.m30_, _mm_add_ps(_mm_add_ps(t0, t1), _mm_add_ps(t2, t3)));
  413. return out;
  414. #else
  415. return Matrix4(
  416. m00_ * rhs.m00_ + m01_ * rhs.m10_ + m02_ * rhs.m20_ + m03_ * rhs.m30_,
  417. m00_ * rhs.m01_ + m01_ * rhs.m11_ + m02_ * rhs.m21_ + m03_ * rhs.m31_,
  418. m00_ * rhs.m02_ + m01_ * rhs.m12_ + m02_ * rhs.m22_ + m03_ * rhs.m32_,
  419. m00_ * rhs.m03_ + m01_ * rhs.m13_ + m02_ * rhs.m23_ + m03_ * rhs.m33_,
  420. m10_ * rhs.m00_ + m11_ * rhs.m10_ + m12_ * rhs.m20_ + m13_ * rhs.m30_,
  421. m10_ * rhs.m01_ + m11_ * rhs.m11_ + m12_ * rhs.m21_ + m13_ * rhs.m31_,
  422. m10_ * rhs.m02_ + m11_ * rhs.m12_ + m12_ * rhs.m22_ + m13_ * rhs.m32_,
  423. m10_ * rhs.m03_ + m11_ * rhs.m13_ + m12_ * rhs.m23_ + m13_ * rhs.m33_,
  424. m20_ * rhs.m00_ + m21_ * rhs.m10_ + m22_ * rhs.m20_ + m23_ * rhs.m30_,
  425. m20_ * rhs.m01_ + m21_ * rhs.m11_ + m22_ * rhs.m21_ + m23_ * rhs.m31_,
  426. m20_ * rhs.m02_ + m21_ * rhs.m12_ + m22_ * rhs.m22_ + m23_ * rhs.m32_,
  427. m20_ * rhs.m03_ + m21_ * rhs.m13_ + m22_ * rhs.m23_ + m23_ * rhs.m33_,
  428. m30_ * rhs.m00_ + m31_ * rhs.m10_ + m32_ * rhs.m20_ + m33_ * rhs.m30_,
  429. m30_ * rhs.m01_ + m31_ * rhs.m11_ + m32_ * rhs.m21_ + m33_ * rhs.m31_,
  430. m30_ * rhs.m02_ + m31_ * rhs.m12_ + m32_ * rhs.m22_ + m33_ * rhs.m32_,
  431. m30_ * rhs.m03_ + m31_ * rhs.m13_ + m32_ * rhs.m23_ + m33_ * rhs.m33_
  432. );
  433. #endif
  434. }
  435. /// Multiply with a 3x4 matrix.
  436. Matrix4 operator *(const Matrix3x4& rhs) const;
  437. /// Set translation elements.
  438. void SetTranslation(const Vector3& translation)
  439. {
  440. m03_ = translation.x_;
  441. m13_ = translation.y_;
  442. m23_ = translation.z_;
  443. }
  444. /// Set rotation elements from a 3x3 matrix.
  445. void SetRotation(const Matrix3& rotation)
  446. {
  447. m00_ = rotation.m00_;
  448. m01_ = rotation.m01_;
  449. m02_ = rotation.m02_;
  450. m10_ = rotation.m10_;
  451. m11_ = rotation.m11_;
  452. m12_ = rotation.m12_;
  453. m20_ = rotation.m20_;
  454. m21_ = rotation.m21_;
  455. m22_ = rotation.m22_;
  456. }
  457. /// Set scaling elements.
  458. void SetScale(const Vector3& scale)
  459. {
  460. m00_ = scale.x_;
  461. m11_ = scale.y_;
  462. m22_ = scale.z_;
  463. }
  464. /// Set uniform scaling elements.
  465. void SetScale(float scale)
  466. {
  467. m00_ = scale;
  468. m11_ = scale;
  469. m22_ = scale;
  470. }
  471. /// Return the combined rotation and scaling matrix.
  472. Matrix3 ToMatrix3() const
  473. {
  474. return Matrix3(
  475. m00_,
  476. m01_,
  477. m02_,
  478. m10_,
  479. m11_,
  480. m12_,
  481. m20_,
  482. m21_,
  483. m22_
  484. );
  485. }
  486. /// Return the rotation matrix with scaling removed.
  487. Matrix3 RotationMatrix() const
  488. {
  489. Vector3 invScale(
  490. 1.0f / sqrtf(m00_ * m00_ + m10_ * m10_ + m20_ * m20_),
  491. 1.0f / sqrtf(m01_ * m01_ + m11_ * m11_ + m21_ * m21_),
  492. 1.0f / sqrtf(m02_ * m02_ + m12_ * m12_ + m22_ * m22_)
  493. );
  494. return ToMatrix3().Scaled(invScale);
  495. }
  496. /// Return the translation part.
  497. Vector3 Translation() const
  498. {
  499. return Vector3(
  500. m03_,
  501. m13_,
  502. m23_
  503. );
  504. }
  505. /// Return the rotation part.
  506. Quaternion Rotation() const { return Quaternion(RotationMatrix()); }
  507. /// Return the scaling part.
  508. Vector3 Scale() const
  509. {
  510. return Vector3(
  511. sqrtf(m00_ * m00_ + m10_ * m10_ + m20_ * m20_),
  512. sqrtf(m01_ * m01_ + m11_ * m11_ + m21_ * m21_),
  513. sqrtf(m02_ * m02_ + m12_ * m12_ + m22_ * m22_)
  514. );
  515. }
  516. /// Return the scaling part with the sign. Reference rotation matrix is required to avoid ambiguity.
  517. Vector3 SignedScale(const Matrix3& rotation) const
  518. {
  519. return Vector3(
  520. rotation.m00_ * m00_ + rotation.m10_ * m10_ + rotation.m20_ * m20_,
  521. rotation.m01_ * m01_ + rotation.m11_ * m11_ + rotation.m21_ * m21_,
  522. rotation.m02_ * m02_ + rotation.m12_ * m12_ + rotation.m22_ * m22_
  523. );
  524. }
  525. /// Return transposed.
  526. Matrix4 Transpose() const
  527. {
  528. #ifdef URHO3D_SSE
  529. __m128 m0 = _mm_loadu_ps(&m00_);
  530. __m128 m1 = _mm_loadu_ps(&m10_);
  531. __m128 m2 = _mm_loadu_ps(&m20_);
  532. __m128 m3 = _mm_loadu_ps(&m30_);
  533. _MM_TRANSPOSE4_PS(m0, m1, m2, m3); // NOLINT(modernize-use-bool-literals)
  534. Matrix4 out;
  535. _mm_storeu_ps(&out.m00_, m0);
  536. _mm_storeu_ps(&out.m10_, m1);
  537. _mm_storeu_ps(&out.m20_, m2);
  538. _mm_storeu_ps(&out.m30_, m3);
  539. return out;
  540. #else
  541. return Matrix4(
  542. m00_,
  543. m10_,
  544. m20_,
  545. m30_,
  546. m01_,
  547. m11_,
  548. m21_,
  549. m31_,
  550. m02_,
  551. m12_,
  552. m22_,
  553. m32_,
  554. m03_,
  555. m13_,
  556. m23_,
  557. m33_
  558. );
  559. #endif
  560. }
  561. /// Test for equality with another matrix with epsilon.
  562. bool Equals(const Matrix4& rhs) const
  563. {
  564. const float* leftData = Data();
  565. const float* rightData = rhs.Data();
  566. for (unsigned i = 0; i < 16; ++i)
  567. {
  568. if (!Urho3D::Equals(leftData[i], rightData[i]))
  569. return false;
  570. }
  571. return true;
  572. }
  573. /// Return decomposition to translation, rotation and scale.
  574. void Decompose(Vector3& translation, Quaternion& rotation, Vector3& scale) const;
  575. /// Return inverse.
  576. Matrix4 Inverse() const;
  577. /// Return float data.
  578. const float* Data() const { return &m00_; }
  579. /// Return matrix element.
  580. float Element(unsigned i, unsigned j) const { return Data()[i * 4 + j]; }
  581. /// Return matrix row.
  582. Vector4 Row(unsigned i) const { return Vector4(Element(i, 0), Element(i, 1), Element(i, 2), Element(i, 3)); }
  583. /// Return matrix column.
  584. Vector4 Column(unsigned j) const { return Vector4(Element(0, j), Element(1, j), Element(2, j), Element(3, j)); }
  585. /// Return whether any element is NaN.
  586. bool IsNaN() const
  587. {
  588. const float* data = Data();
  589. for (unsigned i = 0; i < 16; ++i)
  590. {
  591. if (Urho3D::IsNaN(data[i]))
  592. return true;
  593. }
  594. return false;
  595. }
  596. /// Return whether any element is Inf.
  597. bool IsInf() const
  598. {
  599. const float* data = Data();
  600. for (unsigned i = 0; i < 16; ++i)
  601. {
  602. if (Urho3D::IsInf(data[i]))
  603. return true;
  604. }
  605. return false;
  606. }
  607. /// Return as string.
  608. String ToString() const;
  609. float m00_;
  610. float m01_;
  611. float m02_;
  612. float m03_;
  613. float m10_;
  614. float m11_;
  615. float m12_;
  616. float m13_;
  617. float m20_;
  618. float m21_;
  619. float m22_;
  620. float m23_;
  621. float m30_;
  622. float m31_;
  623. float m32_;
  624. float m33_;
  625. /// Bulk transpose matrices.
  626. static void BulkTranspose(float* dest, const float* src, unsigned count)
  627. {
  628. for (unsigned i = 0; i < count; ++i)
  629. {
  630. #ifdef URHO3D_SSE
  631. __m128 m0 = _mm_loadu_ps(src);
  632. __m128 m1 = _mm_loadu_ps(src + 4);
  633. __m128 m2 = _mm_loadu_ps(src + 8);
  634. __m128 m3 = _mm_loadu_ps(src + 12);
  635. _MM_TRANSPOSE4_PS(m0, m1, m2, m3); // NOLINT(modernize-use-bool-literals)
  636. _mm_storeu_ps(dest, m0);
  637. _mm_storeu_ps(dest + 4, m1);
  638. _mm_storeu_ps(dest + 8, m2);
  639. _mm_storeu_ps(dest + 12, m3);
  640. #else
  641. dest[0] = src[0];
  642. dest[1] = src[4];
  643. dest[2] = src[8];
  644. dest[3] = src[12];
  645. dest[4] = src[1];
  646. dest[5] = src[5];
  647. dest[6] = src[9];
  648. dest[7] = src[13];
  649. dest[8] = src[2];
  650. dest[9] = src[6];
  651. dest[10] = src[10];
  652. dest[11] = src[14];
  653. dest[12] = src[3];
  654. dest[13] = src[7];
  655. dest[14] = src[11];
  656. dest[15] = src[15];
  657. #endif
  658. dest += 16;
  659. src += 16;
  660. }
  661. }
  662. /// Zero matrix.
  663. static const Matrix4 ZERO;
  664. /// Identity matrix.
  665. static const Matrix4 IDENTITY;
  666. };
  667. /// Multiply a 4x4 matrix with a scalar.
  668. inline Matrix4 operator *(float lhs, const Matrix4& rhs) { return rhs * lhs; }
  669. }