Mat.h 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654
  1. // Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #pragma once
  6. #include <AnKi/Math/Common.h>
  7. #include <AnKi/Math/Vec.h>
  8. namespace anki {
  9. /// @addtogroup math
  10. /// @{
  11. /// Matrix type.
  12. /// @tparam T The scalar type. Eg float.
  13. /// @tparam J The number of rows.
  14. /// @tparam I The number of columns.
  15. template<typename T, U J, U I>
  16. class alignas(MathSimd<T, I>::ALIGNMENT) TMat
  17. {
  18. public:
  19. using Scalar = T;
  20. using Simd = typename MathSimd<T, I>::Type;
  21. #if ANKI_COMPILER_GCC_COMPATIBLE
  22. # pragma GCC diagnostic push
  23. # pragma GCC diagnostic ignored "-Wignored-attributes"
  24. #endif
  25. using SimdArray = Array<Simd, J>;
  26. #if ANKI_COMPILER_GCC_COMPATIBLE
  27. # pragma GCC diagnostic pop
  28. #endif
  29. using RowVec = TVec<T, I>;
  30. using ColumnVec = TVec<T, J>;
  31. static constexpr U ROW_SIZE = J; ///< Number of rows
  32. static constexpr U COLUMN_SIZE = I; ///< Number of columns
  33. static constexpr U SIZE = J * I; ///< Number of total elements
  34. static constexpr Bool HAS_SIMD = I == 4 && std::is_same<T, F32>::value && ANKI_ENABLE_SIMD;
  35. static constexpr Bool HAS_MAT4_SIMD = J == 4 && I == 4 && std::is_same<T, F32>::value && ANKI_ENABLE_SIMD;
  36. static constexpr Bool HAS_MAT3X4_SIMD = J == 3 && I == 4 && std::is_same<T, F32>::value && ANKI_ENABLE_SIMD;
  37. /// @name Constructors
  38. /// @{
  39. TMat()
  40. {
  41. }
  42. /// Copy.
  43. TMat(ANKI_ENABLE_ARG(const TMat&, !HAS_SIMD) b)
  44. {
  45. for(U i = 0; i < N; i++)
  46. {
  47. m_arr1[i] = b.m_arr1[i];
  48. }
  49. }
  50. /// Copy.
  51. TMat(ANKI_ENABLE_ARG(const TMat&, HAS_SIMD) b)
  52. {
  53. for(U i = 0; i < J; i++)
  54. {
  55. m_simd[i] = b.m_simd[i];
  56. }
  57. }
  58. ANKI_ENABLE_METHOD(!HAS_SIMD)
  59. explicit TMat(const T f)
  60. {
  61. for(T& x : m_arr1)
  62. {
  63. x = f;
  64. }
  65. }
  66. #if ANKI_ENABLE_SIMD
  67. ANKI_ENABLE_METHOD(HAS_SIMD)
  68. explicit TMat(const T f)
  69. {
  70. for(U i = 0; i < J; i++)
  71. {
  72. # if ANKI_SIMD_SSE
  73. m_simd[i] = _mm_set1_ps(f);
  74. # else
  75. m_simd[i] = {f, f, f, f};
  76. # endif
  77. }
  78. }
  79. #endif
  80. explicit TMat(const T arr[])
  81. {
  82. for(U i = 0; i < N; i++)
  83. {
  84. m_arr1[i] = arr[i];
  85. }
  86. }
  87. // 3x3 specific constructors
  88. ANKI_ENABLE_METHOD(J == 3 && I == 3)
  89. TMat(T m00, T m01, T m02, T m10, T m11, T m12, T m20, T m21, T m22)
  90. {
  91. auto& m = *this;
  92. m(0, 0) = m00;
  93. m(0, 1) = m01;
  94. m(0, 2) = m02;
  95. m(1, 0) = m10;
  96. m(1, 1) = m11;
  97. m(1, 2) = m12;
  98. m(2, 0) = m20;
  99. m(2, 1) = m21;
  100. m(2, 2) = m22;
  101. }
  102. ANKI_ENABLE_METHOD(J == 3 && I == 3)
  103. explicit TMat(const TQuat<T>& q)
  104. {
  105. setRotationPart(q);
  106. }
  107. ANKI_ENABLE_METHOD(J == 3 && I == 3)
  108. explicit TMat(const TEuler<T>& e)
  109. {
  110. setRotationPart(e);
  111. }
  112. ANKI_ENABLE_METHOD(J == 3 && I == 3)
  113. explicit TMat(const TAxisang<T>& axisang)
  114. {
  115. setRotationPart(axisang);
  116. }
  117. // 4x4 specific constructors
  118. ANKI_ENABLE_METHOD(J == 4 && I == 4)
  119. TMat(T m00, T m01, T m02, T m03, T m10, T m11, T m12, T m13, T m20, T m21, T m22, T m23, T m30, T m31, T m32, T m33)
  120. {
  121. auto& m = *this;
  122. m(0, 0) = m00;
  123. m(0, 1) = m01;
  124. m(0, 2) = m02;
  125. m(0, 3) = m03;
  126. m(1, 0) = m10;
  127. m(1, 1) = m11;
  128. m(1, 2) = m12;
  129. m(1, 3) = m13;
  130. m(2, 0) = m20;
  131. m(2, 1) = m21;
  132. m(2, 2) = m22;
  133. m(2, 3) = m23;
  134. m(3, 0) = m30;
  135. m(3, 1) = m31;
  136. m(3, 2) = m32;
  137. m(3, 3) = m33;
  138. }
  139. ANKI_ENABLE_METHOD(J == 4 && I == 4)
  140. TMat(const TVec<T, 4>& translation, const TMat<T, 3, 3>& rotation, const T scale = T(1))
  141. {
  142. if(isZero<T>(scale - T(1)))
  143. {
  144. setRotationPart(rotation);
  145. }
  146. else
  147. {
  148. setRotationPart(rotation * scale);
  149. }
  150. setTranslationPart(translation);
  151. auto& m = *this;
  152. m(3, 0) = m(3, 1) = m(3, 2) = T(0);
  153. }
  154. ANKI_ENABLE_METHOD(J == 4 && I == 4)
  155. explicit TMat(const TTransform<T>& t)
  156. : TMat(t.getOrigin().xyz1(), t.getRotation().getRotationPart(), t.getScale())
  157. {
  158. }
  159. // 3x4 specific constructors
  160. ANKI_ENABLE_METHOD(J == 3 && I == 4)
  161. TMat(T m00, T m01, T m02, T m03, T m10, T m11, T m12, T m13, T m20, T m21, T m22, T m23)
  162. {
  163. auto& m = *this;
  164. m(0, 0) = m00;
  165. m(0, 1) = m01;
  166. m(0, 2) = m02;
  167. m(0, 3) = m03;
  168. m(1, 0) = m10;
  169. m(1, 1) = m11;
  170. m(1, 2) = m12;
  171. m(1, 3) = m13;
  172. m(2, 0) = m20;
  173. m(2, 1) = m21;
  174. m(2, 2) = m22;
  175. m(2, 3) = m23;
  176. }
  177. ANKI_ENABLE_METHOD(J == 3 && I == 4)
  178. explicit TMat(const TMat<T, 4, 4>& m4)
  179. {
  180. auto& m = *this;
  181. m(0, 0) = m4(0, 0);
  182. m(0, 1) = m4(0, 1);
  183. m(0, 2) = m4(0, 2);
  184. m(0, 3) = m4(0, 3);
  185. m(1, 0) = m4(1, 0);
  186. m(1, 1) = m4(1, 1);
  187. m(1, 2) = m4(1, 2);
  188. m(1, 3) = m4(1, 3);
  189. m(2, 0) = m4(2, 0);
  190. m(2, 1) = m4(2, 1);
  191. m(2, 2) = m4(2, 2);
  192. m(2, 3) = m4(2, 3);
  193. }
  194. ANKI_ENABLE_METHOD(J == 3 && I == 4)
  195. explicit TMat(const TVec<T, 3>& translation, const TMat<T, 3, 3>& rotation, const T scale = T(1))
  196. {
  197. if(isZero<T>(scale - T(1)))
  198. {
  199. setRotationPart(rotation);
  200. }
  201. else
  202. {
  203. setRotationPart(rotation * scale);
  204. }
  205. setTranslationPart(translation);
  206. }
  207. ANKI_ENABLE_METHOD(J == 3 && I == 4)
  208. explicit TMat(const TVec<T, 3>& translation, const TQuat<T>& q, const T scale = T(1))
  209. : TMat(translation, TMat<T, 3, 3>(q), scale)
  210. {
  211. }
  212. ANKI_ENABLE_METHOD(J == 3 && I == 4)
  213. explicit TMat(const TVec<T, 3>& translation, const TEuler<T>& b, const T scale = T(1))
  214. : TMat(translation, TMat<T, 3, 3>(b), scale)
  215. {
  216. }
  217. ANKI_ENABLE_METHOD(J == 3 && I == 4)
  218. explicit TMat(const TVec<T, 3>& translation, const TAxisang<T>& b, const T scale = T(1))
  219. : TMat(translation, TMat<T, 3, 3>(b), scale)
  220. {
  221. }
  222. ANKI_ENABLE_METHOD(J == 3 && I == 4)
  223. explicit TMat(const TTransform<T>& t)
  224. : TMat(t.getOrigin().xyz(), t.getRotation().getRotationPart(), t.getScale())
  225. {
  226. }
  227. /// @}
  228. /// @name Accessors
  229. /// @{
  230. T& operator()(const U j, const U i)
  231. {
  232. return m_arr2[j][i];
  233. }
  234. T operator()(const U j, const U i) const
  235. {
  236. return m_arr2[j][i];
  237. }
  238. T& operator[](const U n)
  239. {
  240. return m_arr1[n];
  241. }
  242. T operator[](const U n) const
  243. {
  244. return m_arr1[n];
  245. }
  246. /// @}
  247. /// @name Operators with same type
  248. /// @{
  249. /// Copy.
  250. TMat& operator=(ANKI_ENABLE_ARG(const TMat&, !HAS_SIMD) b)
  251. {
  252. for(U n = 0; n < N; n++)
  253. {
  254. m_arr1[n] = b.m_arr1[n];
  255. }
  256. return *this;
  257. }
  258. /// Copy.
  259. TMat& operator=(ANKI_ENABLE_ARG(const TMat&, HAS_SIMD) b)
  260. {
  261. for(U i = 0; i < J; i++)
  262. {
  263. m_simd[i] = b.m_simd[i];
  264. }
  265. return *this;
  266. }
  267. ANKI_ENABLE_METHOD(!HAS_SIMD)
  268. TMat operator+(const TMat& b) const
  269. {
  270. TMat c;
  271. for(U n = 0; n < N; n++)
  272. {
  273. c.m_arr1[n] = m_arr1[n] + b.m_arr1[n];
  274. }
  275. return c;
  276. }
  277. #if ANKI_ENABLE_SIMD
  278. ANKI_ENABLE_METHOD(HAS_SIMD)
  279. TMat operator+(const TMat& b) const
  280. {
  281. TMat c;
  282. for(U i = 0; i < J; i++)
  283. {
  284. # if ANKI_SIMD_SSE
  285. c.m_simd[i] = _mm_add_ps(m_simd[i], b.m_simd[i]);
  286. # else
  287. c.m_simd[i] = m_simd[i] + b.m_simd[i];
  288. # endif
  289. }
  290. return c;
  291. }
  292. #endif
  293. ANKI_ENABLE_METHOD(!HAS_SIMD)
  294. TMat& operator+=(const TMat& b)
  295. {
  296. for(U n = 0; n < N; n++)
  297. {
  298. m_arr1[n] += b.m_arr1[n];
  299. }
  300. return *this;
  301. }
  302. #if ANKI_ENABLE_SIMD
  303. ANKI_ENABLE_METHOD(HAS_SIMD)
  304. TMat& operator+=(const TMat& b)
  305. {
  306. for(U i = 0; i < J; i++)
  307. {
  308. # if ANKI_SIMD_SSE
  309. m_simd[i] = _mm_add_ps(m_simd[i], b.m_simd[i]);
  310. # else
  311. m_simd[i] += b.m_simd[i];
  312. # endif
  313. }
  314. return *this;
  315. }
  316. #endif
  317. ANKI_ENABLE_METHOD(!HAS_SIMD)
  318. TMat operator-(const TMat& b) const
  319. {
  320. TMat c;
  321. for(U n = 0; n < N; n++)
  322. {
  323. c.m_arr1[n] = m_arr1[n] - b.m_arr1[n];
  324. }
  325. return c;
  326. }
  327. #if ANKI_ENABLE_SIMD
  328. ANKI_ENABLE_METHOD(HAS_SIMD)
  329. TMat operator-(const TMat& b) const
  330. {
  331. TMat c;
  332. for(U i = 0; i < J; i++)
  333. {
  334. # if ANKI_SIMD_SSE
  335. c.m_simd[i] = _mm_sub_ps(m_simd[i], b.m_simd[i]);
  336. # else
  337. c.m_simd[i] = m_simd[i] - b.m_simd[i];
  338. # endif
  339. }
  340. return c;
  341. }
  342. #endif
  343. ANKI_ENABLE_METHOD(!HAS_SIMD)
  344. TMat& operator-=(const TMat& b)
  345. {
  346. for(U n = 0; n < N; n++)
  347. {
  348. m_arr1[n] -= b.m_arr1[n];
  349. }
  350. return *this;
  351. }
  352. #if ANKI_ENABLE_SIMD
  353. ANKI_ENABLE_METHOD(HAS_SIMD)
  354. TMat& operator-=(const TMat& b)
  355. {
  356. for(U i = 0; i < J; i++)
  357. {
  358. # if ANKI_SIMD_SSE
  359. m_simd[i] = _mm_sub_ps(m_simd[i], b.m_simd[i]);
  360. # else
  361. m_simd[i] -= b.m_simd[i];
  362. # endif
  363. }
  364. return *this;
  365. }
  366. #endif
  367. ANKI_ENABLE_METHOD(J == I && !HAS_MAT4_SIMD)
  368. TMat operator*(const TMat& b) const
  369. {
  370. TMat out;
  371. const TMat& a = *this;
  372. for(U j = 0; j < J; j++)
  373. {
  374. for(U i = 0; i < I; i++)
  375. {
  376. out(j, i) = T(0);
  377. for(U k = 0; k < I; k++)
  378. {
  379. out(j, i) += a(j, k) * b(k, i);
  380. }
  381. }
  382. }
  383. return out;
  384. }
  385. #if ANKI_ENABLE_SIMD
  386. ANKI_ENABLE_METHOD(HAS_MAT4_SIMD)
  387. TMat operator*(const TMat& b) const
  388. {
  389. TMat out;
  390. const auto& m = *this;
  391. for(U i = 0; i < 4; i++)
  392. {
  393. # if ANKI_SIMD_SSE
  394. __m128 t1, t2;
  395. t1 = _mm_set1_ps(m(i, 0));
  396. t2 = _mm_mul_ps(b.m_simd[0], t1);
  397. t1 = _mm_set1_ps(m(i, 1));
  398. t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[1], t1), t2);
  399. t1 = _mm_set1_ps(m(i, 2));
  400. t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[2], t1), t2);
  401. t1 = _mm_set1_ps(m(i, 3));
  402. t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[3], t1), t2);
  403. out.m_simd[i] = t2;
  404. # else
  405. float32x4_t t1, t2;
  406. t1 = vmovq_n_f32(m(i, 0));
  407. t2 = b.m_simd[0] * t1;
  408. t1 = vmovq_n_f32(m(i, 1));
  409. t2 = b.m_simd[1] * t1 + t2;
  410. t1 = vmovq_n_f32(m(i, 2));
  411. t2 = b.m_simd[2] * t1 + t2;
  412. t1 = vmovq_n_f32(m(i, 3));
  413. t2 = b.m_simd[3] * t1 + t2;
  414. out.m_simd[i] = t2;
  415. # endif
  416. }
  417. return out;
  418. }
  419. #endif
  420. TMat& operator*=(const TMat& b)
  421. {
  422. (*this) = (*this) * b;
  423. return *this;
  424. }
  425. Bool operator==(const TMat& b) const
  426. {
  427. for(U i = 0; i < N; i++)
  428. {
  429. if(!isZero<T>(m_arr1[i] - b.m_arr1[i]))
  430. {
  431. return false;
  432. }
  433. }
  434. return true;
  435. }
  436. Bool operator!=(const TMat& b) const
  437. {
  438. for(U i = 0; i < N; i++)
  439. {
  440. if(!isZero<T>(m_arr1[i] - b.m_arr1[i]))
  441. {
  442. return true;
  443. }
  444. }
  445. return false;
  446. }
  447. /// @}
  448. /// @name Operators with T
  449. /// @{
  450. TMat operator+(const T f) const
  451. {
  452. TMat out;
  453. for(U i = 0; i < N; i++)
  454. {
  455. out.m_arr1[i] = m_arr1[i] + f;
  456. }
  457. return out;
  458. }
  459. TMat& operator+=(const T f)
  460. {
  461. for(U i = 0; i < N; i++)
  462. {
  463. m_arr1[i] += f;
  464. }
  465. return *this;
  466. }
  467. TMat operator-(const T f) const
  468. {
  469. TMat out;
  470. for(U i = 0; i < N; i++)
  471. {
  472. out.m_arr1[i] = m_arr1[i] - f;
  473. }
  474. return out;
  475. }
  476. TMat& operator-=(const T f)
  477. {
  478. for(U i = 0; i < N; i++)
  479. {
  480. m_arr1[i] -= f;
  481. }
  482. return *this;
  483. }
  484. TMat operator*(const T f) const
  485. {
  486. TMat out;
  487. for(U i = 0; i < N; i++)
  488. {
  489. out.m_arr1[i] = m_arr1[i] * f;
  490. }
  491. return out;
  492. }
  493. TMat& operator*=(const T f)
  494. {
  495. for(U i = 0; i < N; i++)
  496. {
  497. m_arr1[i] *= f;
  498. }
  499. return *this;
  500. }
  501. TMat operator/(const T f) const
  502. {
  503. ANKI_ASSERT(f != T(0));
  504. TMat out;
  505. for(U i = 0; i < N; i++)
  506. {
  507. out.m_arr1[i] = m_arr1[i] / f;
  508. }
  509. return out;
  510. }
  511. TMat& operator/=(const T f)
  512. {
  513. ANKI_ASSERT(f != T(0));
  514. for(U i = 0; i < N; i++)
  515. {
  516. m_arr1[i] /= f;
  517. }
  518. return *this;
  519. }
  520. /// @}
  521. /// @name Operators with other types
  522. /// @{
  523. ANKI_ENABLE_METHOD(!HAS_SIMD)
  524. ColumnVec operator*(const RowVec& v) const
  525. {
  526. const TMat& m = *this;
  527. ColumnVec out;
  528. for(U j = 0; j < J; j++)
  529. {
  530. T sum = T(0);
  531. for(U i = 0; i < I; i++)
  532. {
  533. sum += m(j, i) * v[i];
  534. }
  535. out[j] = sum;
  536. }
  537. return out;
  538. }
  539. #if ANKI_ENABLE_SIMD
  540. ANKI_ENABLE_METHOD(HAS_SIMD)
  541. ColumnVec operator*(const RowVec& v) const
  542. {
  543. ColumnVec out;
  544. # if ANKI_SIMD_SSE
  545. for(U i = 0; i < J; i++)
  546. {
  547. _mm_store_ss(&out[i], _mm_dp_ps(m_simd[i], v.getSimd(), 0xF1));
  548. }
  549. # else
  550. for(U i = 0; i < J; i++)
  551. {
  552. out[i] = RowVec(m_simd[i]).dot(v);
  553. }
  554. # endif
  555. return out;
  556. }
  557. #endif
  558. /// @}
  559. /// @name Other
  560. /// @{
  561. ANKI_ENABLE_METHOD(!HAS_SIMD)
  562. void setRow(const U j, const RowVec& v)
  563. {
  564. for(U i = 0; i < I; i++)
  565. {
  566. m_arr2[j][i] = v[i];
  567. }
  568. }
  569. ANKI_ENABLE_METHOD(HAS_SIMD)
  570. void setRow(const U j, const RowVec& v)
  571. {
  572. m_simd[j] = v.getSimd();
  573. }
  574. void setRows(const RowVec& a, const RowVec& b, const RowVec& c)
  575. {
  576. setRow(0, a);
  577. setRow(1, b);
  578. setRow(2, c);
  579. }
  580. ANKI_ENABLE_METHOD(J > 3)
  581. void setRows(const RowVec& a, const RowVec& b, const RowVec& c, const RowVec& d)
  582. {
  583. setRows(a, b, c);
  584. setRow(3, d);
  585. }
  586. RowVec getRow(const U j) const
  587. {
  588. RowVec out;
  589. for(U i = 0; i < I; i++)
  590. {
  591. out[i] = m_arr2[j][i];
  592. }
  593. return out;
  594. }
  595. void getRows(RowVec& a, RowVec& b, RowVec& c) const
  596. {
  597. a = getRow(0);
  598. b = getRow(1);
  599. c = getRow(2);
  600. }
  601. void getRows(RowVec& a, RowVec& b, RowVec& c, RowVec& d) const
  602. {
  603. static_assert(J > 3, "Wrong matrix");
  604. getRows(a, b, c);
  605. d = getRow(3);
  606. }
  607. void setColumn(const U i, const ColumnVec& v)
  608. {
  609. for(U j = 0; j < J; j++)
  610. {
  611. m_arr2[j][i] = v[j];
  612. }
  613. }
  614. void setColumns(const ColumnVec& a, const ColumnVec& b, const ColumnVec& c)
  615. {
  616. setColumn(0, a);
  617. setColumn(1, b);
  618. setColumn(2, c);
  619. }
  620. void setColumns(const ColumnVec& a, const ColumnVec& b, const ColumnVec& c, const ColumnVec& d)
  621. {
  622. static_assert(I > 3, "Check column number");
  623. setColumns(a, b, c);
  624. setColumn(3, d);
  625. }
  626. ColumnVec getColumn(const U i) const
  627. {
  628. ColumnVec out;
  629. for(U j = 0; j < J; j++)
  630. {
  631. out[j] = m_arr2[j][i];
  632. }
  633. return out;
  634. }
  635. void getColumns(ColumnVec& a, ColumnVec& b, ColumnVec& c) const
  636. {
  637. a = getColumn(0);
  638. b = getColumn(1);
  639. c = getColumn(2);
  640. }
  641. void getColumns(ColumnVec& a, ColumnVec& b, ColumnVec& c, ColumnVec& d) const
  642. {
  643. static_assert(I > 3, "Check column number");
  644. getColumns(a, b, c);
  645. d = getColumn(3);
  646. }
  647. /// Get 1st column
  648. ColumnVec getXAxis() const
  649. {
  650. return getColumn(0);
  651. }
  652. /// Get 2nd column
  653. ColumnVec getYAxis() const
  654. {
  655. return getColumn(1);
  656. }
  657. /// Get 3rd column
  658. ColumnVec getZAxis() const
  659. {
  660. return getColumn(2);
  661. }
  662. /// Set 1st column
  663. void setXAxis(const ColumnVec& v)
  664. {
  665. setColumn(0, v);
  666. }
  667. /// Set 2nd column
  668. void setYAxis(const ColumnVec& v)
  669. {
  670. setColumn(1, v);
  671. }
  672. /// Set 3rd column
  673. void setZAxis(const ColumnVec& v)
  674. {
  675. setColumn(2, v);
  676. }
  677. void setRotationX(const T rad)
  678. {
  679. TMat& m = *this;
  680. T sintheta, costheta;
  681. sinCos(rad, sintheta, costheta);
  682. m(0, 0) = T(1);
  683. m(0, 1) = T(0);
  684. m(0, 2) = T(0);
  685. m(1, 0) = T(0);
  686. m(1, 1) = costheta;
  687. m(1, 2) = -sintheta;
  688. m(2, 0) = T(0);
  689. m(2, 1) = sintheta;
  690. m(2, 2) = costheta;
  691. }
  692. void setRotationY(const T rad)
  693. {
  694. TMat& m = *this;
  695. T sintheta, costheta;
  696. sinCos(rad, sintheta, costheta);
  697. m(0, 0) = costheta;
  698. m(0, 1) = T(0);
  699. m(0, 2) = sintheta;
  700. m(1, 0) = T(0);
  701. m(1, 1) = T(1);
  702. m(1, 2) = T(0);
  703. m(2, 0) = -sintheta;
  704. m(2, 1) = T(0);
  705. m(2, 2) = costheta;
  706. }
  707. void setRotationZ(const T rad)
  708. {
  709. TMat& m = *this;
  710. T sintheta, costheta;
  711. sinCos(rad, sintheta, costheta);
  712. m(0, 0) = costheta;
  713. m(0, 1) = -sintheta;
  714. m(0, 2) = T(0);
  715. m(1, 0) = sintheta;
  716. m(1, 1) = costheta;
  717. m(1, 2) = T(0);
  718. m(2, 0) = T(0);
  719. m(2, 1) = T(0);
  720. m(2, 2) = T(1);
  721. }
  722. /// It rotates "this" in the axis defined by the rotation AND not the
  723. /// world axis
  724. void rotateXAxis(const T rad)
  725. {
  726. TMat& m = *this;
  727. // If we analize the mat3 we can extract the 3 unit vectors rotated by the mat3. The 3 rotated vectors are in
  728. // mat's columns. This means that: mat3.colomn[0] == i * mat3. rotateXAxis() rotates rad angle not from i
  729. // vector (aka x axis) but from the vector from colomn 0
  730. // NOTE: See the clean code from < r664
  731. T sina, cosa;
  732. sinCos(rad, sina, cosa);
  733. // zAxis = zAxis*cosa - yAxis*sina;
  734. m(0, 2) = m(0, 2) * cosa - m(0, 1) * sina;
  735. m(1, 2) = m(1, 2) * cosa - m(1, 1) * sina;
  736. m(2, 2) = m(2, 2) * cosa - m(2, 1) * sina;
  737. // zAxis.normalize();
  738. T len = sqrt(m(0, 2) * m(0, 2) + m(1, 2) * m(1, 2) + m(2, 2) * m(2, 2));
  739. m(0, 2) /= len;
  740. m(1, 2) /= len;
  741. m(2, 2) /= len;
  742. // yAxis = zAxis * xAxis;
  743. m(0, 1) = m(1, 2) * m(2, 0) - m(2, 2) * m(1, 0);
  744. m(1, 1) = m(2, 2) * m(0, 0) - m(0, 2) * m(2, 0);
  745. m(2, 1) = m(0, 2) * m(1, 0) - m(1, 2) * m(0, 0);
  746. // yAxis.normalize();
  747. }
  748. /// @copybrief rotateXAxis
  749. void rotateYAxis(const T rad)
  750. {
  751. TMat& m = *this;
  752. // NOTE: See the clean code from < r664
  753. T sina, cosa;
  754. sinCos(rad, sina, cosa);
  755. // zAxis = zAxis*cosa + xAxis*sina;
  756. m(0, 2) = m(0, 2) * cosa + m(0, 0) * sina;
  757. m(1, 2) = m(1, 2) * cosa + m(1, 0) * sina;
  758. m(2, 2) = m(2, 2) * cosa + m(2, 0) * sina;
  759. // zAxis.normalize();
  760. T len = sqrt(m(0, 2) * m(0, 2) + m(1, 2) * m(1, 2) + m(2, 2) * m(2, 2));
  761. m(0, 2) /= len;
  762. m(1, 2) /= len;
  763. m(2, 2) /= len;
  764. // xAxis = (zAxis*yAxis) * -1.0f;
  765. m(0, 0) = m(2, 2) * m(1, 1) - m(1, 2) * m(2, 1);
  766. m(1, 0) = m(0, 2) * m(2, 1) - m(2, 2) * m(0, 1);
  767. m(2, 0) = m(1, 2) * m(0, 1) - m(0, 2) * m(1, 1);
  768. }
  769. /// @copybrief rotateXAxis
  770. void rotateZAxis(const T rad)
  771. {
  772. TMat& m = *this;
  773. // NOTE: See the clean code from < r664
  774. T sina, cosa;
  775. sinCos(rad, sina, cosa);
  776. // xAxis = xAxis*cosa + yAxis*sina;
  777. m(0, 0) = m(0, 0) * cosa + m(0, 1) * sina;
  778. m(1, 0) = m(1, 0) * cosa + m(1, 1) * sina;
  779. m(2, 0) = m(2, 0) * cosa + m(2, 1) * sina;
  780. // xAxis.normalize();
  781. T len = sqrt(m(0, 0) * m(0, 0) + m(1, 0) * m(1, 0) + m(2, 0) * m(2, 0));
  782. m(0, 0) /= len;
  783. m(1, 0) /= len;
  784. m(2, 0) /= len;
  785. // yAxis = zAxis*xAxis;
  786. m(0, 1) = m(1, 2) * m(2, 0) - m(2, 2) * m(1, 0);
  787. m(1, 1) = m(2, 2) * m(0, 0) - m(0, 2) * m(2, 0);
  788. m(2, 1) = m(0, 2) * m(1, 0) - m(1, 2) * m(0, 0);
  789. }
  790. void setRotationPart(const TMat<T, 3, 3>& m3)
  791. {
  792. TMat& m = *this;
  793. for(U j = 0; j < 3; j++)
  794. {
  795. for(U i = 0; i < 3; i++)
  796. {
  797. m(j, i) = m3(j, i);
  798. }
  799. }
  800. }
  801. void setRotationPart(const TQuat<T>& q)
  802. {
  803. TMat& m = *this;
  804. // If length is > 1 + 0.002 or < 1 - 0.002 then not normalized quat
  805. ANKI_ASSERT(absolute(T(1) - q.getLength()) <= 0.002);
  806. T xs, ys, zs, wx, wy, wz, xx, xy, xz, yy, yz, zz;
  807. xs = q.x() + q.x();
  808. ys = q.y() + q.y();
  809. zs = q.z() + q.z();
  810. wx = q.w() * xs;
  811. wy = q.w() * ys;
  812. wz = q.w() * zs;
  813. xx = q.x() * xs;
  814. xy = q.x() * ys;
  815. xz = q.x() * zs;
  816. yy = q.y() * ys;
  817. yz = q.y() * zs;
  818. zz = q.z() * zs;
  819. m(0, 0) = T(1) - (yy + zz);
  820. m(0, 1) = xy - wz;
  821. m(0, 2) = xz + wy;
  822. m(1, 0) = xy + wz;
  823. m(1, 1) = T(1) - (xx + zz);
  824. m(1, 2) = yz - wx;
  825. m(2, 0) = xz - wy;
  826. m(2, 1) = yz + wx;
  827. m(2, 2) = T(1) - (xx + yy);
  828. }
  829. void setRotationPart(const TEuler<T>& e)
  830. {
  831. TMat& m = *this;
  832. T ch, sh, ca, sa, cb, sb;
  833. sinCos(e.y(), sh, ch);
  834. sinCos(e.z(), sa, ca);
  835. sinCos(e.x(), sb, cb);
  836. m(0, 0) = ch * ca;
  837. m(0, 1) = sh * sb - ch * sa * cb;
  838. m(0, 2) = ch * sa * sb + sh * cb;
  839. m(1, 0) = sa;
  840. m(1, 1) = ca * cb;
  841. m(1, 2) = -ca * sb;
  842. m(2, 0) = -sh * ca;
  843. m(2, 1) = sh * sa * cb + ch * sb;
  844. m(2, 2) = -sh * sa * sb + ch * cb;
  845. }
  846. void setRotationPart(const TAxisang<T>& axisang)
  847. {
  848. TMat& m = *this;
  849. // Not normalized axis
  850. ANKI_ASSERT(isZero<T>(T(1) - axisang.getAxis().getLength()));
  851. T c, s;
  852. sinCos(axisang.getAngle(), s, c);
  853. T t = T(1) - c;
  854. const TVec<T, 3>& axis = axisang.getAxis();
  855. m(0, 0) = c + axis.x() * axis.x() * t;
  856. m(1, 1) = c + axis.y() * axis.y() * t;
  857. m(2, 2) = c + axis.z() * axis.z() * t;
  858. T tmp1 = axis.x() * axis.y() * t;
  859. T tmp2 = axis.z() * s;
  860. m(1, 0) = tmp1 + tmp2;
  861. m(0, 1) = tmp1 - tmp2;
  862. tmp1 = axis.x() * axis.z() * t;
  863. tmp2 = axis.y() * s;
  864. m(2, 0) = tmp1 - tmp2;
  865. m(0, 2) = tmp1 + tmp2;
  866. tmp1 = axis.y() * axis.z() * t;
  867. tmp2 = axis.x() * s;
  868. m(2, 1) = tmp1 + tmp2;
  869. m(1, 2) = tmp1 - tmp2;
  870. }
  871. TMat<T, 3, 3> getRotationPart() const
  872. {
  873. const TMat& m = *this;
  874. TMat<T, 3, 3> m3;
  875. m3(0, 0) = m(0, 0);
  876. m3(0, 1) = m(0, 1);
  877. m3(0, 2) = m(0, 2);
  878. m3(1, 0) = m(1, 0);
  879. m3(1, 1) = m(1, 1);
  880. m3(1, 2) = m(1, 2);
  881. m3(2, 0) = m(2, 0);
  882. m3(2, 1) = m(2, 1);
  883. m3(2, 2) = m(2, 2);
  884. return m3;
  885. }
  886. void setTranslationPart(const ColumnVec& v)
  887. {
  888. if(ROW_SIZE == 4)
  889. {
  890. ANKI_ASSERT(isZero<T>(v[3] - T(1)) && "w should be 1");
  891. }
  892. setColumn(3, v);
  893. }
  894. ColumnVec getTranslationPart() const
  895. {
  896. return getColumn(3);
  897. }
  898. void reorthogonalize()
  899. {
  900. // There are 2 methods, the standard and the Gram-Schmidt method with a twist for zAxis. This uses the 2nd. For
  901. // the first see < r664
  902. ColumnVec xAxis, yAxis, zAxis;
  903. getColumns(xAxis, yAxis, zAxis);
  904. xAxis.normalize();
  905. yAxis = yAxis - (xAxis * xAxis.dot(yAxis));
  906. yAxis.normalize();
  907. zAxis = xAxis.cross(yAxis);
  908. setColumns(xAxis, yAxis, zAxis);
  909. }
  910. ANKI_ENABLE_METHOD(J == I && !HAS_SIMD)
  911. void transpose()
  912. {
  913. for(U j = 0; j < J; j++)
  914. {
  915. for(U i = j + 1; i < I; i++)
  916. {
  917. T tmp = m_arr2[j][i];
  918. m_arr2[j][i] = m_arr2[i][j];
  919. m_arr2[i][j] = tmp;
  920. }
  921. }
  922. }
  923. #if ANKI_ENABLE_SIMD
  924. ANKI_ENABLE_METHOD(J == I && HAS_SIMD)
  925. void transpose()
  926. {
  927. # if ANKI_SIMD_SSE
  928. _MM_TRANSPOSE4_PS(m_simd[0], m_simd[1], m_simd[2], m_simd[3]);
  929. # else
  930. const float32x4x2_t row01 = vtrnq_f32(m_simd[0], m_simd[1]);
  931. const float32x4x2_t row23 = vtrnq_f32(m_simd[2], m_simd[3]);
  932. m_simd[0] = vcombine_f32(vget_low_f32(row01.val[0]), vget_low_f32(row23.val[0]));
  933. m_simd[1] = vcombine_f32(vget_low_f32(row01.val[1]), vget_low_f32(row23.val[1]));
  934. m_simd[2] = vcombine_f32(vget_high_f32(row01.val[0]), vget_high_f32(row23.val[0]));
  935. m_simd[3] = vcombine_f32(vget_high_f32(row01.val[1]), vget_high_f32(row23.val[1]));
  936. # endif
  937. }
  938. #endif
  939. void transposeRotationPart()
  940. {
  941. for(U j = 0; j < 3; j++)
  942. {
  943. for(U i = j + 1; i < 3; i++)
  944. {
  945. const T tmp = m_arr2[j][i];
  946. m_arr2[j][i] = m_arr2[i][j];
  947. m_arr2[i][j] = tmp;
  948. }
  949. }
  950. }
  951. ANKI_ENABLE_METHOD(I == J)
  952. TMat getTransposed() const
  953. {
  954. TMat out;
  955. for(U j = 0; j < J; j++)
  956. {
  957. for(U i = 0; i < I; i++)
  958. {
  959. out.m_arr2[i][j] = m_arr2[j][i];
  960. }
  961. }
  962. return out;
  963. }
  964. ANKI_ENABLE_METHOD(I == 3 && J == 3)
  965. T getDet() const
  966. {
  967. const auto& m = *this;
  968. // For the accurate method see < r664
  969. return m(0, 0) * (m(1, 1) * m(2, 2) - m(1, 2) * m(2, 1)) - m(0, 1) * (m(1, 0) * m(2, 2) - m(1, 2) * m(2, 0))
  970. + m(0, 2) * (m(0, 1) * m(2, 1) - m(1, 1) * m(2, 0));
  971. }
  972. ANKI_ENABLE_METHOD(I == 4 && J == 4)
  973. T getDet() const
  974. {
  975. const auto& t = *this;
  976. return t(0, 3) * t(1, 2) * t(2, 1) * t(3, 0) - t(0, 2) * t(1, 3) * t(2, 1) * t(3, 0)
  977. - t(0, 3) * t(1, 1) * t(2, 2) * t(3, 0) + t(0, 1) * t(1, 3) * t(2, 2) * t(3, 0)
  978. + t(0, 2) * t(1, 1) * t(2, 3) * t(3, 0) - t(0, 1) * t(1, 2) * t(2, 3) * t(3, 0)
  979. - t(0, 3) * t(1, 2) * t(2, 0) * t(3, 1) + t(0, 2) * t(1, 3) * t(2, 0) * t(3, 1)
  980. + t(0, 3) * t(1, 0) * t(2, 2) * t(3, 1) - t(0, 0) * t(1, 3) * t(2, 2) * t(3, 1)
  981. - t(0, 2) * t(1, 0) * t(2, 3) * t(3, 1) + t(0, 0) * t(1, 2) * t(2, 3) * t(3, 1)
  982. + t(0, 3) * t(1, 1) * t(2, 0) * t(3, 2) - t(0, 1) * t(1, 3) * t(2, 0) * t(3, 2)
  983. - t(0, 3) * t(1, 0) * t(2, 1) * t(3, 2) + t(0, 0) * t(1, 3) * t(2, 1) * t(3, 2)
  984. + t(0, 1) * t(1, 0) * t(2, 3) * t(3, 2) - t(0, 0) * t(1, 1) * t(2, 3) * t(3, 2)
  985. - t(0, 2) * t(1, 1) * t(2, 0) * t(3, 3) + t(0, 1) * t(1, 2) * t(2, 0) * t(3, 3)
  986. + t(0, 2) * t(1, 0) * t(2, 1) * t(3, 3) - t(0, 0) * t(1, 2) * t(2, 1) * t(3, 3)
  987. - t(0, 1) * t(1, 0) * t(2, 2) * t(3, 3) + t(0, 0) * t(1, 1) * t(2, 2) * t(3, 3);
  988. }
  989. ANKI_ENABLE_METHOD(I == 3 && J == 3)
  990. TMat getInverse() const
  991. {
  992. // Using Gramer's method Inv(A) = (1 / getDet(A)) * Adj(A)
  993. const TMat& m = *this;
  994. TMat r;
  995. // compute determinant
  996. const T cofactor0 = m(1, 1) * m(2, 2) - m(1, 2) * m(2, 1);
  997. const T cofactor3 = m(0, 2) * m(2, 1) - m(0, 1) * m(2, 2);
  998. const T cofactor6 = m(0, 1) * m(1, 2) - m(0, 2) * m(1, 1);
  999. const T det = m(0, 0) * cofactor0 + m(1, 0) * cofactor3 + m(2, 0) * cofactor6;
  1000. ANKI_ASSERT(!isZero<T>(det)); // Cannot invert det == 0
  1001. // create adjoint matrix and multiply by 1/det to get inverse
  1002. const T invDet = T(1) / det;
  1003. r(0, 0) = invDet * cofactor0;
  1004. r(0, 1) = invDet * cofactor3;
  1005. r(0, 2) = invDet * cofactor6;
  1006. r(1, 0) = invDet * (m(1, 2) * m(2, 0) - m(1, 0) * m(2, 2));
  1007. r(1, 1) = invDet * (m(0, 0) * m(2, 2) - m(0, 2) * m(2, 0));
  1008. r(1, 2) = invDet * (m(0, 2) * m(1, 0) - m(0, 0) * m(1, 2));
  1009. r(2, 0) = invDet * (m(1, 0) * m(2, 1) - m(1, 1) * m(2, 0));
  1010. r(2, 1) = invDet * (m(0, 1) * m(2, 0) - m(0, 0) * m(2, 1));
  1011. r(2, 2) = invDet * (m(0, 0) * m(1, 1) - m(0, 1) * m(1, 0));
  1012. return r;
  1013. }
  1014. /// Invert using Cramer's rule
  1015. ANKI_ENABLE_METHOD(I == 4 && J == 4)
  1016. TMat getInverse() const
  1017. {
  1018. Array<T, 12> tmp;
  1019. const auto& in = (*this);
  1020. TMat m4;
  1021. tmp[0] = in(2, 2) * in(3, 3);
  1022. tmp[1] = in(3, 2) * in(2, 3);
  1023. tmp[2] = in(1, 2) * in(3, 3);
  1024. tmp[3] = in(3, 2) * in(1, 3);
  1025. tmp[4] = in(1, 2) * in(2, 3);
  1026. tmp[5] = in(2, 2) * in(1, 3);
  1027. tmp[6] = in(0, 2) * in(3, 3);
  1028. tmp[7] = in(3, 2) * in(0, 3);
  1029. tmp[8] = in(0, 2) * in(2, 3);
  1030. tmp[9] = in(2, 2) * in(0, 3);
  1031. tmp[10] = in(0, 2) * in(1, 3);
  1032. tmp[11] = in(1, 2) * in(0, 3);
  1033. m4(0, 0) = tmp[0] * in(1, 1) + tmp[3] * in(2, 1) + tmp[4] * in(3, 1);
  1034. m4(0, 0) -= tmp[1] * in(1, 1) + tmp[2] * in(2, 1) + tmp[5] * in(3, 1);
  1035. m4(0, 1) = tmp[1] * in(0, 1) + tmp[6] * in(2, 1) + tmp[9] * in(3, 1);
  1036. m4(0, 1) -= tmp[0] * in(0, 1) + tmp[7] * in(2, 1) + tmp[8] * in(3, 1);
  1037. m4(0, 2) = tmp[2] * in(0, 1) + tmp[7] * in(1, 1) + tmp[10] * in(3, 1);
  1038. m4(0, 2) -= tmp[3] * in(0, 1) + tmp[6] * in(1, 1) + tmp[11] * in(3, 1);
  1039. m4(0, 3) = tmp[5] * in(0, 1) + tmp[8] * in(1, 1) + tmp[11] * in(2, 1);
  1040. m4(0, 3) -= tmp[4] * in(0, 1) + tmp[9] * in(1, 1) + tmp[10] * in(2, 1);
  1041. m4(1, 0) = tmp[1] * in(1, 0) + tmp[2] * in(2, 0) + tmp[5] * in(3, 0);
  1042. m4(1, 0) -= tmp[0] * in(1, 0) + tmp[3] * in(2, 0) + tmp[4] * in(3, 0);
  1043. m4(1, 1) = tmp[0] * in(0, 0) + tmp[7] * in(2, 0) + tmp[8] * in(3, 0);
  1044. m4(1, 1) -= tmp[1] * in(0, 0) + tmp[6] * in(2, 0) + tmp[9] * in(3, 0);
  1045. m4(1, 2) = tmp[3] * in(0, 0) + tmp[6] * in(1, 0) + tmp[11] * in(3, 0);
  1046. m4(1, 2) -= tmp[2] * in(0, 0) + tmp[7] * in(1, 0) + tmp[10] * in(3, 0);
  1047. m4(1, 3) = tmp[4] * in(0, 0) + tmp[9] * in(1, 0) + tmp[10] * in(2, 0);
  1048. m4(1, 3) -= tmp[5] * in(0, 0) + tmp[8] * in(1, 0) + tmp[11] * in(2, 0);
  1049. tmp[0] = in(2, 0) * in(3, 1);
  1050. tmp[1] = in(3, 0) * in(2, 1);
  1051. tmp[2] = in(1, 0) * in(3, 1);
  1052. tmp[3] = in(3, 0) * in(1, 1);
  1053. tmp[4] = in(1, 0) * in(2, 1);
  1054. tmp[5] = in(2, 0) * in(1, 1);
  1055. tmp[6] = in(0, 0) * in(3, 1);
  1056. tmp[7] = in(3, 0) * in(0, 1);
  1057. tmp[8] = in(0, 0) * in(2, 1);
  1058. tmp[9] = in(2, 0) * in(0, 1);
  1059. tmp[10] = in(0, 0) * in(1, 1);
  1060. tmp[11] = in(1, 0) * in(0, 1);
  1061. m4(2, 0) = tmp[0] * in(1, 3) + tmp[3] * in(2, 3) + tmp[4] * in(3, 3);
  1062. m4(2, 0) -= tmp[1] * in(1, 3) + tmp[2] * in(2, 3) + tmp[5] * in(3, 3);
  1063. m4(2, 1) = tmp[1] * in(0, 3) + tmp[6] * in(2, 3) + tmp[9] * in(3, 3);
  1064. m4(2, 1) -= tmp[0] * in(0, 3) + tmp[7] * in(2, 3) + tmp[8] * in(3, 3);
  1065. m4(2, 2) = tmp[2] * in(0, 3) + tmp[7] * in(1, 3) + tmp[10] * in(3, 3);
  1066. m4(2, 2) -= tmp[3] * in(0, 3) + tmp[6] * in(1, 3) + tmp[11] * in(3, 3);
  1067. m4(2, 3) = tmp[5] * in(0, 3) + tmp[8] * in(1, 3) + tmp[11] * in(2, 3);
  1068. m4(2, 3) -= tmp[4] * in(0, 3) + tmp[9] * in(1, 3) + tmp[10] * in(2, 3);
  1069. m4(3, 0) = tmp[2] * in(2, 2) + tmp[5] * in(3, 2) + tmp[1] * in(1, 2);
  1070. m4(3, 0) -= tmp[4] * in(3, 2) + tmp[0] * in(1, 2) + tmp[3] * in(2, 2);
  1071. m4(3, 1) = tmp[8] * in(3, 2) + tmp[0] * in(0, 2) + tmp[7] * in(2, 2);
  1072. m4(3, 1) -= tmp[6] * in(2, 2) + tmp[9] * in(3, 2) + tmp[1] * in(0, 2);
  1073. m4(3, 2) = tmp[6] * in(1, 2) + tmp[11] * in(3, 2) + tmp[3] * in(0, 2);
  1074. m4(3, 2) -= tmp[10] * in(3, 2) + tmp[2] * in(0, 2) + tmp[7] * in(1, 2);
  1075. m4(3, 3) = tmp[10] * in(2, 2) + tmp[4] * in(0, 2) + tmp[9] * in(1, 2);
  1076. m4(3, 3) -= tmp[8] * in(1, 2) + tmp[11] * in(2, 2) + tmp[5] * in(0, 2);
  1077. T det = in(0, 0) * m4(0, 0) + in(1, 0) * m4(0, 1) + in(2, 0) * m4(0, 2) + in(3, 0) * m4(0, 3);
  1078. ANKI_ASSERT(!isZero<T>(det)); // Cannot invert, det == 0
  1079. det = T(1) / det;
  1080. m4 *= det;
  1081. return m4;
  1082. }
  1083. /// See getInverse
  1084. ANKI_ENABLE_METHOD((I == 4 && J == 4) || (I == 3 && J == 3))
  1085. void invert()
  1086. {
  1087. (*this) = getInverse();
  1088. }
  1089. /// 12 muls, 27 adds. Something like m4 = m0 * m1 but without touching the 4rth row and allot faster
  1090. ANKI_ENABLE_METHOD(I == 4 && J == 4)
  1091. static TMat combineTransformations(const TMat& m0, const TMat& m1)
  1092. {
  1093. // See the clean code in < r664
  1094. // one of the 2 mat4 doesnt represent transformation
  1095. ANKI_ASSERT(isZero<T>(m0(3, 0) + m0(3, 1) + m0(3, 2) + m0(3, 3) - T(1))
  1096. && isZero<T>(m1(3, 0) + m1(3, 1) + m1(3, 2) + m1(3, 3) - T(1)));
  1097. TMat m4;
  1098. m4(0, 0) = m0(0, 0) * m1(0, 0) + m0(0, 1) * m1(1, 0) + m0(0, 2) * m1(2, 0);
  1099. m4(0, 1) = m0(0, 0) * m1(0, 1) + m0(0, 1) * m1(1, 1) + m0(0, 2) * m1(2, 1);
  1100. m4(0, 2) = m0(0, 0) * m1(0, 2) + m0(0, 1) * m1(1, 2) + m0(0, 2) * m1(2, 2);
  1101. m4(1, 0) = m0(1, 0) * m1(0, 0) + m0(1, 1) * m1(1, 0) + m0(1, 2) * m1(2, 0);
  1102. m4(1, 1) = m0(1, 0) * m1(0, 1) + m0(1, 1) * m1(1, 1) + m0(1, 2) * m1(2, 1);
  1103. m4(1, 2) = m0(1, 0) * m1(0, 2) + m0(1, 1) * m1(1, 2) + m0(1, 2) * m1(2, 2);
  1104. m4(2, 0) = m0(2, 0) * m1(0, 0) + m0(2, 1) * m1(1, 0) + m0(2, 2) * m1(2, 0);
  1105. m4(2, 1) = m0(2, 0) * m1(0, 1) + m0(2, 1) * m1(1, 1) + m0(2, 2) * m1(2, 1);
  1106. m4(2, 2) = m0(2, 0) * m1(0, 2) + m0(2, 1) * m1(1, 2) + m0(2, 2) * m1(2, 2);
  1107. m4(0, 3) = m0(0, 0) * m1(0, 3) + m0(0, 1) * m1(1, 3) + m0(0, 2) * m1(2, 3) + m0(0, 3);
  1108. m4(1, 3) = m0(1, 0) * m1(0, 3) + m0(1, 1) * m1(1, 3) + m0(1, 2) * m1(2, 3) + m0(1, 3);
  1109. m4(2, 3) = m0(2, 0) * m1(0, 3) + m0(2, 1) * m1(1, 3) + m0(2, 2) * m1(2, 3) + m0(2, 3);
  1110. m4(3, 0) = m4(3, 1) = m4(3, 2) = T(0);
  1111. m4(3, 3) = T(1);
  1112. return m4;
  1113. }
  1114. /// Create a new matrix that is equivalent to Mat4(this)*Mat4(b)
  1115. ANKI_ENABLE_METHOD(J == 3 && I == 4 && !HAS_SIMD)
  1116. TMat combineTransformations(const TMat& b) const
  1117. {
  1118. const auto& a = *this;
  1119. TMat c;
  1120. c(0, 0) = a(0, 0) * b(0, 0) + a(0, 1) * b(1, 0) + a(0, 2) * b(2, 0);
  1121. c(0, 1) = a(0, 0) * b(0, 1) + a(0, 1) * b(1, 1) + a(0, 2) * b(2, 1);
  1122. c(0, 2) = a(0, 0) * b(0, 2) + a(0, 1) * b(1, 2) + a(0, 2) * b(2, 2);
  1123. c(1, 0) = a(1, 0) * b(0, 0) + a(1, 1) * b(1, 0) + a(1, 2) * b(2, 0);
  1124. c(1, 1) = a(1, 0) * b(0, 1) + a(1, 1) * b(1, 1) + a(1, 2) * b(2, 1);
  1125. c(1, 2) = a(1, 0) * b(0, 2) + a(1, 1) * b(1, 2) + a(1, 2) * b(2, 2);
  1126. c(2, 0) = a(2, 0) * b(0, 0) + a(2, 1) * b(1, 0) + a(2, 2) * b(2, 0);
  1127. c(2, 1) = a(2, 0) * b(0, 1) + a(2, 1) * b(1, 1) + a(2, 2) * b(2, 1);
  1128. c(2, 2) = a(2, 0) * b(0, 2) + a(2, 1) * b(1, 2) + a(2, 2) * b(2, 2);
  1129. c(0, 3) = a(0, 0) * b(0, 3) + a(0, 1) * b(1, 3) + a(0, 2) * b(2, 3) + a(0, 3);
  1130. c(1, 3) = a(1, 0) * b(0, 3) + a(1, 1) * b(1, 3) + a(1, 2) * b(2, 3) + a(1, 3);
  1131. c(2, 3) = a(2, 0) * b(0, 3) + a(2, 1) * b(1, 3) + a(2, 2) * b(2, 3) + a(2, 3);
  1132. return c;
  1133. }
  1134. #if ANKI_ENABLE_SIMD
  1135. ANKI_ENABLE_METHOD(J == 3 && I == 4 && HAS_SIMD)
  1136. TMat combineTransformations(const TMat& b) const
  1137. {
  1138. TMat c;
  1139. const auto& a = *this;
  1140. # if ANKI_SIMD_SSE
  1141. for(U i = 0; i < 3; i++)
  1142. {
  1143. __m128 t1, t2;
  1144. t1 = _mm_set1_ps(a(i, 0));
  1145. t2 = _mm_mul_ps(b.m_simd[0], t1);
  1146. t1 = _mm_set1_ps(a(i, 1));
  1147. t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[1], t1), t2);
  1148. t1 = _mm_set1_ps(a(i, 2));
  1149. t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[2], t1), t2);
  1150. TVec<T, 4> v4(T(0), T(0), T(0), a(i, 3));
  1151. t2 = _mm_add_ps(v4.getSimd(), t2);
  1152. c.m_simd[i] = t2;
  1153. }
  1154. # else
  1155. for(U i = 0; i < 3; i++)
  1156. {
  1157. float32x4_t t1, t2;
  1158. t1 = vdupq_n_f32(a(i, 0));
  1159. t2 = b.m_simd[0] * t1;
  1160. t1 = vdupq_n_f32(a(i, 1));
  1161. t2 = b.m_simd[1] * t1 + t2;
  1162. t1 = vdupq_n_f32(a(i, 2));
  1163. t2 = b.m_simd[2] * t1 + t2;
  1164. TVec<T, 4> v4(T(0), T(0), T(0), a(i, 3));
  1165. t2 += v4.getSimd();
  1166. c.m_simd[i] = t2;
  1167. }
  1168. # endif
  1169. return c;
  1170. }
  1171. #endif
  1172. /// Calculate a perspective projection matrix. The z is mapped in [0, 1] range just like DX and Vulkan.
  1173. ANKI_ENABLE_METHOD(I == 4 && J == 4)
  1174. static ANKI_USE_RESULT TMat calculatePerspectiveProjectionMatrix(T fovX, T fovY, T near, T far)
  1175. {
  1176. ANKI_ASSERT(fovX > T(0) && fovY > T(0) && near > T(0) && far > T(0));
  1177. const T g = near - far;
  1178. const T f = T(1) / tan(fovY / T(2)); // f = cot(fovY/2)
  1179. TMat proj;
  1180. proj(0, 0) = f * (fovY / fovX); // = f/aspectRatio;
  1181. proj(0, 1) = T(0);
  1182. proj(0, 2) = T(0);
  1183. proj(0, 3) = T(0);
  1184. proj(1, 0) = T(0);
  1185. proj(1, 1) = f;
  1186. proj(1, 2) = T(0);
  1187. proj(1, 3) = T(0);
  1188. proj(2, 0) = T(0);
  1189. proj(2, 1) = T(0);
  1190. proj(2, 2) = far / g;
  1191. proj(2, 3) = (far * near) / g;
  1192. proj(3, 0) = T(0);
  1193. proj(3, 1) = T(0);
  1194. proj(3, 2) = T(-1);
  1195. proj(3, 3) = T(0);
  1196. return proj;
  1197. }
  1198. /// Calculate an orthographic projection matrix. The z is mapped in [0, 1] range just like DX and Vulkan.
  1199. ANKI_ENABLE_METHOD(I == 4 && J == 4)
  1200. static ANKI_USE_RESULT TMat calculateOrthographicProjectionMatrix(T right, T left, T top, T bottom, T near, T far)
  1201. {
  1202. ANKI_ASSERT(right != T(0) && left != T(0) && top != T(0) && bottom != T(0) && near != T(0) && far != T(0));
  1203. const T difx = right - left;
  1204. const T dify = top - bottom;
  1205. const T difz = far - near;
  1206. const T tx = -(right + left) / difx;
  1207. const T ty = -(top + bottom) / dify;
  1208. const T tz = -near / difz;
  1209. TMat m;
  1210. m(0, 0) = T(2) / difx;
  1211. m(0, 1) = T(0);
  1212. m(0, 2) = T(0);
  1213. m(0, 3) = tx;
  1214. m(1, 0) = T(0);
  1215. m(1, 1) = T(2) / dify;
  1216. m(1, 2) = T(0);
  1217. m(1, 3) = ty;
  1218. m(2, 0) = T(0);
  1219. m(2, 1) = T(0);
  1220. m(2, 2) = T(-1) / difz;
  1221. m(2, 3) = tz;
  1222. m(3, 0) = T(0);
  1223. m(3, 1) = T(0);
  1224. m(3, 2) = T(0);
  1225. m(3, 3) = T(1);
  1226. return m;
  1227. }
  1228. /// Given the parameters that construct a projection matrix extract 4 values that can be used to unproject a point
  1229. /// from NDC to view space.
  1230. /// @code
  1231. /// Vec4 unprojParams = calculatePerspectiveUnprojectionParams(...);
  1232. /// F32 z = unprojParams.z() / (unprojParams.w() + depth);
  1233. /// Vec2 xy = ndc.xy() * unprojParams.xy() * z;
  1234. /// Vec3 posViewSpace(xy, z);
  1235. /// @endcode
  1236. ANKI_ENABLE_METHOD(I == 4 && J == 4)
  1237. static TVec<T, 4> calculatePerspectiveUnprojectionParams(T fovX, T fovY, T near, T far)
  1238. {
  1239. TVec<T, 4> out;
  1240. const T g = near - far;
  1241. const T f = T(1) / tan(fovY / T(2)); // f = cot(fovY/2)
  1242. const T m00 = f * (fovY / fovX);
  1243. const T m11 = f;
  1244. const T m22 = far / g;
  1245. const T m23 = (far * near) / g;
  1246. // First, clip = (m * Pv) where Pv is the view space position.
  1247. // ndc.z = clip.z / clip.w = (m22 * Pv.z + m23) / -Pv.z. Note that ndc.z == depth in zero_to_one projection.
  1248. // Solving that for Pv.z we get
  1249. // Pv.z = A / (depth + B)
  1250. // where A = -m23 and B = m22
  1251. // so we save the A and B in the projection params vector
  1252. out.z() = -m23;
  1253. out.w() = m22;
  1254. // Using the same logic the Pv.x = x' * w / m00
  1255. // so Pv.x = x' * Pv.z * (-1 / m00)
  1256. out.x() = -T(T(1)) / m00;
  1257. // Same for y
  1258. out.y() = -T(T(1)) / m11;
  1259. return out;
  1260. }
  1261. /// Assuming this is a projection matrix extract the unprojection parameters. See
  1262. /// calculatePerspectiveUnprojectionParams for more info.
  1263. ANKI_ENABLE_METHOD(I == 4 && J == 4)
  1264. TVec<T, 4> extractPerspectiveUnprojectionParams() const
  1265. {
  1266. TVec<T, 4> out;
  1267. const auto& m = *this;
  1268. out.z() = -m(2, 3);
  1269. out.w() = m(2, 2);
  1270. out.x() = -T(T(1)) / m(0, 0);
  1271. out.y() = -T(T(1)) / m(1, 1);
  1272. return out;
  1273. }
  1274. /// If we suppose this matrix represents a transformation, return the inverted transformation
  1275. ANKI_ENABLE_METHOD(I == 4 && J == 4)
  1276. TMat getInverseTransformation() const
  1277. {
  1278. const TMat<T, 3, 3> invertedRot = getRotationPart().getTransposed();
  1279. TVec<T, 3> invertedTsl = getTranslationPart().xyz();
  1280. invertedTsl = -(invertedRot * invertedTsl);
  1281. return TMat(invertedTsl.xyz0(), invertedRot);
  1282. }
  1283. /// @note 9 muls, 9 adds
  1284. ANKI_ENABLE_METHOD(I == 4 && J == 4)
  1285. TVec<T, 3> transform(const TVec<T, 3>& v) const
  1286. {
  1287. const auto& m = *this;
  1288. return TVec<T, 3>(m(0, 0) * v.x() + m(0, 1) * v.y() + m(0, 2) * v.z() + m(0, 3),
  1289. m(1, 0) * v.x() + m(1, 1) * v.y() + m(1, 2) * v.z() + m(1, 3),
  1290. m(2, 0) * v.x() + m(2, 1) * v.y() + m(2, 2) * v.z() + m(2, 3));
  1291. }
  1292. /// Create a new transform matrix position at eye and looking at refPoint.
  1293. template<U VEC_DIMS, ANKI_ENABLE(J == 3 && I == 4 && VEC_DIMS >= 3)>
  1294. static TMat lookAt(const TVec<T, VEC_DIMS>& eye, const TVec<T, VEC_DIMS>& refPoint, const TVec<T, VEC_DIMS>& up)
  1295. {
  1296. const TVec<T, 3> vdir = (refPoint.xyz() - eye.xyz()).getNormalized();
  1297. const TVec<T, 3> vup = (up.xyz() - vdir * up.xyz().dot(vdir)).getNormalized();
  1298. const TVec<T, 3> vside = vdir.cross(vup);
  1299. TMat out;
  1300. out.setColumns(vside, vup, -vdir, eye.xyz());
  1301. return out;
  1302. }
  1303. /// Create a new transform matrix position at eye and looking at refPoint.
  1304. template<U VEC_DIMS, ANKI_ENABLE(J == 4 && I == 4 && VEC_DIMS >= 3)>
  1305. static TMat lookAt(const TVec<T, VEC_DIMS>& eye, const TVec<T, VEC_DIMS>& refPoint, const TVec<T, VEC_DIMS>& up)
  1306. {
  1307. const TVec<T, 4> vdir = (refPoint.xyz0() - eye.xyz0()).getNormalized();
  1308. const TVec<T, 4> vup = (up.xyz0() - vdir * up.xyz0().dot(vdir)).getNormalized();
  1309. const TVec<T, 4> vside = vdir.cross(vup);
  1310. TMat out;
  1311. out.setColumns(vside, vup, -vdir, eye.xyz1());
  1312. return out;
  1313. }
  1314. TMat lerp(const TMat& b, T t) const
  1315. {
  1316. return ((*this) * (T(1) - t)) + (b * t);
  1317. }
  1318. static TMat getZero()
  1319. {
  1320. return TMat(T(0));
  1321. }
  1322. void setZero()
  1323. {
  1324. *this = getZero();
  1325. }
  1326. ANKI_ENABLE_METHOD(I == 3 && J == 3)
  1327. static TMat getIdentity()
  1328. {
  1329. return TMat(T(1), T(0), T(0), T(0), T(1), T(0), T(0), T(0), T(1));
  1330. }
  1331. ANKI_ENABLE_METHOD(I == 4 && J == 4)
  1332. static TMat getIdentity()
  1333. {
  1334. return TMat(T(1), T(0), T(0), T(0), T(0), T(1), T(0), T(0), T(0), T(0), T(1), T(0), T(0), T(0), T(0), T(1));
  1335. }
  1336. ANKI_ENABLE_METHOD(I == 4 && J == 3)
  1337. static TMat getIdentity()
  1338. {
  1339. return TMat(T(1), T(0), T(0), T(0), T(0), T(1), T(0), T(0), T(0), T(0), T(1), T(0));
  1340. }
  1341. void setIdentity()
  1342. {
  1343. (*this) = getIdentity();
  1344. }
  1345. static constexpr U8 getSize()
  1346. {
  1347. return U8(I * J);
  1348. }
  1349. ANKI_ENABLE_METHOD(std::is_floating_point<T>::value)
  1350. void toString(StringAuto& str) const
  1351. {
  1352. for(U j = 0; j < J; ++j)
  1353. {
  1354. for(U i = 0; i < I; ++i)
  1355. {
  1356. CString fmt;
  1357. if(i == I - 1 && j == J - 1)
  1358. {
  1359. fmt = "%f";
  1360. }
  1361. else if(i == I - 1)
  1362. {
  1363. fmt = "%f\n";
  1364. }
  1365. else
  1366. {
  1367. fmt = "%f ";
  1368. }
  1369. str.append(StringAuto(str.getAllocator()).sprintf(fmt, m_arr2[j][i]));
  1370. }
  1371. }
  1372. }
  1373. /// @}
  1374. protected:
  1375. static constexpr U N = I * J;
  1376. /// @name Data members
  1377. /// @{
  1378. union
  1379. {
  1380. Array<T, N> m_arr1;
  1381. Array2d<T, J, I> m_arr2;
  1382. T m_carr1[N]; ///< For easier debugging with gdb
  1383. T m_carr2[J][I]; ///< For easier debugging with gdb
  1384. SimdArray m_simd;
  1385. };
  1386. /// @}
  1387. };
  1388. /// @memberof TMat
  1389. template<typename T, U J, U I>
  1390. TMat<T, J, I> operator+(const T f, const TMat<T, J, I>& m)
  1391. {
  1392. return m + f;
  1393. }
  1394. /// @memberof TMat
  1395. template<typename T, U J, U I>
  1396. TMat<T, J, I> operator-(const T f, const TMat<T, J, I>& m)
  1397. {
  1398. TMat<T, J, I> out;
  1399. for(U i = 0; i < J * I; i++)
  1400. {
  1401. out[i] = f - m[i];
  1402. }
  1403. return out;
  1404. }
  1405. /// @memberof TMat
  1406. template<typename T, U J, U I>
  1407. TMat<T, J, I> operator*(const T f, const TMat<T, J, I>& m)
  1408. {
  1409. return m * f;
  1410. }
  1411. /// @memberof TMat
  1412. template<typename T, U J, U I>
  1413. TMat<T, J, I> operator/(const T f, const TMat<T, 3, 3>& m3)
  1414. {
  1415. TMat<T, J, I> out;
  1416. for(U i = 0; i < J * I; i++)
  1417. {
  1418. ANKI_ASSERT(m3[i] != T(0));
  1419. out[i] = f / m3[i];
  1420. }
  1421. return out;
  1422. }
  1423. /// F32 3x3 matrix
  1424. using Mat3 = TMat<F32, 3, 3>;
  1425. static_assert(sizeof(Mat3) == sizeof(F32) * 3 * 3, "Incorrect size");
  1426. /// F64 3x3 matrix
  1427. using DMat3 = TMat<F64, 3, 3>;
  1428. static_assert(sizeof(DMat3) == sizeof(F64) * 3 * 3, "Incorrect size");
  1429. /// F32 4x4 matrix
  1430. using Mat4 = TMat<F32, 4, 4>;
  1431. static_assert(sizeof(Mat4) == sizeof(F32) * 4 * 4, "Incorrect size");
  1432. /// F64 4x4 matrix
  1433. using DMat4 = TMat<F64, 4, 4>;
  1434. static_assert(sizeof(DMat4) == sizeof(F64) * 4 * 4, "Incorrect size");
  1435. /// F32 3x4 matrix
  1436. using Mat3x4 = TMat<F32, 3, 4>;
  1437. static_assert(sizeof(Mat3x4) == sizeof(F32) * 3 * 4, "Incorrect size");
  1438. /// F64 3x4 matrix
  1439. using DMat3x4 = TMat<F64, 3, 4>;
  1440. static_assert(sizeof(DMat3x4) == sizeof(F64) * 3 * 4, "Incorrect size");
  1441. /// @}
  1442. } // end namespace anki