Mat.h 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581
  1. // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #pragma once
  6. #include <AnKi/Math/Common.h>
  7. #include <AnKi/Math/Vec.h>
  8. namespace anki {
  9. /// @addtogroup math
  10. /// @{
  11. /// Matrix type.
  12. /// @tparam T The scalar type. Eg float.
  13. /// @tparam kTRowCount The number of rows.
  14. /// @tparam kTColumnCount The number of columns.
  15. template<typename T, U kTRowCount, U kTColumnCount>
  16. class alignas(MathSimd<T, kTColumnCount>::kAlignment) TMat
  17. {
  18. public:
  19. using Scalar = T;
  20. using Simd = typename MathSimd<T, kTColumnCount>::Type;
  21. #if ANKI_COMPILER_GCC_COMPATIBLE
  22. # pragma GCC diagnostic push
  23. # pragma GCC diagnostic ignored "-Wignored-attributes"
  24. #endif
  25. using SimdArray = Array<Simd, kTRowCount>;
  26. #if ANKI_COMPILER_GCC_COMPATIBLE
  27. # pragma GCC diagnostic pop
  28. #endif
  29. using RowVec = TVec<T, kTColumnCount>;
  30. using ColumnVec = TVec<T, kTRowCount>;
  31. static constexpr U kRowCount = kTRowCount; ///< Number of rows
  32. static constexpr U kColumnCount = kTColumnCount; ///< Number of columns
  33. static constexpr U kSize = kTRowCount * kTColumnCount; ///< Number of total elements
  34. static constexpr Bool kIsSquare = kTColumnCount == kTRowCount;
  35. static constexpr Bool kHasSimd = kTColumnCount == 4 && std::is_same<T, F32>::value && ANKI_ENABLE_SIMD;
  36. static constexpr Bool kIs4x4Simd = kTRowCount == 4 && kTColumnCount == 4 && std::is_same<T, F32>::value && ANKI_ENABLE_SIMD;
  37. static constexpr Bool kIs3x4Simd = kTRowCount == 3 && kTColumnCount == 4 && std::is_same<T, F32>::value && ANKI_ENABLE_SIMD;
  38. /// @name Constructors
  39. /// @{
  40. constexpr TMat()
  41. : TMat(T(0))
  42. {
  43. }
  44. /// Copy.
  45. constexpr TMat(const TMat& b)
  46. {
  47. for(U i = 0; i < kRowCount; i++)
  48. {
  49. m_rows[i] = b.m_rows[i];
  50. }
  51. }
  52. explicit constexpr TMat(const T f)
  53. {
  54. for(U i = 0; i < kRowCount; i++)
  55. {
  56. m_rows[i] = RowVec(f);
  57. }
  58. }
  59. explicit constexpr TMat(const T arr[])
  60. {
  61. for(U i = 0; i < N; i++)
  62. {
  63. m_arr1[i] = arr[i];
  64. }
  65. }
  66. // 3x3 specific constructors
  67. constexpr TMat(T m00, T m01, T m02, T m10, T m11, T m12, T m20, T m21, T m22) requires(kSize == 9)
  68. {
  69. auto& m = *this;
  70. m(0, 0) = m00;
  71. m(0, 1) = m01;
  72. m(0, 2) = m02;
  73. m(1, 0) = m10;
  74. m(1, 1) = m11;
  75. m(1, 2) = m12;
  76. m(2, 0) = m20;
  77. m(2, 1) = m21;
  78. m(2, 2) = m22;
  79. }
  80. explicit constexpr TMat(const TQuat<T>& q) requires(kSize == 9)
  81. {
  82. TMat& m = *this;
  83. // If length is > 1 + 0.002 or < 1 - 0.002 then not normalized quat
  84. ANKI_ASSERT(absolute(T(1) - q.length()) <= 0.002);
  85. T xs, ys, zs, wx, wy, wz, xx, xy, xz, yy, yz, zz;
  86. xs = q.x() + q.x();
  87. ys = q.y() + q.y();
  88. zs = q.z() + q.z();
  89. wx = q.w() * xs;
  90. wy = q.w() * ys;
  91. wz = q.w() * zs;
  92. xx = q.x() * xs;
  93. xy = q.x() * ys;
  94. xz = q.x() * zs;
  95. yy = q.y() * ys;
  96. yz = q.y() * zs;
  97. zz = q.z() * zs;
  98. m(0, 0) = T(1) - (yy + zz);
  99. m(0, 1) = xy - wz;
  100. m(0, 2) = xz + wy;
  101. m(1, 0) = xy + wz;
  102. m(1, 1) = T(1) - (xx + zz);
  103. m(1, 2) = yz - wx;
  104. m(2, 0) = xz - wy;
  105. m(2, 1) = yz + wx;
  106. m(2, 2) = T(1) - (xx + yy);
  107. }
  108. explicit constexpr TMat(const TEuler<T>& e) requires(kSize == 9)
  109. {
  110. TMat& m = *this;
  111. T ch, sh, ca, sa, cb, sb;
  112. sinCos(e.y(), sh, ch);
  113. sinCos(e.z(), sa, ca);
  114. sinCos(e.x(), sb, cb);
  115. m(0, 0) = ch * ca;
  116. m(0, 1) = sh * sb - ch * sa * cb;
  117. m(0, 2) = ch * sa * sb + sh * cb;
  118. m(1, 0) = sa;
  119. m(1, 1) = ca * cb;
  120. m(1, 2) = -ca * sb;
  121. m(2, 0) = -sh * ca;
  122. m(2, 1) = sh * sa * cb + ch * sb;
  123. m(2, 2) = -sh * sa * sb + ch * cb;
  124. }
  125. explicit constexpr TMat(const TAxisang<T>& axisang) requires(kSize == 9)
  126. {
  127. TMat& m = *this;
  128. // Not normalized axis
  129. ANKI_ASSERT(isZero<T>(T(1) - axisang.getAxis().length()));
  130. T c, s;
  131. sinCos(axisang.getAngle(), s, c);
  132. T t = T(1) - c;
  133. const TVec<T, 3>& axis = axisang.getAxis();
  134. m(0, 0) = c + axis.x() * axis.x() * t;
  135. m(1, 1) = c + axis.y() * axis.y() * t;
  136. m(2, 2) = c + axis.z() * axis.z() * t;
  137. T tmp1 = axis.x() * axis.y() * t;
  138. T tmp2 = axis.z() * s;
  139. m(1, 0) = tmp1 + tmp2;
  140. m(0, 1) = tmp1 - tmp2;
  141. tmp1 = axis.x() * axis.z() * t;
  142. tmp2 = axis.y() * s;
  143. m(2, 0) = tmp1 - tmp2;
  144. m(0, 2) = tmp1 + tmp2;
  145. tmp1 = axis.y() * axis.z() * t;
  146. tmp2 = axis.x() * s;
  147. m(2, 1) = tmp1 + tmp2;
  148. m(1, 2) = tmp1 - tmp2;
  149. }
  150. // 4x4 specific constructors
  151. constexpr TMat(T m00, T m01, T m02, T m03, T m10, T m11, T m12, T m13, T m20, T m21, T m22, T m23, T m30, T m31, T m32,
  152. T m33) requires(kSize == 16)
  153. {
  154. auto& m = *this;
  155. m(0, 0) = m00;
  156. m(0, 1) = m01;
  157. m(0, 2) = m02;
  158. m(0, 3) = m03;
  159. m(1, 0) = m10;
  160. m(1, 1) = m11;
  161. m(1, 2) = m12;
  162. m(1, 3) = m13;
  163. m(2, 0) = m20;
  164. m(2, 1) = m21;
  165. m(2, 2) = m22;
  166. m(2, 3) = m23;
  167. m(3, 0) = m30;
  168. m(3, 1) = m31;
  169. m(3, 2) = m32;
  170. m(3, 3) = m33;
  171. }
  172. constexpr TMat(const TVec<T, 3>& translation, const TMat<T, 3, 3>& rotation, const TVec<T, 3>& scale = TVec<T, 3>(T(1))) requires(kSize == 16)
  173. {
  174. if(scale == TVec<T, 3>(T(1)))
  175. {
  176. setRotationPart(rotation);
  177. }
  178. else
  179. {
  180. const auto a = rotation.getColumn(0) * scale.x();
  181. const auto b = rotation.getColumn(1) * scale.y();
  182. const auto c = rotation.getColumn(2) * scale.z();
  183. TMat<T, 3, 3> rot;
  184. rot.setColumns(a, b, c);
  185. setRotationPart(rot);
  186. }
  187. setTranslationPart(translation);
  188. auto& m = *this;
  189. m(3, 0) = m(3, 1) = m(3, 2) = T(0);
  190. m(3, 3) = T(1);
  191. }
  192. explicit constexpr TMat(const TTransform<T>& t) requires(kSize == 16)
  193. : TMat(t.getOrigin().xyz(), t.getRotation().getRotationPart(), t.getScale().xyz())
  194. {
  195. }
  196. /// Set a 4x4 matrix using a 3x4 for the first 3 rows and a vec4 for the 4rth row.
  197. explicit constexpr TMat(const TMat<T, 3, 4>& m3, const TVec<T, 4>& row3) requires(kSize == 16)
  198. {
  199. setRow(0, m3.getRow(0));
  200. setRow(1, m3.getRow(1));
  201. setRow(2, m3.getRow(2));
  202. setRow(3, row3);
  203. }
  204. // 3x4 specific constructors
  205. constexpr TMat(T m00, T m01, T m02, T m03, T m10, T m11, T m12, T m13, T m20, T m21, T m22, T m23) requires(kSize == 12)
  206. {
  207. auto& m = *this;
  208. m(0, 0) = m00;
  209. m(0, 1) = m01;
  210. m(0, 2) = m02;
  211. m(0, 3) = m03;
  212. m(1, 0) = m10;
  213. m(1, 1) = m11;
  214. m(1, 2) = m12;
  215. m(1, 3) = m13;
  216. m(2, 0) = m20;
  217. m(2, 1) = m21;
  218. m(2, 2) = m22;
  219. m(2, 3) = m23;
  220. }
  221. explicit constexpr TMat(const TMat<T, 4, 4>& m4) requires(kSize == 12)
  222. {
  223. ANKI_ASSERT(m4(3, 0) == T(0) && m4(3, 1) == T(0) && m4(3, 2) == T(0) && m4(3, 3) == T(1));
  224. m_rows[0] = m4.getRow(0);
  225. m_rows[1] = m4.getRow(1);
  226. m_rows[2] = m4.getRow(2);
  227. }
  228. explicit constexpr TMat(const TVec<T, 3>& translation, const TMat<T, 3, 3>& rotation,
  229. const TVec<T, 3>& scale = TVec<T, 3>(T(1))) requires(kSize == 12)
  230. {
  231. if(scale == TVec<T, 3>(T(1)))
  232. {
  233. setRotationPart(rotation);
  234. }
  235. else
  236. {
  237. const auto a = rotation.getColumn(0) * scale.x();
  238. const auto b = rotation.getColumn(1) * scale.y();
  239. const auto c = rotation.getColumn(2) * scale.z();
  240. setColumns(a, b, c);
  241. }
  242. setTranslationPart(translation);
  243. }
  244. explicit constexpr TMat(const TVec<T, 3>& translation, const TQuat<T>& q, const TVec<T, 3>& scale = TVec<T, 3>(T(1))) requires(kSize == 12)
  245. : TMat(translation, TMat<T, 3, 3>(q), scale)
  246. {
  247. }
  248. explicit constexpr TMat(const TVec<T, 3>& translation, const TEuler<T>& b, const TVec<T, 3>& scale = TVec<T, 3>(T(1))) requires(kSize == 12)
  249. : TMat(translation, TMat<T, 3, 3>(b), scale)
  250. {
  251. }
  252. explicit constexpr TMat(const TVec<T, 3>& translation, const TAxisang<T>& b, const TVec<T, 3>& scale = TVec<T, 3>(T(1))) requires(kSize == 12)
  253. : TMat(translation, TMat<T, 3, 3>(b), scale)
  254. {
  255. }
  256. explicit constexpr TMat(const TTransform<T>& t) requires(kSize == 12)
  257. : TMat(t.getOrigin().xyz(), t.getRotation().getRotationPart(), t.getScale().xyz())
  258. {
  259. }
  260. /// @}
  261. /// @name Accessors
  262. /// @{
  263. T& operator()(const U j, const U i)
  264. {
  265. return m_arr2[j][i];
  266. }
  267. [[nodiscard]] T operator()(const U j, const U i) const
  268. {
  269. return m_arr2[j][i];
  270. }
  271. T& operator[](const U n)
  272. {
  273. return m_arr1[n];
  274. }
  275. [[nodiscard]] T operator[](const U n) const
  276. {
  277. return m_arr1[n];
  278. }
  279. /// @}
  280. /// @name Operators with same type
  281. /// @{
  282. /// Copy.
  283. TMat& operator=(const TMat& b)
  284. {
  285. for(U i = 0; i < kRowCount; ++i)
  286. {
  287. m_rows[i] = b.m_rows[i];
  288. }
  289. return *this;
  290. }
  291. [[nodiscard]] TMat operator+(const TMat& b) const
  292. {
  293. TMat c;
  294. for(U i = 0; i < kRowCount; ++i)
  295. {
  296. c.m_rows[i] = m_rows[i] + b.m_rows[i];
  297. }
  298. return c;
  299. }
  300. TMat& operator+=(const TMat& b)
  301. {
  302. for(U i = 0; i < kRowCount; ++i)
  303. {
  304. m_rows[i] += b.m_rows[i];
  305. }
  306. return *this;
  307. }
  308. [[nodiscard]] TMat operator-(const TMat& b) const
  309. {
  310. TMat c;
  311. for(U i = 0; i < kRowCount; ++i)
  312. {
  313. c.m_rows[i] = m_rows[i] - b.m_rows[i];
  314. }
  315. return c;
  316. }
  317. TMat& operator-=(const TMat& b)
  318. {
  319. for(U i = 0; i < kRowCount; ++i)
  320. {
  321. m_rows[i] -= b.m_rows[i];
  322. }
  323. return *this;
  324. }
  325. [[nodiscard]] TMat operator*(const TMat& b) const requires(kIsSquare && !kIs4x4Simd)
  326. {
  327. TMat out;
  328. const TMat& a = *this;
  329. for(U j = 0; j < kTRowCount; j++)
  330. {
  331. for(U i = 0; i < kTColumnCount; i++)
  332. {
  333. out(j, i) = T(0);
  334. for(U k = 0; k < kTColumnCount; k++)
  335. {
  336. out(j, i) += a(j, k) * b(k, i);
  337. }
  338. }
  339. }
  340. return out;
  341. }
  342. #if ANKI_ENABLE_SIMD
  343. [[nodiscard]] TMat operator*(const TMat& b) const requires(kIs4x4Simd)
  344. {
  345. TMat out;
  346. const auto& m = *this;
  347. for(U i = 0; i < 4; i++)
  348. {
  349. # if ANKI_SIMD_SSE
  350. __m128 t1, t2;
  351. t1 = _mm_set1_ps(m(i, 0));
  352. t2 = _mm_mul_ps(b.m_simd[0], t1);
  353. t1 = _mm_set1_ps(m(i, 1));
  354. t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[1], t1), t2);
  355. t1 = _mm_set1_ps(m(i, 2));
  356. t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[2], t1), t2);
  357. t1 = _mm_set1_ps(m(i, 3));
  358. t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[3], t1), t2);
  359. out.m_simd[i] = t2;
  360. # else
  361. float32x4_t t1, t2;
  362. t1 = vmovq_n_f32(m(i, 0));
  363. t2 = vmulq_f32(b.m_simd[0], t1);
  364. t1 = vmovq_n_f32(m(i, 1));
  365. t2 = vaddq_f32(vmulq_f32(b.m_simd[1], t1), t2);
  366. t1 = vmovq_n_f32(m(i, 2));
  367. t2 = vaddq_f32(vmulq_f32(b.m_simd[2], t1), t2);
  368. t1 = vmovq_n_f32(m(i, 3));
  369. t2 = vaddq_f32(vmulq_f32(b.m_simd[3], t1), t2);
  370. out.m_simd[i] = t2;
  371. # endif
  372. }
  373. return out;
  374. }
  375. #endif
  376. TMat& operator*=(const TMat& b)
  377. {
  378. (*this) = (*this) * b;
  379. return *this;
  380. }
  381. [[nodiscard]] Bool operator==(const TMat& b) const
  382. {
  383. for(U i = 0; i < N; i++)
  384. {
  385. if(!isZero<T>(m_arr1[i] - b.m_arr1[i]))
  386. {
  387. return false;
  388. }
  389. }
  390. return true;
  391. }
  392. [[nodiscard]] Bool operator!=(const TMat& b) const
  393. {
  394. for(U i = 0; i < N; i++)
  395. {
  396. if(!isZero<T>(m_arr1[i] - b.m_arr1[i]))
  397. {
  398. return true;
  399. }
  400. }
  401. return false;
  402. }
  403. /// @}
  404. /// @name Operators with T
  405. /// @{
  406. [[nodiscard]] TMat operator+(const T f) const
  407. {
  408. TMat out;
  409. for(U i = 0; i < kRowCount; ++i)
  410. {
  411. out.m_rows[i] = m_rows[i] + f;
  412. }
  413. return out;
  414. }
  415. TMat& operator+=(const T f)
  416. {
  417. for(U i = 0; i < kRowCount; ++i)
  418. {
  419. m_rows[i] += f;
  420. }
  421. return *this;
  422. }
  423. [[nodiscard]] TMat operator-(const T f) const
  424. {
  425. TMat out;
  426. for(U i = 0; i < kRowCount; ++i)
  427. {
  428. out.m_rows[i] = m_rows[i] - f;
  429. }
  430. return out;
  431. }
  432. TMat& operator-=(const T f)
  433. {
  434. for(U i = 0; i < kRowCount; ++i)
  435. {
  436. m_rows[i] -= f;
  437. }
  438. return *this;
  439. }
  440. [[nodiscard]] TMat operator*(const T f) const
  441. {
  442. TMat out;
  443. for(U i = 0; i < kRowCount; ++i)
  444. {
  445. out.m_rows[i] = m_rows[i] * f;
  446. }
  447. return out;
  448. }
  449. TMat& operator*=(const T f)
  450. {
  451. for(U i = 0; i < kRowCount; ++i)
  452. {
  453. m_rows[i] *= f;
  454. }
  455. return *this;
  456. }
  457. [[nodiscard]] TMat operator/(const T f) const
  458. {
  459. ANKI_ASSERT(f != T(0));
  460. TMat out;
  461. for(U i = 0; i < kRowCount; ++i)
  462. {
  463. out.m_rows[i] = m_rows[i] / f;
  464. }
  465. return out;
  466. }
  467. TMat& operator/=(const T f)
  468. {
  469. ANKI_ASSERT(f != T(0));
  470. for(U i = 0; i < kRowCount; ++i)
  471. {
  472. m_rows[i] /= f;
  473. }
  474. return *this;
  475. }
  476. /// @}
  477. /// @name Operators with other types
  478. /// @{
  479. [[nodiscard]] ColumnVec operator*(const RowVec& v) const requires(!kHasSimd)
  480. {
  481. const TMat& m = *this;
  482. ColumnVec out;
  483. for(U j = 0; j < kTRowCount; j++)
  484. {
  485. T sum = T(0);
  486. for(U i = 0; i < kTColumnCount; i++)
  487. {
  488. sum += m(j, i) * v[i];
  489. }
  490. out[j] = sum;
  491. }
  492. return out;
  493. }
  494. #if ANKI_SIMD_SSE
  495. [[nodiscard]] ColumnVec operator*(const RowVec& v) const requires(kIs4x4Simd)
  496. {
  497. __m128 a = _mm_mul_ps(m_simd[0], v.getSimd());
  498. __m128 b = _mm_mul_ps(m_simd[1], v.getSimd());
  499. __m128 c = _mm_mul_ps(m_simd[2], v.getSimd());
  500. __m128 d = _mm_mul_ps(m_simd[3], v.getSimd());
  501. a = _mm_hadd_ps(a, b);
  502. c = _mm_hadd_ps(c, d);
  503. return RowVec(_mm_hadd_ps(a, c));
  504. }
  505. [[nodiscard]] ColumnVec operator*(const RowVec& v) const requires(kIs3x4Simd)
  506. {
  507. __m128 a = _mm_mul_ps(m_simd[0], v.getSimd());
  508. __m128 b = _mm_mul_ps(m_simd[1], v.getSimd());
  509. __m128 c = _mm_mul_ps(m_simd[2], v.getSimd());
  510. a = _mm_hadd_ps(a, b);
  511. const RowVec d(_mm_hadd_ps(a, c));
  512. return ColumnVec(d[0], d[1], d[2] + d[3]);
  513. }
  514. #else
  515. [[nodiscard]] ColumnVec operator*(const RowVec& v) const requires(kHasSimd)
  516. {
  517. ColumnVec out;
  518. for(U i = 0; i < kTRowCount; i++)
  519. {
  520. out[i] = RowVec(m_simd[i]).dot(v);
  521. }
  522. return out;
  523. }
  524. #endif
  525. /// @}
  526. /// @name Other
  527. /// @{
  528. void setRow(const U j, const RowVec& v)
  529. {
  530. m_rows[j] = v;
  531. }
  532. void setRows(const RowVec& a, const RowVec& b, const RowVec& c)
  533. {
  534. setRow(0, a);
  535. setRow(1, b);
  536. setRow(2, c);
  537. }
  538. void setRows(const RowVec& a, const RowVec& b, const RowVec& c, const RowVec& d) requires(kTRowCount > 3)
  539. {
  540. setRows(a, b, c);
  541. setRow(3, d);
  542. }
  543. const RowVec& getRow(const U j) const
  544. {
  545. return m_rows[j];
  546. }
  547. void getRows(RowVec& a, RowVec& b, RowVec& c) const
  548. {
  549. a = getRow(0);
  550. b = getRow(1);
  551. c = getRow(2);
  552. }
  553. void getRows(RowVec& a, RowVec& b, RowVec& c, RowVec& d) const requires(kTRowCount > 3)
  554. {
  555. getRows(a, b, c);
  556. d = getRow(3);
  557. }
  558. void setColumn(const U i, const ColumnVec& v)
  559. {
  560. for(U j = 0; j < kTRowCount; j++)
  561. {
  562. m_arr2[j][i] = v[j];
  563. }
  564. }
  565. void setColumns(const ColumnVec& a, const ColumnVec& b, const ColumnVec& c)
  566. {
  567. setColumn(0, a);
  568. setColumn(1, b);
  569. setColumn(2, c);
  570. }
  571. void setColumns(const ColumnVec& a, const ColumnVec& b, const ColumnVec& c, const ColumnVec& d) requires(kTColumnCount > 3)
  572. {
  573. setColumns(a, b, c);
  574. setColumn(3, d);
  575. }
  576. [[nodiscard]] ColumnVec getColumn(const U i) const
  577. {
  578. ColumnVec out;
  579. for(U j = 0; j < kTRowCount; j++)
  580. {
  581. out[j] = m_arr2[j][i];
  582. }
  583. return out;
  584. }
  585. void getColumns(ColumnVec& a, ColumnVec& b, ColumnVec& c) const
  586. {
  587. a = getColumn(0);
  588. b = getColumn(1);
  589. c = getColumn(2);
  590. }
  591. void getColumns(ColumnVec& a, ColumnVec& b, ColumnVec& c, ColumnVec& d) const requires(kTColumnCount > 3)
  592. {
  593. getColumns(a, b, c);
  594. d = getColumn(3);
  595. }
  596. /// Get 1st column
  597. [[nodiscard]] ColumnVec getXAxis() const
  598. {
  599. return getColumn(0);
  600. }
  601. /// Get 2nd column
  602. [[nodiscard]] ColumnVec getYAxis() const
  603. {
  604. return getColumn(1);
  605. }
  606. /// Get 3rd column
  607. [[nodiscard]] ColumnVec getZAxis() const
  608. {
  609. return getColumn(2);
  610. }
  611. /// Set 1st column
  612. void setXAxis(const ColumnVec& v)
  613. {
  614. setColumn(0, v);
  615. }
  616. /// Set 2nd column
  617. void setYAxis(const ColumnVec& v)
  618. {
  619. setColumn(1, v);
  620. }
  621. /// Set 3rd column
  622. void setZAxis(const ColumnVec& v)
  623. {
  624. setColumn(2, v);
  625. }
  626. void setRotationX(const T rad)
  627. {
  628. TMat& m = *this;
  629. T sintheta, costheta;
  630. sinCos(rad, sintheta, costheta);
  631. m(0, 0) = T(1);
  632. m(0, 1) = T(0);
  633. m(0, 2) = T(0);
  634. m(1, 0) = T(0);
  635. m(1, 1) = costheta;
  636. m(1, 2) = -sintheta;
  637. m(2, 0) = T(0);
  638. m(2, 1) = sintheta;
  639. m(2, 2) = costheta;
  640. }
  641. void setRotationY(const T rad)
  642. {
  643. TMat& m = *this;
  644. T sintheta, costheta;
  645. sinCos(rad, sintheta, costheta);
  646. m(0, 0) = costheta;
  647. m(0, 1) = T(0);
  648. m(0, 2) = sintheta;
  649. m(1, 0) = T(0);
  650. m(1, 1) = T(1);
  651. m(1, 2) = T(0);
  652. m(2, 0) = -sintheta;
  653. m(2, 1) = T(0);
  654. m(2, 2) = costheta;
  655. }
  656. void setRotationZ(const T rad)
  657. {
  658. TMat& m = *this;
  659. T sintheta, costheta;
  660. sinCos(rad, sintheta, costheta);
  661. m(0, 0) = costheta;
  662. m(0, 1) = -sintheta;
  663. m(0, 2) = T(0);
  664. m(1, 0) = sintheta;
  665. m(1, 1) = costheta;
  666. m(1, 2) = T(0);
  667. m(2, 0) = T(0);
  668. m(2, 1) = T(0);
  669. m(2, 2) = T(1);
  670. }
  671. /// It rotates "this" in the axis defined by the rotation AND not the world axis.
  672. void rotateXAxis(const T rad)
  673. {
  674. TMat& m = *this;
  675. // If we analize the mat3 we can extract the 3 unit vectors rotated by the mat3. The 3 rotated vectors are in mat's columns. This means that:
  676. // mat3.colomn[0] == i * mat3. rotateXAxis() rotates rad angle not from i vector (aka x axis) but from the vector from colomn 0
  677. // NOTE: See the clean code from < r664
  678. T sina, cosa;
  679. sinCos(rad, sina, cosa);
  680. // zAxis = zAxis*cosa - yAxis*sina;
  681. m(0, 2) = m(0, 2) * cosa - m(0, 1) * sina;
  682. m(1, 2) = m(1, 2) * cosa - m(1, 1) * sina;
  683. m(2, 2) = m(2, 2) * cosa - m(2, 1) * sina;
  684. // zAxis.normalize();
  685. T len = sqrt(m(0, 2) * m(0, 2) + m(1, 2) * m(1, 2) + m(2, 2) * m(2, 2));
  686. m(0, 2) /= len;
  687. m(1, 2) /= len;
  688. m(2, 2) /= len;
  689. // yAxis = zAxis * xAxis;
  690. m(0, 1) = m(1, 2) * m(2, 0) - m(2, 2) * m(1, 0);
  691. m(1, 1) = m(2, 2) * m(0, 0) - m(0, 2) * m(2, 0);
  692. m(2, 1) = m(0, 2) * m(1, 0) - m(1, 2) * m(0, 0);
  693. // yAxis.normalize();
  694. }
  695. /// @copybrief rotateXAxis
  696. void rotateYAxis(const T rad)
  697. {
  698. TMat& m = *this;
  699. // NOTE: See the clean code from < r664
  700. T sina, cosa;
  701. sinCos(rad, sina, cosa);
  702. // zAxis = zAxis*cosa + xAxis*sina;
  703. m(0, 2) = m(0, 2) * cosa + m(0, 0) * sina;
  704. m(1, 2) = m(1, 2) * cosa + m(1, 0) * sina;
  705. m(2, 2) = m(2, 2) * cosa + m(2, 0) * sina;
  706. // zAxis.normalize();
  707. T len = sqrt(m(0, 2) * m(0, 2) + m(1, 2) * m(1, 2) + m(2, 2) * m(2, 2));
  708. m(0, 2) /= len;
  709. m(1, 2) /= len;
  710. m(2, 2) /= len;
  711. // xAxis = (zAxis*yAxis) * -1.0f;
  712. m(0, 0) = m(2, 2) * m(1, 1) - m(1, 2) * m(2, 1);
  713. m(1, 0) = m(0, 2) * m(2, 1) - m(2, 2) * m(0, 1);
  714. m(2, 0) = m(1, 2) * m(0, 1) - m(0, 2) * m(1, 1);
  715. }
  716. /// @copybrief rotateXAxis
  717. void rotateZAxis(const T rad)
  718. {
  719. TMat& m = *this;
  720. // NOTE: See the clean code from < r664
  721. T sina, cosa;
  722. sinCos(rad, sina, cosa);
  723. // xAxis = xAxis*cosa + yAxis*sina;
  724. m(0, 0) = m(0, 0) * cosa + m(0, 1) * sina;
  725. m(1, 0) = m(1, 0) * cosa + m(1, 1) * sina;
  726. m(2, 0) = m(2, 0) * cosa + m(2, 1) * sina;
  727. // xAxis.normalize();
  728. T len = sqrt(m(0, 0) * m(0, 0) + m(1, 0) * m(1, 0) + m(2, 0) * m(2, 0));
  729. m(0, 0) /= len;
  730. m(1, 0) /= len;
  731. m(2, 0) /= len;
  732. // yAxis = zAxis*xAxis;
  733. m(0, 1) = m(1, 2) * m(2, 0) - m(2, 2) * m(1, 0);
  734. m(1, 1) = m(2, 2) * m(0, 0) - m(0, 2) * m(2, 0);
  735. m(2, 1) = m(0, 2) * m(1, 0) - m(1, 2) * m(0, 0);
  736. }
  737. void setRotationPart(const TMat<T, 3, 3>& m3)
  738. {
  739. TMat& m = *this;
  740. for(U j = 0; j < 3; j++)
  741. {
  742. for(U i = 0; i < 3; i++)
  743. {
  744. m(j, i) = m3(j, i);
  745. }
  746. }
  747. }
  748. [[nodiscard]] TMat<T, 3, 3> getRotationPart() const
  749. {
  750. const TMat& m = *this;
  751. TMat<T, 3, 3> m3;
  752. m3(0, 0) = m(0, 0);
  753. m3(0, 1) = m(0, 1);
  754. m3(0, 2) = m(0, 2);
  755. m3(1, 0) = m(1, 0);
  756. m3(1, 1) = m(1, 1);
  757. m3(1, 2) = m(1, 2);
  758. m3(2, 0) = m(2, 0);
  759. m3(2, 1) = m(2, 1);
  760. m3(2, 2) = m(2, 2);
  761. return m3;
  762. }
  763. void setTranslationPart(const TVec<T, 3>& v)
  764. {
  765. auto c = getColumn(3);
  766. c.x() = v.x();
  767. c.y() = v.y();
  768. c.z() = v.z();
  769. setColumn(3, c);
  770. }
  771. [[nodiscard]] ColumnVec getTranslationPart() const requires(kTColumnCount == 4)
  772. {
  773. return getColumn(3);
  774. }
  775. TMat& setDiagonal(ColumnVec diag)
  776. {
  777. TMat& m = *this;
  778. for(U32 r = 0; r < kRowCount; ++r)
  779. {
  780. m(r, r) = diag[r];
  781. }
  782. return *this;
  783. }
  784. ColumnVec getDiagonal() const
  785. {
  786. TMat& m = *this;
  787. ColumnVec diag;
  788. for(U32 r = 0; r < kRowCount; ++r)
  789. {
  790. diag[r] = m(r, r);
  791. }
  792. return diag;
  793. }
  794. [[nodiscard]] TMat reorthogonalize() const requires(kTRowCount == 3)
  795. {
  796. // There are 2 methods, the standard and the Gram-Schmidt method with a twist for zAxis. This uses the 2nd. For the first see < r664
  797. ColumnVec xAxis, yAxis, zAxis;
  798. getColumns(xAxis, yAxis, zAxis);
  799. xAxis = xAxis.normalize();
  800. yAxis = yAxis - (xAxis * xAxis.dot(yAxis));
  801. yAxis = yAxis.normalize();
  802. zAxis = xAxis.cross(yAxis);
  803. TMat out = *this;
  804. out.setColumns(xAxis, yAxis, zAxis);
  805. return out;
  806. }
  807. [[nodiscard]] TMat transpose() const requires(kIsSquare && !kHasSimd)
  808. {
  809. TMat out;
  810. for(U j = 0; j < kTRowCount; j++)
  811. {
  812. for(U i = 0; i < kTColumnCount; i++)
  813. {
  814. out.m_arr2[i][j] = m_arr2[j][i];
  815. }
  816. }
  817. return out;
  818. }
  819. #if ANKI_ENABLE_SIMD
  820. [[nodiscard]] TMat transpose() const requires(kIsSquare&& kHasSimd)
  821. {
  822. TMat out;
  823. # if ANKI_SIMD_SSE
  824. const __m128 tmp0 = _mm_shuffle_ps(m_simd[0], m_simd[1], 0x44);
  825. const __m128 tmp2 = _mm_shuffle_ps(m_simd[0], m_simd[1], 0xEE);
  826. const __m128 tmp1 = _mm_shuffle_ps(m_simd[2], m_simd[3], 0x44);
  827. const __m128 tmp3 = _mm_shuffle_ps(m_simd[2], m_simd[3], 0xEE);
  828. out.m_simd[0] = _mm_shuffle_ps(tmp0, tmp1, 0x88);
  829. out.m_simd[1] = _mm_shuffle_ps(tmp0, tmp1, 0xDD);
  830. out.m_simd[2] = _mm_shuffle_ps(tmp2, tmp3, 0x88);
  831. out.m_simd[3] = _mm_shuffle_ps(tmp2, tmp3, 0xDD);
  832. # else
  833. const float32x4x2_t row01 = vtrnq_f32(m_simd[0], m_simd[1]);
  834. const float32x4x2_t row23 = vtrnq_f32(m_simd[2], m_simd[3]);
  835. out.m_simd[0] = vcombine_f32(vget_low_f32(row01.val[0]), vget_low_f32(row23.val[0]));
  836. out.m_simd[1] = vcombine_f32(vget_low_f32(row01.val[1]), vget_low_f32(row23.val[1]));
  837. out.m_simd[2] = vcombine_f32(vget_high_f32(row01.val[0]), vget_high_f32(row23.val[0]));
  838. out.m_simd[3] = vcombine_f32(vget_high_f32(row01.val[1]), vget_high_f32(row23.val[1]));
  839. # endif
  840. return out;
  841. }
  842. #endif
  843. void transposeRotationPart()
  844. {
  845. for(U j = 0; j < 3; j++)
  846. {
  847. for(U i = j + 1; i < 3; i++)
  848. {
  849. const T tmp = m_arr2[j][i];
  850. m_arr2[j][i] = m_arr2[i][j];
  851. m_arr2[i][j] = tmp;
  852. }
  853. }
  854. }
  855. [[nodiscard]] T getDet() const requires(kSize == 9)
  856. {
  857. const auto& m = *this;
  858. // For the accurate method see < r664
  859. return m(0, 0) * (m(1, 1) * m(2, 2) - m(1, 2) * m(2, 1)) - m(0, 1) * (m(1, 0) * m(2, 2) - m(1, 2) * m(2, 0))
  860. + m(0, 2) * (m(0, 1) * m(2, 1) - m(1, 1) * m(2, 0));
  861. }
  862. [[nodiscard]] T getDet() const requires(kSize == 16)
  863. {
  864. const auto& t = *this;
  865. return t(0, 3) * t(1, 2) * t(2, 1) * t(3, 0) - t(0, 2) * t(1, 3) * t(2, 1) * t(3, 0) - t(0, 3) * t(1, 1) * t(2, 2) * t(3, 0)
  866. + t(0, 1) * t(1, 3) * t(2, 2) * t(3, 0) + t(0, 2) * t(1, 1) * t(2, 3) * t(3, 0) - t(0, 1) * t(1, 2) * t(2, 3) * t(3, 0)
  867. - t(0, 3) * t(1, 2) * t(2, 0) * t(3, 1) + t(0, 2) * t(1, 3) * t(2, 0) * t(3, 1) + t(0, 3) * t(1, 0) * t(2, 2) * t(3, 1)
  868. - t(0, 0) * t(1, 3) * t(2, 2) * t(3, 1) - t(0, 2) * t(1, 0) * t(2, 3) * t(3, 1) + t(0, 0) * t(1, 2) * t(2, 3) * t(3, 1)
  869. + t(0, 3) * t(1, 1) * t(2, 0) * t(3, 2) - t(0, 1) * t(1, 3) * t(2, 0) * t(3, 2) - t(0, 3) * t(1, 0) * t(2, 1) * t(3, 2)
  870. + t(0, 0) * t(1, 3) * t(2, 1) * t(3, 2) + t(0, 1) * t(1, 0) * t(2, 3) * t(3, 2) - t(0, 0) * t(1, 1) * t(2, 3) * t(3, 2)
  871. - t(0, 2) * t(1, 1) * t(2, 0) * t(3, 3) + t(0, 1) * t(1, 2) * t(2, 0) * t(3, 3) + t(0, 2) * t(1, 0) * t(2, 1) * t(3, 3)
  872. - t(0, 0) * t(1, 2) * t(2, 1) * t(3, 3) - t(0, 1) * t(1, 0) * t(2, 2) * t(3, 3) + t(0, 0) * t(1, 1) * t(2, 2) * t(3, 3);
  873. }
  874. [[nodiscard]] TMat invert() const requires(kSize == 9)
  875. {
  876. // Using Gramer's method Inv(A) = (1 / getDet(A)) * Adj(A)
  877. const TMat& m = *this;
  878. TMat r;
  879. // compute determinant
  880. const T cofactor0 = m(1, 1) * m(2, 2) - m(1, 2) * m(2, 1);
  881. const T cofactor3 = m(0, 2) * m(2, 1) - m(0, 1) * m(2, 2);
  882. const T cofactor6 = m(0, 1) * m(1, 2) - m(0, 2) * m(1, 1);
  883. const T det = m(0, 0) * cofactor0 + m(1, 0) * cofactor3 + m(2, 0) * cofactor6;
  884. ANKI_ASSERT(!isZero<T>(det)); // Cannot invert det == 0
  885. // create adjoint matrix and multiply by 1/det to get inverse
  886. const T invDet = T(1) / det;
  887. r(0, 0) = invDet * cofactor0;
  888. r(0, 1) = invDet * cofactor3;
  889. r(0, 2) = invDet * cofactor6;
  890. r(1, 0) = invDet * (m(1, 2) * m(2, 0) - m(1, 0) * m(2, 2));
  891. r(1, 1) = invDet * (m(0, 0) * m(2, 2) - m(0, 2) * m(2, 0));
  892. r(1, 2) = invDet * (m(0, 2) * m(1, 0) - m(0, 0) * m(1, 2));
  893. r(2, 0) = invDet * (m(1, 0) * m(2, 1) - m(1, 1) * m(2, 0));
  894. r(2, 1) = invDet * (m(0, 1) * m(2, 0) - m(0, 0) * m(2, 1));
  895. r(2, 2) = invDet * (m(0, 0) * m(1, 1) - m(0, 1) * m(1, 0));
  896. return r;
  897. }
  898. /// Invert using Cramer's rule
  899. [[nodiscard]] TMat invert() const requires(kSize == 16)
  900. {
  901. Array<T, 12> tmp;
  902. const auto& in = (*this);
  903. TMat m4;
  904. tmp[0] = in(2, 2) * in(3, 3);
  905. tmp[1] = in(3, 2) * in(2, 3);
  906. tmp[2] = in(1, 2) * in(3, 3);
  907. tmp[3] = in(3, 2) * in(1, 3);
  908. tmp[4] = in(1, 2) * in(2, 3);
  909. tmp[5] = in(2, 2) * in(1, 3);
  910. tmp[6] = in(0, 2) * in(3, 3);
  911. tmp[7] = in(3, 2) * in(0, 3);
  912. tmp[8] = in(0, 2) * in(2, 3);
  913. tmp[9] = in(2, 2) * in(0, 3);
  914. tmp[10] = in(0, 2) * in(1, 3);
  915. tmp[11] = in(1, 2) * in(0, 3);
  916. m4(0, 0) = tmp[0] * in(1, 1) + tmp[3] * in(2, 1) + tmp[4] * in(3, 1);
  917. m4(0, 0) -= tmp[1] * in(1, 1) + tmp[2] * in(2, 1) + tmp[5] * in(3, 1);
  918. m4(0, 1) = tmp[1] * in(0, 1) + tmp[6] * in(2, 1) + tmp[9] * in(3, 1);
  919. m4(0, 1) -= tmp[0] * in(0, 1) + tmp[7] * in(2, 1) + tmp[8] * in(3, 1);
  920. m4(0, 2) = tmp[2] * in(0, 1) + tmp[7] * in(1, 1) + tmp[10] * in(3, 1);
  921. m4(0, 2) -= tmp[3] * in(0, 1) + tmp[6] * in(1, 1) + tmp[11] * in(3, 1);
  922. m4(0, 3) = tmp[5] * in(0, 1) + tmp[8] * in(1, 1) + tmp[11] * in(2, 1);
  923. m4(0, 3) -= tmp[4] * in(0, 1) + tmp[9] * in(1, 1) + tmp[10] * in(2, 1);
  924. m4(1, 0) = tmp[1] * in(1, 0) + tmp[2] * in(2, 0) + tmp[5] * in(3, 0);
  925. m4(1, 0) -= tmp[0] * in(1, 0) + tmp[3] * in(2, 0) + tmp[4] * in(3, 0);
  926. m4(1, 1) = tmp[0] * in(0, 0) + tmp[7] * in(2, 0) + tmp[8] * in(3, 0);
  927. m4(1, 1) -= tmp[1] * in(0, 0) + tmp[6] * in(2, 0) + tmp[9] * in(3, 0);
  928. m4(1, 2) = tmp[3] * in(0, 0) + tmp[6] * in(1, 0) + tmp[11] * in(3, 0);
  929. m4(1, 2) -= tmp[2] * in(0, 0) + tmp[7] * in(1, 0) + tmp[10] * in(3, 0);
  930. m4(1, 3) = tmp[4] * in(0, 0) + tmp[9] * in(1, 0) + tmp[10] * in(2, 0);
  931. m4(1, 3) -= tmp[5] * in(0, 0) + tmp[8] * in(1, 0) + tmp[11] * in(2, 0);
  932. tmp[0] = in(2, 0) * in(3, 1);
  933. tmp[1] = in(3, 0) * in(2, 1);
  934. tmp[2] = in(1, 0) * in(3, 1);
  935. tmp[3] = in(3, 0) * in(1, 1);
  936. tmp[4] = in(1, 0) * in(2, 1);
  937. tmp[5] = in(2, 0) * in(1, 1);
  938. tmp[6] = in(0, 0) * in(3, 1);
  939. tmp[7] = in(3, 0) * in(0, 1);
  940. tmp[8] = in(0, 0) * in(2, 1);
  941. tmp[9] = in(2, 0) * in(0, 1);
  942. tmp[10] = in(0, 0) * in(1, 1);
  943. tmp[11] = in(1, 0) * in(0, 1);
  944. m4(2, 0) = tmp[0] * in(1, 3) + tmp[3] * in(2, 3) + tmp[4] * in(3, 3);
  945. m4(2, 0) -= tmp[1] * in(1, 3) + tmp[2] * in(2, 3) + tmp[5] * in(3, 3);
  946. m4(2, 1) = tmp[1] * in(0, 3) + tmp[6] * in(2, 3) + tmp[9] * in(3, 3);
  947. m4(2, 1) -= tmp[0] * in(0, 3) + tmp[7] * in(2, 3) + tmp[8] * in(3, 3);
  948. m4(2, 2) = tmp[2] * in(0, 3) + tmp[7] * in(1, 3) + tmp[10] * in(3, 3);
  949. m4(2, 2) -= tmp[3] * in(0, 3) + tmp[6] * in(1, 3) + tmp[11] * in(3, 3);
  950. m4(2, 3) = tmp[5] * in(0, 3) + tmp[8] * in(1, 3) + tmp[11] * in(2, 3);
  951. m4(2, 3) -= tmp[4] * in(0, 3) + tmp[9] * in(1, 3) + tmp[10] * in(2, 3);
  952. m4(3, 0) = tmp[2] * in(2, 2) + tmp[5] * in(3, 2) + tmp[1] * in(1, 2);
  953. m4(3, 0) -= tmp[4] * in(3, 2) + tmp[0] * in(1, 2) + tmp[3] * in(2, 2);
  954. m4(3, 1) = tmp[8] * in(3, 2) + tmp[0] * in(0, 2) + tmp[7] * in(2, 2);
  955. m4(3, 1) -= tmp[6] * in(2, 2) + tmp[9] * in(3, 2) + tmp[1] * in(0, 2);
  956. m4(3, 2) = tmp[6] * in(1, 2) + tmp[11] * in(3, 2) + tmp[3] * in(0, 2);
  957. m4(3, 2) -= tmp[10] * in(3, 2) + tmp[2] * in(0, 2) + tmp[7] * in(1, 2);
  958. m4(3, 3) = tmp[10] * in(2, 2) + tmp[4] * in(0, 2) + tmp[9] * in(1, 2);
  959. m4(3, 3) -= tmp[8] * in(1, 2) + tmp[11] * in(2, 2) + tmp[5] * in(0, 2);
  960. T det = in(0, 0) * m4(0, 0) + in(1, 0) * m4(0, 1) + in(2, 0) * m4(0, 2) + in(3, 0) * m4(0, 3);
  961. ANKI_ASSERT(!isZero<T>(det)); // Cannot invert, det == 0
  962. det = T(1) / det;
  963. m4 *= det;
  964. return m4;
  965. }
  966. /// 12 muls, 27 adds. Something like m4 = m0 * m1 but without touching the 4rth row and allot faster
  967. [[nodiscard]] static TMat combineTransformations(const TMat& m0, const TMat& m1) requires(kSize == 16)
  968. {
  969. // See the clean code in < r664
  970. // one of the 2 mat4 doesnt represent transformation
  971. ANKI_ASSERT(isZero<T>(m0(3, 0) + m0(3, 1) + m0(3, 2) + m0(3, 3) - T(1)) && isZero<T>(m1(3, 0) + m1(3, 1) + m1(3, 2) + m1(3, 3) - T(1)));
  972. TMat m4;
  973. m4(0, 0) = m0(0, 0) * m1(0, 0) + m0(0, 1) * m1(1, 0) + m0(0, 2) * m1(2, 0);
  974. m4(0, 1) = m0(0, 0) * m1(0, 1) + m0(0, 1) * m1(1, 1) + m0(0, 2) * m1(2, 1);
  975. m4(0, 2) = m0(0, 0) * m1(0, 2) + m0(0, 1) * m1(1, 2) + m0(0, 2) * m1(2, 2);
  976. m4(1, 0) = m0(1, 0) * m1(0, 0) + m0(1, 1) * m1(1, 0) + m0(1, 2) * m1(2, 0);
  977. m4(1, 1) = m0(1, 0) * m1(0, 1) + m0(1, 1) * m1(1, 1) + m0(1, 2) * m1(2, 1);
  978. m4(1, 2) = m0(1, 0) * m1(0, 2) + m0(1, 1) * m1(1, 2) + m0(1, 2) * m1(2, 2);
  979. m4(2, 0) = m0(2, 0) * m1(0, 0) + m0(2, 1) * m1(1, 0) + m0(2, 2) * m1(2, 0);
  980. m4(2, 1) = m0(2, 0) * m1(0, 1) + m0(2, 1) * m1(1, 1) + m0(2, 2) * m1(2, 1);
  981. m4(2, 2) = m0(2, 0) * m1(0, 2) + m0(2, 1) * m1(1, 2) + m0(2, 2) * m1(2, 2);
  982. m4(0, 3) = m0(0, 0) * m1(0, 3) + m0(0, 1) * m1(1, 3) + m0(0, 2) * m1(2, 3) + m0(0, 3);
  983. m4(1, 3) = m0(1, 0) * m1(0, 3) + m0(1, 1) * m1(1, 3) + m0(1, 2) * m1(2, 3) + m0(1, 3);
  984. m4(2, 3) = m0(2, 0) * m1(0, 3) + m0(2, 1) * m1(1, 3) + m0(2, 2) * m1(2, 3) + m0(2, 3);
  985. m4(3, 0) = m4(3, 1) = m4(3, 2) = T(0);
  986. m4(3, 3) = T(1);
  987. return m4;
  988. }
  989. /// Create a new matrix that is equivalent to Mat4(this)*Mat4(b)
  990. [[nodiscard]] TMat combineTransformations(const TMat& b) const requires(kSize == 12 && !kHasSimd)
  991. {
  992. const auto& a = *this;
  993. TMat c;
  994. c(0, 0) = a(0, 0) * b(0, 0) + a(0, 1) * b(1, 0) + a(0, 2) * b(2, 0);
  995. c(0, 1) = a(0, 0) * b(0, 1) + a(0, 1) * b(1, 1) + a(0, 2) * b(2, 1);
  996. c(0, 2) = a(0, 0) * b(0, 2) + a(0, 1) * b(1, 2) + a(0, 2) * b(2, 2);
  997. c(1, 0) = a(1, 0) * b(0, 0) + a(1, 1) * b(1, 0) + a(1, 2) * b(2, 0);
  998. c(1, 1) = a(1, 0) * b(0, 1) + a(1, 1) * b(1, 1) + a(1, 2) * b(2, 1);
  999. c(1, 2) = a(1, 0) * b(0, 2) + a(1, 1) * b(1, 2) + a(1, 2) * b(2, 2);
  1000. c(2, 0) = a(2, 0) * b(0, 0) + a(2, 1) * b(1, 0) + a(2, 2) * b(2, 0);
  1001. c(2, 1) = a(2, 0) * b(0, 1) + a(2, 1) * b(1, 1) + a(2, 2) * b(2, 1);
  1002. c(2, 2) = a(2, 0) * b(0, 2) + a(2, 1) * b(1, 2) + a(2, 2) * b(2, 2);
  1003. c(0, 3) = a(0, 0) * b(0, 3) + a(0, 1) * b(1, 3) + a(0, 2) * b(2, 3) + a(0, 3);
  1004. c(1, 3) = a(1, 0) * b(0, 3) + a(1, 1) * b(1, 3) + a(1, 2) * b(2, 3) + a(1, 3);
  1005. c(2, 3) = a(2, 0) * b(0, 3) + a(2, 1) * b(1, 3) + a(2, 2) * b(2, 3) + a(2, 3);
  1006. return c;
  1007. }
  1008. #if ANKI_ENABLE_SIMD
  1009. [[nodiscard]] TMat combineTransformations(const TMat& b) const requires(kSize == 12 && kHasSimd)
  1010. {
  1011. TMat c;
  1012. const auto& a = *this;
  1013. # if ANKI_SIMD_SSE
  1014. for(U i = 0; i < 3; i++)
  1015. {
  1016. __m128 t1, t2;
  1017. t1 = _mm_set1_ps(a(i, 0));
  1018. t2 = _mm_mul_ps(b.m_simd[0], t1);
  1019. t1 = _mm_set1_ps(a(i, 1));
  1020. t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[1], t1), t2);
  1021. t1 = _mm_set1_ps(a(i, 2));
  1022. t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[2], t1), t2);
  1023. TVec<T, 4> v4(T(0), T(0), T(0), a(i, 3));
  1024. t2 = _mm_add_ps(v4.getSimd(), t2);
  1025. c.m_simd[i] = t2;
  1026. }
  1027. # else
  1028. for(U i = 0; i < 3; i++)
  1029. {
  1030. float32x4_t t1, t2;
  1031. t1 = vdupq_n_f32(a(i, 0));
  1032. t2 = vmulq_f32(b.m_simd[0], t1);
  1033. t1 = vdupq_n_f32(a(i, 1));
  1034. t2 = vaddq_f32(vmulq_f32(b.m_simd[1], t1), t2);
  1035. t1 = vdupq_n_f32(a(i, 2));
  1036. t2 = vaddq_f32(vmulq_f32(b.m_simd[2], t1), t2);
  1037. TVec<T, 4> v4(T(0), T(0), T(0), a(i, 3));
  1038. t2 = vaddq_f32(v4.getSimd(), t2);
  1039. c.m_simd[i] = t2;
  1040. }
  1041. # endif
  1042. return c;
  1043. }
  1044. #endif
  1045. /// Calculate a perspective projection matrix. The z is mapped in [0, 1] range just like DX and Vulkan.
  1046. /// Same as D3DXMatrixPerspectiveFovRH
  1047. [[nodiscard]] static TMat calculatePerspectiveProjectionMatrix(T fovX, T fovY, T near, T far) requires(kSize == 16)
  1048. {
  1049. ANKI_ASSERT(fovX > T(0) && fovY > T(0) && near > T(0) && far > T(0));
  1050. const T g = near - far;
  1051. const T f = T(1) / tan(fovY / T(2)); // f = cot(fovY/2)
  1052. TMat proj;
  1053. proj(0, 0) = f * (fovY / fovX); // = f/aspectRatio;
  1054. proj(0, 1) = T(0);
  1055. proj(0, 2) = T(0);
  1056. proj(0, 3) = T(0);
  1057. proj(1, 0) = T(0);
  1058. proj(1, 1) = f;
  1059. proj(1, 2) = T(0);
  1060. proj(1, 3) = T(0);
  1061. proj(2, 0) = T(0);
  1062. proj(2, 1) = T(0);
  1063. proj(2, 2) = far / g;
  1064. proj(2, 3) = (far * near) / g;
  1065. proj(3, 0) = T(0);
  1066. proj(3, 1) = T(0);
  1067. proj(3, 2) = T(-1);
  1068. proj(3, 3) = T(0);
  1069. return proj;
  1070. }
  1071. /// Calculate an orthographic projection matrix. The z is mapped in [0, 1] range just like DX and Vulkan.
  1072. /// Same as D3DXMatrixOrthoOffCenterRH.
  1073. [[nodiscard]] static TMat calculateOrthographicProjectionMatrix(T right, T left, T top, T bottom, T near, T far) requires(kSize == 16)
  1074. {
  1075. ANKI_ASSERT(right != T(0) && left != T(0) && top != T(0) && bottom != T(0) && near != T(0) && far != T(0));
  1076. const T difx = right - left;
  1077. const T dify = top - bottom;
  1078. const T difz = far - near;
  1079. const T tx = -(right + left) / difx;
  1080. const T ty = -(top + bottom) / dify;
  1081. const T tz = -near / difz;
  1082. TMat m;
  1083. m(0, 0) = T(2) / difx;
  1084. m(0, 1) = T(0);
  1085. m(0, 2) = T(0);
  1086. m(0, 3) = tx;
  1087. m(1, 0) = T(0);
  1088. m(1, 1) = T(2) / dify;
  1089. m(1, 2) = T(0);
  1090. m(1, 3) = ty;
  1091. m(2, 0) = T(0);
  1092. m(2, 1) = T(0);
  1093. m(2, 2) = T(-1) / difz;
  1094. m(2, 3) = tz;
  1095. m(3, 0) = T(0);
  1096. m(3, 1) = T(0);
  1097. m(3, 2) = T(0);
  1098. m(3, 3) = T(1);
  1099. return m;
  1100. }
  1101. /// Calculate a perspective projection matrix. The z is reversed and mapped in [1, 0] range just like DX and Vulkan.
  1102. /// Same as D3DXMatrixPerspectiveFovRH but z reversed
  1103. [[nodiscard]] static TMat calculatePerspectiveProjectionMatrixReverseZ(T fovX, T fovY, T near, T far) requires(kSize == 16)
  1104. {
  1105. ANKI_ASSERT(fovX > T(0) && fovY > T(0) && near > T(0) && far > T(0));
  1106. const T g = near - far;
  1107. const T f = T(1) / tan(fovY / T(2)); // f = cot(fovY/2)
  1108. TMat proj;
  1109. proj(0, 0) = f * (fovY / fovX); // = f/aspectRatio;
  1110. proj(0, 1) = T(0);
  1111. proj(0, 2) = T(0);
  1112. proj(0, 3) = T(0);
  1113. proj(1, 0) = T(0);
  1114. proj(1, 1) = f;
  1115. proj(1, 2) = T(0);
  1116. proj(1, 3) = T(0);
  1117. proj(2, 0) = T(0);
  1118. proj(2, 1) = T(0);
  1119. proj(2, 2) = far / g;
  1120. proj(2, 3) = (far * near) / g;
  1121. proj(3, 0) = T(0);
  1122. proj(3, 1) = T(0);
  1123. proj(3, 2) = T(-1);
  1124. proj(3, 3) = T(0);
  1125. const TMat rev(T(1), T(0), T(0), T(0), T(0), T(1), T(0), T(0), T(0), T(0), T(-1), T(1), T(0), T(0), T(0), T(1));
  1126. return rev * proj;
  1127. }
  1128. /// Given the parameters that construct a projection matrix extract 4 values that can be used to unproject a point from NDC to view space.
  1129. /// @code
  1130. /// Vec4 unprojParams = calculatePerspectiveUnprojectionParams(...);
  1131. /// F32 z = unprojParams.z() / (unprojParams.w() + depth);
  1132. /// Vec2 xy = ndc.xy() * unprojParams.xy() * z;
  1133. /// Vec3 posViewSpace(xy, z);
  1134. /// @endcode
  1135. [[nodiscard]] static TVec<T, 4> calculatePerspectiveUnprojectionParams(T fovX, T fovY, T near, T far) requires(kSize == 16)
  1136. {
  1137. TVec<T, 4> out;
  1138. const T g = near - far;
  1139. const T f = T(1) / tan(fovY / T(2)); // f = cot(fovY/2)
  1140. const T m00 = f * (fovY / fovX);
  1141. const T m11 = f;
  1142. const T m22 = far / g;
  1143. const T m23 = (far * near) / g;
  1144. // First, clip = (m * Pv) where Pv is the view space position.
  1145. // ndc.z = clip.z / clip.w = (m22 * Pv.z + m23) / -Pv.z. Note that ndc.z == depth in zero_to_one projection.
  1146. // Solving that for Pv.z we get
  1147. // Pv.z = A / (depth + B)
  1148. // where A = -m23 and B = m22
  1149. // so we save the A and B in the projection params vector
  1150. out.z() = -m23;
  1151. out.w() = m22;
  1152. // Using the same logic the Pv.x = x' * w / m00
  1153. // so Pv.x = x' * Pv.z * (-1 / m00)
  1154. out.x() = -T(T(1)) / m00;
  1155. // Same for y
  1156. out.y() = -T(T(1)) / m11;
  1157. return out;
  1158. }
  1159. /// Assuming this is a projection matrix extract the unprojection parameters. See calculatePerspectiveUnprojectionParams for more info.
  1160. [[nodiscard]] TVec<T, 4> extractPerspectiveUnprojectionParams() const requires(kSize == 16)
  1161. {
  1162. TVec<T, 4> out;
  1163. const auto& m = *this;
  1164. out.z() = -m(2, 3);
  1165. out.w() = m(2, 2);
  1166. out.x() = -T(T(1)) / m(0, 0);
  1167. out.y() = -T(T(1)) / m(1, 1);
  1168. return out;
  1169. }
  1170. /// If we suppose this matrix represents a transformation, return the inverted transformation
  1171. [[nodiscard]] TMat invertTransformation() const requires(kSize == 16 || kSize == 12)
  1172. {
  1173. const TVec<T, 3> scale = extractScale();
  1174. const TVec<T, 3> invScale = T(1) / scale;
  1175. TMat<T, 3, 3> rot;
  1176. rot.setRows(getRow(0).xyz() * invScale, getRow(1).xyz() * invScale, getRow(2).xyz() * invScale);
  1177. const TMat<T, 3, 3> invRot = rot.transpose();
  1178. const TVec<T, 3> invTsl = -(invRot * (getTranslationPart().xyz() * invScale));
  1179. return TMat(invTsl, invRot, invScale);
  1180. }
  1181. /// @note 9 muls, 9 adds
  1182. [[nodiscard]] TVec<T, 3> transform(const TVec<T, 3>& v) const requires(kSize == 16)
  1183. {
  1184. const auto& m = *this;
  1185. return TVec<T, 3>(m(0, 0) * v.x() + m(0, 1) * v.y() + m(0, 2) * v.z() + m(0, 3),
  1186. m(1, 0) * v.x() + m(1, 1) * v.y() + m(1, 2) * v.z() + m(1, 3),
  1187. m(2, 0) * v.x() + m(2, 1) * v.y() + m(2, 2) * v.z() + m(2, 3));
  1188. }
  1189. /// Create a new transform matrix position at eye and looking at refPoint.
  1190. template<U kVecDimensions>
  1191. [[nodiscard]] static TMat lookAt(const TVec<T, kVecDimensions>& eye, const TVec<T, kVecDimensions>& refPoint,
  1192. const TVec<T, kVecDimensions>& up) requires(kTRowCount == 3 && kTColumnCount == 4 && kVecDimensions >= 3)
  1193. {
  1194. const TVec<T, 3> vdir = (refPoint.xyz() - eye.xyz()).normalize();
  1195. const TVec<T, 3> vup = (up.xyz() - vdir * up.xyz().dot(vdir)).normalize();
  1196. const TVec<T, 3> vside = vdir.cross(vup);
  1197. TMat out;
  1198. out.setColumns(vside, vup, -vdir, eye.xyz());
  1199. return out;
  1200. }
  1201. /// Create a new transform matrix position at eye and looking at refPoint.
  1202. template<U kVecDimensions>
  1203. [[nodiscard]] static TMat lookAt(const TVec<T, kVecDimensions>& eye, const TVec<T, kVecDimensions>& refPoint,
  1204. const TVec<T, kVecDimensions>& up) requires(kTRowCount == 4 && kTColumnCount == 4 && kVecDimensions >= 3)
  1205. {
  1206. const TVec<T, 4> vdir = (refPoint.xyz0() - eye.xyz0()).normalize();
  1207. const TVec<T, 4> vup = (up.xyz0() - vdir * up.xyz0().dot(vdir)).normalize();
  1208. const TVec<T, 4> vside = vdir.cross(vup);
  1209. TMat out;
  1210. out.setColumns(vside, vup, -vdir, eye.xyz1());
  1211. return out;
  1212. }
  1213. /// Create a rotation matrix from some direction. http://jcgt.org/published/0006/01/01/
  1214. [[nodiscard]] static TMat rotationFromDirection(const TVec<T, 3>& zAxis) requires(kSize == 9)
  1215. {
  1216. const TVec<T, 3> z = zAxis;
  1217. const T sign = (z.z() >= T(0)) ? T(1) : -T(1);
  1218. const T a = -T(1) / (sign + z.z());
  1219. const T b = z.x() * z.y() * a;
  1220. const TVec<T, 3> x = TVec<T, 3>(T(1) + sign * a * pow(z.x(), T(2)), sign * b, -sign * z.x());
  1221. const TVec<T, 3> y = TVec<T, 3>(b, sign + a * pow(z.y(), T(2)), -z.y());
  1222. TMat out;
  1223. out.setColumns(x, y, z);
  1224. return out;
  1225. }
  1226. [[nodiscard]] TMat lerp(const TMat& b, T t) const
  1227. {
  1228. return ((*this) * (T(1) - t)) + (b * t);
  1229. }
  1230. // If we assume this is a transformation matrix then extract the scale
  1231. [[nodiscard]] TVec<T, 3> extractScale() const
  1232. {
  1233. return TVec<T, 3>(getColumn(0).xyz().length(), getColumn(1).xyz().length(), getColumn(2).xyz().length());
  1234. }
  1235. static TMat getZero()
  1236. {
  1237. return TMat(T(0));
  1238. }
  1239. void setZero()
  1240. {
  1241. *this = getZero();
  1242. }
  1243. static TMat getIdentity() requires(kSize == 9)
  1244. {
  1245. return TMat(T(1), T(0), T(0), T(0), T(1), T(0), T(0), T(0), T(1));
  1246. }
  1247. static TMat getIdentity() requires(kSize == 16)
  1248. {
  1249. return TMat(T(1), T(0), T(0), T(0), T(0), T(1), T(0), T(0), T(0), T(0), T(1), T(0), T(0), T(0), T(0), T(1));
  1250. }
  1251. static TMat getIdentity() requires(kSize == 12)
  1252. {
  1253. return TMat(T(1), T(0), T(0), T(0), T(0), T(1), T(0), T(0), T(0), T(0), T(1), T(0));
  1254. }
  1255. void setIdentity()
  1256. {
  1257. (*this) = getIdentity();
  1258. }
  1259. static constexpr U8 getSize()
  1260. {
  1261. return U8(kTColumnCount * kTRowCount);
  1262. }
  1263. [[nodiscard]] String toString() const requires(std::is_floating_point<T>::value)
  1264. {
  1265. String str;
  1266. for(U j = 0; j < kTRowCount; ++j)
  1267. {
  1268. for(U i = 0; i < kTColumnCount; ++i)
  1269. {
  1270. CString fmt;
  1271. if(i == kTColumnCount - 1 && j == kTRowCount - 1)
  1272. {
  1273. fmt = "%f";
  1274. }
  1275. else if(i == kTColumnCount - 1)
  1276. {
  1277. fmt = "%f\n";
  1278. }
  1279. else
  1280. {
  1281. fmt = "%f ";
  1282. }
  1283. str += String().sprintf(fmt.cstr(), m_arr2[j][i]);
  1284. }
  1285. }
  1286. return str;
  1287. }
  1288. /// @}
  1289. protected:
  1290. static constexpr U N = kTColumnCount * kTRowCount;
  1291. /// @name Data members
  1292. /// @{
  1293. union
  1294. {
  1295. T m_carr1[N]; ///< For easier debugging with gdb
  1296. T m_carr2[kTRowCount][kTColumnCount]; ///< For easier debugging with gdb
  1297. Array<T, N> m_arr1;
  1298. Array2d<T, kTRowCount, kTColumnCount> m_arr2;
  1299. SimdArray m_simd;
  1300. Array<RowVec, kTRowCount> m_rows;
  1301. };
  1302. /// @}
  1303. };
  1304. /// @memberof TMat
  1305. template<typename T, U kTRowCount, U kTColumnCount>
  1306. TMat<T, kTRowCount, kTColumnCount> operator+(const T f, const TMat<T, kTRowCount, kTColumnCount>& m)
  1307. {
  1308. return m + f;
  1309. }
  1310. /// @memberof TMat
  1311. template<typename T, U kTRowCount, U kTColumnCount>
  1312. TMat<T, kTRowCount, kTColumnCount> operator-(const T f, const TMat<T, kTRowCount, kTColumnCount>& m)
  1313. {
  1314. TMat<T, kTRowCount, kTColumnCount> out;
  1315. for(U i = 0; i < kTRowCount * kTColumnCount; i++)
  1316. {
  1317. out[i] = f - m[i];
  1318. }
  1319. return out;
  1320. }
  1321. /// @memberof TMat
  1322. template<typename T, U kTRowCount, U kTColumnCount>
  1323. TMat<T, kTRowCount, kTColumnCount> operator*(const T f, const TMat<T, kTRowCount, kTColumnCount>& m)
  1324. {
  1325. return m * f;
  1326. }
  1327. /// @memberof TMat
  1328. template<typename T, U kTRowCount, U kTColumnCount>
  1329. TMat<T, kTRowCount, kTColumnCount> operator/(const T f, const TMat<T, 3, 3>& m3)
  1330. {
  1331. TMat<T, kTRowCount, kTColumnCount> out;
  1332. for(U i = 0; i < kTRowCount * kTColumnCount; i++)
  1333. {
  1334. ANKI_ASSERT(m3[i] != T(0));
  1335. out[i] = f / m3[i];
  1336. }
  1337. return out;
  1338. }
  1339. /// F32 3x3 matrix
  1340. using Mat3 = TMat<F32, 3, 3>;
  1341. static_assert(sizeof(Mat3) == sizeof(F32) * 3 * 3, "Incorrect size");
  1342. /// F64 3x3 matrix
  1343. using DMat3 = TMat<F64, 3, 3>;
  1344. static_assert(sizeof(DMat3) == sizeof(F64) * 3 * 3, "Incorrect size");
  1345. /// F32 4x4 matrix
  1346. using Mat4 = TMat<F32, 4, 4>;
  1347. static_assert(sizeof(Mat4) == sizeof(F32) * 4 * 4, "Incorrect size");
  1348. /// F64 4x4 matrix
  1349. using DMat4 = TMat<F64, 4, 4>;
  1350. static_assert(sizeof(DMat4) == sizeof(F64) * 4 * 4, "Incorrect size");
  1351. /// F32 3x4 matrix
  1352. using Mat3x4 = TMat<F32, 3, 4>;
  1353. static_assert(sizeof(Mat3x4) == sizeof(F32) * 3 * 4, "Incorrect size");
  1354. /// F64 3x4 matrix
  1355. using DMat3x4 = TMat<F64, 3, 4>;
  1356. static_assert(sizeof(DMat3x4) == sizeof(F64) * 3 * 4, "Incorrect size");
  1357. /// @}
  1358. } // end namespace anki