Mat.h 38 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655
  1. // Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
  2. // All rights reserved.
  3. // Code licensed under the BSD License.
  4. // http://www.anki3d.org/LICENSE
  5. #pragma once
  6. #include <AnKi/Math/Common.h>
  7. #include <AnKi/Math/Vec.h>
  8. namespace anki
  9. {
  10. /// @addtogroup math
  11. /// @{
  12. /// Matrix type.
  13. /// @tparam T The scalar type. Eg float.
  14. /// @tparam J The number of rows.
  15. /// @tparam I The number of columns.
  16. template<typename T, U J, U I>
  17. class alignas(MathSimd<T, I>::ALIGNMENT) TMat
  18. {
  19. public:
  20. using Scalar = T;
  21. using Simd = typename MathSimd<T, I>::Type;
  22. #if ANKI_COMPILER_GCC_COMPATIBLE
  23. # pragma GCC diagnostic push
  24. # pragma GCC diagnostic ignored "-Wignored-attributes"
  25. #endif
  26. using SimdArray = Array<Simd, J>;
  27. #if ANKI_COMPILER_GCC_COMPATIBLE
  28. # pragma GCC diagnostic pop
  29. #endif
  30. using RowVec = TVec<T, I>;
  31. using ColumnVec = TVec<T, J>;
  32. static constexpr U ROW_SIZE = J; ///< Number of rows
  33. static constexpr U COLUMN_SIZE = I; ///< Number of columns
  34. static constexpr U SIZE = J * I; ///< Number of total elements
  35. static constexpr Bool HAS_SIMD = I == 4 && std::is_same<T, F32>::value && ANKI_ENABLE_SIMD;
  36. static constexpr Bool HAS_MAT4_SIMD = J == 4 && I == 4 && std::is_same<T, F32>::value && ANKI_ENABLE_SIMD;
  37. static constexpr Bool HAS_MAT3X4_SIMD = J == 3 && I == 4 && std::is_same<T, F32>::value && ANKI_ENABLE_SIMD;
  38. /// @name Constructors
  39. /// @{
  40. TMat()
  41. {
  42. }
  43. /// Copy.
  44. TMat(ANKI_ENABLE_ARG(const TMat&, !HAS_SIMD) b)
  45. {
  46. for(U i = 0; i < N; i++)
  47. {
  48. m_arr1[i] = b.m_arr1[i];
  49. }
  50. }
  51. /// Copy.
  52. TMat(ANKI_ENABLE_ARG(const TMat&, HAS_SIMD) b)
  53. {
  54. for(U i = 0; i < J; i++)
  55. {
  56. m_simd[i] = b.m_simd[i];
  57. }
  58. }
  59. ANKI_ENABLE_METHOD(!HAS_SIMD)
  60. explicit TMat(const T f)
  61. {
  62. for(T& x : m_arr1)
  63. {
  64. x = f;
  65. }
  66. }
  67. #if ANKI_ENABLE_SIMD
  68. ANKI_ENABLE_METHOD(HAS_SIMD)
  69. explicit TMat(const T f)
  70. {
  71. for(U i = 0; i < J; i++)
  72. {
  73. # if ANKI_SIMD_SSE
  74. m_simd[i] = _mm_set1_ps(f);
  75. # else
  76. m_simd[i] = {f, f, f, f};
  77. # endif
  78. }
  79. }
  80. #endif
  81. explicit TMat(const T arr[])
  82. {
  83. for(U i = 0; i < N; i++)
  84. {
  85. m_arr1[i] = arr[i];
  86. }
  87. }
  88. // 3x3 specific constructors
  89. ANKI_ENABLE_METHOD(J == 3 && I == 3)
  90. TMat(T m00, T m01, T m02, T m10, T m11, T m12, T m20, T m21, T m22)
  91. {
  92. auto& m = *this;
  93. m(0, 0) = m00;
  94. m(0, 1) = m01;
  95. m(0, 2) = m02;
  96. m(1, 0) = m10;
  97. m(1, 1) = m11;
  98. m(1, 2) = m12;
  99. m(2, 0) = m20;
  100. m(2, 1) = m21;
  101. m(2, 2) = m22;
  102. }
  103. ANKI_ENABLE_METHOD(J == 3 && I == 3)
  104. explicit TMat(const TQuat<T>& q)
  105. {
  106. setRotationPart(q);
  107. }
  108. ANKI_ENABLE_METHOD(J == 3 && I == 3)
  109. explicit TMat(const TEuler<T>& e)
  110. {
  111. setRotationPart(e);
  112. }
  113. ANKI_ENABLE_METHOD(J == 3 && I == 3)
  114. explicit TMat(const TAxisang<T>& axisang)
  115. {
  116. setRotationPart(axisang);
  117. }
  118. // 4x4 specific constructors
  119. ANKI_ENABLE_METHOD(J == 4 && I == 4)
  120. TMat(T m00, T m01, T m02, T m03, T m10, T m11, T m12, T m13, T m20, T m21, T m22, T m23, T m30, T m31, T m32, T m33)
  121. {
  122. auto& m = *this;
  123. m(0, 0) = m00;
  124. m(0, 1) = m01;
  125. m(0, 2) = m02;
  126. m(0, 3) = m03;
  127. m(1, 0) = m10;
  128. m(1, 1) = m11;
  129. m(1, 2) = m12;
  130. m(1, 3) = m13;
  131. m(2, 0) = m20;
  132. m(2, 1) = m21;
  133. m(2, 2) = m22;
  134. m(2, 3) = m23;
  135. m(3, 0) = m30;
  136. m(3, 1) = m31;
  137. m(3, 2) = m32;
  138. m(3, 3) = m33;
  139. }
  140. ANKI_ENABLE_METHOD(J == 4 && I == 4)
  141. TMat(const TVec<T, 4>& translation, const TMat<T, 3, 3>& rotation, const T scale = T(1))
  142. {
  143. if(isZero<T>(scale - T(1)))
  144. {
  145. setRotationPart(rotation);
  146. }
  147. else
  148. {
  149. setRotationPart(rotation * scale);
  150. }
  151. setTranslationPart(translation);
  152. auto& m = *this;
  153. m(3, 0) = m(3, 1) = m(3, 2) = T(0);
  154. }
  155. ANKI_ENABLE_METHOD(J == 4 && I == 4)
  156. explicit TMat(const TTransform<T>& t)
  157. : TMat(t.getOrigin().xyz1(), t.getRotation().getRotationPart(), t.getScale())
  158. {
  159. }
  160. // 3x4 specific constructors
  161. ANKI_ENABLE_METHOD(J == 3 && I == 4)
  162. TMat(T m00, T m01, T m02, T m03, T m10, T m11, T m12, T m13, T m20, T m21, T m22, T m23)
  163. {
  164. auto& m = *this;
  165. m(0, 0) = m00;
  166. m(0, 1) = m01;
  167. m(0, 2) = m02;
  168. m(0, 3) = m03;
  169. m(1, 0) = m10;
  170. m(1, 1) = m11;
  171. m(1, 2) = m12;
  172. m(1, 3) = m13;
  173. m(2, 0) = m20;
  174. m(2, 1) = m21;
  175. m(2, 2) = m22;
  176. m(2, 3) = m23;
  177. }
  178. ANKI_ENABLE_METHOD(J == 3 && I == 4)
  179. explicit TMat(const TMat<T, 4, 4>& m4)
  180. {
  181. auto& m = *this;
  182. m(0, 0) = m4(0, 0);
  183. m(0, 1) = m4(0, 1);
  184. m(0, 2) = m4(0, 2);
  185. m(0, 3) = m4(0, 3);
  186. m(1, 0) = m4(1, 0);
  187. m(1, 1) = m4(1, 1);
  188. m(1, 2) = m4(1, 2);
  189. m(1, 3) = m4(1, 3);
  190. m(2, 0) = m4(2, 0);
  191. m(2, 1) = m4(2, 1);
  192. m(2, 2) = m4(2, 2);
  193. m(2, 3) = m4(2, 3);
  194. }
  195. ANKI_ENABLE_METHOD(J == 3 && I == 4)
  196. explicit TMat(const TVec<T, 3>& translation, const TMat<T, 3, 3>& rotation, const T scale = T(1))
  197. {
  198. if(isZero<T>(scale - T(1)))
  199. {
  200. setRotationPart(rotation);
  201. }
  202. else
  203. {
  204. setRotationPart(rotation * scale);
  205. }
  206. setTranslationPart(translation);
  207. }
  208. ANKI_ENABLE_METHOD(J == 3 && I == 4)
  209. explicit TMat(const TVec<T, 3>& translation, const TQuat<T>& q, const T scale = T(1))
  210. : TMat(translation, TMat<T, 3, 3>(q), scale)
  211. {
  212. }
  213. ANKI_ENABLE_METHOD(J == 3 && I == 4)
  214. explicit TMat(const TVec<T, 3>& translation, const TEuler<T>& b, const T scale = T(1))
  215. : TMat(translation, TMat<T, 3, 3>(b), scale)
  216. {
  217. }
  218. ANKI_ENABLE_METHOD(J == 3 && I == 4)
  219. explicit TMat(const TVec<T, 3>& translation, const TAxisang<T>& b, const T scale = T(1))
  220. : TMat(translation, TMat<T, 3, 3>(b), scale)
  221. {
  222. }
  223. ANKI_ENABLE_METHOD(J == 3 && I == 4)
  224. explicit TMat(const TTransform<T>& t)
  225. : TMat(t.getOrigin().xyz(), t.getRotation().getRotationPart(), t.getScale())
  226. {
  227. }
  228. /// @}
  229. /// @name Accessors
  230. /// @{
  231. T& operator()(const U j, const U i)
  232. {
  233. return m_arr2[j][i];
  234. }
  235. T operator()(const U j, const U i) const
  236. {
  237. return m_arr2[j][i];
  238. }
  239. T& operator[](const U n)
  240. {
  241. return m_arr1[n];
  242. }
  243. T operator[](const U n) const
  244. {
  245. return m_arr1[n];
  246. }
  247. /// @}
  248. /// @name Operators with same type
  249. /// @{
  250. /// Copy.
  251. TMat& operator=(ANKI_ENABLE_ARG(const TMat&, !HAS_SIMD) b)
  252. {
  253. for(U n = 0; n < N; n++)
  254. {
  255. m_arr1[n] = b.m_arr1[n];
  256. }
  257. return *this;
  258. }
  259. /// Copy.
  260. TMat& operator=(ANKI_ENABLE_ARG(const TMat&, HAS_SIMD) b)
  261. {
  262. for(U i = 0; i < J; i++)
  263. {
  264. m_simd[i] = b.m_simd[i];
  265. }
  266. return *this;
  267. }
  268. ANKI_ENABLE_METHOD(!HAS_SIMD)
  269. TMat operator+(const TMat& b) const
  270. {
  271. TMat c;
  272. for(U n = 0; n < N; n++)
  273. {
  274. c.m_arr1[n] = m_arr1[n] + b.m_arr1[n];
  275. }
  276. return c;
  277. }
  278. #if ANKI_ENABLE_SIMD
  279. ANKI_ENABLE_METHOD(HAS_SIMD)
  280. TMat operator+(const TMat& b) const
  281. {
  282. TMat c;
  283. for(U i = 0; i < J; i++)
  284. {
  285. # if ANKI_SIMD_SSE
  286. c.m_simd[i] = _mm_add_ps(m_simd[i], b.m_simd[i]);
  287. # else
  288. c.m_simd[i] = m_simd[i] + b.m_simd[i];
  289. # endif
  290. }
  291. return c;
  292. }
  293. #endif
  294. ANKI_ENABLE_METHOD(!HAS_SIMD)
  295. TMat& operator+=(const TMat& b)
  296. {
  297. for(U n = 0; n < N; n++)
  298. {
  299. m_arr1[n] += b.m_arr1[n];
  300. }
  301. return *this;
  302. }
  303. #if ANKI_ENABLE_SIMD
  304. ANKI_ENABLE_METHOD(HAS_SIMD)
  305. TMat& operator+=(const TMat& b)
  306. {
  307. for(U i = 0; i < J; i++)
  308. {
  309. # if ANKI_SIMD_SSE
  310. m_simd[i] = _mm_add_ps(m_simd[i], b.m_simd[i]);
  311. # else
  312. m_simd[i] += b.m_simd[i];
  313. # endif
  314. }
  315. return *this;
  316. }
  317. #endif
  318. ANKI_ENABLE_METHOD(!HAS_SIMD)
  319. TMat operator-(const TMat& b) const
  320. {
  321. TMat c;
  322. for(U n = 0; n < N; n++)
  323. {
  324. c.m_arr1[n] = m_arr1[n] - b.m_arr1[n];
  325. }
  326. return c;
  327. }
  328. #if ANKI_ENABLE_SIMD
  329. ANKI_ENABLE_METHOD(HAS_SIMD)
  330. TMat operator-(const TMat& b) const
  331. {
  332. TMat c;
  333. for(U i = 0; i < J; i++)
  334. {
  335. # if ANKI_SIMD_SSE
  336. c.m_simd[i] = _mm_sub_ps(m_simd[i], b.m_simd[i]);
  337. # else
  338. c.m_simd[i] = m_simd[i] - b.m_simd[i];
  339. # endif
  340. }
  341. return c;
  342. }
  343. #endif
  344. ANKI_ENABLE_METHOD(!HAS_SIMD)
  345. TMat& operator-=(const TMat& b)
  346. {
  347. for(U n = 0; n < N; n++)
  348. {
  349. m_arr1[n] -= b.m_arr1[n];
  350. }
  351. return *this;
  352. }
  353. #if ANKI_ENABLE_SIMD
  354. ANKI_ENABLE_METHOD(HAS_SIMD)
  355. TMat& operator-=(const TMat& b)
  356. {
  357. for(U i = 0; i < J; i++)
  358. {
  359. # if ANKI_SIMD_SSE
  360. m_simd[i] = _mm_sub_ps(m_simd[i], b.m_simd[i]);
  361. # else
  362. m_simd[i] -= b.m_simd[i];
  363. # endif
  364. }
  365. return *this;
  366. }
  367. #endif
  368. ANKI_ENABLE_METHOD(J == I && !HAS_MAT4_SIMD)
  369. TMat operator*(const TMat& b) const
  370. {
  371. TMat out;
  372. const TMat& a = *this;
  373. for(U j = 0; j < J; j++)
  374. {
  375. for(U i = 0; i < I; i++)
  376. {
  377. out(j, i) = T(0);
  378. for(U k = 0; k < I; k++)
  379. {
  380. out(j, i) += a(j, k) * b(k, i);
  381. }
  382. }
  383. }
  384. return out;
  385. }
  386. #if ANKI_ENABLE_SIMD
  387. ANKI_ENABLE_METHOD(HAS_MAT4_SIMD)
  388. TMat operator*(const TMat& b) const
  389. {
  390. TMat out;
  391. const auto& m = *this;
  392. for(U i = 0; i < 4; i++)
  393. {
  394. # if ANKI_SIMD_SSE
  395. __m128 t1, t2;
  396. t1 = _mm_set1_ps(m(i, 0));
  397. t2 = _mm_mul_ps(b.m_simd[0], t1);
  398. t1 = _mm_set1_ps(m(i, 1));
  399. t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[1], t1), t2);
  400. t1 = _mm_set1_ps(m(i, 2));
  401. t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[2], t1), t2);
  402. t1 = _mm_set1_ps(m(i, 3));
  403. t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[3], t1), t2);
  404. out.m_simd[i] = t2;
  405. # else
  406. float32x4_t t1, t2;
  407. t1 = vmovq_n_f32(m(i, 0));
  408. t2 = b.m_simd[0] * t1;
  409. t1 = vmovq_n_f32(m(i, 1));
  410. t2 = b.m_simd[1] * t1 + t2;
  411. t1 = vmovq_n_f32(m(i, 2));
  412. t2 = b.m_simd[2] * t1 + t2;
  413. t1 = vmovq_n_f32(m(i, 3));
  414. t2 = b.m_simd[3] * t1 + t2;
  415. out.m_simd[i] = t2;
  416. # endif
  417. }
  418. return out;
  419. }
  420. #endif
  421. TMat& operator*=(const TMat& b)
  422. {
  423. (*this) = (*this) * b;
  424. return *this;
  425. }
  426. Bool operator==(const TMat& b) const
  427. {
  428. for(U i = 0; i < N; i++)
  429. {
  430. if(!isZero<T>(m_arr1[i] - b.m_arr1[i]))
  431. {
  432. return false;
  433. }
  434. }
  435. return true;
  436. }
  437. Bool operator!=(const TMat& b) const
  438. {
  439. for(U i = 0; i < N; i++)
  440. {
  441. if(!isZero<T>(m_arr1[i] - b.m_arr1[i]))
  442. {
  443. return true;
  444. }
  445. }
  446. return false;
  447. }
  448. /// @}
  449. /// @name Operators with T
  450. /// @{
  451. TMat operator+(const T f) const
  452. {
  453. TMat out;
  454. for(U i = 0; i < N; i++)
  455. {
  456. out.m_arr1[i] = m_arr1[i] + f;
  457. }
  458. return out;
  459. }
  460. TMat& operator+=(const T f)
  461. {
  462. for(U i = 0; i < N; i++)
  463. {
  464. m_arr1[i] += f;
  465. }
  466. return *this;
  467. }
  468. TMat operator-(const T f) const
  469. {
  470. TMat out;
  471. for(U i = 0; i < N; i++)
  472. {
  473. out.m_arr1[i] = m_arr1[i] - f;
  474. }
  475. return out;
  476. }
  477. TMat& operator-=(const T f)
  478. {
  479. for(U i = 0; i < N; i++)
  480. {
  481. m_arr1[i] -= f;
  482. }
  483. return *this;
  484. }
  485. TMat operator*(const T f) const
  486. {
  487. TMat out;
  488. for(U i = 0; i < N; i++)
  489. {
  490. out.m_arr1[i] = m_arr1[i] * f;
  491. }
  492. return out;
  493. }
  494. TMat& operator*=(const T f)
  495. {
  496. for(U i = 0; i < N; i++)
  497. {
  498. m_arr1[i] *= f;
  499. }
  500. return *this;
  501. }
  502. TMat operator/(const T f) const
  503. {
  504. ANKI_ASSERT(f != T(0));
  505. TMat out;
  506. for(U i = 0; i < N; i++)
  507. {
  508. out.m_arr1[i] = m_arr1[i] / f;
  509. }
  510. return out;
  511. }
  512. TMat& operator/=(const T f)
  513. {
  514. ANKI_ASSERT(f != T(0));
  515. for(U i = 0; i < N; i++)
  516. {
  517. m_arr1[i] /= f;
  518. }
  519. return *this;
  520. }
  521. /// @}
  522. /// @name Operators with other types
  523. /// @{
  524. ANKI_ENABLE_METHOD(!HAS_SIMD)
  525. ColumnVec operator*(const RowVec& v) const
  526. {
  527. const TMat& m = *this;
  528. ColumnVec out;
  529. for(U j = 0; j < J; j++)
  530. {
  531. T sum = T(0);
  532. for(U i = 0; i < I; i++)
  533. {
  534. sum += m(j, i) * v[i];
  535. }
  536. out[j] = sum;
  537. }
  538. return out;
  539. }
  540. #if ANKI_ENABLE_SIMD
  541. ANKI_ENABLE_METHOD(HAS_SIMD)
  542. ColumnVec operator*(const RowVec& v) const
  543. {
  544. ColumnVec out;
  545. # if ANKI_SIMD_SSE
  546. for(U i = 0; i < J; i++)
  547. {
  548. _mm_store_ss(&out[i], _mm_dp_ps(m_simd[i], v.getSimd(), 0xF1));
  549. }
  550. # else
  551. for(U i = 0; i < J; i++)
  552. {
  553. out[i] = RowVec(m_simd[i]).dot(v);
  554. }
  555. # endif
  556. return out;
  557. }
  558. #endif
  559. /// @}
  560. /// @name Other
  561. /// @{
  562. ANKI_ENABLE_METHOD(!HAS_SIMD)
  563. void setRow(const U j, const RowVec& v)
  564. {
  565. for(U i = 0; i < I; i++)
  566. {
  567. m_arr2[j][i] = v[i];
  568. }
  569. }
  570. ANKI_ENABLE_METHOD(HAS_SIMD)
  571. void setRow(const U j, const RowVec& v)
  572. {
  573. m_simd[j] = v.getSimd();
  574. }
  575. void setRows(const RowVec& a, const RowVec& b, const RowVec& c)
  576. {
  577. setRow(0, a);
  578. setRow(1, b);
  579. setRow(2, c);
  580. }
  581. ANKI_ENABLE_METHOD(J > 3)
  582. void setRows(const RowVec& a, const RowVec& b, const RowVec& c, const RowVec& d)
  583. {
  584. setRows(a, b, c);
  585. setRow(3, d);
  586. }
  587. RowVec getRow(const U j) const
  588. {
  589. RowVec out;
  590. for(U i = 0; i < I; i++)
  591. {
  592. out[i] = m_arr2[j][i];
  593. }
  594. return out;
  595. }
  596. void getRows(RowVec& a, RowVec& b, RowVec& c) const
  597. {
  598. a = getRow(0);
  599. b = getRow(1);
  600. c = getRow(2);
  601. }
  602. void getRows(RowVec& a, RowVec& b, RowVec& c, RowVec& d) const
  603. {
  604. static_assert(J > 3, "Wrong matrix");
  605. getRows(a, b, c);
  606. d = getRow(3);
  607. }
  608. void setColumn(const U i, const ColumnVec& v)
  609. {
  610. for(U j = 0; j < J; j++)
  611. {
  612. m_arr2[j][i] = v[j];
  613. }
  614. }
  615. void setColumns(const ColumnVec& a, const ColumnVec& b, const ColumnVec& c)
  616. {
  617. setColumn(0, a);
  618. setColumn(1, b);
  619. setColumn(2, c);
  620. }
  621. void setColumns(const ColumnVec& a, const ColumnVec& b, const ColumnVec& c, const ColumnVec& d)
  622. {
  623. static_assert(I > 3, "Check column number");
  624. setColumns(a, b, c);
  625. setColumn(3, d);
  626. }
  627. ColumnVec getColumn(const U i) const
  628. {
  629. ColumnVec out;
  630. for(U j = 0; j < J; j++)
  631. {
  632. out[j] = m_arr2[j][i];
  633. }
  634. return out;
  635. }
  636. void getColumns(ColumnVec& a, ColumnVec& b, ColumnVec& c) const
  637. {
  638. a = getColumn(0);
  639. b = getColumn(1);
  640. c = getColumn(2);
  641. }
  642. void getColumns(ColumnVec& a, ColumnVec& b, ColumnVec& c, ColumnVec& d) const
  643. {
  644. static_assert(I > 3, "Check column number");
  645. getColumns(a, b, c);
  646. d = getColumn(3);
  647. }
  648. /// Get 1st column
  649. ColumnVec getXAxis() const
  650. {
  651. return getColumn(0);
  652. }
  653. /// Get 2nd column
  654. ColumnVec getYAxis() const
  655. {
  656. return getColumn(1);
  657. }
  658. /// Get 3rd column
  659. ColumnVec getZAxis() const
  660. {
  661. return getColumn(2);
  662. }
  663. /// Set 1st column
  664. void setXAxis(const ColumnVec& v)
  665. {
  666. setColumn(0, v);
  667. }
  668. /// Set 2nd column
  669. void setYAxis(const ColumnVec& v)
  670. {
  671. setColumn(1, v);
  672. }
  673. /// Set 3rd column
  674. void setZAxis(const ColumnVec& v)
  675. {
  676. setColumn(2, v);
  677. }
  678. void setRotationX(const T rad)
  679. {
  680. TMat& m = *this;
  681. T sintheta, costheta;
  682. sinCos(rad, sintheta, costheta);
  683. m(0, 0) = T(1);
  684. m(0, 1) = T(0);
  685. m(0, 2) = T(0);
  686. m(1, 0) = T(0);
  687. m(1, 1) = costheta;
  688. m(1, 2) = -sintheta;
  689. m(2, 0) = T(0);
  690. m(2, 1) = sintheta;
  691. m(2, 2) = costheta;
  692. }
  693. void setRotationY(const T rad)
  694. {
  695. TMat& m = *this;
  696. T sintheta, costheta;
  697. sinCos(rad, sintheta, costheta);
  698. m(0, 0) = costheta;
  699. m(0, 1) = T(0);
  700. m(0, 2) = sintheta;
  701. m(1, 0) = T(0);
  702. m(1, 1) = T(1);
  703. m(1, 2) = T(0);
  704. m(2, 0) = -sintheta;
  705. m(2, 1) = T(0);
  706. m(2, 2) = costheta;
  707. }
  708. void setRotationZ(const T rad)
  709. {
  710. TMat& m = *this;
  711. T sintheta, costheta;
  712. sinCos(rad, sintheta, costheta);
  713. m(0, 0) = costheta;
  714. m(0, 1) = -sintheta;
  715. m(0, 2) = T(0);
  716. m(1, 0) = sintheta;
  717. m(1, 1) = costheta;
  718. m(1, 2) = T(0);
  719. m(2, 0) = T(0);
  720. m(2, 1) = T(0);
  721. m(2, 2) = T(1);
  722. }
  723. /// It rotates "this" in the axis defined by the rotation AND not the
  724. /// world axis
  725. void rotateXAxis(const T rad)
  726. {
  727. TMat& m = *this;
  728. // If we analize the mat3 we can extract the 3 unit vectors rotated by the mat3. The 3 rotated vectors are in
  729. // mat's columns. This means that: mat3.colomn[0] == i * mat3. rotateXAxis() rotates rad angle not from i
  730. // vector (aka x axis) but from the vector from colomn 0
  731. // NOTE: See the clean code from < r664
  732. T sina, cosa;
  733. sinCos(rad, sina, cosa);
  734. // zAxis = zAxis*cosa - yAxis*sina;
  735. m(0, 2) = m(0, 2) * cosa - m(0, 1) * sina;
  736. m(1, 2) = m(1, 2) * cosa - m(1, 1) * sina;
  737. m(2, 2) = m(2, 2) * cosa - m(2, 1) * sina;
  738. // zAxis.normalize();
  739. T len = sqrt(m(0, 2) * m(0, 2) + m(1, 2) * m(1, 2) + m(2, 2) * m(2, 2));
  740. m(0, 2) /= len;
  741. m(1, 2) /= len;
  742. m(2, 2) /= len;
  743. // yAxis = zAxis * xAxis;
  744. m(0, 1) = m(1, 2) * m(2, 0) - m(2, 2) * m(1, 0);
  745. m(1, 1) = m(2, 2) * m(0, 0) - m(0, 2) * m(2, 0);
  746. m(2, 1) = m(0, 2) * m(1, 0) - m(1, 2) * m(0, 0);
  747. // yAxis.normalize();
  748. }
  749. /// @copybrief rotateXAxis
  750. void rotateYAxis(const T rad)
  751. {
  752. TMat& m = *this;
  753. // NOTE: See the clean code from < r664
  754. T sina, cosa;
  755. sinCos(rad, sina, cosa);
  756. // zAxis = zAxis*cosa + xAxis*sina;
  757. m(0, 2) = m(0, 2) * cosa + m(0, 0) * sina;
  758. m(1, 2) = m(1, 2) * cosa + m(1, 0) * sina;
  759. m(2, 2) = m(2, 2) * cosa + m(2, 0) * sina;
  760. // zAxis.normalize();
  761. T len = sqrt(m(0, 2) * m(0, 2) + m(1, 2) * m(1, 2) + m(2, 2) * m(2, 2));
  762. m(0, 2) /= len;
  763. m(1, 2) /= len;
  764. m(2, 2) /= len;
  765. // xAxis = (zAxis*yAxis) * -1.0f;
  766. m(0, 0) = m(2, 2) * m(1, 1) - m(1, 2) * m(2, 1);
  767. m(1, 0) = m(0, 2) * m(2, 1) - m(2, 2) * m(0, 1);
  768. m(2, 0) = m(1, 2) * m(0, 1) - m(0, 2) * m(1, 1);
  769. }
  770. /// @copybrief rotateXAxis
  771. void rotateZAxis(const T rad)
  772. {
  773. TMat& m = *this;
  774. // NOTE: See the clean code from < r664
  775. T sina, cosa;
  776. sinCos(rad, sina, cosa);
  777. // xAxis = xAxis*cosa + yAxis*sina;
  778. m(0, 0) = m(0, 0) * cosa + m(0, 1) * sina;
  779. m(1, 0) = m(1, 0) * cosa + m(1, 1) * sina;
  780. m(2, 0) = m(2, 0) * cosa + m(2, 1) * sina;
  781. // xAxis.normalize();
  782. T len = sqrt(m(0, 0) * m(0, 0) + m(1, 0) * m(1, 0) + m(2, 0) * m(2, 0));
  783. m(0, 0) /= len;
  784. m(1, 0) /= len;
  785. m(2, 0) /= len;
  786. // yAxis = zAxis*xAxis;
  787. m(0, 1) = m(1, 2) * m(2, 0) - m(2, 2) * m(1, 0);
  788. m(1, 1) = m(2, 2) * m(0, 0) - m(0, 2) * m(2, 0);
  789. m(2, 1) = m(0, 2) * m(1, 0) - m(1, 2) * m(0, 0);
  790. }
  791. void setRotationPart(const TMat<T, 3, 3>& m3)
  792. {
  793. TMat& m = *this;
  794. for(U j = 0; j < 3; j++)
  795. {
  796. for(U i = 0; i < 3; i++)
  797. {
  798. m(j, i) = m3(j, i);
  799. }
  800. }
  801. }
  802. void setRotationPart(const TQuat<T>& q)
  803. {
  804. TMat& m = *this;
  805. // If length is > 1 + 0.002 or < 1 - 0.002 then not normalized quat
  806. ANKI_ASSERT(absolute(T(1) - q.getLength()) <= 0.002);
  807. T xs, ys, zs, wx, wy, wz, xx, xy, xz, yy, yz, zz;
  808. xs = q.x() + q.x();
  809. ys = q.y() + q.y();
  810. zs = q.z() + q.z();
  811. wx = q.w() * xs;
  812. wy = q.w() * ys;
  813. wz = q.w() * zs;
  814. xx = q.x() * xs;
  815. xy = q.x() * ys;
  816. xz = q.x() * zs;
  817. yy = q.y() * ys;
  818. yz = q.y() * zs;
  819. zz = q.z() * zs;
  820. m(0, 0) = T(1) - (yy + zz);
  821. m(0, 1) = xy - wz;
  822. m(0, 2) = xz + wy;
  823. m(1, 0) = xy + wz;
  824. m(1, 1) = T(1) - (xx + zz);
  825. m(1, 2) = yz - wx;
  826. m(2, 0) = xz - wy;
  827. m(2, 1) = yz + wx;
  828. m(2, 2) = T(1) - (xx + yy);
  829. }
  830. void setRotationPart(const TEuler<T>& e)
  831. {
  832. TMat& m = *this;
  833. T ch, sh, ca, sa, cb, sb;
  834. sinCos(e.y(), sh, ch);
  835. sinCos(e.z(), sa, ca);
  836. sinCos(e.x(), sb, cb);
  837. m(0, 0) = ch * ca;
  838. m(0, 1) = sh * sb - ch * sa * cb;
  839. m(0, 2) = ch * sa * sb + sh * cb;
  840. m(1, 0) = sa;
  841. m(1, 1) = ca * cb;
  842. m(1, 2) = -ca * sb;
  843. m(2, 0) = -sh * ca;
  844. m(2, 1) = sh * sa * cb + ch * sb;
  845. m(2, 2) = -sh * sa * sb + ch * cb;
  846. }
  847. void setRotationPart(const TAxisang<T>& axisang)
  848. {
  849. TMat& m = *this;
  850. // Not normalized axis
  851. ANKI_ASSERT(isZero<T>(T(1) - axisang.getAxis().getLength()));
  852. T c, s;
  853. sinCos(axisang.getAngle(), s, c);
  854. T t = T(1) - c;
  855. const TVec<T, 3>& axis = axisang.getAxis();
  856. m(0, 0) = c + axis.x() * axis.x() * t;
  857. m(1, 1) = c + axis.y() * axis.y() * t;
  858. m(2, 2) = c + axis.z() * axis.z() * t;
  859. T tmp1 = axis.x() * axis.y() * t;
  860. T tmp2 = axis.z() * s;
  861. m(1, 0) = tmp1 + tmp2;
  862. m(0, 1) = tmp1 - tmp2;
  863. tmp1 = axis.x() * axis.z() * t;
  864. tmp2 = axis.y() * s;
  865. m(2, 0) = tmp1 - tmp2;
  866. m(0, 2) = tmp1 + tmp2;
  867. tmp1 = axis.y() * axis.z() * t;
  868. tmp2 = axis.x() * s;
  869. m(2, 1) = tmp1 + tmp2;
  870. m(1, 2) = tmp1 - tmp2;
  871. }
  872. TMat<T, 3, 3> getRotationPart() const
  873. {
  874. const TMat& m = *this;
  875. TMat<T, 3, 3> m3;
  876. m3(0, 0) = m(0, 0);
  877. m3(0, 1) = m(0, 1);
  878. m3(0, 2) = m(0, 2);
  879. m3(1, 0) = m(1, 0);
  880. m3(1, 1) = m(1, 1);
  881. m3(1, 2) = m(1, 2);
  882. m3(2, 0) = m(2, 0);
  883. m3(2, 1) = m(2, 1);
  884. m3(2, 2) = m(2, 2);
  885. return m3;
  886. }
  887. void setTranslationPart(const ColumnVec& v)
  888. {
  889. if(ROW_SIZE == 4)
  890. {
  891. ANKI_ASSERT(isZero<T>(v[3] - T(1)) && "w should be 1");
  892. }
  893. setColumn(3, v);
  894. }
  895. ColumnVec getTranslationPart() const
  896. {
  897. return getColumn(3);
  898. }
  899. void reorthogonalize()
  900. {
  901. // There are 2 methods, the standard and the Gram-Schmidt method with a twist for zAxis. This uses the 2nd. For
  902. // the first see < r664
  903. ColumnVec xAxis, yAxis, zAxis;
  904. getColumns(xAxis, yAxis, zAxis);
  905. xAxis.normalize();
  906. yAxis = yAxis - (xAxis * xAxis.dot(yAxis));
  907. yAxis.normalize();
  908. zAxis = xAxis.cross(yAxis);
  909. setColumns(xAxis, yAxis, zAxis);
  910. }
  911. ANKI_ENABLE_METHOD(J == I && !HAS_SIMD)
  912. void transpose()
  913. {
  914. for(U j = 0; j < J; j++)
  915. {
  916. for(U i = j + 1; i < I; i++)
  917. {
  918. T tmp = m_arr2[j][i];
  919. m_arr2[j][i] = m_arr2[i][j];
  920. m_arr2[i][j] = tmp;
  921. }
  922. }
  923. }
  924. #if ANKI_ENABLE_SIMD
  925. ANKI_ENABLE_METHOD(J == I && HAS_SIMD)
  926. void transpose()
  927. {
  928. # if ANKI_SIMD_SSE
  929. _MM_TRANSPOSE4_PS(m_simd[0], m_simd[1], m_simd[2], m_simd[3]);
  930. # else
  931. const float32x4x2_t row01 = vtrnq_f32(m_simd[0], m_simd[1]);
  932. const float32x4x2_t row23 = vtrnq_f32(m_simd[2], m_simd[3]);
  933. m_simd[0] = vcombine_f32(vget_low_f32(row01.val[0]), vget_low_f32(row23.val[0]));
  934. m_simd[1] = vcombine_f32(vget_low_f32(row01.val[1]), vget_low_f32(row23.val[1]));
  935. m_simd[2] = vcombine_f32(vget_high_f32(row01.val[0]), vget_high_f32(row23.val[0]));
  936. m_simd[3] = vcombine_f32(vget_high_f32(row01.val[1]), vget_high_f32(row23.val[1]));
  937. # endif
  938. }
  939. #endif
  940. void transposeRotationPart()
  941. {
  942. for(U j = 0; j < 3; j++)
  943. {
  944. for(U i = j + 1; i < 3; i++)
  945. {
  946. const T tmp = m_arr2[j][i];
  947. m_arr2[j][i] = m_arr2[i][j];
  948. m_arr2[i][j] = tmp;
  949. }
  950. }
  951. }
  952. ANKI_ENABLE_METHOD(I == J)
  953. TMat getTransposed() const
  954. {
  955. TMat out;
  956. for(U j = 0; j < J; j++)
  957. {
  958. for(U i = 0; i < I; i++)
  959. {
  960. out.m_arr2[i][j] = m_arr2[j][i];
  961. }
  962. }
  963. return out;
  964. }
  965. ANKI_ENABLE_METHOD(I == 3 && J == 3)
  966. T getDet() const
  967. {
  968. const auto& m = *this;
  969. // For the accurate method see < r664
  970. return m(0, 0) * (m(1, 1) * m(2, 2) - m(1, 2) * m(2, 1)) - m(0, 1) * (m(1, 0) * m(2, 2) - m(1, 2) * m(2, 0))
  971. + m(0, 2) * (m(0, 1) * m(2, 1) - m(1, 1) * m(2, 0));
  972. }
  973. ANKI_ENABLE_METHOD(I == 4 && J == 4)
  974. T getDet() const
  975. {
  976. const auto& t = *this;
  977. return t(0, 3) * t(1, 2) * t(2, 1) * t(3, 0) - t(0, 2) * t(1, 3) * t(2, 1) * t(3, 0)
  978. - t(0, 3) * t(1, 1) * t(2, 2) * t(3, 0) + t(0, 1) * t(1, 3) * t(2, 2) * t(3, 0)
  979. + t(0, 2) * t(1, 1) * t(2, 3) * t(3, 0) - t(0, 1) * t(1, 2) * t(2, 3) * t(3, 0)
  980. - t(0, 3) * t(1, 2) * t(2, 0) * t(3, 1) + t(0, 2) * t(1, 3) * t(2, 0) * t(3, 1)
  981. + t(0, 3) * t(1, 0) * t(2, 2) * t(3, 1) - t(0, 0) * t(1, 3) * t(2, 2) * t(3, 1)
  982. - t(0, 2) * t(1, 0) * t(2, 3) * t(3, 1) + t(0, 0) * t(1, 2) * t(2, 3) * t(3, 1)
  983. + t(0, 3) * t(1, 1) * t(2, 0) * t(3, 2) - t(0, 1) * t(1, 3) * t(2, 0) * t(3, 2)
  984. - t(0, 3) * t(1, 0) * t(2, 1) * t(3, 2) + t(0, 0) * t(1, 3) * t(2, 1) * t(3, 2)
  985. + t(0, 1) * t(1, 0) * t(2, 3) * t(3, 2) - t(0, 0) * t(1, 1) * t(2, 3) * t(3, 2)
  986. - t(0, 2) * t(1, 1) * t(2, 0) * t(3, 3) + t(0, 1) * t(1, 2) * t(2, 0) * t(3, 3)
  987. + t(0, 2) * t(1, 0) * t(2, 1) * t(3, 3) - t(0, 0) * t(1, 2) * t(2, 1) * t(3, 3)
  988. - t(0, 1) * t(1, 0) * t(2, 2) * t(3, 3) + t(0, 0) * t(1, 1) * t(2, 2) * t(3, 3);
  989. }
  990. ANKI_ENABLE_METHOD(I == 3 && J == 3)
  991. TMat getInverse() const
  992. {
  993. // Using Gramer's method Inv(A) = (1 / getDet(A)) * Adj(A)
  994. const TMat& m = *this;
  995. TMat r;
  996. // compute determinant
  997. const T cofactor0 = m(1, 1) * m(2, 2) - m(1, 2) * m(2, 1);
  998. const T cofactor3 = m(0, 2) * m(2, 1) - m(0, 1) * m(2, 2);
  999. const T cofactor6 = m(0, 1) * m(1, 2) - m(0, 2) * m(1, 1);
  1000. const T det = m(0, 0) * cofactor0 + m(1, 0) * cofactor3 + m(2, 0) * cofactor6;
  1001. ANKI_ASSERT(!isZero<T>(det)); // Cannot invert det == 0
  1002. // create adjoint matrix and multiply by 1/det to get inverse
  1003. const T invDet = T(1) / det;
  1004. r(0, 0) = invDet * cofactor0;
  1005. r(0, 1) = invDet * cofactor3;
  1006. r(0, 2) = invDet * cofactor6;
  1007. r(1, 0) = invDet * (m(1, 2) * m(2, 0) - m(1, 0) * m(2, 2));
  1008. r(1, 1) = invDet * (m(0, 0) * m(2, 2) - m(0, 2) * m(2, 0));
  1009. r(1, 2) = invDet * (m(0, 2) * m(1, 0) - m(0, 0) * m(1, 2));
  1010. r(2, 0) = invDet * (m(1, 0) * m(2, 1) - m(1, 1) * m(2, 0));
  1011. r(2, 1) = invDet * (m(0, 1) * m(2, 0) - m(0, 0) * m(2, 1));
  1012. r(2, 2) = invDet * (m(0, 0) * m(1, 1) - m(0, 1) * m(1, 0));
  1013. return r;
  1014. }
  1015. /// Invert using Cramer's rule
  1016. ANKI_ENABLE_METHOD(I == 4 && J == 4)
  1017. TMat getInverse() const
  1018. {
  1019. Array<T, 12> tmp;
  1020. const auto& in = (*this);
  1021. TMat m4;
  1022. tmp[0] = in(2, 2) * in(3, 3);
  1023. tmp[1] = in(3, 2) * in(2, 3);
  1024. tmp[2] = in(1, 2) * in(3, 3);
  1025. tmp[3] = in(3, 2) * in(1, 3);
  1026. tmp[4] = in(1, 2) * in(2, 3);
  1027. tmp[5] = in(2, 2) * in(1, 3);
  1028. tmp[6] = in(0, 2) * in(3, 3);
  1029. tmp[7] = in(3, 2) * in(0, 3);
  1030. tmp[8] = in(0, 2) * in(2, 3);
  1031. tmp[9] = in(2, 2) * in(0, 3);
  1032. tmp[10] = in(0, 2) * in(1, 3);
  1033. tmp[11] = in(1, 2) * in(0, 3);
  1034. m4(0, 0) = tmp[0] * in(1, 1) + tmp[3] * in(2, 1) + tmp[4] * in(3, 1);
  1035. m4(0, 0) -= tmp[1] * in(1, 1) + tmp[2] * in(2, 1) + tmp[5] * in(3, 1);
  1036. m4(0, 1) = tmp[1] * in(0, 1) + tmp[6] * in(2, 1) + tmp[9] * in(3, 1);
  1037. m4(0, 1) -= tmp[0] * in(0, 1) + tmp[7] * in(2, 1) + tmp[8] * in(3, 1);
  1038. m4(0, 2) = tmp[2] * in(0, 1) + tmp[7] * in(1, 1) + tmp[10] * in(3, 1);
  1039. m4(0, 2) -= tmp[3] * in(0, 1) + tmp[6] * in(1, 1) + tmp[11] * in(3, 1);
  1040. m4(0, 3) = tmp[5] * in(0, 1) + tmp[8] * in(1, 1) + tmp[11] * in(2, 1);
  1041. m4(0, 3) -= tmp[4] * in(0, 1) + tmp[9] * in(1, 1) + tmp[10] * in(2, 1);
  1042. m4(1, 0) = tmp[1] * in(1, 0) + tmp[2] * in(2, 0) + tmp[5] * in(3, 0);
  1043. m4(1, 0) -= tmp[0] * in(1, 0) + tmp[3] * in(2, 0) + tmp[4] * in(3, 0);
  1044. m4(1, 1) = tmp[0] * in(0, 0) + tmp[7] * in(2, 0) + tmp[8] * in(3, 0);
  1045. m4(1, 1) -= tmp[1] * in(0, 0) + tmp[6] * in(2, 0) + tmp[9] * in(3, 0);
  1046. m4(1, 2) = tmp[3] * in(0, 0) + tmp[6] * in(1, 0) + tmp[11] * in(3, 0);
  1047. m4(1, 2) -= tmp[2] * in(0, 0) + tmp[7] * in(1, 0) + tmp[10] * in(3, 0);
  1048. m4(1, 3) = tmp[4] * in(0, 0) + tmp[9] * in(1, 0) + tmp[10] * in(2, 0);
  1049. m4(1, 3) -= tmp[5] * in(0, 0) + tmp[8] * in(1, 0) + tmp[11] * in(2, 0);
  1050. tmp[0] = in(2, 0) * in(3, 1);
  1051. tmp[1] = in(3, 0) * in(2, 1);
  1052. tmp[2] = in(1, 0) * in(3, 1);
  1053. tmp[3] = in(3, 0) * in(1, 1);
  1054. tmp[4] = in(1, 0) * in(2, 1);
  1055. tmp[5] = in(2, 0) * in(1, 1);
  1056. tmp[6] = in(0, 0) * in(3, 1);
  1057. tmp[7] = in(3, 0) * in(0, 1);
  1058. tmp[8] = in(0, 0) * in(2, 1);
  1059. tmp[9] = in(2, 0) * in(0, 1);
  1060. tmp[10] = in(0, 0) * in(1, 1);
  1061. tmp[11] = in(1, 0) * in(0, 1);
  1062. m4(2, 0) = tmp[0] * in(1, 3) + tmp[3] * in(2, 3) + tmp[4] * in(3, 3);
  1063. m4(2, 0) -= tmp[1] * in(1, 3) + tmp[2] * in(2, 3) + tmp[5] * in(3, 3);
  1064. m4(2, 1) = tmp[1] * in(0, 3) + tmp[6] * in(2, 3) + tmp[9] * in(3, 3);
  1065. m4(2, 1) -= tmp[0] * in(0, 3) + tmp[7] * in(2, 3) + tmp[8] * in(3, 3);
  1066. m4(2, 2) = tmp[2] * in(0, 3) + tmp[7] * in(1, 3) + tmp[10] * in(3, 3);
  1067. m4(2, 2) -= tmp[3] * in(0, 3) + tmp[6] * in(1, 3) + tmp[11] * in(3, 3);
  1068. m4(2, 3) = tmp[5] * in(0, 3) + tmp[8] * in(1, 3) + tmp[11] * in(2, 3);
  1069. m4(2, 3) -= tmp[4] * in(0, 3) + tmp[9] * in(1, 3) + tmp[10] * in(2, 3);
  1070. m4(3, 0) = tmp[2] * in(2, 2) + tmp[5] * in(3, 2) + tmp[1] * in(1, 2);
  1071. m4(3, 0) -= tmp[4] * in(3, 2) + tmp[0] * in(1, 2) + tmp[3] * in(2, 2);
  1072. m4(3, 1) = tmp[8] * in(3, 2) + tmp[0] * in(0, 2) + tmp[7] * in(2, 2);
  1073. m4(3, 1) -= tmp[6] * in(2, 2) + tmp[9] * in(3, 2) + tmp[1] * in(0, 2);
  1074. m4(3, 2) = tmp[6] * in(1, 2) + tmp[11] * in(3, 2) + tmp[3] * in(0, 2);
  1075. m4(3, 2) -= tmp[10] * in(3, 2) + tmp[2] * in(0, 2) + tmp[7] * in(1, 2);
  1076. m4(3, 3) = tmp[10] * in(2, 2) + tmp[4] * in(0, 2) + tmp[9] * in(1, 2);
  1077. m4(3, 3) -= tmp[8] * in(1, 2) + tmp[11] * in(2, 2) + tmp[5] * in(0, 2);
  1078. T det = in(0, 0) * m4(0, 0) + in(1, 0) * m4(0, 1) + in(2, 0) * m4(0, 2) + in(3, 0) * m4(0, 3);
  1079. ANKI_ASSERT(!isZero<T>(det)); // Cannot invert, det == 0
  1080. det = T(1) / det;
  1081. m4 *= det;
  1082. return m4;
  1083. }
  1084. /// See getInverse
  1085. ANKI_ENABLE_METHOD((I == 4 && J == 4) || (I == 3 && J == 3))
  1086. void invert()
  1087. {
  1088. (*this) = getInverse();
  1089. }
  1090. /// 12 muls, 27 adds. Something like m4 = m0 * m1 but without touching the 4rth row and allot faster
  1091. ANKI_ENABLE_METHOD(I == 4 && J == 4)
  1092. static TMat combineTransformations(const TMat& m0, const TMat& m1)
  1093. {
  1094. // See the clean code in < r664
  1095. // one of the 2 mat4 doesnt represent transformation
  1096. ANKI_ASSERT(isZero<T>(m0(3, 0) + m0(3, 1) + m0(3, 2) + m0(3, 3) - T(1))
  1097. && isZero<T>(m1(3, 0) + m1(3, 1) + m1(3, 2) + m1(3, 3) - T(1)));
  1098. TMat m4;
  1099. m4(0, 0) = m0(0, 0) * m1(0, 0) + m0(0, 1) * m1(1, 0) + m0(0, 2) * m1(2, 0);
  1100. m4(0, 1) = m0(0, 0) * m1(0, 1) + m0(0, 1) * m1(1, 1) + m0(0, 2) * m1(2, 1);
  1101. m4(0, 2) = m0(0, 0) * m1(0, 2) + m0(0, 1) * m1(1, 2) + m0(0, 2) * m1(2, 2);
  1102. m4(1, 0) = m0(1, 0) * m1(0, 0) + m0(1, 1) * m1(1, 0) + m0(1, 2) * m1(2, 0);
  1103. m4(1, 1) = m0(1, 0) * m1(0, 1) + m0(1, 1) * m1(1, 1) + m0(1, 2) * m1(2, 1);
  1104. m4(1, 2) = m0(1, 0) * m1(0, 2) + m0(1, 1) * m1(1, 2) + m0(1, 2) * m1(2, 2);
  1105. m4(2, 0) = m0(2, 0) * m1(0, 0) + m0(2, 1) * m1(1, 0) + m0(2, 2) * m1(2, 0);
  1106. m4(2, 1) = m0(2, 0) * m1(0, 1) + m0(2, 1) * m1(1, 1) + m0(2, 2) * m1(2, 1);
  1107. m4(2, 2) = m0(2, 0) * m1(0, 2) + m0(2, 1) * m1(1, 2) + m0(2, 2) * m1(2, 2);
  1108. m4(0, 3) = m0(0, 0) * m1(0, 3) + m0(0, 1) * m1(1, 3) + m0(0, 2) * m1(2, 3) + m0(0, 3);
  1109. m4(1, 3) = m0(1, 0) * m1(0, 3) + m0(1, 1) * m1(1, 3) + m0(1, 2) * m1(2, 3) + m0(1, 3);
  1110. m4(2, 3) = m0(2, 0) * m1(0, 3) + m0(2, 1) * m1(1, 3) + m0(2, 2) * m1(2, 3) + m0(2, 3);
  1111. m4(3, 0) = m4(3, 1) = m4(3, 2) = T(0);
  1112. m4(3, 3) = T(1);
  1113. return m4;
  1114. }
  1115. /// Create a new matrix that is equivalent to Mat4(this)*Mat4(b)
  1116. ANKI_ENABLE_METHOD(J == 3 && I == 4 && !HAS_SIMD)
  1117. TMat combineTransformations(const TMat& b) const
  1118. {
  1119. const auto& a = *this;
  1120. TMat c;
  1121. c(0, 0) = a(0, 0) * b(0, 0) + a(0, 1) * b(1, 0) + a(0, 2) * b(2, 0);
  1122. c(0, 1) = a(0, 0) * b(0, 1) + a(0, 1) * b(1, 1) + a(0, 2) * b(2, 1);
  1123. c(0, 2) = a(0, 0) * b(0, 2) + a(0, 1) * b(1, 2) + a(0, 2) * b(2, 2);
  1124. c(1, 0) = a(1, 0) * b(0, 0) + a(1, 1) * b(1, 0) + a(1, 2) * b(2, 0);
  1125. c(1, 1) = a(1, 0) * b(0, 1) + a(1, 1) * b(1, 1) + a(1, 2) * b(2, 1);
  1126. c(1, 2) = a(1, 0) * b(0, 2) + a(1, 1) * b(1, 2) + a(1, 2) * b(2, 2);
  1127. c(2, 0) = a(2, 0) * b(0, 0) + a(2, 1) * b(1, 0) + a(2, 2) * b(2, 0);
  1128. c(2, 1) = a(2, 0) * b(0, 1) + a(2, 1) * b(1, 1) + a(2, 2) * b(2, 1);
  1129. c(2, 2) = a(2, 0) * b(0, 2) + a(2, 1) * b(1, 2) + a(2, 2) * b(2, 2);
  1130. c(0, 3) = a(0, 0) * b(0, 3) + a(0, 1) * b(1, 3) + a(0, 2) * b(2, 3) + a(0, 3);
  1131. c(1, 3) = a(1, 0) * b(0, 3) + a(1, 1) * b(1, 3) + a(1, 2) * b(2, 3) + a(1, 3);
  1132. c(2, 3) = a(2, 0) * b(0, 3) + a(2, 1) * b(1, 3) + a(2, 2) * b(2, 3) + a(2, 3);
  1133. return c;
  1134. }
  1135. #if ANKI_ENABLE_SIMD
  1136. ANKI_ENABLE_METHOD(J == 3 && I == 4 && HAS_SIMD)
  1137. TMat combineTransformations(const TMat& b) const
  1138. {
  1139. TMat c;
  1140. const auto& a = *this;
  1141. # if ANKI_SIMD_SSE
  1142. for(U i = 0; i < 3; i++)
  1143. {
  1144. __m128 t1, t2;
  1145. t1 = _mm_set1_ps(a(i, 0));
  1146. t2 = _mm_mul_ps(b.m_simd[0], t1);
  1147. t1 = _mm_set1_ps(a(i, 1));
  1148. t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[1], t1), t2);
  1149. t1 = _mm_set1_ps(a(i, 2));
  1150. t2 = _mm_add_ps(_mm_mul_ps(b.m_simd[2], t1), t2);
  1151. TVec<T, 4> v4(T(0), T(0), T(0), a(i, 3));
  1152. t2 = _mm_add_ps(v4.getSimd(), t2);
  1153. c.m_simd[i] = t2;
  1154. }
  1155. # else
  1156. for(U i = 0; i < 3; i++)
  1157. {
  1158. float32x4_t t1, t2;
  1159. t1 = vdupq_n_f32(a(i, 0));
  1160. t2 = b.m_simd[0] * t1;
  1161. t1 = vdupq_n_f32(a(i, 1));
  1162. t2 = b.m_simd[1] * t1 + t2;
  1163. t1 = vdupq_n_f32(a(i, 2));
  1164. t2 = b.m_simd[2] * t1 + t2;
  1165. TVec<T, 4> v4(T(0), T(0), T(0), a(i, 3));
  1166. t2 += v4.getSimd();
  1167. c.m_simd[i] = t2;
  1168. }
  1169. # endif
  1170. return c;
  1171. }
  1172. #endif
  1173. /// Calculate a perspective projection matrix. The z is mapped in [0, 1] range just like DX and Vulkan.
  1174. ANKI_ENABLE_METHOD(I == 4 && J == 4)
  1175. static ANKI_USE_RESULT TMat calculatePerspectiveProjectionMatrix(T fovX, T fovY, T near, T far)
  1176. {
  1177. ANKI_ASSERT(fovX > T(0) && fovY > T(0) && near > T(0) && far > T(0));
  1178. const T g = near - far;
  1179. const T f = T(1) / tan(fovY / T(2)); // f = cot(fovY/2)
  1180. TMat proj;
  1181. proj(0, 0) = f * (fovY / fovX); // = f/aspectRatio;
  1182. proj(0, 1) = T(0);
  1183. proj(0, 2) = T(0);
  1184. proj(0, 3) = T(0);
  1185. proj(1, 0) = T(0);
  1186. proj(1, 1) = f;
  1187. proj(1, 2) = T(0);
  1188. proj(1, 3) = T(0);
  1189. proj(2, 0) = T(0);
  1190. proj(2, 1) = T(0);
  1191. proj(2, 2) = far / g;
  1192. proj(2, 3) = (far * near) / g;
  1193. proj(3, 0) = T(0);
  1194. proj(3, 1) = T(0);
  1195. proj(3, 2) = T(-1);
  1196. proj(3, 3) = T(0);
  1197. return proj;
  1198. }
  1199. /// Calculate an orthographic projection matrix. The z is mapped in [0, 1] range just like DX and Vulkan.
  1200. ANKI_ENABLE_METHOD(I == 4 && J == 4)
  1201. static ANKI_USE_RESULT TMat calculateOrthographicProjectionMatrix(T right, T left, T top, T bottom, T near, T far)
  1202. {
  1203. ANKI_ASSERT(right != T(0) && left != T(0) && top != T(0) && bottom != T(0) && near != T(0) && far != T(0));
  1204. const T difx = right - left;
  1205. const T dify = top - bottom;
  1206. const T difz = far - near;
  1207. const T tx = -(right + left) / difx;
  1208. const T ty = -(top + bottom) / dify;
  1209. const T tz = -near / difz;
  1210. TMat m;
  1211. m(0, 0) = T(2) / difx;
  1212. m(0, 1) = T(0);
  1213. m(0, 2) = T(0);
  1214. m(0, 3) = tx;
  1215. m(1, 0) = T(0);
  1216. m(1, 1) = T(2) / dify;
  1217. m(1, 2) = T(0);
  1218. m(1, 3) = ty;
  1219. m(2, 0) = T(0);
  1220. m(2, 1) = T(0);
  1221. m(2, 2) = T(-1) / difz;
  1222. m(2, 3) = tz;
  1223. m(3, 0) = T(0);
  1224. m(3, 1) = T(0);
  1225. m(3, 2) = T(0);
  1226. m(3, 3) = T(1);
  1227. return m;
  1228. }
  1229. /// Given the parameters that construct a projection matrix extract 4 values that can be used to unproject a point
  1230. /// from NDC to view space.
  1231. /// @code
  1232. /// Vec4 unprojParams = calculatePerspectiveUnprojectionParams(...);
  1233. /// F32 z = unprojParams.z() / (unprojParams.w() + depth);
  1234. /// Vec2 xy = ndc.xy() * unprojParams.xy() * z;
  1235. /// Vec3 posViewSpace(xy, z);
  1236. /// @endcode
  1237. ANKI_ENABLE_METHOD(I == 4 && J == 4)
  1238. static TVec<T, 4> calculatePerspectiveUnprojectionParams(T fovX, T fovY, T near, T far)
  1239. {
  1240. TVec<T, 4> out;
  1241. const T g = near - far;
  1242. const T f = T(1) / tan(fovY / T(2)); // f = cot(fovY/2)
  1243. const T m00 = f * (fovY / fovX);
  1244. const T m11 = f;
  1245. const T m22 = far / g;
  1246. const T m23 = (far * near) / g;
  1247. // First, clip = (m * Pv) where Pv is the view space position.
  1248. // ndc.z = clip.z / clip.w = (m22 * Pv.z + m23) / -Pv.z. Note that ndc.z == depth in zero_to_one projection.
  1249. // Solving that for Pv.z we get
  1250. // Pv.z = A / (depth + B)
  1251. // where A = -m23 and B = m22
  1252. // so we save the A and B in the projection params vector
  1253. out.z() = -m23;
  1254. out.w() = m22;
  1255. // Using the same logic the Pv.x = x' * w / m00
  1256. // so Pv.x = x' * Pv.z * (-1 / m00)
  1257. out.x() = -T(T(1)) / m00;
  1258. // Same for y
  1259. out.y() = -T(T(1)) / m11;
  1260. return out;
  1261. }
  1262. /// Assuming this is a projection matrix extract the unprojection parameters. See
  1263. /// calculatePerspectiveUnprojectionParams for more info.
  1264. ANKI_ENABLE_METHOD(I == 4 && J == 4)
  1265. TVec<T, 4> extractPerspectiveUnprojectionParams() const
  1266. {
  1267. TVec<T, 4> out;
  1268. const auto& m = *this;
  1269. out.z() = -m(2, 3);
  1270. out.w() = m(2, 2);
  1271. out.x() = -T(T(1)) / m(0, 0);
  1272. out.y() = -T(T(1)) / m(1, 1);
  1273. return out;
  1274. }
  1275. /// If we suppose this matrix represents a transformation, return the inverted transformation
  1276. ANKI_ENABLE_METHOD(I == 4 && J == 4)
  1277. TMat getInverseTransformation() const
  1278. {
  1279. const TMat<T, 3, 3> invertedRot = getRotationPart().getTransposed();
  1280. TVec<T, 3> invertedTsl = getTranslationPart().xyz();
  1281. invertedTsl = -(invertedRot * invertedTsl);
  1282. return TMat(invertedTsl.xyz0(), invertedRot);
  1283. }
  1284. /// @note 9 muls, 9 adds
  1285. ANKI_ENABLE_METHOD(I == 4 && J == 4)
  1286. TVec<T, 3> transform(const TVec<T, 3>& v) const
  1287. {
  1288. const auto& m = *this;
  1289. return TVec<T, 3>(m(0, 0) * v.x() + m(0, 1) * v.y() + m(0, 2) * v.z() + m(0, 3),
  1290. m(1, 0) * v.x() + m(1, 1) * v.y() + m(1, 2) * v.z() + m(1, 3),
  1291. m(2, 0) * v.x() + m(2, 1) * v.y() + m(2, 2) * v.z() + m(2, 3));
  1292. }
  1293. /// Create a new transform matrix position at eye and looking at refPoint.
  1294. template<U VEC_DIMS, ANKI_ENABLE(J == 3 && I == 4 && VEC_DIMS >= 3)>
  1295. static TMat lookAt(const TVec<T, VEC_DIMS>& eye, const TVec<T, VEC_DIMS>& refPoint, const TVec<T, VEC_DIMS>& up)
  1296. {
  1297. const TVec<T, 3> vdir = (refPoint.xyz() - eye.xyz()).getNormalized();
  1298. const TVec<T, 3> vup = (up.xyz() - vdir * up.xyz().dot(vdir)).getNormalized();
  1299. const TVec<T, 3> vside = vdir.cross(vup);
  1300. TMat out;
  1301. out.setColumns(vside, vup, -vdir, eye.xyz());
  1302. return out;
  1303. }
  1304. /// Create a new transform matrix position at eye and looking at refPoint.
  1305. template<U VEC_DIMS, ANKI_ENABLE(J == 4 && I == 4 && VEC_DIMS >= 3)>
  1306. static TMat lookAt(const TVec<T, VEC_DIMS>& eye, const TVec<T, VEC_DIMS>& refPoint, const TVec<T, VEC_DIMS>& up)
  1307. {
  1308. const TVec<T, 4> vdir = (refPoint.xyz0() - eye.xyz0()).getNormalized();
  1309. const TVec<T, 4> vup = (up.xyz0() - vdir * up.xyz0().dot(vdir)).getNormalized();
  1310. const TVec<T, 4> vside = vdir.cross(vup);
  1311. TMat out;
  1312. out.setColumns(vside, vup, -vdir, eye.xyz1());
  1313. return out;
  1314. }
  1315. TMat lerp(const TMat& b, T t) const
  1316. {
  1317. return ((*this) * (T(1) - t)) + (b * t);
  1318. }
  1319. static TMat getZero()
  1320. {
  1321. return TMat(T(0));
  1322. }
  1323. void setZero()
  1324. {
  1325. *this = getZero();
  1326. }
  1327. ANKI_ENABLE_METHOD(I == 3 && J == 3)
  1328. static TMat getIdentity()
  1329. {
  1330. return TMat(T(1), T(0), T(0), T(0), T(1), T(0), T(0), T(0), T(1));
  1331. }
  1332. ANKI_ENABLE_METHOD(I == 4 && J == 4)
  1333. static TMat getIdentity()
  1334. {
  1335. return TMat(T(1), T(0), T(0), T(0), T(0), T(1), T(0), T(0), T(0), T(0), T(1), T(0), T(0), T(0), T(0), T(1));
  1336. }
  1337. ANKI_ENABLE_METHOD(I == 4 && J == 3)
  1338. static TMat getIdentity()
  1339. {
  1340. return TMat(T(1), T(0), T(0), T(0), T(0), T(1), T(0), T(0), T(0), T(0), T(1), T(0));
  1341. }
  1342. void setIdentity()
  1343. {
  1344. (*this) = getIdentity();
  1345. }
  1346. static constexpr U8 getSize()
  1347. {
  1348. return U8(I * J);
  1349. }
  1350. ANKI_ENABLE_METHOD(std::is_floating_point<T>::value)
  1351. void toString(StringAuto& str) const
  1352. {
  1353. for(U j = 0; j < J; ++j)
  1354. {
  1355. for(U i = 0; i < I; ++i)
  1356. {
  1357. CString fmt;
  1358. if(i == I - 1 && j == J - 1)
  1359. {
  1360. fmt = "%f";
  1361. }
  1362. else if(i == I - 1)
  1363. {
  1364. fmt = "%f\n";
  1365. }
  1366. else
  1367. {
  1368. fmt = "%f ";
  1369. }
  1370. str.append(StringAuto(str.getAllocator()).sprintf(fmt, m_arr2[j][i]));
  1371. }
  1372. }
  1373. }
  1374. /// @}
  1375. protected:
  1376. static constexpr U N = I * J;
  1377. /// @name Data members
  1378. /// @{
  1379. union
  1380. {
  1381. Array<T, N> m_arr1;
  1382. Array2d<T, J, I> m_arr2;
  1383. T m_carr1[N]; ///< For easier debugging with gdb
  1384. T m_carr2[J][I]; ///< For easier debugging with gdb
  1385. SimdArray m_simd;
  1386. };
  1387. /// @}
  1388. };
  1389. /// @memberof TMat
  1390. template<typename T, U J, U I>
  1391. TMat<T, J, I> operator+(const T f, const TMat<T, J, I>& m)
  1392. {
  1393. return m + f;
  1394. }
  1395. /// @memberof TMat
  1396. template<typename T, U J, U I>
  1397. TMat<T, J, I> operator-(const T f, const TMat<T, J, I>& m)
  1398. {
  1399. TMat<T, J, I> out;
  1400. for(U i = 0; i < J * I; i++)
  1401. {
  1402. out[i] = f - m[i];
  1403. }
  1404. return out;
  1405. }
  1406. /// @memberof TMat
  1407. template<typename T, U J, U I>
  1408. TMat<T, J, I> operator*(const T f, const TMat<T, J, I>& m)
  1409. {
  1410. return m * f;
  1411. }
  1412. /// @memberof TMat
  1413. template<typename T, U J, U I>
  1414. TMat<T, J, I> operator/(const T f, const TMat<T, 3, 3>& m3)
  1415. {
  1416. TMat<T, J, I> out;
  1417. for(U i = 0; i < J * I; i++)
  1418. {
  1419. ANKI_ASSERT(m3[i] != T(0));
  1420. out[i] = f / m3[i];
  1421. }
  1422. return out;
  1423. }
  1424. /// F32 3x3 matrix
  1425. using Mat3 = TMat<F32, 3, 3>;
  1426. static_assert(sizeof(Mat3) == sizeof(F32) * 3 * 3, "Incorrect size");
  1427. /// F64 3x3 matrix
  1428. using DMat3 = TMat<F64, 3, 3>;
  1429. static_assert(sizeof(DMat3) == sizeof(F64) * 3 * 3, "Incorrect size");
  1430. /// F32 4x4 matrix
  1431. using Mat4 = TMat<F32, 4, 4>;
  1432. static_assert(sizeof(Mat4) == sizeof(F32) * 4 * 4, "Incorrect size");
  1433. /// F64 4x4 matrix
  1434. using DMat4 = TMat<F64, 4, 4>;
  1435. static_assert(sizeof(DMat4) == sizeof(F64) * 4 * 4, "Incorrect size");
  1436. /// F32 3x4 matrix
  1437. using Mat3x4 = TMat<F32, 3, 4>;
  1438. static_assert(sizeof(Mat3x4) == sizeof(F32) * 3 * 4, "Incorrect size");
  1439. /// F64 3x4 matrix
  1440. using DMat3x4 = TMat<F64, 3, 4>;
  1441. static_assert(sizeof(DMat3x4) == sizeof(F64) * 3 * 4, "Incorrect size");
  1442. /// @}
  1443. } // end namespace anki