main.cpp 57 KB


  1. ///////////////////////////////////////////////////////////////////////////////////////////////////
  2. // OpenGL Mathematics Copyright (c) 2005 - 2010 G-Truc Creation (www.g-truc.net)
  3. ///////////////////////////////////////////////////////////////////////////////////////////////////
  4. // Created : 2005-01-12
  5. // Updated : 2006-12-03
  6. // Licence : This source is under MIT License
  7. // File : main.cpp
  8. ///////////////////////////////////////////////////////////////////////////////////////////////////
  9. #include "precompiled.hpp"
  10. #include <glm/gtc/matrix_operation.hpp>
  11. #include <glm/gtx/simd_vec4.hpp>
  12. #include <glm/core/intrinsic_common.hpp>
  13. #include <glm/core/intrinsic_exponential.hpp>
  14. #include <glm/core/intrinsic_geometric.hpp>
  15. #include <glm/core/intrinsic_matrix.hpp>
  16. #include <glm/core/intrinsic_trigonometric.hpp>
  17. #include <glm/core/intrinsic_vector_relational.hpp>
  18. #include <glm/gtc/matrix_transform.hpp>
  19. #include <glm/gtc/matrix_projection.hpp>
  20. #include <glm/gtc/quaternion.hpp>
  21. #include <glm/gtx/bit.hpp>
  22. #include <glm/gtx/inverse.hpp>
  23. #include <glm/gtx/inverse_transpose.hpp>
  24. #include <glm/gtx/fast_square_root.hpp>
  25. #include <glm/gtx/string_cast.hpp>
  26. #include <glm/gtx/intersect.hpp>
  27. #include <glm/gtx/matrix_operation.hpp>
  28. #include <glm/gtx/integer.hpp>
  29. #include <glm/gtx/random.hpp>
  30. #include <glm/gtx/color_cast.hpp>
  31. #include <glm/gtx/transform2.hpp>
  32. #include <glm/gtx/gradient_paint.hpp>
  33. #include <glm/gtx/perpendicular.hpp>
  34. #include <glm/gtx/vector_angle.hpp>
  35. #include <glm/gtx/rotate_vector.hpp>
  36. #include "core.hpp"
  37. #include "img.hpp"
  38. #include "gtc.hpp"
  39. #include "gtx.hpp"
  40. #include <vector>
  41. //namespace glm
  42. //{
  43. // using GLM_GTX_double_float;
  44. // using GLM_GTX_inverse;
  45. // using GLM_GTX_integer;
  46. // using GLM_GTX_unsigned_int;
  47. //using GLM_GTX_bit;
  48. // using GLM_GTX_fast_square_root;
  49. // using GLM_GTX_number_precision;
  50. // using GLM_GTX_color_cast;
  51. // using GLM_GTX_quaternion;
  52. // using GLM_GTX_inverse_transpose;
  53. // using GLM_GTX_transform;
  54. // using GLM_GTX_transform2;
  55. // using GLM_GTX_intersect;
  56. // using GLM_GTX_random;
  57. // using GLM_GTX_gradient_paint;
  58. // using GLM_GTX_perpendicular;
  59. //using GLM_GTX_compatibility;
  60. //using GLM_GTX_quaternion;
  61. //using GLM_GTX_string_cast;
  62. //using GLM_GTX_fast_square_root;
  63. //using GLM_GTX_half_float;
  64. //using GLM_GTX_vector_angle;
  65. //using GLM_GTX_matrix_projection;
  66. //}
  67. unsigned int GetCpuCycle()
  68. {
  69. unsigned int LowWord = 0;
  70. unsigned int HighWord = 0;
  71. // _asm
  72. // {
  73. // cpuid
  74. // // Insert Real Time Stamp Counter opcodes
  75. // _emit 0x0F
  76. // _emit 0x31
  77. // mov HighWord, edx
  78. // mov LowWord, eax
  79. // }
  80. // return ((__int64)(HighWord) << 32) + LowWord;
  81. return 0;
  82. }
  83. //namespace wip
  84. //{
  85. // inline glm::detail::fvec4SIMD rcp(glm::detail::fvec4SIMD const & v)
  86. // {
  87. // return glm::detail::fvec4SIMD(_mm_rcp_ps(v.Data));
  88. // }
  89. //
  90. // inline glm::detail::fvec4SIMD sqrt(glm::detail::fvec4SIMD const & v)
  91. // {
  92. // return glm::detail::fvec4SIMD(_mm_sqrt_ps(v.Data));
  93. // }
  94. //
  95. // inline glm::detail::fvec4SIMD inversesqrt(glm::detail::fvec4SIMD const & v)
  96. // {
  97. // return glm::detail::fvec4SIMD(_mm_rsqrt_ps(v.Data));
  98. // }
  99. //
  100. // inline glm::detail::fvec4SIMD min(glm::detail::fvec4SIMD const & v1, glm::detail::fvec4SIMD const & v2)
  101. // {
  102. // return glm::detail::fvec4SIMD(_mm_min_ps(v1.Data, v2.Data));
  103. // }
  104. //
  105. // inline glm::detail::fvec4SIMD max(glm::detail::fvec4SIMD const & v1, glm::detail::fvec4SIMD const & v2)
  106. // {
  107. // return glm::detail::fvec4SIMD(_mm_max_ps(v1.Data, v2.Data));
  108. // }
  109. //}//namespace wip
  110. void test_simd()
  111. {
  112. glm::detail::fvec4SIMD v1(1.0f, 2.0f, 3.0f, 4.0f);
  113. glm::detail::fvec4SIMD v2(5.0f, 6.0f, 7.0f, 8.0f);
  114. bool end = true;
  115. }
  116. namespace a
  117. {
  118. namespace b{}
  119. }
  120. namespace c = a::b;
  121. //#include <glm/ext/virtrev/gl.hpp>
  122. #include <cstdio>
  123. #include <cstdlib>
  124. #include <ctime>
  125. //#include <windows.hpp>
  126. //#include <boost/static_assert.hpp>
  127. #ifdef min
  128. #undef min
  129. #endif
  130. #ifdef max
  131. #undef max
  132. #endif
  133. //#include "test/sse_vec4.h"
  134. //#include "test/sse_mat4.h"
  135. /*
  136. inline float fastExp0(float x)
  137. {
  138. return 1.0f + x + (x * x * 0.5f) + (x * x * x * 0.1666666667f) + (x * x * x * x * 0.041666667f) + (x * x * x * x * x * 0.008333333333f) + (x * x * x * x * x * x * 0.00138888888888f) + (x * x * x * x * x * x * x * 0.000198412698f) + (x * x * x * x * x * x * x * x * 0.0000248015873f);
  139. }
  140. inline float fastExp1(float x)
  141. {
  142. float x2 = x * x;
  143. float x3 = x2 * x;
  144. float x4 = x3 * x;
  145. float x5 = x4 * x;
  146. float x6 = x5 * x;
  147. float x7 = x6 * x;
  148. float x8 = x7 * x;
  149. return 1.0f + x + (x2 * 0.5f) + (x3 * 0.1666666667f) + (x4 * 0.041666667f) + (x5 * 0.008333333333f) + (x6 * 0.00138888888888f) + (x7 * 0.000198412698f) + (x8 * 0.0000248015873f);
  150. }
  151. inline float fastExp2(float x)
  152. {
  153. float x2 = x * x;
  154. float x3 = x2 * x;
  155. float x4 = x3 * x;
  156. float x5 = x4 * x;
  157. return 1.0f + x + (x2 * 0.5f) + (x3 * 0.1666666667f) + (x4 * 0.041666667f) + (x5 * 0.008333333333f);
  158. }
  159. inline float fastExp3(float x)
  160. {
  161. return 1.0f + x * (1.0f + x * 0.5f * (1.0f + x * 0.3333333333f * (1.0f + x * 0.25 * (1.0f + x * 0.2f))));
  162. }
  163. inline float fastExp4(float x)
  164. {
  165. if(x >= 0.0f && x <= 1.0f)
  166. {
  167. float x2 = x * x;
  168. float x3 = x2 * x;
  169. float x4 = x3 * x;
  170. float x5 = x4 * x;
  171. return 1.0f + x + (x2 * 0.5f) + (x3 * 0.1666666667f) + (x4 * 0.041666667f) + (x5 * 0.008333333333f);
  172. }
  173. else
  174. {
  175. float e = 2.718281828f;
  176. float IntegerPart = glm::floor(x);
  177. float FloatPart = x - IntegerPart;
  178. float z = e;
  179. for(int i = 1; i < int(IntegerPart); ++i)
  180. z *= e;
  181. float x2 = FloatPart * FloatPart;
  182. float x3 = x2 * FloatPart;
  183. float x4 = x3 * FloatPart;
  184. float x5 = x4 * FloatPart;
  185. return z * (1.0f + FloatPart + (x2 * 0.5f) + (x3 * 0.1666666667f) + (x4 * 0.041666667f) + (x5 * 0.008333333333f));
  186. }
  187. }
  188. __forceinline float fastExp5(float x)
  189. {
  190. const float e = 2.718281828f;
  191. const float IntegerPart = glm::floor(x);
  192. const float FloatPart = x - IntegerPart;
  193. float z = 1.f;
  194. //for(int i = 0; i < int(IntegerPart); ++i)
  195. // z *= e;
  196. const float x2 = FloatPart * FloatPart;
  197. const float x3 = x2 * FloatPart;
  198. const float x4 = x3 * FloatPart;
  199. const float x5 = x4 * FloatPart;
  200. return z * (1.0f + FloatPart + (x2 * 0.5f) + (x3 * 0.1666666667f) + (x4 * 0.041666667f) + (x5 * 0.008333333333f));
  201. }
  202. inline float fastLn0(float x)
  203. {
  204. float y1 = (x - 1.0f) / (x + 1.0f);
  205. float y2 = y1 * y1;
  206. float y4 = y2 * y2;
  207. float y6 = y4 * y2;
  208. float y8 = y4 * y4;
  209. return 2.0f * y1 * (1.0f + y2 * 0.3333333333f + y4 * 0.2f + y6 * 0.1428571429f);// + y8 * 0.1111111111f);
  210. }
  211. inline float fastLn1(float x)
  212. {
  213. float y1 = (x - 1.0f) / (x + 1.0f);
  214. float y2 = y1 * y1;
  215. return 2.0f * y1 * (1.0f + y2 * (0.3333333333f + y2 * (0.2f + y2 * 0.1428571429f)));
  216. }
  217. */
  218. using namespace std;
  219. using namespace glm;
  220. void subtitiution()
  221. {
  222. //--------------------------------------------------------------------
  223. //AX=B
  224. mat4 L(0.f);
  225. mat4 U(0.f);
  226. mat4 A = mat4(
  227. vec4(4.f),
  228. vec4(4.f),
  229. vec4(4.f),
  230. vec4(4.f));
  231. for(int i=0;i < 4;i++)
  232. for(int j=0;j < 4;j++)
  233. {
  234. if(i>j)
  235. U[i][j]=0;
  236. else if(i==j)
  237. L[i][j]=1;
  238. else
  239. L[i][j]=0;
  240. }
  241. printf("A:\n");
  242. for(int j = 0; j < 4; ++j)
  243. {
  244. printf("(");
  245. for(int i = 0; i < 4; ++i)
  246. printf("%f ", A[j][i]);
  247. printf(")\n");
  248. }
  249. printf("\n");
  250. //Decomposition of A into L and U
  251. for(int i = 0; i < 4; ++i)
  252. for(int j = 0; j < 4; ++j)
  253. {
  254. float Sum = 0.f;
  255. if(i <= j)
  256. {
  257. for(int k = 0; k < 4; ++k)
  258. if(k != i)
  259. Sum += L[i][k] * U[k][j];
  260. U[i][j] = (A[i][j] - Sum);// / U[j][j];
  261. }
  262. else
  263. {
  264. for(int k = 0; k < 4; k++)
  265. if(k != j)
  266. Sum += L[i][k] * U[k][j];
  267. L[i][j] = (A[i][j] - Sum) / U[j][j];
  268. }
  269. }
  270. printf("L:\n");
  271. for(int j = 0; j < 4; ++j)
  272. {
  273. printf("(");
  274. for(int i = 0; i < 4; ++i)
  275. printf("%f ", L[j][i]);
  276. printf(")\n");
  277. }
  278. printf("\n");
  279. printf("U:\n");
  280. for(int j = 0; j < 4; ++j)
  281. {
  282. printf("(");
  283. for(int i = 0; i < 4; ++i)
  284. printf("%f ", U[j][i]);
  285. printf(")\n");
  286. }
  287. printf("\n");
  288. system("pause");
  289. }
  290. void LUDecompsition(
  291. const detail::tmat4x4<float>& m,
  292. detail::tmat4x4<float>& l,
  293. detail::tmat4x4<float>& u)
  294. {
  295. for(int i = 0; i < 4; ++i)
  296. l[i][i] = 1.f;
  297. for(int j = 0; j < 4; ++j)
  298. {
  299. //for(int i = 0; i <= j; ++i)
  300. for(int i = 0; i < j; ++i)
  301. {
  302. u[j][i] = m[j][i];
  303. for(int k = 0; k < i - 1; ++k)
  304. //for(int k = 0; k < i; ++k)
  305. u[j][i] -= l[k][i] * u[j][k];
  306. }
  307. //for(int i = j + 1; i < 4; ++i)
  308. for(int i = j; i < 4; ++i)
  309. {
  310. l[j][i] = m[j][i];
  311. for(int k = 0; k < j - 1; ++k)
  312. //for(int k = 0; k < j; ++k)
  313. l[j][i] -= l[k][i] * u[j][k];
  314. }
  315. }
  316. }
  317. /*
  318. ivec3& operator+=(const ivec3& v, const int s)
  319. {
  320. ivec3 Result = v;
  321. Result.x &= s;
  322. Result.y &= s;
  323. Result.z &= s;
  324. return Result;
  325. }
  326. */
  327. void g()
  328. {
  329. mat4 m = glm::inverse(mat4(4.0f));
  330. }
  331. void test_gtx_bit()
  332. {
  333. int Number1 = 76;
  334. int NumberA = glm::highestBit(Number1);
  335. int NumberB = glm::highestBitValue(Number1);
  336. bool NumberC = glm::isPowerOfTwo(Number1);
  337. int NumberD = glm::powerOfTwoAbove(Number1);
  338. int NumberE = glm::powerOfTwoBelow(Number1);
  339. int NumberF = glm::powerOfTwoNearest(Number1);
  340. int Number2 = 256;
  341. int NumberG = glm::highestBit(Number2);
  342. int NumberH = glm::highestBitValue(Number2);
  343. bool NumberI = glm::isPowerOfTwo(Number2);
  344. int NumberJ = glm::powerOfTwoAbove(Number2);
  345. int NumberK = glm::powerOfTwoBelow(Number2);
  346. int NumberL = glm::powerOfTwoNearest(Number2);
  347. int NumberZ = 0;
  348. }
  349. /*
  350. struct gni
  351. {
  352. void constructor(short i)
  353. {
  354. data = i;
  355. }
  356. gni& assignement(const gni& i)
  357. {
  358. data = i.data;
  359. return *this;
  360. }
  361. short data;
  362. }
  363. union pouet
  364. {
  365. pouet(detail::thalf x, detail::thalf y, detail::thalf z) :
  366. x(x), y(y), z(z)
  367. {}
  368. struct{detail::thalf x, y, z;};
  369. struct{detail::thalf r, g, b;};
  370. struct{detail::thalf s, t, q;};
  371. };
  372. */
  373. //#include GLM_EXTENSION(GLM_GTX_compatibility, glm::required);
  374. vec4 mix_fpu(const vec4& x, const vec4& y, const vec4& a)
  375. {
  376. return x * (vec4(1) - a) + y * a;
  377. }
  378. /*
  379. namespace glm
  380. {
  381. template<class T, int N>
  382. struct traits
  383. {
  384. typedef T value_type;
  385. typedef T* pointer;
  386. typedef T& reference;
  387. typedef std::size_t size_type;
  388. static const size_type value_size;
  389. };
  390. template<class T, int N>
  391. static const traits::size_type traits::value_size = N;
  392. template<class Type>
  393. struct traits<Type*>
  394. {
  395. typedef Type value_type;
  396. typedef Type* pointer;
  397. typedef Type& reference;
  398. };
  399. template<class Type>
  400. struct traits<const Type*>
  401. {
  402. typedef Type value_type;
  403. typedef const Type* pointer;
  404. typedef const Type& reference;
  405. };
  406. }
  407. */
  408. vec2 reference_swizzle(const vec3& a, const vec3& b)
  409. {
  410. return vec2(0.0f);
  411. }
  412. //void test_random()
  413. //{
  414. // float Value = gaussRand1GTX(1.0f, 2.0f);
  415. // detail::_xvecxGTX<8, float> vecN;
  416. // vecN = compRandGTX(vecN, vecN);
  417. //}
  418. void test_matrix()
  419. {
  420. // glm::mat4 Transform = glm::translateGTX(
  421. // glm::rotateGTX(45.f, glm::core::func_geometric::normalize(glm::vec3(1))), glm::vec3(1, 2, 3));
  422. glm::vec3 Normalized = glm::normalize(glm::vec3(1));
  423. glm::mat4 Transform = glm::gtc::matrix_transform::translate(
  424. glm::gtc::matrix_transform::rotate(glm::mat4(1.0f), 45.f, Normalized),
  425. glm::vec3(1, 2, 3));
  426. glm::mat4 TransformA = glm::inverse(Transform);
  427. glm::mat4 TransformB = glm::affineInverse(Transform);
  428. glm::mat4 End;
  429. }
  430. void test_vec4()
  431. {
  432. {
  433. glm::vec4 v(1.0);
  434. glm::vec4 const* const pv = &v;
  435. glm::vec4 const& rv = v;
  436. //float const* pointer1 = &v;
  437. //float const* pointer2 = v;
  438. //float const* pointer3 = pv;
  439. //float const* pointer4 = &rv;
  440. //glm::vec4* p = &v;
  441. //glm::vec4 v6 = v + long(3);
  442. float const* pointer1 = &v[0];
  443. }
  444. //{
  445. // glm::mat4 m;
  446. // glm::mat4 const* const pm = &m;
  447. // glm::mat4 const& rm = m;
  448. // float const* pointer1 = &m;
  449. // float const* pointer2 = m;
  450. // float const* pointer3 = pm;
  451. // float const* pointer4 = &rm;
  452. //}
  453. }
  454. void test_mat4(const glm::mat4& m)
  455. {
  456. //glm::mat4 copy = m;
  457. //glLoadMatrix(GL_MODELVIEW, copy);
  458. }
  459. void test_string_cast()
  460. {
  461. printf("Test string cast: %s\n", glm::string(glm::vec4(1, 2, 3, 4)).c_str());
  462. }
  463. void test_isPowerOfTwo()
  464. {
  465. bool resultA = glm::isPowerOfTwo(unsigned(512));
  466. bool resultB = glm::isPowerOfTwo(unsigned(513));
  467. bool end = true;
  468. }
  469. //#include <glm/glw.hpp>
  470. //
  471. //void test_opengl_wrapper()
  472. //{
  473. // glVertex(glm::vec3(1.0f));
  474. //}
  475. void fast_inverse()
  476. {
  477. float one = fastInverseSqrt(1.0f);
  478. float two = fastInverseSqrt(2.0f);
  479. float result = 1.0f;
  480. }
  481. //void test_desk()
  482. //{
  483. // typedef glm::detail::desc<glm::vec4>::value_type vec4_type;
  484. // vec4_type Test(1.0f);
  485. //
  486. // glm::detail::desc<glm::vec4>::size_type Size = glm::detail::desc<glm::vec4>::value_size();
  487. //
  488. // int end = 0;
  489. //}
  490. //template <typename genType>
  491. //genType TemplateFuncMin(const genType& Type1, const genType& Type2)
  492. //{
  493. // genType Result;
  494. // for
  495. // (
  496. // glm::detail::desc<glm::vec4>::size_type i = glm::detail::desc<glm::vec4>::size_type(0);
  497. // i < glm::detail::desc<glm::vec4>::value_size;
  498. // ++i
  499. // )
  500. // {
  501. // Result[i] = Type1[i] < Type2[i] ? Type1[i] : Type2[i];
  502. // }
  503. //
  504. // return Result;
  505. //}
  506. //
  507. //void test_genType()
  508. //{
  509. // glm::vec1 Result1 = TemplateFuncMin(glm::vec1(1), glm::vec1(2));
  510. //// float ResultF = TemplateFuncMin(1.f, 2.f);
  511. // vec4 Result4 = TemplateFuncMin(glm::vec4(1), glm::vec4(2));
  512. //
  513. // bool end = true;
  514. //}
  515. namespace test_ac
  516. {
  517. struct C1
  518. {
  519. C1() :
  520. data(76)
  521. {}
  522. int data;
  523. };
  524. struct C2
  525. {
  526. C2(){}
  527. C2(const C1& c1) :
  528. data(c1.data)
  529. {}
  530. int data;
  531. };
  532. C2 operator+ (const C2& a, const C2& b)
  533. {
  534. C2 result;
  535. result.data = a.data + b.data;
  536. return result;
  537. }
  538. }
  539. void test_auto_cast()
  540. {
  541. test_ac::C1 c1;
  542. test_ac::C2 c2 = c1;
  543. test_ac::C2 c3 = c2 + c1;
  544. //
  545. // glm::vec3 Data = glm::vec2(1.f)._yxy();
  546. }
  547. template <typename genType>
  548. genType abs(const genType& v)
  549. {
  550. genType Result;
  551. for(typename genType::size_type i = 0; i < genType::size_value; ++i)
  552. Result[i] = Result[i] < typename genType::value_type(0) ? -Result[i] : Result[i];
  553. return Result;
  554. }
  555. void test_quaternion()
  556. {
  557. glm::vec3 tan(1.0f);
  558. glm::quat q;
  559. tan = glm::cross(q, tan);
  560. }
  561. void test_swizzle()
  562. {
  563. glm::vec2 a(1, 2);
  564. glm::vec4 b = a.swizzle(X, Y, X, Y);
  565. glm::vec4 c(0);
  566. c.swizzle(X, Y, Z, W) = b.swizzle(X, Y, Z, W);
  567. //a.xyz = b.xyz;
  568. bool end = true;
  569. }
  570. void test_angle()
  571. {
  572. //float angle1 = glm::angle(0.1f, 0.2f);
  573. float angle2 = glm::angle(glm::vec2(0.1f), glm::vec2(0.2f));
  574. return;
  575. }
  576. void test_half()
  577. {
  578. hmat2 hmatrix1(half(1.f));
  579. hmat2 hmatrix2(half(2.f));
  580. hmat2 hmatrix3 = hmatrix1 + hmatrix2;
  581. half hscalar = hmatrix3[0].x;
  582. double fscalar = hscalar;
  583. hvec2::size_type Size = hvec2::value_size();
  584. return;
  585. }
  586. template <typename valType, profile proType>
  587. valType func_profile(glm::detail::tvec3<valType> const & x, valType y)
  588. {
  589. return glm::dot(x, glm::detail::tvec3<valType>(y));
  590. }
  591. template <>
  592. float func_profile<float, fast>(glm::vec3 const & x, float y)
  593. {
  594. return glm::dot(x, glm::vec3(y));
  595. }
  596. void test_profile()
  597. {
  598. }
  599. //template <typename valType, int C, int R>
  600. //class TestType
  601. //{
  602. //public:
  603. // TestType(valType const & x);
  604. //};
  605. //
  606. //template <typename valType, int C, int R>
  607. //TestType<valType, C, R>::TestType(valType const & x)
  608. //{}
  609. //
  610. //template <typename valType>
  611. //class TestType<valType, 4, 1>
  612. //{
  613. //public:
  614. // TestType(valType const & x);
  615. //};
  616. //
  617. //template <typename valType>
  618. //TestType<valType, 4, 1>::TestType(valType const & x)
  619. //{}
  620. //
  621. //typedef TestType<float, 4, 1> _vec4;
  622. //
  623. //void test_type()
  624. //{
  625. // _vec4 v(1.0);
  626. //}
  627. void test_fast_inverse()
  628. {
  629. printf("fastInvSqrt(1.0f) : %f\n", fastInverseSqrt(1.0f));
  630. printf("fastInvSqrt(1.0f) : %f\n", inversesqrt(1.0f));
  631. printf("fastInvSqrt(76.0f) : %f\n", fastInverseSqrt(76.f));
  632. printf("fastInvSqrt(76.0f) : %f\n", inversesqrt(76.f));
  633. printf("fastInvSqrt(0.01f) : %f\n", fastInverseSqrt(0.01f));
  634. printf("fastInvSqrt(0.01f) : %f\n", inversesqrt(0.01f));
  635. }
  636. namespace ns_hint
  637. {
  638. struct hint
  639. {};
  640. struct see : public hint
  641. {};
  642. struct see2 : public see
  643. {};
  644. template <typename vecType>
  645. vecType func(vecType const & v, hint)
  646. {
  647. return v * v;
  648. }
  649. template <typename vecType>
  650. vecType func(vecType const & v, see2)
  651. {
  652. return v * v;
  653. }
  654. template <typename valType, typename Hint = hint>
  655. struct vec4
  656. {
  657. vec4()
  658. {
  659. int i = 0;
  660. }
  661. valType data;
  662. };
  663. template <typename valType>
  664. struct vec4<valType, see2>
  665. {
  666. vec4()
  667. {
  668. int i = 0;
  669. }
  670. valType data;
  671. };
  672. //enum hint
  673. //{
  674. // fast,
  675. // nice,
  676. // see,
  677. // see2,
  678. // see3,
  679. // see4
  680. //};
  681. //template <typename vecType, hint Hint = see2>
  682. //vecType func(vecType const & v);
  683. //template <hint Hint = see2>
  684. //struct functor
  685. //{
  686. // template <typename vecType>
  687. // vecType operator() (vecType const & v) const
  688. // {
  689. // return v * v;
  690. // }
  691. //};
  692. }
  693. void test_hint()
  694. {
  695. glm::vec3 v1 = glm::vec3(2.0f);
  696. glm::vec3 v2 = ns_hint::func(v1, ns_hint::see());
  697. // ns_hint::vec4 v3;
  698. // ns_hint::vec4<see2> v4;
  699. //glm::vec3 v2 = ns_hint::functor<ns_hint::see2>()(v1);
  700. //glm::vec3 v2 = hint::func<glm::vec3>(v1);
  701. }
  702. void test_const_call
  703. (
  704. glm::mat4 const & m,
  705. glm::vec4 const & v
  706. )
  707. {
  708. float const & Value = m[0][0];
  709. glm::vec4 const & Vector = m[0];
  710. float const & VecRef = v[0];
  711. float const * const ValuePtr = &m[0][0];
  712. glm::vec4 const * const VectorPtr = &m[0];
  713. float const * VecPtr = &v[0];
  714. }
  715. void test_const()
  716. {
  717. test_const_call(
  718. glm::mat4(1.0),
  719. glm::vec4(1.0));
  720. }
  721. void test_transform()
  722. {
  723. glm::vec2 QMin(-1024,+1024);
  724. //glm::vec2 QMax(+1024,-1024);
  725. glm::vec2 QMax(-1024 + 640, 1024);
  726. std::size_t const ValueSize = 6;
  727. glm::vec3 ValueOut[ValueSize];
  728. glm::vec3 ValueData[ValueSize];
  729. ValueData[0] = glm::vec3(QMin.x, QMin.y, 1);//(0, 0)
  730. ValueData[1] = glm::vec3(QMax.x, QMin.y, 1);//(1, 0)
  731. ValueData[2] = glm::vec3(QMax.x, QMax.y, 1);//(1, 1)
  732. ValueData[3] = glm::vec3(QMin.x, QMax.y, 1);//(0, 1)
  733. ValueData[4] = glm::vec3(QMin.x, QMax.y / 4.f, 1);//(0, 0.25f)
  734. ValueData[5] = glm::vec3(QMin.x / 4.f, QMax.y, 1);//(0.25f, 0.25f)
  735. glm::mat3 Q2Norm(1.0f);
  736. Q2Norm[0] = glm::vec3(
  737. 1.0f / (QMax.x - QMin.x),
  738. 0.0f,
  739. 0.0f);
  740. Q2Norm[1] = glm::vec3(
  741. 0.0f,
  742. 1.0f / (QMax.y - QMin.y),
  743. 0.0f);
  744. Q2Norm[2] = glm::vec3(
  745. - QMin.x / (QMax.x - QMin.x),
  746. - QMin.y / (QMax.y - QMin.y),
  747. 1.0f);
  748. glm::vec2 FBMin(0, 0);
  749. glm::vec2 FBMax(640, 480);
  750. glm::mat3 Norm2FB(1.0f);
  751. Norm2FB[0] = glm::vec3(FBMax.x - FBMin.x, 0, 0);
  752. Norm2FB[1] = glm::vec3(0, FBMax.y - FBMin.y, 0);
  753. Norm2FB[2] = glm::vec3(0, 0, 1);
  754. for(std::size_t i = 0; i < ValueSize; ++i)
  755. ValueOut[i] = Norm2FB * Q2Norm * ValueData[i];
  756. bool End = true;
  757. }
  758. //namespace
  759. //{
  760. // template <typename valType>
  761. // valType radialGradient(
  762. // glm::detail::tvec2<valType> const & Center,
  763. // valType const & Radius,
  764. // glm::detail::tvec2<valType> const & Focal,
  765. // glm::detail::tvec2<valType> const & Position)
  766. // {
  767. // glm::detail::tvec2<valType> F = Focal - Center;
  768. // glm::detail::tvec2<valType> D = Position - Focal;
  769. //
  770. // valType Numerator = (D.x * F.x + D.y * F.y) + glm::sqrt((Radius * Radius) * (D.x * D.x + D.y * D.y) - (D.x * F.y - D.y * F.x) * (D.x * F.y - D.y * F.x));
  771. // valType Denominator = (Radius * Radius) - (F.x * F.x + F.y * F.y);
  772. // return Numerator / Denominator;
  773. // }
  774. //}
  775. void test_radial()
  776. {
  777. glm::vec2 Center(0);
  778. float Radius = 1.0f;
  779. glm::vec2 Focal(0);
  780. glm::vec2 PositionA(0);
  781. glm::vec2 PositionB(0, 1);
  782. glm::vec2 PositionC(1, 0);
  783. glm::vec2 PositionD(0.5f, 0.0f);
  784. float GradA = glm::radialGradient(Center, Radius, Focal, PositionA);
  785. float GradB = glm::radialGradient(Center, Radius, Focal, PositionB);
  786. float GradC = glm::radialGradient(Center, Radius, Focal, PositionC);
  787. float GradD = glm::radialGradient(Center, Radius, Focal, PositionD);
  788. bool End = true;
  789. }
  790. void test_quat()
  791. {
  792. glm::quat q1(1.0f, glm::vec3(0));
  793. float Roll = glm::roll(q1);
  794. }
  795. //
  796. //void _mm_add_ps(__m128 in1[4], __m128 in2[4], __m128 out[4])
  797. //{
  798. // {
  799. // out[0] = _mm_add_ps(in1[0], in2[0]);
  800. // out[1] = _mm_add_ps(in1[1], in2[1]);
  801. // out[2] = _mm_add_ps(in1[2], in2[2]);
  802. // out[3] = _mm_add_ps(in1[3], in2[3]);
  803. // }
  804. //}
  805. //
  806. //void _mm_sub_ps(__m128 in1[4], __m128 in2[4], __m128 out[4])
  807. //{
  808. // {
  809. // out[0] = _mm_sub_ps(in1[0], in2[0]);
  810. // out[1] = _mm_sub_ps(in1[1], in2[1]);
  811. // out[2] = _mm_sub_ps(in1[2], in2[2]);
  812. // out[3] = _mm_sub_ps(in1[3], in2[3]);
  813. // }
  814. //}
  815. //
  816. //inline __m128 _mm_mul_ps(__m128 m[4], __m128 v)
  817. //{
  818. // __m128 v0 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0));
  819. // __m128 v1 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1));
  820. // __m128 v2 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2));
  821. // __m128 v3 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(3, 3, 3, 3));
  822. //
  823. // __m128 m0 = _mm_mul_ps(m[0], v0);
  824. // __m128 m1 = _mm_mul_ps(m[1], v1);
  825. // __m128 m2 = _mm_mul_ps(m[2], v2);
  826. // __m128 m3 = _mm_mul_ps(m[3], v3);
  827. //
  828. // __m128 a0 = _mm_add_ps(m0, m1);
  829. // __m128 a1 = _mm_add_ps(m2, m3);
  830. // __m128 a2 = _mm_add_ps(a0, a1);
  831. //
  832. // return a2;
  833. //}
  834. //
  835. //inline __m128 _mm_mul_ps(__m128 v, __m128 m[4])
  836. //{
  837. // __m128 i0 = m[0];
  838. // __m128 i1 = m[1];
  839. // __m128 i2 = m[2];
  840. // __m128 i3 = m[3];
  841. //
  842. // __m128 m0 = _mm_mul_ps(v, i0);
  843. // __m128 m1 = _mm_mul_ps(v, i1);
  844. // __m128 m2 = _mm_mul_ps(v, i2);
  845. // __m128 m3 = _mm_mul_ps(v, i3);
  846. //
  847. // __m128 u0 = _mm_unpacklo_ps(m0, m1);
  848. // __m128 u1 = _mm_unpackhi_ps(m0, m1);
  849. // __m128 a0 = _mm_add_ps(u0, u1);
  850. //
  851. // __m128 u2 = _mm_unpacklo_ps(m2, m3);
  852. // __m128 u3 = _mm_unpackhi_ps(m2, m3);
  853. // __m128 a1 = _mm_add_ps(u2, u3);
  854. //
  855. // __m128 f0 = _mm_movelh_ps(a0, a1);
  856. // __m128 f1 = _mm_movehl_ps(a1, a0);
  857. // __m128 f2 = _mm_add_ps(f0, f1);
  858. //
  859. // return f2;
  860. //}
  861. //
  862. //inline void _mm_mul_ps(__m128 in1[4], __m128 in2[4], __m128 out[4])
  863. //{
  864. // glm::uint64 TimeStart = GetCpuCycle();
  865. //
  866. // {
  867. // __m128 e0 = _mm_shuffle_ps(in2[0], in2[0], _MM_SHUFFLE(0, 0, 0, 0));
  868. // __m128 e1 = _mm_shuffle_ps(in2[0], in2[0], _MM_SHUFFLE(1, 1, 1, 1));
  869. // __m128 e2 = _mm_shuffle_ps(in2[0], in2[0], _MM_SHUFFLE(2, 2, 2, 2));
  870. // __m128 e3 = _mm_shuffle_ps(in2[0], in2[0], _MM_SHUFFLE(3, 3, 3, 3));
  871. //
  872. // __m128 m0 = _mm_mul_ps(in1[0], e0);
  873. // __m128 m1 = _mm_mul_ps(in1[1], e1);
  874. // __m128 m2 = _mm_mul_ps(in1[2], e2);
  875. // __m128 m3 = _mm_mul_ps(in1[3], e3);
  876. //
  877. // __m128 a0 = _mm_add_ps(m0, m1);
  878. // __m128 a1 = _mm_add_ps(m2, m3);
  879. // __m128 a2 = _mm_add_ps(a0, a1);
  880. //
  881. // out[0] = a2;
  882. // }
  883. //
  884. // {
  885. // __m128 e0 = _mm_shuffle_ps(in2[1], in2[1], _MM_SHUFFLE(0, 0, 0, 0));
  886. // __m128 e1 = _mm_shuffle_ps(in2[1], in2[1], _MM_SHUFFLE(1, 1, 1, 1));
  887. // __m128 e2 = _mm_shuffle_ps(in2[1], in2[1], _MM_SHUFFLE(2, 2, 2, 2));
  888. // __m128 e3 = _mm_shuffle_ps(in2[1], in2[1], _MM_SHUFFLE(3, 3, 3, 3));
  889. //
  890. // __m128 m0 = _mm_mul_ps(in1[0], e0);
  891. // __m128 m1 = _mm_mul_ps(in1[1], e1);
  892. // __m128 m2 = _mm_mul_ps(in1[2], e2);
  893. // __m128 m3 = _mm_mul_ps(in1[3], e3);
  894. //
  895. // __m128 a0 = _mm_add_ps(m0, m1);
  896. // __m128 a1 = _mm_add_ps(m2, m3);
  897. // __m128 a2 = _mm_add_ps(a0, a1);
  898. //
  899. // out[1] = a2;
  900. // }
  901. //
  902. // {
  903. // __m128 e0 = _mm_shuffle_ps(in2[2], in2[2], _MM_SHUFFLE(0, 0, 0, 0));
  904. // __m128 e1 = _mm_shuffle_ps(in2[2], in2[2], _MM_SHUFFLE(1, 1, 1, 1));
  905. // __m128 e2 = _mm_shuffle_ps(in2[2], in2[2], _MM_SHUFFLE(2, 2, 2, 2));
  906. // __m128 e3 = _mm_shuffle_ps(in2[2], in2[2], _MM_SHUFFLE(3, 3, 3, 3));
  907. //
  908. // __m128 m0 = _mm_mul_ps(in1[0], e0);
  909. // __m128 m1 = _mm_mul_ps(in1[1], e1);
  910. // __m128 m2 = _mm_mul_ps(in1[2], e2);
  911. // __m128 m3 = _mm_mul_ps(in1[3], e3);
  912. //
  913. // __m128 a0 = _mm_add_ps(m0, m1);
  914. // __m128 a1 = _mm_add_ps(m2, m3);
  915. // __m128 a2 = _mm_add_ps(a0, a1);
  916. //
  917. // out[2] = a2;
  918. // }
  919. //
  920. // {
  921. // //(__m128&)_mm_shuffle_epi32(__m128i&)in2[0], _MM_SHUFFLE(3, 3, 3, 3))
  922. // __m128 e0 = _mm_shuffle_ps(in2[3], in2[3], _MM_SHUFFLE(0, 0, 0, 0));
  923. // __m128 e1 = _mm_shuffle_ps(in2[3], in2[3], _MM_SHUFFLE(1, 1, 1, 1));
  924. // __m128 e2 = _mm_shuffle_ps(in2[3], in2[3], _MM_SHUFFLE(2, 2, 2, 2));
  925. // __m128 e3 = _mm_shuffle_ps(in2[3], in2[3], _MM_SHUFFLE(3, 3, 3, 3));
  926. //
  927. // __m128 m0 = _mm_mul_ps(in1[0], e0);
  928. // __m128 m1 = _mm_mul_ps(in1[1], e1);
  929. // __m128 m2 = _mm_mul_ps(in1[2], e2);
  930. // __m128 m3 = _mm_mul_ps(in1[3], e3);
  931. //
  932. // __m128 a0 = _mm_add_ps(m0, m1);
  933. // __m128 a1 = _mm_add_ps(m2, m3);
  934. // __m128 a2 = _mm_add_ps(a0, a1);
  935. //
  936. // out[3] = a2;
  937. // }
  938. //
  939. // glm::uint64 TimeEnd = GetCpuCycle();
  940. //
  941. // printf("Ticks mul: %d\n", TimeEnd - TimeStart);
  942. //}
  943. //
  944. //inline void _mm_transpose_ps(__m128 const in[4], __m128 out[4])
  945. //{
  946. // __m128 tmp0 = _mm_shuffle_ps(in[0], in[1], 0x44);
  947. // __m128 tmp2 = _mm_shuffle_ps(in[0], in[1], 0xEE);
  948. // __m128 tmp1 = _mm_shuffle_ps(in[2], in[3], 0x44);
  949. // __m128 tmp3 = _mm_shuffle_ps(in[2], in[3], 0xEE);
  950. //
  951. // out[0] = _mm_shuffle_ps(tmp0, tmp1, 0x88);
  952. // out[1] = _mm_shuffle_ps(tmp0, tmp1, 0xDD);
  953. // out[2] = _mm_shuffle_ps(tmp2, tmp3, 0x88);
  954. // out[3] = _mm_shuffle_ps(tmp2, tmp3, 0xDD);
  955. //}
  956. //void _mm_inverse_ps(__m128 const in[4], __m128 out[4])
  957. //{
  958. // // Swp00 = _mm_shuffle_ps(in[1], in[2], _MM_SHUFFLE(2, 2, 2, 2));
  959. // // SubFactor00 = m[2][2] * m[3][3] - m[3][2] * m[2][3];
  960. // // SubFactor00 = m[2][2] * m[3][3] - m[3][2] * m[2][3];
  961. // // SubFactor06 = m[1][2] * m[3][3] - m[3][2] * m[1][3];
  962. // // SubFactor13 = m[1][2] * m[2][3] - m[2][2] * m[1][3];
  963. // // + (m[1][1] * SubFactor00 - m[1][2] * SubFactor01 + m[1][3] * SubFactor02),
  964. // // - (m[0][1] * SubFactor00 - m[0][2] * SubFactor01 + m[0][3] * SubFactor02),
  965. // // + (m[0][1] * SubFactor06 - m[0][2] * SubFactor07 + m[0][3] * SubFactor08),
  966. // // - (m[0][1] * SubFactor13 - m[0][2] * SubFactor14 + m[0][3] * SubFactor15),
  967. //
  968. // __m128 Sfc00 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(1, 1, 1, 1));
  969. // __m128 Sfc0a = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(3, 2, 3, 2));
  970. // __m128 Sfc01 = _mm_shuffle_ps(Sfc0a, Sfc0a, _MM_SHUFFLE(2, 3, 1, 1));
  971. // __m128 Sfc02 = _mm_shuffle_ps(Sfc0a, Sfc0a, _MM_SHUFFLE(3, 2, 0, 0));
  972. // __m128 Sfc03 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(3, 3, 3, 3));
  973. //
  974. // __m128 SfcM0 = _mm_mul_ps(Sfc00, Sfc01);
  975. // __m128 SfcM1 = _mm_mul_ps(Sfc02, Sfc03);
  976. // __m128 SfcM2 = _mm_sub_ps(SfcM0, SfcM1);
  977. // // SubFactor00 = m[2][2] * m[3][3] - m[3][2] * m[2][3];
  978. // // SubFactor00 = m[2][2] * m[3][3] - m[3][2] * m[2][3];
  979. // // SubFactor06 = m[1][2] * m[3][3] - m[3][2] * m[1][3];
  980. // // SubFactor13 = m[1][2] * m[2][3] - m[2][2] * m[1][3];
  981. // // + (m[1][1] * SubFactor00 - m[1][2] * SubFactor01 + m[1][3] * SubFactor02),
  982. // // - (m[0][1] * SubFactor00 - m[0][2] * SubFactor01 + m[0][3] * SubFactor02),
  983. // // + (m[0][1] * SubFactor06 - m[0][2] * SubFactor07 + m[0][3] * SubFactor08),
  984. // // - (m[0][1] * SubFactor13 - m[0][2] * SubFactor14 + m[0][3] * SubFactor15),
  985. //
  986. // __m128 Swp00 = _mm_shuffle_ps(in[2], in[2], _MM_SHUFFLE(3, 0, 1, 2));
  987. // __m128 Swp01 = _mm_shuffle_ps(in[3], in[3], _MM_SHUFFLE(3, 3, 3, 3));
  988. // __m128 Swp02 = _mm_shuffle_ps(in[3], in[3], _MM_SHUFFLE(3, 0, 1, 2));
  989. // __m128 Swp03 = _mm_shuffle_ps(in[2], in[2], _MM_SHUFFLE(3, 3, 3, 3));
  990. //
  991. // __m128 Swp04 = _mm_shuffle_ps(in[2], in[2], _MM_SHUFFLE(3, 0, 0, 1));
  992. // __m128 Swp05 = _mm_shuffle_ps(in[3], in[3], _MM_SHUFFLE(3, 1, 2, 2));
  993. // __m128 Swp06 = _mm_shuffle_ps(in[3], in[3], _MM_SHUFFLE(3, 0, 0, 1));
  994. // __m128 Swp07 = _mm_shuffle_ps(in[2], in[2], _MM_SHUFFLE(3, 1, 2, 2));
  995. //
  996. // __m128 Swp08 = _mm_shuffle_ps(in[1], in[1], _MM_SHUFFLE(3, 0, 1, 2));
  997. // __m128 Swp09 = _mm_shuffle_ps(in[1], in[1], _MM_SHUFFLE(3, 3, 3, 3));
  998. // __m128 Swp10 = _mm_shuffle_ps(in[1], in[1], _MM_SHUFFLE(3, 0, 0, 1));
  999. // __m128 Swp11 = _mm_shuffle_ps(in[1], in[1], _MM_SHUFFLE(3, 1, 2, 2));
  1000. //
  1001. // __m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
  1002. // __m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
  1003. // __m128 Fac00 = _mm_sub_ps(Mul00, Mul01);
  1004. //
  1005. // //valType SubFactor00 = m[2][2] * m[3][3] - m[3][2] * m[2][3];
  1006. // //valType SubFactor01 = m[2][1] * m[3][3] - m[3][1] * m[2][3];
  1007. // //valType SubFactor03 = m[2][0] * m[3][3] - m[3][0] * m[2][3];
  1008. //
  1009. // __m128 Mul02 = _mm_mul_ps(Swp04, Swp05);
  1010. // __m128 Mul03 = _mm_mul_ps(Swp06, Swp07);
  1011. // __m128 Fac01 = _mm_sub_ps(Mul02, Mul03);
  1012. //
  1013. // //valType SubFactor02 = m[2][1] * m[3][2] - m[3][1] * m[2][2];
  1014. // //valType SubFactor04 = m[2][0] * m[3][2] - m[3][0] * m[2][2];
  1015. // //valType SubFactor05 = m[2][0] * m[3][1] - m[3][0] * m[2][1];
  1016. //
  1017. // __m128 Mul04 = _mm_mul_ps(Swp08, Swp01);
  1018. // __m128 Mul05 = _mm_mul_ps(Swp02, Swp09);
  1019. // __m128 Fac02 = _mm_sub_ps(Mul04, Mul05);
  1020. //
  1021. // //valType SubFactor06 = m[1][2] * m[3][3] - m[3][2] * m[1][3];
  1022. // //valType SubFactor07 = m[1][1] * m[3][3] - m[3][1] * m[1][3];
  1023. // //valType SubFactor09 = m[1][0] * m[3][3] - m[3][0] * m[1][3];
  1024. //
  1025. // __m128 Mul06 = _mm_mul_ps(Swp10, Swp05);
  1026. // __m128 Mul07 = _mm_mul_ps(Swp06, Swp11);
  1027. // __m128 Fac03 = _mm_sub_ps(Mul06, Mul07);
  1028. //
  1029. // //valType SubFactor08 = m[1][1] * m[3][2] - m[3][1] * m[1][2];
  1030. // //valType SubFactor10 = m[1][0] * m[3][2] - m[3][0] * m[1][2];
  1031. // //valType SubFactor12 = m[1][0] * m[3][1] - m[3][0] * m[1][1];
  1032. //
  1033. // __m128 Mul08 = _mm_mul_ps(Swp08, Swp03);
  1034. // __m128 Mul09 = _mm_mul_ps(Swp00, Swp09);
  1035. // __m128 Fac04 = _mm_sub_ps(Mul08, Mul09);
  1036. //
  1037. // //valType SubFactor13 = m[1][2] * m[2][3] - m[2][2] * m[1][3];
  1038. // //valType SubFactor14 = m[1][1] * m[2][3] - m[2][1] * m[1][3];
  1039. // //valType SubFactor16 = m[1][0] * m[2][3] - m[2][0] * m[1][3];
  1040. //
  1041. // __m128 Mul10 = _mm_mul_ps(Swp10, Swp07);
  1042. // __m128 Mul11 = _mm_mul_ps(Swp04, Swp11);
  1043. // __m128 Fac05 = _mm_sub_ps(Mul10, Mul11);
  1044. //
  1045. // //valType SubFactor15 = m[1][1] * m[2][2] - m[2][1] * m[1][2];
  1046. // //valType SubFactor17 = m[1][0] * m[2][2] - m[2][0] * m[1][2];
  1047. // //valType SubFactor18 = m[1][0] * m[2][1] - m[2][0] * m[1][1];
  1048. //
  1049. // bool end = true;
  1050. //}
  1051. //template <typename valType>
  1052. //inline detail::tmat4x4<valType> inverse
  1053. //(
  1054. // detail::tmat4x4<valType> const & m
  1055. //)
  1056. //{
  1057. //inline __m128 _mm_dot_ps(__m128 v1, __m128 v2)
  1058. //{
  1059. // __m128 mul0 = _mm_mul_ps(v1, v2);
  1060. // __m128 swp0 = _mm_shuffle_ps(mul0, mul0, _MM_SHUFFLE(2, 3, 0, 1));
  1061. // __m128 add0 = _mm_add_ps(mul0, swp0);
  1062. // __m128 swp1 = _mm_shuffle_ps(add0, add0, _MM_SHUFFLE(0, 1, 2, 3));
  1063. // __m128 add1 = _mm_add_ps(add0, swp1);
  1064. // return add1;
  1065. //}
  1066. //
  1067. //inline void _mm_inverse_ps(__m128 const in[4], __m128 out[4])
  1068. //{
  1069. // glm::uint64 TimeStart = GetCpuCycle();
  1070. //
  1071. // __m128 Fac0;
  1072. // {
  1073. // // valType SubFactor00 = m[2][2] * m[3][3] - m[3][2] * m[2][3];
  1074. // // valType SubFactor00 = m[2][2] * m[3][3] - m[3][2] * m[2][3];
  1075. // // valType SubFactor06 = m[1][2] * m[3][3] - m[3][2] * m[1][3];
  1076. // // valType SubFactor13 = m[1][2] * m[2][3] - m[2][2] * m[1][3];
  1077. //
  1078. // __m128 Swp0a = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(3, 3, 3, 3));
  1079. // __m128 Swp0b = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(2, 2, 2, 2));
  1080. //
  1081. // __m128 Swp00 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(2, 2, 2, 2));
  1082. // __m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
  1083. // __m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
  1084. // __m128 Swp03 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(3, 3, 3, 3));
  1085. //
  1086. // __m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
  1087. // __m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
  1088. // Fac0 = _mm_sub_ps(Mul00, Mul01);
  1089. //
  1090. // bool stop = true;
  1091. // }
  1092. //
  1093. // __m128 Fac1;
  1094. // {
  1095. // // valType SubFactor01 = m[2][1] * m[3][3] - m[3][1] * m[2][3];
  1096. // // valType SubFactor01 = m[2][1] * m[3][3] - m[3][1] * m[2][3];
  1097. // // valType SubFactor07 = m[1][1] * m[3][3] - m[3][1] * m[1][3];
  1098. // // valType SubFactor14 = m[1][1] * m[2][3] - m[2][1] * m[1][3];
  1099. //
  1100. // __m128 Swp0a = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(3, 3, 3, 3));
  1101. // __m128 Swp0b = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(1, 1, 1, 1));
  1102. //
  1103. // __m128 Swp00 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(1, 1, 1, 1));
  1104. // __m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
  1105. // __m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
  1106. // __m128 Swp03 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(3, 3, 3, 3));
  1107. //
  1108. // __m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
  1109. // __m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
  1110. // Fac1 = _mm_sub_ps(Mul00, Mul01);
  1111. //
  1112. // bool stop = true;
  1113. // }
  1114. //
  1115. //
  1116. // __m128 Fac2;
  1117. // {
  1118. // // valType SubFactor02 = m[2][1] * m[3][2] - m[3][1] * m[2][2];
  1119. // // valType SubFactor02 = m[2][1] * m[3][2] - m[3][1] * m[2][2];
  1120. // // valType SubFactor08 = m[1][1] * m[3][2] - m[3][1] * m[1][2];
  1121. // // valType SubFactor15 = m[1][1] * m[2][2] - m[2][1] * m[1][2];
  1122. //
  1123. // __m128 Swp0a = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(2, 2, 2, 2));
  1124. // __m128 Swp0b = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(1, 1, 1, 1));
  1125. //
  1126. // __m128 Swp00 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(1, 1, 1, 1));
  1127. // __m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
  1128. // __m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
  1129. // __m128 Swp03 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(2, 2, 2, 2));
  1130. //
  1131. // __m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
  1132. // __m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
  1133. // Fac2 = _mm_sub_ps(Mul00, Mul01);
  1134. //
  1135. // bool stop = true;
  1136. // }
  1137. //
  1138. // __m128 Fac3;
  1139. // {
  1140. // // valType SubFactor03 = m[2][0] * m[3][3] - m[3][0] * m[2][3];
  1141. // // valType SubFactor03 = m[2][0] * m[3][3] - m[3][0] * m[2][3];
  1142. // // valType SubFactor09 = m[1][0] * m[3][3] - m[3][0] * m[1][3];
  1143. // // valType SubFactor16 = m[1][0] * m[2][3] - m[2][0] * m[1][3];
  1144. //
  1145. // __m128 Swp0a = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(3, 3, 3, 3));
  1146. // __m128 Swp0b = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(0, 0, 0, 0));
  1147. //
  1148. // __m128 Swp00 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(0, 0, 0, 0));
  1149. // __m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
  1150. // __m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
  1151. // __m128 Swp03 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(3, 3, 3, 3));
  1152. //
  1153. // __m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
  1154. // __m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
  1155. // Fac3 = _mm_sub_ps(Mul00, Mul01);
  1156. //
  1157. // bool stop = true;
  1158. // }
  1159. //
  1160. // __m128 Fac4;
  1161. // {
  1162. // // valType SubFactor04 = m[2][0] * m[3][2] - m[3][0] * m[2][2];
  1163. // // valType SubFactor04 = m[2][0] * m[3][2] - m[3][0] * m[2][2];
  1164. // // valType SubFactor10 = m[1][0] * m[3][2] - m[3][0] * m[1][2];
  1165. // // valType SubFactor17 = m[1][0] * m[2][2] - m[2][0] * m[1][2];
  1166. //
  1167. // __m128 Swp0a = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(2, 2, 2, 2));
  1168. // __m128 Swp0b = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(0, 0, 0, 0));
  1169. //
  1170. // __m128 Swp00 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(0, 0, 0, 0));
  1171. // __m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
  1172. // __m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
  1173. // __m128 Swp03 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(2, 2, 2, 2));
  1174. //
  1175. // __m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
  1176. // __m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
  1177. // Fac4 = _mm_sub_ps(Mul00, Mul01);
  1178. //
  1179. // bool stop = true;
  1180. // }
  1181. //
  1182. // __m128 Fac5;
  1183. // {
  1184. // // valType SubFactor05 = m[2][0] * m[3][1] - m[3][0] * m[2][1];
  1185. // // valType SubFactor05 = m[2][0] * m[3][1] - m[3][0] * m[2][1];
  1186. // // valType SubFactor12 = m[1][0] * m[3][1] - m[3][0] * m[1][1];
  1187. // // valType SubFactor18 = m[1][0] * m[2][1] - m[2][0] * m[1][1];
  1188. //
  1189. // __m128 Swp0a = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(1, 1, 1, 1));
  1190. // __m128 Swp0b = _mm_shuffle_ps(in[3], in[2], _MM_SHUFFLE(0, 0, 0, 0));
  1191. //
  1192. // __m128 Swp00 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(0, 0, 0, 0));
  1193. // __m128 Swp01 = _mm_shuffle_ps(Swp0a, Swp0a, _MM_SHUFFLE(2, 0, 0, 0));
  1194. // __m128 Swp02 = _mm_shuffle_ps(Swp0b, Swp0b, _MM_SHUFFLE(2, 0, 0, 0));
  1195. // __m128 Swp03 = _mm_shuffle_ps(in[2], in[1], _MM_SHUFFLE(1, 1, 1, 1));
  1196. //
  1197. // __m128 Mul00 = _mm_mul_ps(Swp00, Swp01);
  1198. // __m128 Mul01 = _mm_mul_ps(Swp02, Swp03);
  1199. // Fac5 = _mm_sub_ps(Mul00, Mul01);
  1200. //
  1201. // bool stop = true;
  1202. // }
  1203. //
  1204. // __m128 SignA = _mm_set_ps( 1.0f,-1.0f, 1.0f,-1.0f);
  1205. // __m128 SignB = _mm_set_ps(-1.0f, 1.0f,-1.0f, 1.0f);
  1206. //
  1207. // // m[1][0]
  1208. // // m[0][0]
  1209. // // m[0][0]
  1210. // // m[0][0]
  1211. // __m128 Temp0 = _mm_shuffle_ps(in[1], in[0], _MM_SHUFFLE(0, 0, 0, 0));
  1212. // __m128 Vec0 = _mm_shuffle_ps(Temp0, Temp0, _MM_SHUFFLE(2, 2, 2, 0));
  1213. //
  1214. // // m[1][1]
  1215. // // m[0][1]
  1216. // // m[0][1]
  1217. // // m[0][1]
  1218. // __m128 Temp1 = _mm_shuffle_ps(in[1], in[0], _MM_SHUFFLE(1, 1, 1, 1));
  1219. // __m128 Vec1 = _mm_shuffle_ps(Temp1, Temp1, _MM_SHUFFLE(2, 2, 2, 0));
  1220. //
  1221. // // m[1][2]
  1222. // // m[0][2]
  1223. // // m[0][2]
  1224. // // m[0][2]
  1225. // __m128 Temp2 = _mm_shuffle_ps(in[1], in[0], _MM_SHUFFLE(2, 2, 2, 2));
  1226. // __m128 Vec2 = _mm_shuffle_ps(Temp2, Temp2, _MM_SHUFFLE(2, 2, 2, 0));
  1227. //
  1228. // // m[1][3]
  1229. // // m[0][3]
  1230. // // m[0][3]
  1231. // // m[0][3]
  1232. // __m128 Temp3 = _mm_shuffle_ps(in[1], in[0], _MM_SHUFFLE(3, 3, 3, 3));
  1233. // __m128 Vec3 = _mm_shuffle_ps(Temp3, Temp3, _MM_SHUFFLE(2, 2, 2, 0));
  1234. //
  1235. // // col0
  1236. // // + (Vec1[0] * Fac0[0] - Vec2[0] * Fac1[0] + Vec3[0] * Fac2[0]),
  1237. // // - (Vec1[1] * Fac0[1] - Vec2[1] * Fac1[1] + Vec3[1] * Fac2[1]),
  1238. // // + (Vec1[2] * Fac0[2] - Vec2[2] * Fac1[2] + Vec3[2] * Fac2[2]),
  1239. // // - (Vec1[3] * Fac0[3] - Vec2[3] * Fac1[3] + Vec3[3] * Fac2[3]),
  1240. // __m128 Mul00 = _mm_mul_ps(Vec1, Fac0);
  1241. // __m128 Mul01 = _mm_mul_ps(Vec2, Fac1);
  1242. // __m128 Mul02 = _mm_mul_ps(Vec3, Fac2);
  1243. // __m128 Sub00 = _mm_sub_ps(Mul00, Mul01);
  1244. // __m128 Add00 = _mm_add_ps(Sub00, Mul02);
  1245. // __m128 Inv0 = _mm_mul_ps(SignB, Add00);
  1246. //
  1247. // // col1
  1248. // // - (Vec0[0] * Fac0[0] - Vec2[0] * Fac3[0] + Vec3[0] * Fac4[0]),
  1249. // // + (Vec0[0] * Fac0[1] - Vec2[1] * Fac3[1] + Vec3[1] * Fac4[1]),
  1250. // // - (Vec0[0] * Fac0[2] - Vec2[2] * Fac3[2] + Vec3[2] * Fac4[2]),
  1251. // // + (Vec0[0] * Fac0[3] - Vec2[3] * Fac3[3] + Vec3[3] * Fac4[3]),
  1252. // __m128 Mul03 = _mm_mul_ps(Vec0, Fac0);
  1253. // __m128 Mul04 = _mm_mul_ps(Vec2, Fac3);
  1254. // __m128 Mul05 = _mm_mul_ps(Vec3, Fac4);
  1255. // __m128 Sub01 = _mm_sub_ps(Mul03, Mul04);
  1256. // __m128 Add01 = _mm_add_ps(Sub01, Mul05);
  1257. // __m128 Inv1 = _mm_mul_ps(SignA, Add01);
  1258. //
  1259. // // col2
  1260. // // + (Vec0[0] * Fac1[0] - Vec1[0] * Fac3[0] + Vec3[0] * Fac5[0]),
  1261. // // - (Vec0[0] * Fac1[1] - Vec1[1] * Fac3[1] + Vec3[1] * Fac5[1]),
  1262. // // + (Vec0[0] * Fac1[2] - Vec1[2] * Fac3[2] + Vec3[2] * Fac5[2]),
  1263. // // - (Vec0[0] * Fac1[3] - Vec1[3] * Fac3[3] + Vec3[3] * Fac5[3]),
  1264. // __m128 Mul06 = _mm_mul_ps(Vec0, Fac1);
  1265. // __m128 Mul07 = _mm_mul_ps(Vec1, Fac3);
  1266. // __m128 Mul08 = _mm_mul_ps(Vec3, Fac5);
  1267. // __m128 Sub02 = _mm_sub_ps(Mul06, Mul07);
  1268. // __m128 Add02 = _mm_add_ps(Sub02, Mul08);
  1269. // __m128 Inv2 = _mm_mul_ps(SignB, Add02);
  1270. //
  1271. // // col3
  1272. // // - (Vec1[0] * Fac2[0] - Vec1[0] * Fac4[0] + Vec2[0] * Fac5[0]),
  1273. // // + (Vec1[0] * Fac2[1] - Vec1[1] * Fac4[1] + Vec2[1] * Fac5[1]),
  1274. // // - (Vec1[0] * Fac2[2] - Vec1[2] * Fac4[2] + Vec2[2] * Fac5[2]),
  1275. // // + (Vec1[0] * Fac2[3] - Vec1[3] * Fac4[3] + Vec2[3] * Fac5[3]));
  1276. // __m128 Mul09 = _mm_mul_ps(Vec0, Fac2);
  1277. // __m128 Mul10 = _mm_mul_ps(Vec1, Fac4);
  1278. // __m128 Mul11 = _mm_mul_ps(Vec2, Fac5);
  1279. // __m128 Sub03 = _mm_sub_ps(Mul09, Mul10);
  1280. // __m128 Add03 = _mm_add_ps(Sub03, Mul11);
  1281. // __m128 Inv3 = _mm_mul_ps(SignA, Add03);
  1282. //
  1283. // __m128 Row0 = _mm_shuffle_ps(Inv0, Inv1, _MM_SHUFFLE(0, 0, 0, 0));
  1284. // __m128 Row1 = _mm_shuffle_ps(Inv2, Inv3, _MM_SHUFFLE(0, 0, 0, 0));
  1285. // __m128 Row2 = _mm_shuffle_ps(Row0, Row1, _MM_SHUFFLE(2, 0, 2, 0));
  1286. //
  1287. // // valType Determinant = m[0][0] * Inverse[0][0]
  1288. // // + m[0][1] * Inverse[1][0]
  1289. // // + m[0][2] * Inverse[2][0]
  1290. // // + m[0][3] * Inverse[3][0];
  1291. // __m128 Det0 = _mm_dot_ps(in[0], Row2);
  1292. //
  1293. // // Inverse /= Determinant;
  1294. // out[0] = _mm_div_ps(Inv0, Det0);
  1295. // out[1] = _mm_div_ps(Inv1, Det0);
  1296. // out[2] = _mm_div_ps(Inv2, Det0);
  1297. // out[3] = _mm_div_ps(Inv3, Det0);
  1298. //
  1299. // glm::uint64 TimeEnd = GetCpuCycle();
  1300. //
  1301. // printf("Ticks inv: %d\n", TimeEnd - TimeStart);
  1302. //
  1303. // bool stop = true;
  1304. //}
  1305. //#include <intrin.h>
  1306. void test_mat4_mul()
  1307. {
  1308. /*
  1309. {
  1310. __m128 v1 = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f);
  1311. __m128 m0[4];
  1312. m0[0] = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f);
  1313. m0[1] = _mm_set_ps(4.0f, 5.0f, 6.0f, 7.0f);
  1314. m0[2] = _mm_set_ps(0.3f, 0.2f, 0.1f, 0.0f);
  1315. m0[3] = _mm_set_ps(0.4f, 0.5f, 0.6f, 0.7f);
  1316. __m128 v2 = _mm_mul_ps(v1, m0);
  1317. bool stop = true;
  1318. }
  1319. {
  1320. glm::vec4 v1(0.0f, 1.0f, 2.0f, 3.0f);
  1321. glm::mat4 m0;
  1322. m0[0] = glm::vec4(0.0f, 1.0f, 2.0f, 3.0f);
  1323. m0[1] = glm::vec4(7.0f, 6.0f, 5.0f, 4.0f);
  1324. m0[2] = glm::vec4(0.0f, 0.1f, 0.2f, 0.3f);
  1325. m0[3] = glm::vec4(0.7f, 0.6f, 0.5f, 0.4f);
  1326. glm::vec4 v2 = v1 * m0;
  1327. bool stop = true;
  1328. }
  1329. */
  1330. {
  1331. __m128 a = _mm_setr_ps(0.0f, 1.0f, 2.0f, 3.0f);
  1332. __m128 b = _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 2, 1, 0));
  1333. bool stop = true;
  1334. }
  1335. {
  1336. __m128 v1 = _mm_setr_ps(0.3f, 0.2f, 0.1f, 1.0f);
  1337. __m128 m0[4];
  1338. m0[0] = _mm_setr_ps(2.0f, 0.2f, 0.1f,-1.4f);
  1339. m0[1] = _mm_setr_ps(0.5f, 2.0f, 0.3f,-1.2f);
  1340. m0[2] = _mm_setr_ps(0.6f, 0.4f, 2.0f,-1.1f);
  1341. m0[3] = _mm_setr_ps(1.4f, 1.2f, 1.1f, 1.0f);
  1342. __m128 v2 = _mm_mul_ps(v1, m0);
  1343. bool stop = true;
  1344. }
  1345. {
  1346. glm::vec4 v1(0.3f, 0.2f, 0.1f, 1.0f);
  1347. glm::mat4 m0;
  1348. m0[0] = glm::vec4(2.0f, 0.2f, 0.1f,-1.4f);
  1349. m0[1] = glm::vec4(0.5f, 2.0f, 0.3f,-1.2f);
  1350. m0[2] = glm::vec4(0.6f, 0.4f, 2.0f,-1.1f);
  1351. m0[3] = glm::vec4(1.4f, 1.2f, 1.1f, 1.0f);
  1352. glm::vec4 v2 = v1 * m0;
  1353. bool stop = true;
  1354. }
  1355. {
  1356. glm::vec4 v1(1.0f, 0.5f,-0.5f,-1.0f);
  1357. glm::mat4 m0;
  1358. m0[0] = glm::vec4(2.0f,-1.0f,-1.0f, 4.0f);
  1359. m0[1] = glm::vec4(2.0f, 1.0f, 4.0f, 1.0f);
  1360. m0[2] = glm::vec4(2.0f, 4.0f, 1.0f, 1.0f);
  1361. m0[3] = glm::vec4(4.0f, 1.0f, 1.0f, 1.0f);
  1362. m0 = glm::transpose(m0);
  1363. glm::vec4 v2 = m0 * v1;
  1364. bool stop = true;
  1365. }
  1366. {
  1367. glm::mat4 MatA;
  1368. MatA[0] = glm::vec4(0.2f, 0.1f, 0.3f, 0.4f);
  1369. MatA[1] = glm::vec4(1.0f, 0.3f, 0.2f, 0.3f);
  1370. MatA[2] = glm::vec4(3.0f, 2.0f, 0.4f, 0.2f);
  1371. MatA[3] = glm::vec4(4.0f, 3.0f, 2.0f, 1.0f);
  1372. glm::mat4 InvA = glm::inverse(MatA);
  1373. glm::mat4 IndA = MatA * InvA;
  1374. // glm::mat4 InvB = glm::inverseOgre(MatA);
  1375. // glm::mat4 IndB = MatA * InvB;
  1376. // glm::mat4 InvD = glm::inverseSIMD(MatA);
  1377. // glm::mat4 IndD = MatA * InvD;
  1378. // glm::mat4 InvE = glm::inverseDev(MatA);
  1379. // glm::mat4 IndE = MatA * InvE;
  1380. __m128 MatC[4];
  1381. MatC[0] = _mm_setr_ps(0.2f, 0.1f, 0.3f, 0.4f);
  1382. MatC[1] = _mm_setr_ps(1.0f, 0.3f, 0.2f, 0.3f);
  1383. MatC[2] = _mm_setr_ps(3.0f, 2.0f, 0.4f, 0.2f);
  1384. MatC[3] = _mm_setr_ps(4.0f, 3.0f, 2.0f, 1.0f);
  1385. __m128 InvC[4];
  1386. _mm_inverse_ps(MatC, InvC);
  1387. __m128 IndC[4];
  1388. _mm_mul_ps(MatC, InvC, IndC);
  1389. __m128 Mul2[4];
  1390. _mm_mul_ps(MatC, MatC, Mul2);
  1391. bool stop = true;
  1392. }
  1393. {
  1394. glm::dmat4 Mat;
  1395. Mat[0] = glm::dvec4(0.0f, 1.0f, 2.0f, 3.0f);
  1396. Mat[1] = glm::dvec4(0.1f, 1.1f, 2.1f, 3.1f);
  1397. Mat[2] = glm::dvec4(0.2f, 1.2f, 2.2f, 3.2f);
  1398. Mat[3] = glm::dvec4(0.3f, 1.3f, 2.3f, 3.3f);
  1399. glm::dmat4 Inv = glm::inverse(Mat);
  1400. glm::dmat4 Ind = Mat * Inv;
  1401. glm::dmat4 MatB;
  1402. MatB[0] = glm::dvec4(0.0f, 1.0f, 2.0f, 3.0f);
  1403. MatB[1] = glm::dvec4(0.1f, 1.1f, 2.1f, 3.1f);
  1404. MatB[2] = glm::dvec4(0.2f, 1.2f, 2.2f, 3.2f);
  1405. MatB[3] = glm::dvec4(0.3f, 1.3f, 2.3f, 3.3f);
  1406. // glm::dmat4 InvB = glm::inverseOgre(MatB);
  1407. // glm::dmat4 IndB = MatB * InvB;
  1408. bool stop = true;
  1409. }
  1410. {
  1411. glm::mat3 Mat;
  1412. Mat[0] = glm::vec3(0.0f, 1.0f, 2.0f);
  1413. Mat[1] = glm::vec3(0.1f, 1.1f, 2.1f);
  1414. Mat[2] = glm::vec3(0.2f, 1.2f, 2.2f);
  1415. glm::mat3 Inv = glm::inverse(Mat);
  1416. glm::mat3 Ind = Mat * Inv;
  1417. bool stop = true;
  1418. }
  1419. bool stop = true;
  1420. }
  1421. void test_vec4_mul()
  1422. {
  1423. glm::vec4 v1(1.0f, 2.0f, 3.0f, 4.0f);
  1424. glm::mat4 m1;
  1425. m1[0] = glm::vec4(1.0f, 2.0f, 3.0f, 4.0f) * 0.1f;
  1426. m1[1] = glm::vec4(1.0f, 2.0f, 3.0f, 4.0f) * 0.2f;
  1427. m1[2] = glm::vec4(1.0f, 2.0f, 3.0f, 4.0f) * 0.3f;
  1428. m1[3] = glm::vec4(1.0f, 2.0f, 3.0f, 4.0f) * 0.4f;
  1429. glm::vec4 vA = m1 * v1;
  1430. glm::vec4 vC = v1 * m1;
  1431. __m128 v2 = _mm_setr_ps(1.0f, 2.0f, 3.0f, 4.0f);
  1432. __m128 m2[4];
  1433. m2[0] = _mm_mul_ps(_mm_setr_ps(1.0f, 2.0f, 3.0f, 4.0f), _mm_set_ps1(0.1f));
  1434. m2[1] = _mm_mul_ps(_mm_setr_ps(1.0f, 2.0f, 3.0f, 4.0f), _mm_set_ps1(0.2f));
  1435. m2[2] = _mm_mul_ps(_mm_setr_ps(1.0f, 2.0f, 3.0f, 4.0f), _mm_set_ps1(0.3f));
  1436. m2[3] = _mm_mul_ps(_mm_setr_ps(1.0f, 2.0f, 3.0f, 4.0f), _mm_set_ps1(0.4f));
  1437. __m128 vB = _mm_mul_ps(m2, v2);
  1438. __m128 vD = _mm_mul_ps(v2, m2);
  1439. bool stop = true;
  1440. }
  1441. void test_mat4_tick()
  1442. {
  1443. __m128 MatC[4];
  1444. MatC[0] = _mm_setr_ps(0.2f, 0.1f, 0.3f, 0.4f);
  1445. MatC[1] = _mm_setr_ps(1.0f, 0.3f, 0.2f, 0.3f);
  1446. MatC[2] = _mm_setr_ps(3.0f, 2.0f, 0.4f, 0.2f);
  1447. MatC[3] = _mm_setr_ps(4.0f, 3.0f, 2.0f, 1.0f);
  1448. int CPUInfo[4];
  1449. int InfoType;
  1450. for(std::size_t i = 0; i < 10; ++i)
  1451. {
  1452. __m128 InvC[4];
  1453. __m128 IndC[4];
  1454. {
  1455. // __cpuid(CPUInfo, InfoType);
  1456. // __rdtsc();
  1457. // __cpuid(CPUInfo, InfoType);
  1458. // __rdtsc();
  1459. // __cpuid(CPUInfo, InfoType);
  1460. //glm::uint64 ClockStart = GetCpuCycle();
  1461. _mm_inverse_ps(MatC, InvC);
  1462. //glm::uint64 ClockEnd = GetCpuCycle();
  1463. //printf("inv: %d\n", ClockEnd - ClockStart);
  1464. }
  1465. {
  1466. // __cpuid(CPUInfo, InfoType);
  1467. // __rdtsc();
  1468. // __cpuid(CPUInfo, InfoType);
  1469. // __rdtsc();
  1470. // __cpuid(CPUInfo, InfoType);
  1471. //glm::uint64 ClockStart = GetCpuCycle();
  1472. _mm_mul_ps(MatC, InvC, IndC);
  1473. //glm::uint64 ClockEnd = GetCpuCycle();
  1474. //printf("mul: %d\n", ClockEnd - ClockStart);
  1475. }
  1476. }
  1477. }
  1478. int firstBit(int Binary)
  1479. {
  1480. for(std::size_t i = 0; i < 32; ++i)
  1481. if(Binary & (1 << i))
  1482. return i;
  1483. assert(0);
  1484. return 0;
  1485. }
  1486. void test_exp_golomb()
  1487. {
  1488. //0 => 1 => 1
  1489. //1 => 10 => 010
  1490. //2 => 11 => 011
  1491. //3 => 100 => 00100
  1492. //4 => 101 => 00101
  1493. //5 => 110 => 00110
  1494. //6 => 111 => 00111
  1495. //7 => 1000 => 0001000
  1496. //8 => 1001 => 0001001
  1497. // - Take the number in binary except for the last k digits
  1498. // and add 1 to it (arithmetically). Write this down.
  1499. // - Count the bits written, subtract one,
  1500. // - Write that number of starting zero bits preceding the previous bit string.
  1501. // - Write the last k bits in binary.
  1502. std::size_t Bit00 = glm::highestBit(0);
  1503. std::size_t Bit01 = glm::highestBit(1);
  1504. std::size_t Bit02 = glm::highestBit(2);
  1505. std::size_t Bit04 = glm::highestBit(4);
  1506. std::size_t Bit08 = glm::highestBit(8);
  1507. std::size_t Bit16 = glm::highestBit(16);
  1508. std::size_t Bit32 = glm::highestBit(32);
  1509. std::vector<glm::uint32> Size(256);
  1510. std::vector<glm::uint32> Data(256);
  1511. std::vector<glm::uint32> Conv(256);
  1512. for(std::size_t i = 0; i < 256; ++i)
  1513. {
  1514. glm::uint32 Binary = i + 1;
  1515. glm::uint32 HighestBit = glm::highestBit(Binary);
  1516. glm::uint32 CodeSize = (HighestBit << 1) + 1;
  1517. glm::uint32 Swift = 32 - CodeSize;
  1518. glm::uint32 Temp = glm::bitRevert(Binary << Swift);
  1519. //glm::uint32 Temp = Binary << Swift;
  1520. Data[i] = Temp;
  1521. Size[i] = HighestBit * 2 + 1;
  1522. printf("Binary(%d, %d): ", Swift, CodeSize);
  1523. for(std::size_t k = 0; k < 32; ++k)
  1524. printf("%d", glm::uint32((Binary & (1 << k)) != 0));
  1525. printf(" (%d)\n", Binary);
  1526. printf("Temp(%d, %d) : ", Swift, CodeSize);
  1527. for(std::size_t k = 0; k < 32; ++k)
  1528. printf("%d", glm::uint32((Temp & (1 << k)) != 0));
  1529. printf(" (%d)\n", Temp);
  1530. printf("Value(%d): ", i);
  1531. for(std::size_t k = 0; k < 8; ++k)
  1532. printf("%d", glm::uint32((i & (1 << k)) != 0));
  1533. printf("; ");
  1534. printf("Code: ");
  1535. for(std::size_t k = 0; k < Size[i]; ++k)
  1536. printf("%d", glm::uint32((Data[i] & (1 << k)) != 0));
  1537. printf("\n");
  1538. {
  1539. glm::uint32 Temp = Data[i];
  1540. glm::uint32 CodeSize2 = (firstBit(Temp) << 1) + 1;
  1541. glm::uint32 Swift2 = 32 - CodeSize2;
  1542. glm::uint32 Binary = glm::bitRevert(Temp) >> Swift2;
  1543. Conv[i] = Binary - 1;
  1544. }
  1545. printf("Conv (%d, %d): ", Swift, CodeSize);
  1546. for(std::size_t k = 0; k < 32; ++k)
  1547. printf("%d", glm::uint32((Conv[i] & (1 << k)) != 0));
  1548. printf(" (%d)\n\n", Conv[i]);
  1549. bool stop = true;
  1550. }
  1551. bool stop = true;
  1552. }
  1553. /*
  1554. int main(int argc, char* argv[])
  1555. {
  1556. test_mat4_mul();
  1557. test_exp_golomb();
  1558. //test_mat4_tick();
  1559. test_quat();
  1560. test_simd();
  1561. test_transform();
  1562. test_const();
  1563. test_radial();
  1564. test_vec4_mul();
  1565. glm::uint LowestBit = glm::lowestBit(8);
  1566. glm::test::main_core_func_common();
  1567. glm::test::main_core_func_exponential();
  1568. glm::test::main_core_func_geometric();
  1569. glm::test::main_core_func_matrix();
  1570. glm::test::main_core_func_noise();
  1571. glm::test::main_core_func_trigonometric();
  1572. glm::test::main_core_func_vector_relational();
  1573. glm::test::main_gtx_intesect();
  1574. {
  1575. glm::vec3 Normal(1, 0, 0);
  1576. glm::vec3 x = glm::rotate<glm::vec3::value_type>(glm::vec3(1,0,0),180.f,Normal);
  1577. glm::vec3 y = glm::rotate<glm::vec3::value_type>(glm::vec3(1,1,0),180.f,Normal);
  1578. bool Stop = true;
  1579. }
  1580. glm::mat4 m1 = glm::inverseTranspose(
  1581. glm::gtc::matrix_transform::translate(
  1582. glm::gtc::matrix_transform::rotate(
  1583. glm::mat4(1.0f),
  1584. 90.f,
  1585. glm::normalize(glm::vec3(1.0f, 0.5f, 0.2f))),
  1586. glm::vec3(1.f, 2.f, 3.f)));
  1587. glm::mat4 m2 = glm::transpose(
  1588. glm::inverse(
  1589. glm::gtc::matrix_transform::translate(
  1590. glm::gtc::matrix_transform::rotate(
  1591. glm::mat4(1.0f),
  1592. 90.f,
  1593. glm::normalize(glm::vec3(1.0f, 0.5f, 0.2f))),
  1594. glm::vec3(1.f, 2.f, 3.f))));
  1595. test_hint();
  1596. test_fast_inverse();
  1597. //test_type();
  1598. test_profile();
  1599. // glm::greaterThan();
  1600. test_angle();
  1601. test_half();
  1602. test_swizzle();
  1603. test::main_vec2();
  1604. test_quaternion();
  1605. test_auto_cast();
  1606. //detail::test_half_type();
  1607. glm::vec2 Result = glm::perp(glm::vec2(1.0, 0.0), glm::vec2(0.0, 1.0));
  1608. // test_genType();
  1609. // test_desk();
  1610. //radians(1);
  1611. glm::mat4 LookAt = glm::lookAt(glm::vec3(1), glm::vec3(1), glm::vec3(1));
  1612. fast_inverse();
  1613. test_string_cast();
  1614. test_isPowerOfTwo();
  1615. {
  1616. float fast0 = glm::fastLength(glm::vec3(1.0f));
  1617. float fast1 = 0.0f;
  1618. }
  1619. vec4 v76 = 1.f * vec4(1.0f);
  1620. test_matrix();
  1621. //test_random();
  1622. //test_sse_vec4();
  1623. //test_sse_mat4();
  1624. {
  1625. float f1 = glm::mix(100.f, 1000.f, 0.5f);
  1626. vec3 f2 = glm::mix(vec3(100.f), vec3(1000.f), 0.5f);
  1627. ivec3 f3 = glm::mix(ivec3(100), ivec3(1000), 0.5f);
  1628. ivec3 f4 = glm::mix(ivec3(100), ivec3(1000), vec3(0.5f));
  1629. }
  1630. {
  1631. mat4 Projection = glm::frustum(-1.0f, 1.0f, -1.0f, 1.0f, 0.1f, 1.0f);
  1632. //glm_traits<glm::vec3>::value_type vec(1.0f);
  1633. }
  1634. {
  1635. glm::vec2 v1(1.0f);
  1636. glm::vec2 v2(2.0f);
  1637. glm::vec2 v3(3.0f);
  1638. // v1.xy += v2 + v3.xy;
  1639. }
  1640. {
  1641. bool b = true;
  1642. glm::vec4 v4(b);
  1643. }
  1644. {
  1645. glm::quat q1;
  1646. q1 = glm::inverse(q1);
  1647. }
  1648. //test_cast();
  1649. //test_half_full();
  1650. //test_sse();
  1651. //__hvec2GTX Pouet;
  1652. //pouet Pouet(1, 2, 3);
  1653. glm::dmat4 dm = glm::dmat4(1.0);
  1654. glm::mat4 fm1 = glm::mat4(dm);
  1655. glm::mat4 fm2(dm);
  1656. //glm::hmat4 hm = glm::hmat4(1.0);
  1657. //glm::mat4 fm3(hm);
  1658. //glm::dmat4 dm2(hm);
  1659. {
  1660. glm::quat q1;
  1661. glm::vec4 v1;
  1662. glm::vec4 r1;
  1663. r1 = q1 * v1;
  1664. r1++;
  1665. //q1 = glm::absGTX(q1);
  1666. }
  1667. {
  1668. glm::vec3 v1(1.0f);
  1669. ++v1;
  1670. v1++;
  1671. }
  1672. {
  1673. glm::vec4 v1(1.0f);
  1674. --v1;
  1675. v1--;
  1676. }
  1677. {
  1678. glm::vec4 v1(1.0f);
  1679. ++v1;
  1680. v1++;
  1681. }
  1682. {
  1683. ivec3 testVec = ivec3(3,35,4);
  1684. const ivec3 testVec2 = ivec3(13,15,14);
  1685. //testVec = testVec2.zyx;
  1686. //testVec = testVec2.xyy;
  1687. //testVec = testVec2.rrr;
  1688. //ivec4 textVec4 = testVec2.zzxx;
  1689. //ivec2 textVec2 = testVec2.gr;
  1690. //half h1 = half(1.0f);
  1691. //hvec2 hv2 = hvec2(h1);
  1692. //hvec3 hv3 = hvec3(h1);
  1693. //hvec4 hv4 = hvec4(h1);
  1694. }
  1695. vec2 Stuff = glm::mix(vec2(1), vec2(0), 0.5f);
  1696. test_gtx_bit();
  1697. vec2 Max = glm::max(vec2(0.f), vec2(1.f));
  1698. vec2 Truc = radians(vec2(1));
  1699. // ivec3 v(1);
  1700. // v = operator &=((1 << 0));
  1701. ivec3 v(1);
  1702. v &= (1 << 0);
  1703. vec4 Color = glm::f32_rgba_cast<glm::uint32>(glm::uint32(0));
  1704. vec3 Normal = glm::normalizedRand3<float>();
  1705. //__hvec3GTX hCos = cos(__hvec3GTX(1.0f));
  1706. //__hvec3GTX hNormal = normalizedRand3<detail::thalf>();
  1707. //quat Quat;
  1708. //Quat = sqrt(Quat);
  1709. #if(defined(GLM_SWIZZLE) && GLM_SWIZZLE & GLM_SWIZZLE_FUNC)
  1710. vec4 ColorRGBA = vec4(1.0, 0.5, 0.0, 1.0)._xyzw();
  1711. #endif//GLM_SWIZZLE
  1712. hvec2 v1(1.0f, 2.0f);
  1713. hvec2 v2(3.0f, 4.0f);
  1714. v2 = v1;
  1715. v2 += half(1.0f);
  1716. v2 += detail::thalf(1.0f);
  1717. v2 = v1 - v2;
  1718. half hcos1 = glm::cos(half(1.0));
  1719. hvec2 hcos2 = glm::cos(hvec2(1.0));
  1720. //test_associated_min_max();
  1721. system("pause");
  1722. return 0;
  1723. }
  1724. */
  1725. #define CPUID __asm __emit 0fh __asm __emit 0a2h
  1726. #define RDTSC __asm __emit 0fh __asm __emit 031h
  1727. unsigned test_sse()
  1728. {
  1729. glm::mat4 MatA;
  1730. MatA[0] = glm::vec4(0.2f, 0.1f, 0.3f, 0.4f);
  1731. MatA[1] = glm::vec4(1.0f, 0.3f, 0.2f, 0.3f);
  1732. MatA[2] = glm::vec4(3.0f, 2.0f, 0.4f, 0.2f);
  1733. MatA[3] = glm::vec4(4.0f, 3.0f, 2.0f, 1.0f);
  1734. glm::mat4 MulA;
  1735. glm::mat4 InvA;
  1736. unsigned cycles;
  1737. __m128 MatC[4];
  1738. MatC[0] = _mm_setr_ps(0.2f, 0.1f, 0.3f, 0.4f);
  1739. MatC[1] = _mm_setr_ps(1.0f, 0.3f, 0.2f, 0.3f);
  1740. MatC[2] = _mm_setr_ps(3.0f, 2.0f, 0.4f, 0.2f);
  1741. MatC[3] = _mm_setr_ps(4.0f, 3.0f, 2.0f, 1.0f);
  1742. __m128 MatR[4];
  1743. /*
  1744. __asm
  1745. {
  1746. pushad
  1747. CPUID
  1748. RDTSC
  1749. mov cycles, eax
  1750. popad
  1751. }
  1752. */
  1753. //MulA = MatA * MatA;
  1754. //_mm_mul_ps(MatC, MatC, MatR);
  1755. //InvA = glm::inverse(MatA);
  1756. //_mm_inverse_ps(MatC, MatR);
  1757. _mm_inverse_fast_ps(MatC, MatR);
  1758. //glm::mat4 IndA = MatA * InvA;
  1759. //glm::mat4 InvB = glm::inverseOgre(MatA);
  1760. //glm::mat4 IndB = MatA * InvB;
  1761. // glm::mat4 InvD = glm::inverseSIMD(MatA);
  1762. // glm::mat4 IndD = MatA * InvD;
  1763. // glm::mat4 InvE = glm::inverseDev(MatA);
  1764. // glm::mat4 IndE = MatA * InvE;
  1765. //__m128 MatC[4];
  1766. //MatC[0] = _mm_setr_ps(0.2f, 0.1f, 0.3f, 0.4f);
  1767. //MatC[1] = _mm_setr_ps(1.0f, 0.3f, 0.2f, 0.3f);
  1768. //MatC[2] = _mm_setr_ps(3.0f, 2.0f, 0.4f, 0.2f);
  1769. //MatC[3] = _mm_setr_ps(4.0f, 3.0f, 2.0f, 1.0f);
  1770. //__m128 InvC[4];
  1771. //_mm_inverse_ps(MatC, InvC);
  1772. //__m128 IndC[4];
  1773. //_mm_mul_ps(MatC, InvC, IndC);
  1774. //__m128 Mul2[4];
  1775. //_mm_mul_ps(MatC, MatC, Mul2);
  1776. /*
  1777. __asm
  1778. {
  1779. pushad
  1780. CPUID
  1781. RDTSC
  1782. sub eax, cycles
  1783. mov cycles, eax
  1784. popad
  1785. }
  1786. */
  1787. return cycles;
  1788. }
  1789. unsigned test_mat4_translate()
  1790. {
  1791. glm::mat4 MatA;
  1792. MatA[0] = glm::vec4(0.2f, 0.1f, 0.3f, 0.4f);
  1793. MatA[1] = glm::vec4(1.0f, 0.3f, 0.2f, 0.3f);
  1794. MatA[2] = glm::vec4(3.0f, 2.0f, 0.4f, 0.2f);
  1795. MatA[3] = glm::vec4(4.0f, 3.0f, 2.0f, 1.0f);
  1796. glm::mat4 Transform;
  1797. __m128 MatC[4];
  1798. MatC[0] = _mm_setr_ps(0.2f, 0.1f, 0.3f, 0.4f);
  1799. MatC[1] = _mm_setr_ps(1.0f, 0.3f, 0.2f, 0.3f);
  1800. MatC[2] = _mm_setr_ps(3.0f, 2.0f, 0.4f, 0.2f);
  1801. MatC[3] = _mm_setr_ps(4.0f, 3.0f, 2.0f, 1.0f);
  1802. __m128 MatR[4];
  1803. float v[] = {1, 2, 3};
  1804. unsigned cycles;
  1805. /*
  1806. __asm
  1807. {
  1808. pushad
  1809. CPUID
  1810. RDTSC
  1811. mov cycles, eax
  1812. popad
  1813. }
  1814. */
  1815. _mm_rotate_ps(MatC, 45.f, v, MatR);
  1816. //Transform = glm::rotate(MatA, 45.f, glm::vec3(1, 2, 3));
  1817. /*
  1818. __asm
  1819. {
  1820. pushad
  1821. CPUID
  1822. RDTSC
  1823. sub eax, cycles
  1824. mov cycles, eax
  1825. popad
  1826. }
  1827. */
  1828. return cycles;
  1829. }
  1830. void test_vec4_simd()
  1831. {
  1832. glm::vec4SIMD v1(1.0f, 2.0f, 3.0f, 4.0f);
  1833. glm::vec4SIMD v2(5.0f, 6.0f, 7.0f, 8.0f);
  1834. glm::vec4SIMD v3 = v1 + v2;
  1835. bool end = true;
  1836. }
  1837. bool main_test()
  1838. {
  1839. bool Result = true;
  1840. Result = Result && glm::test::main_bug();
  1841. assert(Result);
  1842. Result = Result && glm::test::main_type_half();
  1843. assert(Result);
  1844. Result = Result && glm::test::main_img();
  1845. assert(Result);
  1846. return true;
  1847. }
  1848. /*
  1849. template<template <typename valType> genType>
  1850. valType templateTemplateParameter(genType const & v)
  1851. {
  1852. valType Result = valType(0);
  1853. for(typename gentype::size_type i = 0; i < genType::value_size(); ++i)
  1854. {
  1855. Result += v[i];
  1856. }
  1857. return Result;
  1858. }
  1859. */
  1860. template<class T>
  1861. class A1
  1862. {
  1863. int x;
  1864. };
  1865. template<template<class T> class U>
  1866. class B1
  1867. {
  1868. };
  1869. //#include <glm/glm.hpp>
  1870. //void computeFaceNormals(mesh & Mesh)
  1871. //{
  1872. // for(mesh::iteractor it = Mesh.Faces.begin(); it != Mesh.Faces.end(); ++it)
  1873. // {
  1874. // glm::vec3 const & a = Mesh.Points[it->Vertices[0].Index].Position;
  1875. // glm::vec3 const & b = Mesh.Points[it->Vertices[1].Index].Position;
  1876. // glm::vec3 const & c = Mesh.Points[it->Vertices[2].Index].Position;
  1877. // it->Normal = glm::normalize(glm::cross(c - a, b - a));
  1878. // }
  1879. //}
  1880. template <std::size_t N, typename T>
  1881. struct vecBase
  1882. {
  1883. typedef T value_type;
  1884. typedef std::size_t size_type;
  1885. static size_type const value_size;
  1886. };
  1887. template <std::size_t N, typename T>
  1888. typename vecBase<N, T>::size_type const vecBase<N, T>::value_size = N;
  1889. template <std::size_t N, typename T>
  1890. struct vecDEV : public vecBase<N, T>
  1891. {
  1892. typedef T value_type;
  1893. };
  1894. template <typename T>
  1895. struct vecDEV<4, T> : public vecBase<4, T>
  1896. {
  1897. value_type Data[value_size];
  1898. };
  1899. void main_core_func_integer()
  1900. {
  1901. std::size_t ValueA = 1;
  1902. std::size_t ValueB = glm::bitfieldReverse(ValueA);
  1903. assert(ValueA != ValueB);
  1904. std::size_t ValueC = glm::bitfieldReverse(ValueB);
  1905. assert(ValueA == ValueC);
  1906. bool stop = 76;
  1907. }
  1908. int main()
  1909. {
  1910. float MuxesA = glm::log2(136.f) * 136.f * 2.f;
  1911. float MuxesB = glm::log2(120.f) * 120.f * 2.f;
  1912. float Ratio = MuxesB * 100 / MuxesA;
  1913. B1<A1> c;
  1914. //templateTemplateParameter<glm::detail::tvec3>();
  1915. main_core_func_integer();
  1916. assert(main_test());
  1917. glm::test::main_core_func_common();
  1918. glm::test::main_core_func_exponential();
  1919. glm::test::main_core_func_geometric();
  1920. glm::test::main_core_func_matrix();
  1921. glm::test::main_core_func_noise();
  1922. glm::test::main_core_func_trigonometric();
  1923. glm::test::main_core_func_vector_relational();
  1924. glm::test::main_gtx_intesect();
  1925. //test_sse_vec4();
  1926. //test_sse_mat4();
  1927. test_vec4_simd();
  1928. glm::quat q;
  1929. glm::vec3 v;
  1930. glm::vec3 w = glm::cross(q, v);
  1931. glm::vec3 u = glm::cross(v, q);
  1932. unsigned int base, base1, base2, base3, base4, base5;
  1933. unsigned int clock;
  1934. // Warm up cpuid & rdtsc
  1935. /*
  1936. */
  1937. /*
  1938. __asm
  1939. {
  1940. pushad;
  1941. cpuid;
  1942. rdtsc;
  1943. mov clock, eax;
  1944. cpuid;
  1945. rdtsc;
  1946. sub eax, clock;
  1947. mov base1, eax;
  1948. cpuid;
  1949. rdtsc;
  1950. mov clock, eax;
  1951. cpuid;
  1952. rdtsc;
  1953. sub eax, clock;
  1954. mov base2, eax;
  1955. cpuid;
  1956. rdtsc;
  1957. mov clock, eax;
  1958. cpuid;
  1959. rdtsc;
  1960. sub eax, clock;
  1961. mov base3, eax;
  1962. cpuid;
  1963. rdtsc;
  1964. mov clock, eax;
  1965. cpuid;
  1966. rdtsc;
  1967. sub eax, clock;
  1968. mov base4, eax;
  1969. cpuid;
  1970. rdtsc;
  1971. mov clock, eax;
  1972. cpuid;
  1973. rdtsc;
  1974. sub eax, clock;
  1975. mov base5, eax;
  1976. popad;
  1977. }
  1978. */
  1979. base = base1;
  1980. if (base > base2)
  1981. base = base2;
  1982. if (base > base3)
  1983. base = base3;
  1984. if (base > base4)
  1985. base = base4;
  1986. if (base > base5)
  1987. base = base5;
  1988. {
  1989. const unsigned size = 16;
  1990. unsigned cycles[size];
  1991. for(unsigned i = 0; i < size; ++i)
  1992. cycles[i] = test_mat4_translate();//test_sse();
  1993. // By the second or third run, both data and instruction
  1994. // cache effects should have been eliminated, and results
  1995. // will be consistent.
  1996. printf("SSE\nBase : %d\n", base);
  1997. printf("Cycle counts:\n");
  1998. for(unsigned i = 0; i < size; ++i)
  1999. printf("%d\n", cycles[i] - base);
  2000. }
  2001. glm::mat4 m(1.0);
  2002. glm::mat4 m1 = m + 2.0f;
  2003. system("pause");
  2004. }
  2005. //#include <glm/glm.hpp> // vec3, vec4, ivec4, mat4
  2006. //#include <glm/gtc/matrix_projection.hpp> // glm::perspective
  2007. //#include <glm/gtc/matrix_transform.hpp> // glm::translate, glm::rotate, glm::scale
  2008. //#include <glm/gtx/type_ptr.hpp> // glm::value_ptr
  2009. //
  2010. //glm::vec4 const ClearColor(glm::vec3(0.0f), 1.0f);
  2011. //glm::ivec4 const Viewport(0, 0, 640, 480);
  2012. //
  2013. //void render()
  2014. //{
  2015. // glClearColor(ClearColor.r, ClearColor.g, ClearColor.b, ClearColor.a);
  2016. // glClear(GL_COLOR_BUFFER_BIT);
  2017. //
  2018. // glViewport(Viewport.x, Viewport.y, Viewport.z, Viewport.w);
  2019. //
  2020. // glm::mat4 Projection = glm::perspective(45.0f, 4.0f / 3.0f, 0.1f, 100.0f);
  2021. // glm::mat4 ViewTranslate = glm::translate(glm::vec3(0.0f, 0.0f, -4.0f));
  2022. // glm::mat4 ViewRotateX = glm::rotate(ViewTranslate, 45.0f, glm::vec3(-1.0f, 0.0f, 0.0f));
  2023. // glm::mat4 View = glm::rotate(ViewRotateX, 45.0f, glm::vec3(0.0f, 1.0f, 0.0f));
  2024. // glm::mat4 Model = glm::scale(glm::mat4(1.0f), glm::vec3(0.5f));
  2025. //
  2026. // glMatrixMode(GL_PROJECTION);
  2027. // glLoadMatrixf(glm::value_ptr(Projection));
  2028. //
  2029. // glMatrixMode(GL_MODELVIEW);
  2030. // glLoadMatrixf(glm::value_ptr(View * Model));
  2031. //}