gtc_bitfield.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675
  1. ///////////////////////////////////////////////////////////////////////////////////
  2. /// OpenGL Mathematics (glm.g-truc.net)
  3. ///
  4. /// Copyright (c) 2005 - 2015 G-Truc Creation (www.g-truc.net)
  5. /// Permission is hereby granted, free of charge, to any person obtaining a copy
  6. /// of this software and associated documentation files (the "Software"), to deal
  7. /// in the Software without restriction, including without limitation the rights
  8. /// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. /// copies of the Software, and to permit persons to whom the Software is
  10. /// furnished to do so, subject to the following conditions:
  11. ///
  12. /// The above copyright notice and this permission notice shall be included in
  13. /// all copies or substantial portions of the Software.
  14. ///
  15. /// Restrictions:
  16. /// By making use of the Software for military purposes, you choose to make
  17. /// a Bunny unhappy.
  18. ///
  19. /// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  20. /// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21. /// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  22. /// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  23. /// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  24. /// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  25. /// THE SOFTWARE.
  26. ///
  27. /// @file test/gtc/gtc_bitfield.cpp
  28. /// @date 2014-10-25 / 2014-11-25
  29. /// @author Christophe Riccio
  30. ///////////////////////////////////////////////////////////////////////////////////
  31. #include <glm/gtc/bitfield.hpp>
  32. #include <glm/gtc/type_precision.hpp>
  33. #include <glm/vector_relational.hpp>
  34. #if GLM_ARCH != GLM_ARCH_PURE
  35. # include <glm/detail/intrinsic_integer.hpp>
  36. #endif
  37. #include <ctime>
  38. #include <cstdio>
  39. #include <vector>
  40. namespace mask
  41. {
  42. template <typename genType>
  43. struct type
  44. {
  45. genType Value;
  46. genType Return;
  47. };
  48. inline int mask_zero(int Bits)
  49. {
  50. return ~((~0) << Bits);
  51. }
  52. inline int mask_mix(int Bits)
  53. {
  54. return Bits >= sizeof(int) * 8 ? 0xffffffff : (static_cast<int>(1) << Bits) - static_cast<int>(1);
  55. }
  56. inline int mask_half(int Bits)
  57. {
  58. // We do the shift in two steps because 1 << 32 on an int is undefined.
  59. int const Half = Bits >> 1;
  60. int const Fill = ~0;
  61. int const ShiftHaft = (Fill << Half);
  62. int const Rest = Bits - Half;
  63. int const Reversed = ShiftHaft << Rest;
  64. return ~Reversed;
  65. }
  66. inline int mask_loop(int Bits)
  67. {
  68. int Mask = 0;
  69. for(int Bit = 0; Bit < Bits; ++Bit)
  70. Mask |= (static_cast<int>(1) << Bit);
  71. return Mask;
  72. }
  73. int perf()
  74. {
  75. int const Count = 100000000;
  76. std::clock_t Timestamp1 = std::clock();
  77. {
  78. std::vector<int> Mask;
  79. Mask.resize(Count);
  80. for(int i = 0; i < Count; ++i)
  81. Mask[i] = mask_mix(i % 32);
  82. }
  83. std::clock_t Timestamp2 = std::clock();
  84. {
  85. std::vector<int> Mask;
  86. Mask.resize(Count);
  87. for(int i = 0; i < Count; ++i)
  88. Mask[i] = mask_loop(i % 32);
  89. }
  90. std::clock_t Timestamp3 = std::clock();
  91. {
  92. std::vector<int> Mask;
  93. Mask.resize(Count);
  94. for(int i = 0; i < Count; ++i)
  95. Mask[i] = glm::mask(i % 32);
  96. }
  97. std::clock_t Timestamp4 = std::clock();
  98. {
  99. std::vector<int> Mask;
  100. Mask.resize(Count);
  101. for(int i = 0; i < Count; ++i)
  102. Mask[i] = mask_zero(i % 32);
  103. }
  104. std::clock_t Timestamp5 = std::clock();
  105. {
  106. std::vector<int> Mask;
  107. Mask.resize(Count);
  108. for(int i = 0; i < Count; ++i)
  109. Mask[i] = mask_half(i % 32);
  110. }
  111. std::clock_t Timestamp6 = std::clock();
  112. std::clock_t TimeMix = Timestamp2 - Timestamp1;
  113. std::clock_t TimeLoop = Timestamp3 - Timestamp2;
  114. std::clock_t TimeDefault = Timestamp4 - Timestamp3;
  115. std::clock_t TimeZero = Timestamp5 - Timestamp4;
  116. std::clock_t TimeHalf = Timestamp6 - Timestamp5;
  117. printf("mask[mix]: %d\n", static_cast<unsigned int>(TimeMix));
  118. printf("mask[loop]: %d\n", static_cast<unsigned int>(TimeLoop));
  119. printf("mask[default]: %d\n", static_cast<unsigned int>(TimeDefault));
  120. printf("mask[zero]: %d\n", static_cast<unsigned int>(TimeZero));
  121. printf("mask[half]: %d\n", static_cast<unsigned int>(TimeHalf));
  122. return TimeDefault < TimeLoop ? 0 : 1;
  123. }
  124. int test_uint()
  125. {
  126. type<glm::uint> const Data[] =
  127. {
  128. { 0, 0x00000000},
  129. { 1, 0x00000001},
  130. { 2, 0x00000003},
  131. { 3, 0x00000007},
  132. {31, 0x7fffffff},
  133. {32, 0xffffffff}
  134. };
  135. int Error(0);
  136. /* mask_zero is sadly not a correct code
  137. for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
  138. {
  139. int Result = mask_zero(Data[i].Value);
  140. Error += Data[i].Return == Result ? 0 : 1;
  141. }
  142. */
  143. for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
  144. {
  145. int Result = mask_mix(Data[i].Value);
  146. Error += Data[i].Return == Result ? 0 : 1;
  147. }
  148. for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
  149. {
  150. int Result = mask_half(Data[i].Value);
  151. Error += Data[i].Return == Result ? 0 : 1;
  152. }
  153. for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
  154. {
  155. int Result = mask_loop(Data[i].Value);
  156. Error += Data[i].Return == Result ? 0 : 1;
  157. }
  158. for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
  159. {
  160. int Result = glm::mask(Data[i].Value);
  161. Error += Data[i].Return == Result ? 0 : 1;
  162. }
  163. return Error;
  164. }
  165. int test_uvec4()
  166. {
  167. type<glm::ivec4> const Data[] =
  168. {
  169. {glm::ivec4( 0), glm::ivec4(0x00000000)},
  170. {glm::ivec4( 1), glm::ivec4(0x00000001)},
  171. {glm::ivec4( 2), glm::ivec4(0x00000003)},
  172. {glm::ivec4( 3), glm::ivec4(0x00000007)},
  173. {glm::ivec4(31), glm::ivec4(0x7fffffff)},
  174. {glm::ivec4(32), glm::ivec4(0xffffffff)}
  175. };
  176. int Error(0);
  177. for(std::size_t i = 0, n = sizeof(Data) / sizeof(type<glm::ivec4>); i < n; ++i)
  178. {
  179. glm::ivec4 Result = glm::mask(Data[i].Value);
  180. Error += glm::all(glm::equal(Data[i].Return, Result)) ? 0 : 1;
  181. }
  182. return Error;
  183. }
  184. int test()
  185. {
  186. int Error(0);
  187. Error += test_uint();
  188. Error += test_uvec4();
  189. return Error;
  190. }
  191. }//namespace mask
  192. namespace bitfieldInterleave3
  193. {
  194. template <typename PARAM, typename RET>
  195. inline RET refBitfieldInterleave(PARAM x, PARAM y, PARAM z)
  196. {
  197. RET Result = 0;
  198. for(RET i = 0; i < sizeof(PARAM) * 8; ++i)
  199. {
  200. Result |= ((RET(x) & (RET(1U) << i)) << ((i << 1) + 0));
  201. Result |= ((RET(y) & (RET(1U) << i)) << ((i << 1) + 1));
  202. Result |= ((RET(z) & (RET(1U) << i)) << ((i << 1) + 2));
  203. }
  204. return Result;
  205. }
  206. int test()
  207. {
  208. int Error(0);
  209. glm::uint16 x_max = 1 << 11;
  210. glm::uint16 y_max = 1 << 11;
  211. glm::uint16 z_max = 1 << 11;
  212. for(glm::uint16 z = 0; z < z_max; z += 27)
  213. for(glm::uint16 y = 0; y < y_max; y += 27)
  214. for(glm::uint16 x = 0; x < x_max; x += 27)
  215. {
  216. glm::uint64 ResultA = refBitfieldInterleave<glm::uint16, glm::uint64>(x, y, z);
  217. glm::uint64 ResultB = glm::bitfieldInterleave(x, y, z);
  218. Error += ResultA == ResultB ? 0 : 1;
  219. }
  220. return Error;
  221. }
  222. }
  223. namespace bitfieldInterleave4
  224. {
  225. template <typename PARAM, typename RET>
  226. inline RET loopBitfieldInterleave(PARAM x, PARAM y, PARAM z, PARAM w)
  227. {
  228. RET const v[4] = {x, y, z, w};
  229. RET Result = 0;
  230. for(RET i = 0; i < sizeof(PARAM) * 8; i++)
  231. {
  232. Result |= ((((v[0] >> i) & 1U)) << ((i << 2) + 0));
  233. Result |= ((((v[1] >> i) & 1U)) << ((i << 2) + 1));
  234. Result |= ((((v[2] >> i) & 1U)) << ((i << 2) + 2));
  235. Result |= ((((v[3] >> i) & 1U)) << ((i << 2) + 3));
  236. }
  237. return Result;
  238. }
  239. int test()
  240. {
  241. int Error(0);
  242. glm::uint16 x_max = 1 << 11;
  243. glm::uint16 y_max = 1 << 11;
  244. glm::uint16 z_max = 1 << 11;
  245. glm::uint16 w_max = 1 << 11;
  246. for(glm::uint16 w = 0; w < w_max; w += 27)
  247. for(glm::uint16 z = 0; z < z_max; z += 27)
  248. for(glm::uint16 y = 0; y < y_max; y += 27)
  249. for(glm::uint16 x = 0; x < x_max; x += 27)
  250. {
  251. glm::uint64 ResultA = loopBitfieldInterleave<glm::uint16, glm::uint64>(x, y, z, w);
  252. glm::uint64 ResultB = glm::bitfieldInterleave(x, y, z, w);
  253. Error += ResultA == ResultB ? 0 : 1;
  254. }
  255. return Error;
  256. }
  257. }
  258. namespace bitfieldInterleave
  259. {
  260. inline glm::uint64 fastBitfieldInterleave(glm::uint32 x, glm::uint32 y)
  261. {
  262. glm::uint64 REG1;
  263. glm::uint64 REG2;
  264. REG1 = x;
  265. REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x0000FFFF0000FFFF);
  266. REG1 = ((REG1 << 8) | REG1) & glm::uint64(0x00FF00FF00FF00FF);
  267. REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  268. REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x3333333333333333);
  269. REG1 = ((REG1 << 1) | REG1) & glm::uint64(0x5555555555555555);
  270. REG2 = y;
  271. REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x0000FFFF0000FFFF);
  272. REG2 = ((REG2 << 8) | REG2) & glm::uint64(0x00FF00FF00FF00FF);
  273. REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  274. REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x3333333333333333);
  275. REG2 = ((REG2 << 1) | REG2) & glm::uint64(0x5555555555555555);
  276. return REG1 | (REG2 << 1);
  277. }
  278. inline glm::uint64 interleaveBitfieldInterleave(glm::uint32 x, glm::uint32 y)
  279. {
  280. glm::uint64 REG1;
  281. glm::uint64 REG2;
  282. REG1 = x;
  283. REG2 = y;
  284. REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x0000FFFF0000FFFF);
  285. REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x0000FFFF0000FFFF);
  286. REG1 = ((REG1 << 8) | REG1) & glm::uint64(0x00FF00FF00FF00FF);
  287. REG2 = ((REG2 << 8) | REG2) & glm::uint64(0x00FF00FF00FF00FF);
  288. REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  289. REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  290. REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x3333333333333333);
  291. REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x3333333333333333);
  292. REG1 = ((REG1 << 1) | REG1) & glm::uint64(0x5555555555555555);
  293. REG2 = ((REG2 << 1) | REG2) & glm::uint64(0x5555555555555555);
  294. return REG1 | (REG2 << 1);
  295. }
  296. inline glm::uint64 loopBitfieldInterleave(glm::uint32 x, glm::uint32 y)
  297. {
  298. static glm::uint64 const Mask[5] =
  299. {
  300. 0x5555555555555555,
  301. 0x3333333333333333,
  302. 0x0F0F0F0F0F0F0F0F,
  303. 0x00FF00FF00FF00FF,
  304. 0x0000FFFF0000FFFF
  305. };
  306. glm::uint64 REG1 = x;
  307. glm::uint64 REG2 = y;
  308. for(int i = 4; i >= 0; --i)
  309. {
  310. REG1 = ((REG1 << (1 << i)) | REG1) & Mask[i];
  311. REG2 = ((REG2 << (1 << i)) | REG2) & Mask[i];
  312. }
  313. return REG1 | (REG2 << 1);
  314. }
  315. #if(GLM_ARCH != GLM_ARCH_PURE)
  316. inline glm::uint64 sseBitfieldInterleave(glm::uint32 x, glm::uint32 y)
  317. {
  318. GLM_ALIGN(16) glm::uint32 const Array[4] = {x, 0, y, 0};
  319. __m128i const Mask4 = _mm_set1_epi32(0x0000FFFF);
  320. __m128i const Mask3 = _mm_set1_epi32(0x00FF00FF);
  321. __m128i const Mask2 = _mm_set1_epi32(0x0F0F0F0F);
  322. __m128i const Mask1 = _mm_set1_epi32(0x33333333);
  323. __m128i const Mask0 = _mm_set1_epi32(0x55555555);
  324. __m128i Reg1;
  325. __m128i Reg2;
  326. // REG1 = x;
  327. // REG2 = y;
  328. Reg1 = _mm_load_si128((__m128i*)Array);
  329. //REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x0000FFFF0000FFFF);
  330. //REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x0000FFFF0000FFFF);
  331. Reg2 = _mm_slli_si128(Reg1, 2);
  332. Reg1 = _mm_or_si128(Reg2, Reg1);
  333. Reg1 = _mm_and_si128(Reg1, Mask4);
  334. //REG1 = ((REG1 << 8) | REG1) & glm::uint64(0x00FF00FF00FF00FF);
  335. //REG2 = ((REG2 << 8) | REG2) & glm::uint64(0x00FF00FF00FF00FF);
  336. Reg2 = _mm_slli_si128(Reg1, 1);
  337. Reg1 = _mm_or_si128(Reg2, Reg1);
  338. Reg1 = _mm_and_si128(Reg1, Mask3);
  339. //REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  340. //REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  341. Reg2 = _mm_slli_epi32(Reg1, 4);
  342. Reg1 = _mm_or_si128(Reg2, Reg1);
  343. Reg1 = _mm_and_si128(Reg1, Mask2);
  344. //REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x3333333333333333);
  345. //REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x3333333333333333);
  346. Reg2 = _mm_slli_epi32(Reg1, 2);
  347. Reg1 = _mm_or_si128(Reg2, Reg1);
  348. Reg1 = _mm_and_si128(Reg1, Mask1);
  349. //REG1 = ((REG1 << 1) | REG1) & glm::uint64(0x5555555555555555);
  350. //REG2 = ((REG2 << 1) | REG2) & glm::uint64(0x5555555555555555);
  351. Reg2 = _mm_slli_epi32(Reg1, 1);
  352. Reg1 = _mm_or_si128(Reg2, Reg1);
  353. Reg1 = _mm_and_si128(Reg1, Mask0);
  354. //return REG1 | (REG2 << 1);
  355. Reg2 = _mm_slli_epi32(Reg1, 1);
  356. Reg2 = _mm_srli_si128(Reg2, 8);
  357. Reg1 = _mm_or_si128(Reg1, Reg2);
  358. GLM_ALIGN(16) glm::uint64 Result[2];
  359. _mm_store_si128((__m128i*)Result, Reg1);
  360. return Result[0];
  361. }
  362. inline glm::uint64 sseUnalignedBitfieldInterleave(glm::uint32 x, glm::uint32 y)
  363. {
  364. glm::uint32 const Array[4] = {x, 0, y, 0};
  365. __m128i const Mask4 = _mm_set1_epi32(0x0000FFFF);
  366. __m128i const Mask3 = _mm_set1_epi32(0x00FF00FF);
  367. __m128i const Mask2 = _mm_set1_epi32(0x0F0F0F0F);
  368. __m128i const Mask1 = _mm_set1_epi32(0x33333333);
  369. __m128i const Mask0 = _mm_set1_epi32(0x55555555);
  370. __m128i Reg1;
  371. __m128i Reg2;
  372. // REG1 = x;
  373. // REG2 = y;
  374. Reg1 = _mm_loadu_si128((__m128i*)Array);
  375. //REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x0000FFFF0000FFFF);
  376. //REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x0000FFFF0000FFFF);
  377. Reg2 = _mm_slli_si128(Reg1, 2);
  378. Reg1 = _mm_or_si128(Reg2, Reg1);
  379. Reg1 = _mm_and_si128(Reg1, Mask4);
  380. //REG1 = ((REG1 << 8) | REG1) & glm::uint64(0x00FF00FF00FF00FF);
  381. //REG2 = ((REG2 << 8) | REG2) & glm::uint64(0x00FF00FF00FF00FF);
  382. Reg2 = _mm_slli_si128(Reg1, 1);
  383. Reg1 = _mm_or_si128(Reg2, Reg1);
  384. Reg1 = _mm_and_si128(Reg1, Mask3);
  385. //REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  386. //REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  387. Reg2 = _mm_slli_epi32(Reg1, 4);
  388. Reg1 = _mm_or_si128(Reg2, Reg1);
  389. Reg1 = _mm_and_si128(Reg1, Mask2);
  390. //REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x3333333333333333);
  391. //REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x3333333333333333);
  392. Reg2 = _mm_slli_epi32(Reg1, 2);
  393. Reg1 = _mm_or_si128(Reg2, Reg1);
  394. Reg1 = _mm_and_si128(Reg1, Mask1);
  395. //REG1 = ((REG1 << 1) | REG1) & glm::uint64(0x5555555555555555);
  396. //REG2 = ((REG2 << 1) | REG2) & glm::uint64(0x5555555555555555);
  397. Reg2 = _mm_slli_epi32(Reg1, 1);
  398. Reg1 = _mm_or_si128(Reg2, Reg1);
  399. Reg1 = _mm_and_si128(Reg1, Mask0);
  400. //return REG1 | (REG2 << 1);
  401. Reg2 = _mm_slli_epi32(Reg1, 1);
  402. Reg2 = _mm_srli_si128(Reg2, 8);
  403. Reg1 = _mm_or_si128(Reg1, Reg2);
  404. glm::uint64 Result[2];
  405. _mm_storeu_si128((__m128i*)Result, Reg1);
  406. return Result[0];
  407. }
  408. #endif//(GLM_ARCH != GLM_ARCH_PURE)
  409. int test()
  410. {
  411. {
  412. for(glm::uint32 y = 0; y < (1 << 10); ++y)
  413. for(glm::uint32 x = 0; x < (1 << 10); ++x)
  414. {
  415. glm::uint64 A = glm::bitfieldInterleave(x, y);
  416. glm::uint64 B = fastBitfieldInterleave(x, y);
  417. glm::uint64 C = loopBitfieldInterleave(x, y);
  418. glm::uint64 D = interleaveBitfieldInterleave(x, y);
  419. assert(A == B);
  420. assert(A == C);
  421. assert(A == D);
  422. # if(GLM_ARCH != GLM_ARCH_PURE)
  423. glm::uint64 E = sseBitfieldInterleave(x, y);
  424. glm::uint64 F = sseUnalignedBitfieldInterleave(x, y);
  425. assert(A == E);
  426. assert(A == F);
  427. __m128i G = glm::detail::_mm_bit_interleave_si128(_mm_set_epi32(0, y, 0, x));
  428. glm::uint64 Result[2];
  429. _mm_storeu_si128((__m128i*)Result, G);
  430. assert(A == Result[0]);
  431. # endif//(GLM_ARCH != GLM_ARCH_PURE)
  432. }
  433. }
  434. {
  435. for(glm::uint8 y = 0; y < 127; ++y)
  436. for(glm::uint8 x = 0; x < 127; ++x)
  437. {
  438. glm::uint64 A(glm::bitfieldInterleave(glm::uint8(x), glm::uint8(y)));
  439. glm::uint64 B(glm::bitfieldInterleave(glm::uint16(x), glm::uint16(y)));
  440. glm::uint64 C(glm::bitfieldInterleave(glm::uint32(x), glm::uint32(y)));
  441. glm::int64 D(glm::bitfieldInterleave(glm::int8(x), glm::int8(y)));
  442. glm::int64 E(glm::bitfieldInterleave(glm::int16(x), glm::int16(y)));
  443. glm::int64 F(glm::bitfieldInterleave(glm::int32(x), glm::int32(y)));
  444. assert(D == E);
  445. assert(D == F);
  446. }
  447. }
  448. return 0;
  449. }
  450. int perf()
  451. {
  452. glm::uint32 x_max = 1 << 11;
  453. glm::uint32 y_max = 1 << 10;
  454. // ALU
  455. std::vector<glm::uint64> Data(x_max * y_max);
  456. std::vector<glm::u32vec2> Param(x_max * y_max);
  457. for(glm::uint32 i = 0; i < Param.size(); ++i)
  458. Param[i] = glm::u32vec2(i % x_max, i / y_max);
  459. {
  460. std::clock_t LastTime = std::clock();
  461. for(std::size_t i = 0; i < Data.size(); ++i)
  462. Data[i] = glm::bitfieldInterleave(Param[i].x, Param[i].y);
  463. std::clock_t Time = std::clock() - LastTime;
  464. std::printf("glm::bitfieldInterleave Time %d clocks\n", static_cast<unsigned int>(Time));
  465. }
  466. {
  467. std::clock_t LastTime = std::clock();
  468. for(std::size_t i = 0; i < Data.size(); ++i)
  469. Data[i] = fastBitfieldInterleave(Param[i].x, Param[i].y);
  470. std::clock_t Time = std::clock() - LastTime;
  471. std::printf("fastBitfieldInterleave Time %d clocks\n", static_cast<unsigned int>(Time));
  472. }
  473. {
  474. std::clock_t LastTime = std::clock();
  475. for(std::size_t i = 0; i < Data.size(); ++i)
  476. Data[i] = loopBitfieldInterleave(Param[i].x, Param[i].y);
  477. std::clock_t Time = std::clock() - LastTime;
  478. std::printf("loopBitfieldInterleave Time %d clocks\n", static_cast<unsigned int>(Time));
  479. }
  480. {
  481. std::clock_t LastTime = std::clock();
  482. for(std::size_t i = 0; i < Data.size(); ++i)
  483. Data[i] = interleaveBitfieldInterleave(Param[i].x, Param[i].y);
  484. std::clock_t Time = std::clock() - LastTime;
  485. std::printf("interleaveBitfieldInterleave Time %d clocks\n", static_cast<unsigned int>(Time));
  486. }
  487. # if(GLM_ARCH != GLM_ARCH_PURE)
  488. {
  489. std::clock_t LastTime = std::clock();
  490. for(std::size_t i = 0; i < Data.size(); ++i)
  491. Data[i] = sseBitfieldInterleave(Param[i].x, Param[i].y);
  492. std::clock_t Time = std::clock() - LastTime;
  493. std::printf("sseBitfieldInterleave Time %d clocks\n", static_cast<unsigned int>(Time));
  494. }
  495. {
  496. std::clock_t LastTime = std::clock();
  497. for(std::size_t i = 0; i < Data.size(); ++i)
  498. Data[i] = sseUnalignedBitfieldInterleave(Param[i].x, Param[i].y);
  499. std::clock_t Time = std::clock() - LastTime;
  500. std::printf("sseUnalignedBitfieldInterleave Time %d clocks\n", static_cast<unsigned int>(Time));
  501. }
  502. # endif//(GLM_ARCH != GLM_ARCH_PURE)
  503. {
  504. std::clock_t LastTime = std::clock();
  505. for(std::size_t i = 0; i < Data.size(); ++i)
  506. Data[i] = glm::bitfieldInterleave(Param[i].x, Param[i].y, Param[i].x);
  507. std::clock_t Time = std::clock() - LastTime;
  508. std::printf("glm::detail::bitfieldInterleave Time %d clocks\n", static_cast<unsigned int>(Time));
  509. }
  510. # if(GLM_ARCH != GLM_ARCH_PURE && !(GLM_COMPILER & GLM_COMPILER_GCC))
  511. {
  512. // SIMD
  513. std::vector<__m128i> SimdData;
  514. SimdData.resize(x_max * y_max);
  515. std::vector<__m128i> SimdParam;
  516. SimdParam.resize(x_max * y_max);
  517. for(int i = 0; i < SimdParam.size(); ++i)
  518. SimdParam[i] = _mm_set_epi32(i % x_max, 0, i / y_max, 0);
  519. std::clock_t LastTime = std::clock();
  520. for(std::size_t i = 0; i < SimdData.size(); ++i)
  521. SimdData[i] = glm::detail::_mm_bit_interleave_si128(SimdParam[i]);
  522. std::clock_t Time = std::clock() - LastTime;
  523. std::printf("_mm_bit_interleave_si128 Time %d clocks\n", static_cast<unsigned int>(Time));
  524. }
  525. # endif//(GLM_ARCH != GLM_ARCH_PURE)
  526. return 0;
  527. }
  528. }//namespace bitfieldInterleave
  529. int main()
  530. {
  531. int Error(0);
  532. Error += ::mask::test();
  533. Error += ::bitfieldInterleave3::test();
  534. Error += ::bitfieldInterleave4::test();
  535. Error += ::bitfieldInterleave::test();
  536. //Error += ::bitRevert::test();
  537. # ifdef NDEBUG
  538. Error += ::mask::perf();
  539. Error += ::bitfieldInterleave::perf();
  540. # endif//NDEBUG
  541. return Error;
  542. }