gtc_bitfield.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673
  1. ///////////////////////////////////////////////////////////////////////////////////
  2. /// OpenGL Mathematics (glm.g-truc.net)
  3. ///
  4. /// Copyright (c) 2005 - 2015 G-Truc Creation (www.g-truc.net)
  5. /// Permission is hereby granted, free of charge, to any person obtaining a copy
  6. /// of this software and associated documentation files (the "Software"), to deal
  7. /// in the Software without restriction, including without limitation the rights
  8. /// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. /// copies of the Software, and to permit persons to whom the Software is
  10. /// furnished to do so, subject to the following conditions:
  11. ///
  12. /// The above copyright notice and this permission notice shall be included in
  13. /// all copies or substantial portions of the Software.
  14. ///
  15. /// Restrictions:
  16. /// By making use of the Software for military purposes, you choose to make
  17. /// a Bunny unhappy.
  18. ///
  19. /// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  20. /// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21. /// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  22. /// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  23. /// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  24. /// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  25. /// THE SOFTWARE.
  26. ///
  27. /// @file test/gtc/gtc_bitfield.cpp
  28. /// @date 2014-10-25 / 2014-11-25
  29. /// @author Christophe Riccio
  30. ///////////////////////////////////////////////////////////////////////////////////
  31. #include <glm/gtc/bitfield.hpp>
  32. #include <glm/gtc/type_precision.hpp>
  33. #include <glm/vector_relational.hpp>
  34. #include <glm/integer.hpp>
  35. #include <ctime>
  36. #include <cstdio>
  37. #include <vector>
  38. namespace mask
  39. {
  40. template <typename genType>
  41. struct type
  42. {
  43. genType Value;
  44. genType Return;
  45. };
  46. inline int mask_zero(int Bits)
  47. {
  48. return ~((~0) << Bits);
  49. }
  50. inline int mask_mix(int Bits)
  51. {
  52. return Bits >= sizeof(int) * 8 ? 0xffffffff : (static_cast<int>(1) << Bits) - static_cast<int>(1);
  53. }
  54. inline int mask_half(int Bits)
  55. {
  56. // We do the shift in two steps because 1 << 32 on an int is undefined.
  57. int const Half = Bits >> 1;
  58. int const Fill = ~0;
  59. int const ShiftHaft = (Fill << Half);
  60. int const Rest = Bits - Half;
  61. int const Reversed = ShiftHaft << Rest;
  62. return ~Reversed;
  63. }
  64. inline int mask_loop(int Bits)
  65. {
  66. int Mask = 0;
  67. for(int Bit = 0; Bit < Bits; ++Bit)
  68. Mask |= (static_cast<int>(1) << Bit);
  69. return Mask;
  70. }
  71. int perf()
  72. {
  73. int const Count = 100000000;
  74. std::clock_t Timestamp1 = std::clock();
  75. {
  76. std::vector<int> Mask;
  77. Mask.resize(Count);
  78. for(int i = 0; i < Count; ++i)
  79. Mask[i] = mask_mix(i % 32);
  80. }
  81. std::clock_t Timestamp2 = std::clock();
  82. {
  83. std::vector<int> Mask;
  84. Mask.resize(Count);
  85. for(int i = 0; i < Count; ++i)
  86. Mask[i] = mask_loop(i % 32);
  87. }
  88. std::clock_t Timestamp3 = std::clock();
  89. {
  90. std::vector<int> Mask;
  91. Mask.resize(Count);
  92. for(int i = 0; i < Count; ++i)
  93. Mask[i] = glm::mask(i % 32);
  94. }
  95. std::clock_t Timestamp4 = std::clock();
  96. {
  97. std::vector<int> Mask;
  98. Mask.resize(Count);
  99. for(int i = 0; i < Count; ++i)
  100. Mask[i] = mask_zero(i % 32);
  101. }
  102. std::clock_t Timestamp5 = std::clock();
  103. {
  104. std::vector<int> Mask;
  105. Mask.resize(Count);
  106. for(int i = 0; i < Count; ++i)
  107. Mask[i] = mask_half(i % 32);
  108. }
  109. std::clock_t Timestamp6 = std::clock();
  110. std::clock_t TimeMix = Timestamp2 - Timestamp1;
  111. std::clock_t TimeLoop = Timestamp3 - Timestamp2;
  112. std::clock_t TimeDefault = Timestamp4 - Timestamp3;
  113. std::clock_t TimeZero = Timestamp5 - Timestamp4;
  114. std::clock_t TimeHalf = Timestamp6 - Timestamp5;
  115. printf("mask[mix]: %d\n", static_cast<unsigned int>(TimeMix));
  116. printf("mask[loop]: %d\n", static_cast<unsigned int>(TimeLoop));
  117. printf("mask[default]: %d\n", static_cast<unsigned int>(TimeDefault));
  118. printf("mask[zero]: %d\n", static_cast<unsigned int>(TimeZero));
  119. printf("mask[half]: %d\n", static_cast<unsigned int>(TimeHalf));
  120. return TimeDefault < TimeLoop ? 0 : 1;
  121. }
  122. int test_uint()
  123. {
  124. type<glm::uint> const Data[] =
  125. {
  126. { 0, 0x00000000},
  127. { 1, 0x00000001},
  128. { 2, 0x00000003},
  129. { 3, 0x00000007},
  130. {31, 0x7fffffff},
  131. {32, 0xffffffff}
  132. };
  133. int Error(0);
  134. /* mask_zero is sadly not a correct code
  135. for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
  136. {
  137. int Result = mask_zero(Data[i].Value);
  138. Error += Data[i].Return == Result ? 0 : 1;
  139. }
  140. */
  141. for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
  142. {
  143. int Result = mask_mix(Data[i].Value);
  144. Error += Data[i].Return == Result ? 0 : 1;
  145. }
  146. for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
  147. {
  148. int Result = mask_half(Data[i].Value);
  149. Error += Data[i].Return == Result ? 0 : 1;
  150. }
  151. for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
  152. {
  153. int Result = mask_loop(Data[i].Value);
  154. Error += Data[i].Return == Result ? 0 : 1;
  155. }
  156. for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
  157. {
  158. int Result = glm::mask(Data[i].Value);
  159. Error += Data[i].Return == Result ? 0 : 1;
  160. }
  161. return Error;
  162. }
  163. int test_uvec4()
  164. {
  165. type<glm::ivec4> const Data[] =
  166. {
  167. {glm::ivec4( 0), glm::ivec4(0x00000000)},
  168. {glm::ivec4( 1), glm::ivec4(0x00000001)},
  169. {glm::ivec4( 2), glm::ivec4(0x00000003)},
  170. {glm::ivec4( 3), glm::ivec4(0x00000007)},
  171. {glm::ivec4(31), glm::ivec4(0x7fffffff)},
  172. {glm::ivec4(32), glm::ivec4(0xffffffff)}
  173. };
  174. int Error(0);
  175. for(std::size_t i = 0, n = sizeof(Data) / sizeof(type<glm::ivec4>); i < n; ++i)
  176. {
  177. glm::ivec4 Result = glm::mask(Data[i].Value);
  178. Error += glm::all(glm::equal(Data[i].Return, Result)) ? 0 : 1;
  179. }
  180. return Error;
  181. }
  182. int test()
  183. {
  184. int Error(0);
  185. Error += test_uint();
  186. Error += test_uvec4();
  187. return Error;
  188. }
  189. }//namespace mask
  190. namespace bitfieldInterleave3
  191. {
  192. template <typename PARAM, typename RET>
  193. inline RET refBitfieldInterleave(PARAM x, PARAM y, PARAM z)
  194. {
  195. RET Result = 0;
  196. for(RET i = 0; i < sizeof(PARAM) * 8; ++i)
  197. {
  198. Result |= ((RET(x) & (RET(1U) << i)) << ((i << 1) + 0));
  199. Result |= ((RET(y) & (RET(1U) << i)) << ((i << 1) + 1));
  200. Result |= ((RET(z) & (RET(1U) << i)) << ((i << 1) + 2));
  201. }
  202. return Result;
  203. }
  204. int test()
  205. {
  206. int Error(0);
  207. glm::uint16 x_max = 1 << 11;
  208. glm::uint16 y_max = 1 << 11;
  209. glm::uint16 z_max = 1 << 11;
  210. for(glm::uint16 z = 0; z < z_max; z += 27)
  211. for(glm::uint16 y = 0; y < y_max; y += 27)
  212. for(glm::uint16 x = 0; x < x_max; x += 27)
  213. {
  214. glm::uint64 ResultA = refBitfieldInterleave<glm::uint16, glm::uint64>(x, y, z);
  215. glm::uint64 ResultB = glm::bitfieldInterleave(x, y, z);
  216. Error += ResultA == ResultB ? 0 : 1;
  217. }
  218. return Error;
  219. }
  220. }
  221. namespace bitfieldInterleave4
  222. {
  223. template <typename PARAM, typename RET>
  224. inline RET loopBitfieldInterleave(PARAM x, PARAM y, PARAM z, PARAM w)
  225. {
  226. RET const v[4] = {x, y, z, w};
  227. RET Result = 0;
  228. for(RET i = 0; i < sizeof(PARAM) * 8; i++)
  229. {
  230. Result |= ((((v[0] >> i) & 1U)) << ((i << 2) + 0));
  231. Result |= ((((v[1] >> i) & 1U)) << ((i << 2) + 1));
  232. Result |= ((((v[2] >> i) & 1U)) << ((i << 2) + 2));
  233. Result |= ((((v[3] >> i) & 1U)) << ((i << 2) + 3));
  234. }
  235. return Result;
  236. }
  237. int test()
  238. {
  239. int Error(0);
  240. glm::uint16 x_max = 1 << 11;
  241. glm::uint16 y_max = 1 << 11;
  242. glm::uint16 z_max = 1 << 11;
  243. glm::uint16 w_max = 1 << 11;
  244. for(glm::uint16 w = 0; w < w_max; w += 27)
  245. for(glm::uint16 z = 0; z < z_max; z += 27)
  246. for(glm::uint16 y = 0; y < y_max; y += 27)
  247. for(glm::uint16 x = 0; x < x_max; x += 27)
  248. {
  249. glm::uint64 ResultA = loopBitfieldInterleave<glm::uint16, glm::uint64>(x, y, z, w);
  250. glm::uint64 ResultB = glm::bitfieldInterleave(x, y, z, w);
  251. Error += ResultA == ResultB ? 0 : 1;
  252. }
  253. return Error;
  254. }
  255. }
  256. namespace bitfieldInterleave
  257. {
  258. inline glm::uint64 fastBitfieldInterleave(glm::uint32 x, glm::uint32 y)
  259. {
  260. glm::uint64 REG1;
  261. glm::uint64 REG2;
  262. REG1 = x;
  263. REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x0000FFFF0000FFFF);
  264. REG1 = ((REG1 << 8) | REG1) & glm::uint64(0x00FF00FF00FF00FF);
  265. REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  266. REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x3333333333333333);
  267. REG1 = ((REG1 << 1) | REG1) & glm::uint64(0x5555555555555555);
  268. REG2 = y;
  269. REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x0000FFFF0000FFFF);
  270. REG2 = ((REG2 << 8) | REG2) & glm::uint64(0x00FF00FF00FF00FF);
  271. REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  272. REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x3333333333333333);
  273. REG2 = ((REG2 << 1) | REG2) & glm::uint64(0x5555555555555555);
  274. return REG1 | (REG2 << 1);
  275. }
  276. inline glm::uint64 interleaveBitfieldInterleave(glm::uint32 x, glm::uint32 y)
  277. {
  278. glm::uint64 REG1;
  279. glm::uint64 REG2;
  280. REG1 = x;
  281. REG2 = y;
  282. REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x0000FFFF0000FFFF);
  283. REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x0000FFFF0000FFFF);
  284. REG1 = ((REG1 << 8) | REG1) & glm::uint64(0x00FF00FF00FF00FF);
  285. REG2 = ((REG2 << 8) | REG2) & glm::uint64(0x00FF00FF00FF00FF);
  286. REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  287. REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  288. REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x3333333333333333);
  289. REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x3333333333333333);
  290. REG1 = ((REG1 << 1) | REG1) & glm::uint64(0x5555555555555555);
  291. REG2 = ((REG2 << 1) | REG2) & glm::uint64(0x5555555555555555);
  292. return REG1 | (REG2 << 1);
  293. }
  294. inline glm::uint64 loopBitfieldInterleave(glm::uint32 x, glm::uint32 y)
  295. {
  296. static glm::uint64 const Mask[5] =
  297. {
  298. 0x5555555555555555,
  299. 0x3333333333333333,
  300. 0x0F0F0F0F0F0F0F0F,
  301. 0x00FF00FF00FF00FF,
  302. 0x0000FFFF0000FFFF
  303. };
  304. glm::uint64 REG1 = x;
  305. glm::uint64 REG2 = y;
  306. for(int i = 4; i >= 0; --i)
  307. {
  308. REG1 = ((REG1 << (1 << i)) | REG1) & Mask[i];
  309. REG2 = ((REG2 << (1 << i)) | REG2) & Mask[i];
  310. }
  311. return REG1 | (REG2 << 1);
  312. }
  313. #if(GLM_ARCH != GLM_ARCH_PURE)
  314. inline glm::uint64 sseBitfieldInterleave(glm::uint32 x, glm::uint32 y)
  315. {
  316. GLM_ALIGN(16) glm::uint32 const Array[4] = {x, 0, y, 0};
  317. __m128i const Mask4 = _mm_set1_epi32(0x0000FFFF);
  318. __m128i const Mask3 = _mm_set1_epi32(0x00FF00FF);
  319. __m128i const Mask2 = _mm_set1_epi32(0x0F0F0F0F);
  320. __m128i const Mask1 = _mm_set1_epi32(0x33333333);
  321. __m128i const Mask0 = _mm_set1_epi32(0x55555555);
  322. __m128i Reg1;
  323. __m128i Reg2;
  324. // REG1 = x;
  325. // REG2 = y;
  326. Reg1 = _mm_load_si128((__m128i*)Array);
  327. //REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x0000FFFF0000FFFF);
  328. //REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x0000FFFF0000FFFF);
  329. Reg2 = _mm_slli_si128(Reg1, 2);
  330. Reg1 = _mm_or_si128(Reg2, Reg1);
  331. Reg1 = _mm_and_si128(Reg1, Mask4);
  332. //REG1 = ((REG1 << 8) | REG1) & glm::uint64(0x00FF00FF00FF00FF);
  333. //REG2 = ((REG2 << 8) | REG2) & glm::uint64(0x00FF00FF00FF00FF);
  334. Reg2 = _mm_slli_si128(Reg1, 1);
  335. Reg1 = _mm_or_si128(Reg2, Reg1);
  336. Reg1 = _mm_and_si128(Reg1, Mask3);
  337. //REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  338. //REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  339. Reg2 = _mm_slli_epi32(Reg1, 4);
  340. Reg1 = _mm_or_si128(Reg2, Reg1);
  341. Reg1 = _mm_and_si128(Reg1, Mask2);
  342. //REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x3333333333333333);
  343. //REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x3333333333333333);
  344. Reg2 = _mm_slli_epi32(Reg1, 2);
  345. Reg1 = _mm_or_si128(Reg2, Reg1);
  346. Reg1 = _mm_and_si128(Reg1, Mask1);
  347. //REG1 = ((REG1 << 1) | REG1) & glm::uint64(0x5555555555555555);
  348. //REG2 = ((REG2 << 1) | REG2) & glm::uint64(0x5555555555555555);
  349. Reg2 = _mm_slli_epi32(Reg1, 1);
  350. Reg1 = _mm_or_si128(Reg2, Reg1);
  351. Reg1 = _mm_and_si128(Reg1, Mask0);
  352. //return REG1 | (REG2 << 1);
  353. Reg2 = _mm_slli_epi32(Reg1, 1);
  354. Reg2 = _mm_srli_si128(Reg2, 8);
  355. Reg1 = _mm_or_si128(Reg1, Reg2);
  356. GLM_ALIGN(16) glm::uint64 Result[2];
  357. _mm_store_si128((__m128i*)Result, Reg1);
  358. return Result[0];
  359. }
  360. inline glm::uint64 sseUnalignedBitfieldInterleave(glm::uint32 x, glm::uint32 y)
  361. {
  362. glm::uint32 const Array[4] = {x, 0, y, 0};
  363. __m128i const Mask4 = _mm_set1_epi32(0x0000FFFF);
  364. __m128i const Mask3 = _mm_set1_epi32(0x00FF00FF);
  365. __m128i const Mask2 = _mm_set1_epi32(0x0F0F0F0F);
  366. __m128i const Mask1 = _mm_set1_epi32(0x33333333);
  367. __m128i const Mask0 = _mm_set1_epi32(0x55555555);
  368. __m128i Reg1;
  369. __m128i Reg2;
  370. // REG1 = x;
  371. // REG2 = y;
  372. Reg1 = _mm_loadu_si128((__m128i*)Array);
  373. //REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x0000FFFF0000FFFF);
  374. //REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x0000FFFF0000FFFF);
  375. Reg2 = _mm_slli_si128(Reg1, 2);
  376. Reg1 = _mm_or_si128(Reg2, Reg1);
  377. Reg1 = _mm_and_si128(Reg1, Mask4);
  378. //REG1 = ((REG1 << 8) | REG1) & glm::uint64(0x00FF00FF00FF00FF);
  379. //REG2 = ((REG2 << 8) | REG2) & glm::uint64(0x00FF00FF00FF00FF);
  380. Reg2 = _mm_slli_si128(Reg1, 1);
  381. Reg1 = _mm_or_si128(Reg2, Reg1);
  382. Reg1 = _mm_and_si128(Reg1, Mask3);
  383. //REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  384. //REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  385. Reg2 = _mm_slli_epi32(Reg1, 4);
  386. Reg1 = _mm_or_si128(Reg2, Reg1);
  387. Reg1 = _mm_and_si128(Reg1, Mask2);
  388. //REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x3333333333333333);
  389. //REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x3333333333333333);
  390. Reg2 = _mm_slli_epi32(Reg1, 2);
  391. Reg1 = _mm_or_si128(Reg2, Reg1);
  392. Reg1 = _mm_and_si128(Reg1, Mask1);
  393. //REG1 = ((REG1 << 1) | REG1) & glm::uint64(0x5555555555555555);
  394. //REG2 = ((REG2 << 1) | REG2) & glm::uint64(0x5555555555555555);
  395. Reg2 = _mm_slli_epi32(Reg1, 1);
  396. Reg1 = _mm_or_si128(Reg2, Reg1);
  397. Reg1 = _mm_and_si128(Reg1, Mask0);
  398. //return REG1 | (REG2 << 1);
  399. Reg2 = _mm_slli_epi32(Reg1, 1);
  400. Reg2 = _mm_srli_si128(Reg2, 8);
  401. Reg1 = _mm_or_si128(Reg1, Reg2);
  402. glm::uint64 Result[2];
  403. _mm_storeu_si128((__m128i*)Result, Reg1);
  404. return Result[0];
  405. }
  406. #endif//(GLM_ARCH != GLM_ARCH_PURE)
  407. int test()
  408. {
  409. {
  410. for(glm::uint32 y = 0; y < (1 << 10); ++y)
  411. for(glm::uint32 x = 0; x < (1 << 10); ++x)
  412. {
  413. glm::uint64 A = glm::bitfieldInterleave(x, y);
  414. glm::uint64 B = fastBitfieldInterleave(x, y);
  415. glm::uint64 C = loopBitfieldInterleave(x, y);
  416. glm::uint64 D = interleaveBitfieldInterleave(x, y);
  417. assert(A == B);
  418. assert(A == C);
  419. assert(A == D);
  420. # if(GLM_ARCH != GLM_ARCH_PURE)
  421. glm::uint64 E = sseBitfieldInterleave(x, y);
  422. glm::uint64 F = sseUnalignedBitfieldInterleave(x, y);
  423. assert(A == E);
  424. assert(A == F);
  425. __m128i G = glm::detail::_mm_bit_interleave_si128(_mm_set_epi32(0, y, 0, x));
  426. glm::uint64 Result[2];
  427. _mm_storeu_si128((__m128i*)Result, G);
  428. assert(A == Result[0]);
  429. # endif//(GLM_ARCH != GLM_ARCH_PURE)
  430. }
  431. }
  432. {
  433. for(glm::uint8 y = 0; y < 127; ++y)
  434. for(glm::uint8 x = 0; x < 127; ++x)
  435. {
  436. glm::uint64 A(glm::bitfieldInterleave(glm::uint8(x), glm::uint8(y)));
  437. glm::uint64 B(glm::bitfieldInterleave(glm::uint16(x), glm::uint16(y)));
  438. glm::uint64 C(glm::bitfieldInterleave(glm::uint32(x), glm::uint32(y)));
  439. glm::int64 D(glm::bitfieldInterleave(glm::int8(x), glm::int8(y)));
  440. glm::int64 E(glm::bitfieldInterleave(glm::int16(x), glm::int16(y)));
  441. glm::int64 F(glm::bitfieldInterleave(glm::int32(x), glm::int32(y)));
  442. assert(D == E);
  443. assert(D == F);
  444. }
  445. }
  446. return 0;
  447. }
  448. int perf()
  449. {
  450. glm::uint32 x_max = 1 << 11;
  451. glm::uint32 y_max = 1 << 10;
  452. // ALU
  453. std::vector<glm::uint64> Data(x_max * y_max);
  454. std::vector<glm::u32vec2> Param(x_max * y_max);
  455. for(glm::uint32 i = 0; i < Param.size(); ++i)
  456. Param[i] = glm::u32vec2(i % x_max, i / y_max);
  457. {
  458. std::clock_t LastTime = std::clock();
  459. for(std::size_t i = 0; i < Data.size(); ++i)
  460. Data[i] = glm::bitfieldInterleave(Param[i].x, Param[i].y);
  461. std::clock_t Time = std::clock() - LastTime;
  462. std::printf("glm::bitfieldInterleave Time %d clocks\n", static_cast<unsigned int>(Time));
  463. }
  464. {
  465. std::clock_t LastTime = std::clock();
  466. for(std::size_t i = 0; i < Data.size(); ++i)
  467. Data[i] = fastBitfieldInterleave(Param[i].x, Param[i].y);
  468. std::clock_t Time = std::clock() - LastTime;
  469. std::printf("fastBitfieldInterleave Time %d clocks\n", static_cast<unsigned int>(Time));
  470. }
  471. {
  472. std::clock_t LastTime = std::clock();
  473. for(std::size_t i = 0; i < Data.size(); ++i)
  474. Data[i] = loopBitfieldInterleave(Param[i].x, Param[i].y);
  475. std::clock_t Time = std::clock() - LastTime;
  476. std::printf("loopBitfieldInterleave Time %d clocks\n", static_cast<unsigned int>(Time));
  477. }
  478. {
  479. std::clock_t LastTime = std::clock();
  480. for(std::size_t i = 0; i < Data.size(); ++i)
  481. Data[i] = interleaveBitfieldInterleave(Param[i].x, Param[i].y);
  482. std::clock_t Time = std::clock() - LastTime;
  483. std::printf("interleaveBitfieldInterleave Time %d clocks\n", static_cast<unsigned int>(Time));
  484. }
  485. # if(GLM_ARCH != GLM_ARCH_PURE)
  486. {
  487. std::clock_t LastTime = std::clock();
  488. for(std::size_t i = 0; i < Data.size(); ++i)
  489. Data[i] = sseBitfieldInterleave(Param[i].x, Param[i].y);
  490. std::clock_t Time = std::clock() - LastTime;
  491. std::printf("sseBitfieldInterleave Time %d clocks\n", static_cast<unsigned int>(Time));
  492. }
  493. {
  494. std::clock_t LastTime = std::clock();
  495. for(std::size_t i = 0; i < Data.size(); ++i)
  496. Data[i] = sseUnalignedBitfieldInterleave(Param[i].x, Param[i].y);
  497. std::clock_t Time = std::clock() - LastTime;
  498. std::printf("sseUnalignedBitfieldInterleave Time %d clocks\n", static_cast<unsigned int>(Time));
  499. }
  500. # endif//(GLM_ARCH != GLM_ARCH_PURE)
  501. {
  502. std::clock_t LastTime = std::clock();
  503. for(std::size_t i = 0; i < Data.size(); ++i)
  504. Data[i] = glm::bitfieldInterleave(Param[i].x, Param[i].y, Param[i].x);
  505. std::clock_t Time = std::clock() - LastTime;
  506. std::printf("glm::detail::bitfieldInterleave Time %d clocks\n", static_cast<unsigned int>(Time));
  507. }
  508. # if(GLM_ARCH != GLM_ARCH_PURE && !(GLM_COMPILER & GLM_COMPILER_GCC))
  509. {
  510. // SIMD
  511. std::vector<__m128i> SimdData;
  512. SimdData.resize(x_max * y_max);
  513. std::vector<__m128i> SimdParam;
  514. SimdParam.resize(x_max * y_max);
  515. for(int i = 0; i < SimdParam.size(); ++i)
  516. SimdParam[i] = _mm_set_epi32(i % x_max, 0, i / y_max, 0);
  517. std::clock_t LastTime = std::clock();
  518. for(std::size_t i = 0; i < SimdData.size(); ++i)
  519. SimdData[i] = glm::detail::_mm_bit_interleave_si128(SimdParam[i]);
  520. std::clock_t Time = std::clock() - LastTime;
  521. std::printf("_mm_bit_interleave_si128 Time %d clocks\n", static_cast<unsigned int>(Time));
  522. }
  523. # endif//(GLM_ARCH != GLM_ARCH_PURE)
  524. return 0;
  525. }
  526. }//namespace bitfieldInterleave
  527. int main()
  528. {
  529. int Error(0);
  530. Error += ::mask::test();
  531. Error += ::bitfieldInterleave3::test();
  532. Error += ::bitfieldInterleave4::test();
  533. Error += ::bitfieldInterleave::test();
  534. //Error += ::bitRevert::test();
  535. # ifdef NDEBUG
  536. Error += ::mask::perf();
  537. Error += ::bitfieldInterleave::perf();
  538. # endif//NDEBUG
  539. return Error;
  540. }