2
0

gtx_bit.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511
  1. ///////////////////////////////////////////////////////////////////////////////////////////////////
  2. // OpenGL Mathematics Copyright (c) 2005 - 2014 G-Truc Creation (www.g-truc.net)
  3. ///////////////////////////////////////////////////////////////////////////////////////////////////
  4. // Created : 2010-09-16
  5. // Updated : 2010-09-16
  6. // Licence : This source is under MIT licence
  7. // File : test/gtx/bit.cpp
  8. ///////////////////////////////////////////////////////////////////////////////////////////////////
  9. #define GLM_FORCE_RADIANS
  10. #include <glm/gtx/bit.hpp>
  11. #include <glm/gtc/type_precision.hpp>
  12. #include <emmintrin.h>
  13. #if(GLM_ARCH != GLM_ARCH_PURE)
  14. # include <glm/detail/intrinsic_integer.hpp>
  15. #endif
  16. #include <iostream>
  17. #include <vector>
  18. #include <ctime>
  19. enum result
  20. {
  21. SUCCESS,
  22. FAIL,
  23. ASSERT,
  24. STATIC_ASSERT
  25. };
  26. namespace bitRevert
  27. {
  28. template <typename genType>
  29. struct type
  30. {
  31. genType Value;
  32. genType Return;
  33. result Result;
  34. };
  35. typedef type<glm::uint64> typeU64;
  36. #if(((GLM_COMPILER & GLM_COMPILER_GCC) == GLM_COMPILER_GCC) && (GLM_COMPILER < GLM_COMPILER_GCC44))
  37. typeU64 const Data64[] =
  38. {
  39. {0xffffffffffffffffLLU, 0xffffffffffffffffLLU, SUCCESS},
  40. {0x0000000000000000LLU, 0x0000000000000000LLU, SUCCESS},
  41. {0xf000000000000000LLU, 0x000000000000000fLLU, SUCCESS},
  42. };
  43. #else
  44. typeU64 const Data64[] =
  45. {
  46. {0xffffffffffffffff, 0xffffffffffffffff, SUCCESS},
  47. {0x0000000000000000, 0x0000000000000000, SUCCESS},
  48. {0xf000000000000000, 0x000000000000000f, SUCCESS},
  49. };
  50. #endif
  51. int test()
  52. {
  53. glm::uint32 count = sizeof(Data64) / sizeof(typeU64);
  54. for(glm::uint32 i = 0; i < count; ++i)
  55. {
  56. glm::uint64 Return = glm::bitRevert(
  57. Data64[i].Value);
  58. bool Compare = Data64[i].Return == Return;
  59. if(Data64[i].Result == SUCCESS && Compare)
  60. continue;
  61. else if(Data64[i].Result == FAIL && !Compare)
  62. continue;
  63. std::cout << "glm::extractfield test fail on test " << i << std::endl;
  64. return 1;
  65. }
  66. return 0;
  67. }
  68. }//bitRevert
  69. namespace bitfieldInterleave
  70. {
  71. inline glm::uint64 fastBitfieldInterleave(glm::uint32 x, glm::uint32 y)
  72. {
  73. glm::uint64 REG1;
  74. glm::uint64 REG2;
  75. REG1 = x;
  76. REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x0000FFFF0000FFFF);
  77. REG1 = ((REG1 << 8) | REG1) & glm::uint64(0x00FF00FF00FF00FF);
  78. REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  79. REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x3333333333333333);
  80. REG1 = ((REG1 << 1) | REG1) & glm::uint64(0x5555555555555555);
  81. REG2 = y;
  82. REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x0000FFFF0000FFFF);
  83. REG2 = ((REG2 << 8) | REG2) & glm::uint64(0x00FF00FF00FF00FF);
  84. REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  85. REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x3333333333333333);
  86. REG2 = ((REG2 << 1) | REG2) & glm::uint64(0x5555555555555555);
  87. return REG1 | (REG2 << 1);
  88. }
  89. inline glm::uint64 interleaveBitfieldInterleave(glm::uint32 x, glm::uint32 y)
  90. {
  91. glm::uint64 REG1;
  92. glm::uint64 REG2;
  93. REG1 = x;
  94. REG2 = y;
  95. REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x0000FFFF0000FFFF);
  96. REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x0000FFFF0000FFFF);
  97. REG1 = ((REG1 << 8) | REG1) & glm::uint64(0x00FF00FF00FF00FF);
  98. REG2 = ((REG2 << 8) | REG2) & glm::uint64(0x00FF00FF00FF00FF);
  99. REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  100. REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  101. REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x3333333333333333);
  102. REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x3333333333333333);
  103. REG1 = ((REG1 << 1) | REG1) & glm::uint64(0x5555555555555555);
  104. REG2 = ((REG2 << 1) | REG2) & glm::uint64(0x5555555555555555);
  105. return REG1 | (REG2 << 1);
  106. }
  107. inline glm::uint64 loopBitfieldInterleave(glm::uint32 x, glm::uint32 y)
  108. {
  109. static glm::uint64 const Mask[5] =
  110. {
  111. 0x5555555555555555,
  112. 0x3333333333333333,
  113. 0x0F0F0F0F0F0F0F0F,
  114. 0x00FF00FF00FF00FF,
  115. 0x0000FFFF0000FFFF
  116. };
  117. glm::uint64 REG1 = x;
  118. glm::uint64 REG2 = y;
  119. for(int i = 4; i >= 0; --i)
  120. {
  121. REG1 = ((REG1 << (1 << i)) | REG1) & Mask[i];
  122. REG2 = ((REG2 << (1 << i)) | REG2) & Mask[i];
  123. }
  124. return REG1 | (REG2 << 1);
  125. }
  126. inline glm::uint64 sseBitfieldInterleave(glm::uint32 x, glm::uint32 y)
  127. {
  128. GLM_ALIGN(16) glm::uint32 const Array[4] = {x, 0, y, 0};
  129. __m128i const Mask4 = _mm_set1_epi32(0x0000FFFF);
  130. __m128i const Mask3 = _mm_set1_epi32(0x00FF00FF);
  131. __m128i const Mask2 = _mm_set1_epi32(0x0F0F0F0F);
  132. __m128i const Mask1 = _mm_set1_epi32(0x33333333);
  133. __m128i const Mask0 = _mm_set1_epi32(0x55555555);
  134. __m128i Reg1;
  135. __m128i Reg2;
  136. // REG1 = x;
  137. // REG2 = y;
  138. Reg1 = _mm_load_si128((__m128i*)Array);
  139. //REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x0000FFFF0000FFFF);
  140. //REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x0000FFFF0000FFFF);
  141. Reg2 = _mm_slli_si128(Reg1, 2);
  142. Reg1 = _mm_or_si128(Reg2, Reg1);
  143. Reg1 = _mm_and_si128(Reg1, Mask4);
  144. //REG1 = ((REG1 << 8) | REG1) & glm::uint64(0x00FF00FF00FF00FF);
  145. //REG2 = ((REG2 << 8) | REG2) & glm::uint64(0x00FF00FF00FF00FF);
  146. Reg2 = _mm_slli_si128(Reg1, 1);
  147. Reg1 = _mm_or_si128(Reg2, Reg1);
  148. Reg1 = _mm_and_si128(Reg1, Mask3);
  149. //REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  150. //REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  151. Reg2 = _mm_slli_epi32(Reg1, 4);
  152. Reg1 = _mm_or_si128(Reg2, Reg1);
  153. Reg1 = _mm_and_si128(Reg1, Mask2);
  154. //REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x3333333333333333);
  155. //REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x3333333333333333);
  156. Reg2 = _mm_slli_epi32(Reg1, 2);
  157. Reg1 = _mm_or_si128(Reg2, Reg1);
  158. Reg1 = _mm_and_si128(Reg1, Mask1);
  159. //REG1 = ((REG1 << 1) | REG1) & glm::uint64(0x5555555555555555);
  160. //REG2 = ((REG2 << 1) | REG2) & glm::uint64(0x5555555555555555);
  161. Reg2 = _mm_slli_epi32(Reg1, 1);
  162. Reg1 = _mm_or_si128(Reg2, Reg1);
  163. Reg1 = _mm_and_si128(Reg1, Mask0);
  164. //return REG1 | (REG2 << 1);
  165. Reg2 = _mm_slli_epi32(Reg1, 1);
  166. Reg2 = _mm_srli_si128(Reg2, 8);
  167. Reg1 = _mm_or_si128(Reg1, Reg2);
  168. GLM_ALIGN(16) glm::uint64 Result[2];
  169. _mm_store_si128((__m128i*)Result, Reg1);
  170. return Result[0];
  171. }
  172. inline glm::uint64 sseUnalignedBitfieldInterleave(glm::uint32 x, glm::uint32 y)
  173. {
  174. glm::uint32 const Array[4] = {x, 0, y, 0};
  175. __m128i const Mask4 = _mm_set1_epi32(0x0000FFFF);
  176. __m128i const Mask3 = _mm_set1_epi32(0x00FF00FF);
  177. __m128i const Mask2 = _mm_set1_epi32(0x0F0F0F0F);
  178. __m128i const Mask1 = _mm_set1_epi32(0x33333333);
  179. __m128i const Mask0 = _mm_set1_epi32(0x55555555);
  180. __m128i Reg1;
  181. __m128i Reg2;
  182. // REG1 = x;
  183. // REG2 = y;
  184. Reg1 = _mm_loadu_si128((__m128i*)Array);
  185. //REG1 = ((REG1 << 16) | REG1) & glm::uint64(0x0000FFFF0000FFFF);
  186. //REG2 = ((REG2 << 16) | REG2) & glm::uint64(0x0000FFFF0000FFFF);
  187. Reg2 = _mm_slli_si128(Reg1, 2);
  188. Reg1 = _mm_or_si128(Reg2, Reg1);
  189. Reg1 = _mm_and_si128(Reg1, Mask4);
  190. //REG1 = ((REG1 << 8) | REG1) & glm::uint64(0x00FF00FF00FF00FF);
  191. //REG2 = ((REG2 << 8) | REG2) & glm::uint64(0x00FF00FF00FF00FF);
  192. Reg2 = _mm_slli_si128(Reg1, 1);
  193. Reg1 = _mm_or_si128(Reg2, Reg1);
  194. Reg1 = _mm_and_si128(Reg1, Mask3);
  195. //REG1 = ((REG1 << 4) | REG1) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  196. //REG2 = ((REG2 << 4) | REG2) & glm::uint64(0x0F0F0F0F0F0F0F0F);
  197. Reg2 = _mm_slli_epi32(Reg1, 4);
  198. Reg1 = _mm_or_si128(Reg2, Reg1);
  199. Reg1 = _mm_and_si128(Reg1, Mask2);
  200. //REG1 = ((REG1 << 2) | REG1) & glm::uint64(0x3333333333333333);
  201. //REG2 = ((REG2 << 2) | REG2) & glm::uint64(0x3333333333333333);
  202. Reg2 = _mm_slli_epi32(Reg1, 2);
  203. Reg1 = _mm_or_si128(Reg2, Reg1);
  204. Reg1 = _mm_and_si128(Reg1, Mask1);
  205. //REG1 = ((REG1 << 1) | REG1) & glm::uint64(0x5555555555555555);
  206. //REG2 = ((REG2 << 1) | REG2) & glm::uint64(0x5555555555555555);
  207. Reg2 = _mm_slli_epi32(Reg1, 1);
  208. Reg1 = _mm_or_si128(Reg2, Reg1);
  209. Reg1 = _mm_and_si128(Reg1, Mask0);
  210. //return REG1 | (REG2 << 1);
  211. Reg2 = _mm_slli_epi32(Reg1, 1);
  212. Reg2 = _mm_srli_si128(Reg2, 8);
  213. Reg1 = _mm_or_si128(Reg1, Reg2);
  214. glm::uint64 Result[2];
  215. _mm_storeu_si128((__m128i*)Result, Reg1);
  216. return Result[0];
  217. }
  218. int test()
  219. {
  220. glm::uint32 x_max = 1 << 11;
  221. glm::uint32 y_max = 1 << 10;
  222. // ALU
  223. std::vector<glm::uint64> Data(x_max * y_max);
  224. std::vector<glm::u32vec2> Param(x_max * y_max);
  225. for(glm::uint32 i = 0; i < Param.size(); ++i)
  226. Param[i] = glm::u32vec2(i % x_max, i / y_max);
  227. {
  228. for(glm::uint32 y = 0; y < (1 << 10); ++y)
  229. for(glm::uint32 x = 0; x < (1 << 10); ++x)
  230. {
  231. glm::uint64 A = glm::bitfieldInterleave(x, y);
  232. glm::uint64 B = fastBitfieldInterleave(x, y);
  233. glm::uint64 C = loopBitfieldInterleave(x, y);
  234. glm::uint64 D = interleaveBitfieldInterleave(x, y);
  235. glm::uint64 E = sseBitfieldInterleave(x, y);
  236. glm::uint64 F = sseUnalignedBitfieldInterleave(x, y);
  237. assert(A == B);
  238. assert(A == C);
  239. assert(A == D);
  240. assert(A == E);
  241. assert(A == F);
  242. # if(GLM_ARCH != GLM_ARCH_PURE)
  243. __m128i G = glm::detail::_mm_bit_interleave_si128(_mm_set_epi32(0, y, 0, x));
  244. glm::uint64 Result[2];
  245. _mm_storeu_si128((__m128i*)Result, G);
  246. assert(A == Result[0]);
  247. # endif//(GLM_ARCH != GLM_ARCH_PURE)
  248. }
  249. }
  250. {
  251. for(glm::uint8 y = 0; y < 127; ++y)
  252. for(glm::uint8 x = 0; x < 127; ++x)
  253. {
  254. glm::uint64 A(glm::bitfieldInterleave(glm::uint8(x), glm::uint8(y)));
  255. glm::uint64 B(glm::bitfieldInterleave(glm::uint16(x), glm::uint16(y)));
  256. glm::uint64 C(glm::bitfieldInterleave(glm::uint32(x), glm::uint32(y)));
  257. glm::int64 D(glm::bitfieldInterleave(glm::int8(x), glm::int8(y)));
  258. glm::int64 E(glm::bitfieldInterleave(glm::int16(x), glm::int16(y)));
  259. glm::int64 F(glm::bitfieldInterleave(glm::int32(x), glm::int32(y)));
  260. assert(D == E);
  261. assert(D == F);
  262. }
  263. }
  264. {
  265. std::clock_t LastTime = std::clock();
  266. for(std::size_t i = 0; i < Data.size(); ++i)
  267. Data[i] = glm::bitfieldInterleave(Param[i].x, Param[i].y);
  268. std::clock_t Time = std::clock() - LastTime;
  269. std::cout << "glm::bitfieldInterleave Time " << Time << " clocks" << std::endl;
  270. }
  271. {
  272. std::clock_t LastTime = std::clock();
  273. for(std::size_t i = 0; i < Data.size(); ++i)
  274. Data[i] = fastBitfieldInterleave(Param[i].x, Param[i].y);
  275. std::clock_t Time = std::clock() - LastTime;
  276. std::cout << "fastBitfieldInterleave Time " << Time << " clocks" << std::endl;
  277. }
  278. {
  279. std::clock_t LastTime = std::clock();
  280. for(std::size_t i = 0; i < Data.size(); ++i)
  281. Data[i] = loopBitfieldInterleave(Param[i].x, Param[i].y);
  282. std::clock_t Time = std::clock() - LastTime;
  283. std::cout << "loopBitfieldInterleave Time " << Time << " clocks" << std::endl;
  284. }
  285. {
  286. std::clock_t LastTime = std::clock();
  287. for(std::size_t i = 0; i < Data.size(); ++i)
  288. Data[i] = interleaveBitfieldInterleave(Param[i].x, Param[i].y);
  289. std::clock_t Time = std::clock() - LastTime;
  290. std::cout << "interleaveBitfieldInterleave Time " << Time << " clocks" << std::endl;
  291. }
  292. {
  293. std::clock_t LastTime = std::clock();
  294. for(std::size_t i = 0; i < Data.size(); ++i)
  295. Data[i] = sseBitfieldInterleave(Param[i].x, Param[i].y);
  296. std::clock_t Time = std::clock() - LastTime;
  297. std::cout << "sseBitfieldInterleave Time " << Time << " clocks" << std::endl;
  298. }
  299. {
  300. std::clock_t LastTime = std::clock();
  301. for(std::size_t i = 0; i < Data.size(); ++i)
  302. Data[i] = sseUnalignedBitfieldInterleave(Param[i].x, Param[i].y);
  303. std::clock_t Time = std::clock() - LastTime;
  304. std::cout << "sseUnalignedBitfieldInterleave Time " << Time << " clocks" << std::endl;
  305. }
  306. {
  307. std::clock_t LastTime = std::clock();
  308. for(std::size_t i = 0; i < Data.size(); ++i)
  309. Data[i] = glm::bitfieldInterleave(Param[i].x, Param[i].y, Param[i].x);
  310. std::clock_t Time = std::clock() - LastTime;
  311. std::cout << "glm::detail::bitfieldInterleave Time " << Time << " clocks" << std::endl;
  312. }
  313. # if(GLM_ARCH != GLM_ARCH_PURE)
  314. {
  315. // SIMD
  316. std::vector<__m128i> SimdData(x_max * y_max);
  317. std::vector<__m128i> SimdParam(x_max * y_max);
  318. for(int i = 0; i < SimdParam.size(); ++i)
  319. SimdParam[i] = _mm_set_epi32(i % x_max, 0, i / y_max, 0);
  320. std::clock_t LastTime = std::clock();
  321. for(std::size_t i = 0; i < SimdData.size(); ++i)
  322. SimdData[i] = glm::detail::_mm_bit_interleave_si128(SimdParam[i]);
  323. std::clock_t Time = std::clock() - LastTime;
  324. std::cout << "_mm_bit_interleave_si128 Time " << Time << " clocks" << std::endl;
  325. }
  326. # endif//(GLM_ARCH != GLM_ARCH_PURE)
  327. return 0;
  328. }
  329. }
  330. namespace bitfieldInterleave3
  331. {
  332. template <typename PARAM, typename RET>
  333. inline RET refBitfieldInterleave(PARAM x, PARAM y, PARAM z)
  334. {
  335. RET Result = 0;
  336. for(RET i = 0; i < sizeof(PARAM) * 8; ++i)
  337. {
  338. Result |= ((RET(x) & (RET(1U) << i)) << ((i << 1) + 0));
  339. Result |= ((RET(y) & (RET(1U) << i)) << ((i << 1) + 1));
  340. Result |= ((RET(z) & (RET(1U) << i)) << ((i << 1) + 2));
  341. }
  342. return Result;
  343. }
  344. int test()
  345. {
  346. int Error(0);
  347. glm::uint16 x_max = 1 << 11;
  348. glm::uint16 y_max = 1 << 11;
  349. glm::uint16 z_max = 1 << 11;
  350. for(glm::uint16 z = 0; z < z_max; z += 27)
  351. for(glm::uint16 y = 0; y < y_max; y += 27)
  352. for(glm::uint16 x = 0; x < x_max; x += 27)
  353. {
  354. glm::uint64 ResultA = refBitfieldInterleave<glm::uint16, glm::uint64>(x, y, z);
  355. glm::uint64 ResultB = glm::bitfieldInterleave(x, y, z);
  356. Error += ResultA == ResultB ? 0 : 1;
  357. }
  358. return Error;
  359. }
  360. }
  361. namespace bitfieldInterleave4
  362. {
  363. template <typename PARAM, typename RET>
  364. inline RET loopBitfieldInterleave(PARAM x, PARAM y, PARAM z, PARAM w)
  365. {
  366. RET const v[4] = {x, y, z, w};
  367. RET Result = 0;
  368. for(RET i = 0; i < sizeof(PARAM) * 8; i++)
  369. {
  370. Result |= ((((v[0] >> i) & 1U)) << ((i << 2) + 0));
  371. Result |= ((((v[1] >> i) & 1U)) << ((i << 2) + 1));
  372. Result |= ((((v[2] >> i) & 1U)) << ((i << 2) + 2));
  373. Result |= ((((v[3] >> i) & 1U)) << ((i << 2) + 3));
  374. }
  375. return Result;
  376. }
  377. int test()
  378. {
  379. int Error(0);
  380. glm::uint16 x_max = 1 << 11;
  381. glm::uint16 y_max = 1 << 11;
  382. glm::uint16 z_max = 1 << 11;
  383. glm::uint16 w_max = 1 << 11;
  384. for(glm::uint16 w = 0; w < w_max; w += 27)
  385. for(glm::uint16 z = 0; z < z_max; z += 27)
  386. for(glm::uint16 y = 0; y < y_max; y += 27)
  387. for(glm::uint16 x = 0; x < x_max; x += 27)
  388. {
  389. glm::uint64 ResultA = loopBitfieldInterleave<glm::uint16, glm::uint64>(x, y, z, w);
  390. glm::uint64 ResultB = glm::bitfieldInterleave(x, y, z, w);
  391. Error += ResultA == ResultB ? 0 : 1;
  392. }
  393. return Error;
  394. }
  395. }
  396. int main()
  397. {
  398. int Error(0);
  399. Error += ::bitfieldInterleave3::test();
  400. Error += ::bitfieldInterleave4::test();
  401. Error += ::bitfieldInterleave::test();
  402. Error += ::bitRevert::test();
  403. return Error;
  404. }