core_func_integer.cpp 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587
  1. #include <glm/integer.hpp>
  2. #include <glm/vector_relational.hpp>
  3. #include <glm/ext/vector_int1.hpp>
  4. #include <glm/ext/vector_int2.hpp>
  5. #include <glm/ext/vector_int3.hpp>
  6. #include <glm/ext/vector_int4.hpp>
  7. #include <glm/ext/vector_uint1.hpp>
  8. #include <glm/ext/vector_uint2.hpp>
  9. #include <glm/ext/vector_uint3.hpp>
  10. #include <glm/ext/vector_uint4.hpp>
  11. #include <glm/ext/scalar_int_sized.hpp>
  12. #include <glm/ext/scalar_uint_sized.hpp>
  13. #include <vector>
  14. #include <ctime>
  15. #include <cstdio>
  16. enum result
  17. {
  18. SUCCESS,
  19. FAIL,
  20. ASSERT,
  21. STATIC_ASSERT
  22. };
  23. namespace bitfieldInsert
  24. {
  25. template<typename genType>
  26. struct type
  27. {
  28. genType Base;
  29. genType Insert;
  30. int Offset;
  31. int Bits;
  32. genType Return;
  33. };
  34. typedef type<glm::uint> typeU32;
  35. typeU32 const Data32[] =
  36. {
  37. {0x00000000, 0xffffffff, 0, 32, 0xffffffff},
  38. {0x00000000, 0xffffffff, 0, 31, 0x7fffffff},
  39. {0x00000000, 0xffffffff, 0, 0, 0x00000000},
  40. {0xff000000, 0x000000ff, 8, 8, 0xff00ff00},
  41. {0xffff0000, 0xffff0000, 16, 16, 0x00000000},
  42. {0x0000ffff, 0x0000ffff, 16, 16, 0xffffffff}
  43. };
  44. static int test()
  45. {
  46. int Error = 0;
  47. glm::uint count = sizeof(Data32) / sizeof(typeU32);
  48. for(glm::uint i = 0; i < count; ++i)
  49. {
  50. glm::uint Return = glm::bitfieldInsert(
  51. Data32[i].Base,
  52. Data32[i].Insert,
  53. Data32[i].Offset,
  54. Data32[i].Bits);
  55. Error += Data32[i].Return == Return ? 0 : 1;
  56. }
  57. return Error;
  58. }
  59. }//bitfieldInsert
  60. namespace bitfieldExtract
  61. {
  62. template<typename genType>
  63. struct type
  64. {
  65. genType Value;
  66. int Offset;
  67. int Bits;
  68. genType Return;
  69. result Result;
  70. };
  71. typedef type<glm::uint> typeU32;
  72. typeU32 const Data32[] =
  73. {
  74. {0xffffffff, 0,32, 0xffffffff, SUCCESS},
  75. {0xffffffff, 8, 0, 0x00000000, SUCCESS},
  76. {0x00000000, 0,32, 0x00000000, SUCCESS},
  77. {0x0f0f0f0f, 0,32, 0x0f0f0f0f, SUCCESS},
  78. {0x00000000, 8, 0, 0x00000000, SUCCESS},
  79. {0x80000000,31, 1, 0x00000001, SUCCESS},
  80. {0x7fffffff,31, 1, 0x00000000, SUCCESS},
  81. {0x00000300, 8, 8, 0x00000003, SUCCESS},
  82. {0x0000ff00, 8, 8, 0x000000ff, SUCCESS},
  83. {0xfffffff0, 0, 5, 0x00000010, SUCCESS},
  84. {0x000000ff, 1, 3, 0x00000007, SUCCESS},
  85. {0x000000ff, 0, 3, 0x00000007, SUCCESS},
  86. {0x00000000, 0, 2, 0x00000000, SUCCESS},
  87. {0xffffffff, 0, 8, 0x000000ff, SUCCESS},
  88. {0xffff0000,16,16, 0x0000ffff, SUCCESS},
  89. {0xfffffff0, 0, 8, 0x00000000, FAIL},
  90. {0xffffffff,16,16, 0x00000000, FAIL},
  91. //{0xffffffff,32, 1, 0x00000000, ASSERT}, // Throw an assert
  92. //{0xffffffff, 0,33, 0x00000000, ASSERT}, // Throw an assert
  93. //{0xffffffff,16,16, 0x00000000, ASSERT}, // Throw an assert
  94. };
  95. static int test()
  96. {
  97. int Error = 0;
  98. glm::uint count = sizeof(Data32) / sizeof(typeU32);
  99. for(glm::uint i = 0; i < count; ++i)
  100. {
  101. glm::uint Return = glm::bitfieldExtract(
  102. Data32[i].Value,
  103. Data32[i].Offset,
  104. Data32[i].Bits);
  105. bool Compare = Data32[i].Return == Return;
  106. if(Data32[i].Result == SUCCESS && Compare)
  107. continue;
  108. else if(Data32[i].Result == FAIL && !Compare)
  109. continue;
  110. Error += 1;
  111. }
  112. return Error;
  113. }
  114. }//extractField
  115. namespace bitfieldReverse
  116. {
  117. /*
  118. GLM_FUNC_QUALIFIER unsigned int bitfieldReverseLoop(unsigned int v)
  119. {
  120. unsigned int Result(0);
  121. unsigned int const BitSize = static_cast<unsigned int>(sizeof(unsigned int) * 8);
  122. for(unsigned int i = 0; i < BitSize; ++i)
  123. {
  124. unsigned int const BitSet(v & (static_cast<unsigned int>(1) << i));
  125. unsigned int const BitFirst(BitSet >> i);
  126. Result |= BitFirst << (BitSize - 1 - i);
  127. }
  128. return Result;
  129. }
  130. GLM_FUNC_QUALIFIER glm::uint64_t bitfieldReverseLoop(glm::uint64_t v)
  131. {
  132. glm::uint64_t Result(0);
  133. glm::uint64_t const BitSize = static_cast<glm::uint64_t>(sizeof(unsigned int) * 8);
  134. for(glm::uint64_t i = 0; i < BitSize; ++i)
  135. {
  136. glm::uint64_t const BitSet(v & (static_cast<glm::uint64_t>(1) << i));
  137. glm::uint64_t const BitFirst(BitSet >> i);
  138. Result |= BitFirst << (BitSize - 1 - i);
  139. }
  140. return Result;
  141. }
  142. */
  143. template<glm::length_t L, typename T, glm::qualifier Q>
  144. GLM_FUNC_QUALIFIER glm::vec<L, T, Q> bitfieldReverseLoop(glm::vec<L, T, Q> const& v)
  145. {
  146. static_assert(std::numeric_limits<T>::is_integer, "'bitfieldReverse' only accept integer values");
  147. glm::vec<L, T, Q> Result(0);
  148. T const BitSize = static_cast<T>(sizeof(T) * 8);
  149. for(T i = 0; i < BitSize; ++i)
  150. {
  151. glm::vec<L, T, Q> const BitSet(v & (static_cast<T>(1) << i));
  152. glm::vec<L, T, Q> const BitFirst(BitSet >> i);
  153. Result |= BitFirst << (BitSize - 1 - i);
  154. }
  155. return Result;
  156. }
  157. template<typename T>
  158. GLM_FUNC_QUALIFIER T bitfieldReverseLoop(T v)
  159. {
  160. return bitfieldReverseLoop(glm::vec<1, T>(v)).x;
  161. }
  162. GLM_FUNC_QUALIFIER glm::uint32 bitfieldReverseUint32(glm::uint32 x)
  163. {
  164. x = (x & 0x55555555) << 1 | (x & 0xAAAAAAAA) >> 1;
  165. x = (x & 0x33333333) << 2 | (x & 0xCCCCCCCC) >> 2;
  166. x = (x & 0x0F0F0F0F) << 4 | (x & 0xF0F0F0F0) >> 4;
  167. x = (x & 0x00FF00FF) << 8 | (x & 0xFF00FF00) >> 8;
  168. x = (x & 0x0000FFFF) << 16 | (x & 0xFFFF0000) >> 16;
  169. return x;
  170. }
  171. GLM_FUNC_QUALIFIER glm::uint64 bitfieldReverseUint64(glm::uint64 x)
  172. {
  173. x = (x & 0x5555555555555555) << 1 | (x & 0xAAAAAAAAAAAAAAAA) >> 1;
  174. x = (x & 0x3333333333333333) << 2 | (x & 0xCCCCCCCCCCCCCCCC) >> 2;
  175. x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x & 0xF0F0F0F0F0F0F0F0) >> 4;
  176. x = (x & 0x00FF00FF00FF00FF) << 8 | (x & 0xFF00FF00FF00FF00) >> 8;
  177. x = (x & 0x0000FFFF0000FFFF) << 16 | (x & 0xFFFF0000FFFF0000) >> 16;
  178. x = (x & 0x00000000FFFFFFFF) << 32 | (x & 0xFFFFFFFF00000000) >> 32;
  179. return x;
  180. }
  181. template<bool EXEC = false>
  182. struct compute_bitfieldReverseStep
  183. {
  184. template<glm::length_t L, typename T, glm::qualifier Q>
  185. GLM_FUNC_QUALIFIER static glm::vec<L, T, Q> call(glm::vec<L, T, Q> const& v, T, T)
  186. {
  187. return v;
  188. }
  189. };
  190. template<>
  191. struct compute_bitfieldReverseStep<true>
  192. {
  193. template<glm::length_t L, typename T, glm::qualifier Q>
  194. GLM_FUNC_QUALIFIER static glm::vec<L, T, Q> call(glm::vec<L, T, Q> const& v, T Mask, T Shift)
  195. {
  196. return (v & Mask) << Shift | (v & (~Mask)) >> Shift;
  197. }
  198. };
  199. # if GLM_COMPILER & GLM_COMPILER_VC
  200. # pragma warning(push)
  201. # pragma warning(disable : 4309)
  202. # endif
  203. template<glm::length_t L, typename T, glm::qualifier Q>
  204. GLM_FUNC_QUALIFIER glm::vec<L, T, Q> bitfieldReverseOps(glm::vec<L, T, Q> const& v)
  205. {
  206. glm::vec<L, T, Q> x(v);
  207. x = compute_bitfieldReverseStep<sizeof(T) * 8 >= 2>::call(x, static_cast<T>(0x5555555555555555ull), static_cast<T>( 1));
  208. x = compute_bitfieldReverseStep<sizeof(T) * 8 >= 4>::call(x, static_cast<T>(0x3333333333333333ull), static_cast<T>( 2));
  209. x = compute_bitfieldReverseStep<sizeof(T) * 8 >= 8>::call(x, static_cast<T>(0x0F0F0F0F0F0F0F0Full), static_cast<T>( 4));
  210. x = compute_bitfieldReverseStep<sizeof(T) * 8 >= 16>::call(x, static_cast<T>(0x00FF00FF00FF00FFull), static_cast<T>( 8));
  211. x = compute_bitfieldReverseStep<sizeof(T) * 8 >= 32>::call(x, static_cast<T>(0x0000FFFF0000FFFFull), static_cast<T>(16));
  212. x = compute_bitfieldReverseStep<sizeof(T) * 8 >= 64>::call(x, static_cast<T>(0x00000000FFFFFFFFull), static_cast<T>(32));
  213. return x;
  214. }
  215. # if GLM_COMPILER & GLM_COMPILER_VC
  216. # pragma warning(pop)
  217. # endif
  218. template<typename genType>
  219. GLM_FUNC_QUALIFIER genType bitfieldReverseOps(genType x)
  220. {
  221. return bitfieldReverseOps(glm::vec<1, genType, glm::defaultp>(x)).x;
  222. }
  223. #if GLM_COMPILER & GLM_COMPILER_CLANG
  224. # pragma clang diagnostic push
  225. # pragma clang diagnostic ignored "-Wpadded"
  226. #endif
  227. template<typename genType>
  228. struct type
  229. {
  230. genType Value;
  231. genType Return;
  232. result Result;
  233. };
  234. #if GLM_COMPILER & GLM_COMPILER_CLANG
  235. # pragma clang diagnostic pop
  236. #endif
  237. typedef type<glm::uint> typeU32;
  238. typeU32 const Data32[] =
  239. {
  240. {0x00000001, 0x80000000, SUCCESS},
  241. {0x0000000f, 0xf0000000, SUCCESS},
  242. {0x000000ff, 0xff000000, SUCCESS},
  243. {0xf0000000, 0x0000000f, SUCCESS},
  244. {0xff000000, 0x000000ff, SUCCESS},
  245. {0xffffffff, 0xffffffff, SUCCESS},
  246. {0x00000000, 0x00000000, SUCCESS}
  247. };
  248. typedef type<glm::uint64> typeU64;
  249. typeU64 const Data64[] =
  250. {
  251. {0x00000000000000ff, 0xff00000000000000, SUCCESS},
  252. {0x000000000000000f, 0xf000000000000000, SUCCESS},
  253. {0xf000000000000000, 0x000000000000000f, SUCCESS},
  254. {0xffffffffffffffff, 0xffffffffffffffff, SUCCESS},
  255. {0x0000000000000000, 0x0000000000000000, SUCCESS}
  256. };
  257. static int test32_bitfieldReverse()
  258. {
  259. int Error = 0;
  260. std::size_t const Count = sizeof(Data32) / sizeof(typeU32);
  261. for(std::size_t i = 0; i < Count; ++i)
  262. {
  263. glm::uint Return = glm::bitfieldReverse(Data32[i].Value);
  264. bool Compare = Data32[i].Return == Return;
  265. if(Data32[i].Result == SUCCESS)
  266. Error += Compare ? 0 : 1;
  267. else
  268. Error += Compare ? 1 : 0;
  269. }
  270. return Error;
  271. }
  272. static int test32_bitfieldReverseLoop()
  273. {
  274. int Error = 0;
  275. std::size_t const Count = sizeof(Data32) / sizeof(typeU32);
  276. for(std::size_t i = 0; i < Count; ++i)
  277. {
  278. glm::uint Return = bitfieldReverseLoop(Data32[i].Value);
  279. bool Compare = Data32[i].Return == Return;
  280. if(Data32[i].Result == SUCCESS)
  281. Error += Compare ? 0 : 1;
  282. else
  283. Error += Compare ? 1 : 0;
  284. }
  285. return Error;
  286. }
  287. static int test32_bitfieldReverseUint32()
  288. {
  289. int Error = 0;
  290. std::size_t const Count = sizeof(Data32) / sizeof(typeU32);
  291. for(std::size_t i = 0; i < Count; ++i)
  292. {
  293. glm::uint Return = bitfieldReverseUint32(Data32[i].Value);
  294. bool Compare = Data32[i].Return == Return;
  295. if(Data32[i].Result == SUCCESS)
  296. Error += Compare ? 0 : 1;
  297. else
  298. Error += Compare ? 1 : 0;
  299. }
  300. return Error;
  301. }
  302. static int test32_bitfieldReverseOps()
  303. {
  304. int Error = 0;
  305. std::size_t const Count = sizeof(Data32) / sizeof(typeU32);
  306. for(std::size_t i = 0; i < Count; ++i)
  307. {
  308. glm::uint Return = bitfieldReverseOps(Data32[i].Value);
  309. bool Compare = Data32[i].Return == Return;
  310. if(Data32[i].Result == SUCCESS)
  311. Error += Compare ? 0 : 1;
  312. else
  313. Error += Compare ? 1 : 0;
  314. }
  315. return Error;
  316. }
  317. static int test64_bitfieldReverse()
  318. {
  319. int Error = 0;
  320. std::size_t const Count = sizeof(Data64) / sizeof(typeU64);
  321. for(std::size_t i = 0; i < Count; ++i)
  322. {
  323. glm::uint64 Return = glm::bitfieldReverse(Data64[i].Value);
  324. bool Compare = Data64[i].Return == Return;
  325. if(Data64[i].Result == SUCCESS)
  326. Error += Compare ? 0 : 1;
  327. else
  328. Error += Compare ? 1 : 0;
  329. }
  330. return Error;
  331. }
  332. static int test64_bitfieldReverseLoop()
  333. {
  334. int Error = 0;
  335. std::size_t const Count = sizeof(Data64) / sizeof(typeU64);
  336. for(std::size_t i = 0; i < Count; ++i)
  337. {
  338. glm::uint64 Return = bitfieldReverseLoop(Data64[i].Value);
  339. bool Compare = Data64[i].Return == Return;
  340. if(Data32[i].Result == SUCCESS)
  341. Error += Compare ? 0 : 1;
  342. else
  343. Error += Compare ? 1 : 0;
  344. }
  345. return Error;
  346. }
  347. static int test64_bitfieldReverseUint64()
  348. {
  349. int Error = 0;
  350. std::size_t const Count = sizeof(Data64) / sizeof(typeU64);
  351. for(std::size_t i = 0; i < Count; ++i)
  352. {
  353. glm::uint64 Return = bitfieldReverseUint64(Data64[i].Value);
  354. bool Compare = Data64[i].Return == Return;
  355. if(Data64[i].Result == SUCCESS)
  356. Error += Compare ? 0 : 1;
  357. else
  358. Error += Compare ? 1 : 0;
  359. }
  360. return Error;
  361. }
  362. static int test64_bitfieldReverseOps()
  363. {
  364. int Error = 0;
  365. std::size_t const Count = sizeof(Data64) / sizeof(typeU64);
  366. for(std::size_t i = 0; i < Count; ++i)
  367. {
  368. glm::uint64 Return = bitfieldReverseOps(Data64[i].Value);
  369. bool Compare = Data64[i].Return == Return;
  370. if(Data64[i].Result == SUCCESS)
  371. Error += Compare ? 0 : 1;
  372. else
  373. Error += Compare ? 1 : 0;
  374. }
  375. return Error;
  376. }
  377. static int test()
  378. {
  379. int Error = 0;
  380. Error += test32_bitfieldReverse();
  381. Error += test32_bitfieldReverseLoop();
  382. Error += test32_bitfieldReverseUint32();
  383. Error += test32_bitfieldReverseOps();
  384. Error += test64_bitfieldReverse();
  385. Error += test64_bitfieldReverseLoop();
  386. Error += test64_bitfieldReverseUint64();
  387. Error += test64_bitfieldReverseOps();
  388. return Error;
  389. }
  390. static int perf32(glm::uint32 Count)
  391. {
  392. int Error = 0;
  393. std::vector<glm::uint32> Data;
  394. Data.resize(static_cast<std::size_t>(Count));
  395. std::clock_t Timestamps0 = std::clock();
  396. for(glm::uint32 k = 0; k < Count; ++k)
  397. Data[k] = glm::bitfieldReverse(k);
  398. std::clock_t Timestamps1 = std::clock();
  399. for(glm::uint32 k = 0; k < Count; ++k)
  400. Data[k] = bitfieldReverseLoop(k);
  401. std::clock_t Timestamps2 = std::clock();
  402. for(glm::uint32 k = 0; k < Count; ++k)
  403. Data[k] = bitfieldReverseUint32(k);
  404. std::clock_t Timestamps3 = std::clock();
  405. for(glm::uint32 k = 0; k < Count; ++k)
  406. Data[k] = bitfieldReverseOps(k);
  407. std::clock_t Timestamps4 = std::clock();
  408. std::printf("glm::bitfieldReverse: %d clocks\n", static_cast<int>(Timestamps1 - Timestamps0));
  409. std::printf("bitfieldReverseLoop: %d clocks\n", static_cast<int>(Timestamps2 - Timestamps1));
  410. std::printf("bitfieldReverseUint32: %d clocks\n", static_cast<int>(Timestamps3 - Timestamps2));
  411. std::printf("bitfieldReverseOps: %d clocks\n", static_cast<int>(Timestamps4 - Timestamps3));
  412. return Error;
  413. }
  414. static int perf64(glm::uint64 Count)
  415. {
  416. int Error = 0;
  417. std::vector<glm::uint64> Data;
  418. Data.resize(static_cast<std::size_t>(Count));
  419. std::clock_t Timestamps0 = std::clock();
  420. for(glm::uint64 k = 0; k < Count; ++k)
  421. Data[static_cast<std::size_t>(k)] = glm::bitfieldReverse(k);
  422. std::clock_t Timestamps1 = std::clock();
  423. for(glm::uint64 k = 0; k < Count; ++k)
  424. Data[static_cast<std::size_t>(k)] = bitfieldReverseLoop<glm::uint64>(k);
  425. std::clock_t Timestamps2 = std::clock();
  426. for(glm::uint64 k = 0; k < Count; ++k)
  427. Data[static_cast<std::size_t>(k)] = bitfieldReverseUint64(k);
  428. std::clock_t Timestamps3 = std::clock();
  429. for(glm::uint64 k = 0; k < Count; ++k)
  430. Data[static_cast<std::size_t>(k)] = bitfieldReverseOps(k);
  431. std::clock_t Timestamps4 = std::clock();
  432. std::printf("glm::bitfieldReverse - 64: %d clocks\n", static_cast<int>(Timestamps1 - Timestamps0));
  433. std::printf("bitfieldReverseLoop - 64: %d clocks\n", static_cast<int>(Timestamps2 - Timestamps1));
  434. std::printf("bitfieldReverseUint - 64: %d clocks\n", static_cast<int>(Timestamps3 - Timestamps2));
  435. std::printf("bitfieldReverseOps - 64: %d clocks\n", static_cast<int>(Timestamps4 - Timestamps3));
  436. return Error;
  437. }
  438. static int perf(std::size_t Samples)
  439. {
  440. int Error = 0;
  441. Error += perf32(static_cast<glm::uint32>(Samples));
  442. Error += perf64(static_cast<glm::uint64>(Samples));
  443. return Error;
  444. }
  445. }//bitfieldReverse
  446. namespace findMSB
  447. {
  448. template<typename genType, typename retType>
  449. struct type
  450. {
  451. genType Value;
  452. retType Return;
  453. };
  454. # if GLM_HAS_BITSCAN_WINDOWS
  455. template<typename genIUType>
  456. static int findMSB_intrinsic(genIUType Value)
  457. {
  458. static_assert(std::numeric_limits<genIUType>::is_integer, "'findMSB' only accept integer values");
  459. if(Value == 0)
  460. return -1;
  461. unsigned long Result(0);
  462. _BitScanReverse(&Result, Value);
  463. return int(Result);
  464. }
  465. # endif//GLM_HAS_BITSCAN_WINDOWS
  466. # if ((GLM_ARCH & GLM_ARCH_AVX_BIT) && (GLM_COMPILER & GLM_COMPILER_VC))
  467. template<typename genIUType>
  468. static int findMSB_avx(genIUType Value)
  469. {
  470. static_assert(std::numeric_limits<genIUType>::is_integer, "'findMSB' only accept integer values");
  471. if(Value == 0)
  472. return -1;
  473. return int(_tzcnt_u32(Value));
  474. }
  475. # endif//((GLM_ARCH & GLM_ARCH_AVX_BIT) && (GLM_PLATFORM & GLM_PLATFORM_WINDOWS))
  476. template<typename genIUType>
  477. static int findMSB_095(genIUType Value)
  478. {
  479. static_assert(std::numeric_limits<genIUType>::is_integer, "'findMSB' only accept integer values");
  480. if(Value == genIUType(0) || Value == genIUType(-1))
  481. return -1;
  482. else if(Value > 0)
  483. {
  484. genIUType Bit = genIUType(-1);
  485. for(genIUType tmp = Value; tmp > 0; tmp >>= 1, ++Bit){}
  486. return static_cast<int>(Bit);
  487. }
  488. else //if(Value < 0)
  489. {
  490. int const BitCount(sizeof(genIUType) * 8);
  491. int MostSignificantBit(-1);
  492. for(int BitIndex(0); BitIndex < BitCount; ++BitIndex)
  493. MostSignificantBit = (Value & (1 << BitIndex)) ? MostSignificantBit : BitIndex;
  494. assert(MostSignificantBit >= 0);
  495. return MostSignificantBit;
  496. }
  497. }
  498. template<typename genIUType>
  499. static int findMSB_nlz1(genIUType x)
  500. {
  501. static_assert(std::numeric_limits<genIUType>::is_integer, "'findMSB' only accept integer values");
  502. if (x == 0)
  503. return -1;
  504. int n = 0;
  505. if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
  506. if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
  507. if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
  508. if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
  509. if (x <= 0x7FFFFFFF) {n = n + 1;}
  510. return 31 - n;
  511. }
  512. static int findMSB_nlz2(unsigned int x)
  513. {
  514. unsigned int y;
  515. int n = 32;
  516. y = x >>16; if (y != 0) {n = n -16; x = y;}
  517. y = x >> 8; if (y != 0) {n = n - 8; x = y;}
  518. y = x >> 4; if (y != 0) {n = n - 4; x = y;}
  519. y = x >> 2; if (y != 0) {n = n - 2; x = y;}
  520. y = x >> 1; if (y != 0) return n - 2;
  521. return 32 - (n - static_cast<int>(x));
  522. }
  523. static int findMSB_pop(unsigned int x)
  524. {
  525. x = x | (x >> 1);
  526. x = x | (x >> 2);
  527. x = x | (x >> 4);
  528. x = x | (x >> 8);
  529. x = x | (x >>16);
  530. return 31 - glm::bitCount(~x);
  531. }
  532. static int perf_int(std::size_t Count)
  533. {
  534. type<int, int> const Data[] =
  535. {
  536. {0x00000000, -1},
  537. {0x00000001, 0},
  538. {0x00000002, 1},
  539. {0x00000003, 1},
  540. {0x00000004, 2},
  541. {0x00000005, 2},
  542. {0x00000007, 2},
  543. {0x00000008, 3},
  544. {0x00000010, 4},
  545. {0x00000020, 5},
  546. {0x00000040, 6},
  547. {0x00000080, 7},
  548. {0x00000100, 8},
  549. {0x00000200, 9},
  550. {0x00000400, 10},
  551. {0x00000800, 11},
  552. {0x00001000, 12},
  553. {0x00002000, 13},
  554. {0x00004000, 14},
  555. {0x00008000, 15},
  556. {0x00010000, 16},
  557. {0x00020000, 17},
  558. {0x00040000, 18},
  559. {0x00080000, 19},
  560. {0x00100000, 20},
  561. {0x00200000, 21},
  562. {0x00400000, 22},
  563. {0x00800000, 23},
  564. {0x01000000, 24},
  565. {0x02000000, 25},
  566. {0x04000000, 26},
  567. {0x08000000, 27},
  568. {0x10000000, 28},
  569. {0x20000000, 29},
  570. {0x40000000, 30}
  571. };
  572. int Error(0);
  573. std::clock_t Timestamps0 = std::clock();
  574. for(std::size_t k = 0; k < Count; ++k)
  575. for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int, int>); ++i)
  576. {
  577. int Result = glm::findMSB(Data[i].Value);
  578. Error += Data[i].Return == Result ? 0 : 1;
  579. }
  580. std::clock_t Timestamps1 = std::clock();
  581. for(std::size_t k = 0; k < Count; ++k)
  582. for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int, int>); ++i)
  583. {
  584. int Result = findMSB_nlz1(Data[i].Value);
  585. Error += Data[i].Return == Result ? 0 : 1;
  586. }
  587. std::clock_t Timestamps2 = std::clock();
  588. for(std::size_t k = 0; k < Count; ++k)
  589. for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int, int>); ++i)
  590. {
  591. int Result = findMSB_nlz2(static_cast<unsigned int>(Data[i].Value));
  592. Error += Data[i].Return == Result ? 0 : 1;
  593. }
  594. std::clock_t Timestamps3 = std::clock();
  595. for(std::size_t k = 0; k < Count; ++k)
  596. for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int, int>); ++i)
  597. {
  598. int Result = findMSB_095(static_cast<unsigned int>(Data[i].Value));
  599. Error += Data[i].Return == Result ? 0 : 1;
  600. }
  601. std::clock_t Timestamps4 = std::clock();
  602. # if GLM_HAS_BITSCAN_WINDOWS
  603. for(std::size_t k = 0; k < Count; ++k)
  604. for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int, int>); ++i)
  605. {
  606. int Result = findMSB_intrinsic(Data[i].Value);
  607. Error += Data[i].Return == Result ? 0 : 1;
  608. }
  609. # endif//GLM_HAS_BITSCAN_WINDOWS
  610. std::clock_t Timestamps5 = std::clock();
  611. for(std::size_t k = 0; k < Count; ++k)
  612. for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int, int>); ++i)
  613. {
  614. int Result = findMSB_pop(static_cast<unsigned int>(Data[i].Value));
  615. Error += Data[i].Return == Result ? 0 : 1;
  616. }
  617. std::clock_t Timestamps6 = std::clock();
  618. # if ((GLM_ARCH & GLM_ARCH_AVX_BIT) && (GLM_COMPILER & GLM_COMPILER_VC))
  619. for(std::size_t k = 0; k < Count; ++k)
  620. for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int, int>); ++i)
  621. {
  622. int Result = findMSB_avx(Data[i].Value);
  623. Error += Data[i].Return == Result ? 0 : 1;
  624. }
  625. std::clock_t Timestamps7 = std::clock();
  626. # endif//((GLM_ARCH & GLM_ARCH_AVX_BIT) && (GLM_COMPILER & GLM_COMPILER_VC))
  627. std::printf("glm::findMSB: %d clocks\n", static_cast<int>(Timestamps1 - Timestamps0));
  628. std::printf("findMSB - nlz1: %d clocks\n", static_cast<int>(Timestamps2 - Timestamps1));
  629. std::printf("findMSB - nlz2: %d clocks\n", static_cast<int>(Timestamps3 - Timestamps2));
  630. std::printf("findMSB - 0.9.5: %d clocks\n", static_cast<int>(Timestamps4 - Timestamps3));
  631. # if GLM_HAS_BITSCAN_WINDOWS
  632. std::printf("findMSB - intrinsics: %d clocks\n", static_cast<int>(Timestamps5 - Timestamps4));
  633. # endif//GLM_HAS_BITSCAN_WINDOWS
  634. std::printf("findMSB - pop: %d clocks\n", static_cast<int>(Timestamps6 - Timestamps5));
  635. # if ((GLM_ARCH & GLM_ARCH_AVX_BIT) && (GLM_COMPILER & GLM_COMPILER_VC))
  636. std::printf("findMSB - avx tzcnt: %d clocks\n", static_cast<int>(Timestamps7 - Timestamps6));
  637. # endif//((GLM_ARCH & GLM_ARCH_AVX_BIT) && (GLM_COMPILER & GLM_COMPILER_VC))
  638. return Error;
  639. }
  640. static int test_ivec4()
  641. {
  642. type<glm::ivec4, glm::ivec4> const Data[] =
  643. {
  644. {glm::ivec4(0x00000000), glm::ivec4(-1)},
  645. {glm::ivec4(0x00000001), glm::ivec4( 0)},
  646. {glm::ivec4(0x00000002), glm::ivec4( 1)},
  647. {glm::ivec4(0x00000003), glm::ivec4( 1)},
  648. {glm::ivec4(0x00000004), glm::ivec4( 2)},
  649. {glm::ivec4(0x00000005), glm::ivec4( 2)},
  650. {glm::ivec4(0x00000007), glm::ivec4( 2)},
  651. {glm::ivec4(0x00000008), glm::ivec4( 3)},
  652. {glm::ivec4(0x00000010), glm::ivec4( 4)},
  653. {glm::ivec4(0x00000020), glm::ivec4( 5)},
  654. {glm::ivec4(0x00000040), glm::ivec4( 6)},
  655. {glm::ivec4(0x00000080), glm::ivec4( 7)},
  656. {glm::ivec4(0x00000100), glm::ivec4( 8)},
  657. {glm::ivec4(0x00000200), glm::ivec4( 9)},
  658. {glm::ivec4(0x00000400), glm::ivec4(10)},
  659. {glm::ivec4(0x00000800), glm::ivec4(11)},
  660. {glm::ivec4(0x00001000), glm::ivec4(12)},
  661. {glm::ivec4(0x00002000), glm::ivec4(13)},
  662. {glm::ivec4(0x00004000), glm::ivec4(14)},
  663. {glm::ivec4(0x00008000), glm::ivec4(15)},
  664. {glm::ivec4(0x00010000), glm::ivec4(16)},
  665. {glm::ivec4(0x00020000), glm::ivec4(17)},
  666. {glm::ivec4(0x00040000), glm::ivec4(18)},
  667. {glm::ivec4(0x00080000), glm::ivec4(19)},
  668. {glm::ivec4(0x00100000), glm::ivec4(20)},
  669. {glm::ivec4(0x00200000), glm::ivec4(21)},
  670. {glm::ivec4(0x00400000), glm::ivec4(22)},
  671. {glm::ivec4(0x00800000), glm::ivec4(23)},
  672. {glm::ivec4(0x01000000), glm::ivec4(24)},
  673. {glm::ivec4(0x02000000), glm::ivec4(25)},
  674. {glm::ivec4(0x04000000), glm::ivec4(26)},
  675. {glm::ivec4(0x08000000), glm::ivec4(27)},
  676. {glm::ivec4(0x10000000), glm::ivec4(28)},
  677. {glm::ivec4(0x20000000), glm::ivec4(29)},
  678. {glm::ivec4(0x40000000), glm::ivec4(30)}
  679. };
  680. int Error(0);
  681. for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<glm::ivec4, glm::ivec4>); ++i)
  682. {
  683. glm::ivec4 Result0 = glm::findMSB(Data[i].Value);
  684. Error += glm::all(glm::equal(Data[i].Return, Result0)) ? 0 : 1;
  685. }
  686. return Error;
  687. }
  688. static int test_int()
  689. {
  690. typedef type<glm::uint, int> entry;
  691. entry const Data[] =
  692. {
  693. {0x00000000, -1},
  694. {0x00000001, 0},
  695. {0x00000002, 1},
  696. {0x00000003, 1},
  697. {0x00000004, 2},
  698. {0x00000005, 2},
  699. {0x00000007, 2},
  700. {0x00000008, 3},
  701. {0x00000010, 4},
  702. {0x00000020, 5},
  703. {0x00000040, 6},
  704. {0x00000080, 7},
  705. {0x00000100, 8},
  706. {0x00000200, 9},
  707. {0x00000400, 10},
  708. {0x00000800, 11},
  709. {0x00001000, 12},
  710. {0x00002000, 13},
  711. {0x00004000, 14},
  712. {0x00008000, 15},
  713. {0x00010000, 16},
  714. {0x00020000, 17},
  715. {0x00040000, 18},
  716. {0x00080000, 19},
  717. {0x00100000, 20},
  718. {0x00200000, 21},
  719. {0x00400000, 22},
  720. {0x00800000, 23},
  721. {0x01000000, 24},
  722. {0x02000000, 25},
  723. {0x04000000, 26},
  724. {0x08000000, 27},
  725. {0x10000000, 28},
  726. {0x20000000, 29},
  727. {0x40000000, 30}
  728. };
  729. int Error(0);
  730. for(std::size_t i = 0; i < sizeof(Data) / sizeof(entry); ++i)
  731. {
  732. int Result0 = glm::findMSB(Data[i].Value);
  733. Error += Data[i].Return == Result0 ? 0 : 1;
  734. }
  735. for(std::size_t i = 0; i < sizeof(Data) / sizeof(entry); ++i)
  736. {
  737. int Result0 = findMSB_nlz1(Data[i].Value);
  738. Error += Data[i].Return == Result0 ? 0 : 1;
  739. }
  740. /*
  741. for(std::size_t i = 0; i < sizeof(Data) / sizeof(entry); ++i)
  742. {
  743. int Result0 = findMSB_nlz2(Data[i].Value);
  744. Error += Data[i].Return == Result0 ? 0 : 1;
  745. }
  746. */
  747. for(std::size_t i = 0; i < sizeof(Data) / sizeof(entry); ++i)
  748. {
  749. int Result0 = findMSB_095(Data[i].Value);
  750. Error += Data[i].Return == Result0 ? 0 : 1;
  751. }
  752. # if GLM_HAS_BITSCAN_WINDOWS
  753. for(std::size_t i = 0; i < sizeof(Data) / sizeof(entry); ++i)
  754. {
  755. int Result0 = findMSB_intrinsic(Data[i].Value);
  756. Error += Data[i].Return == Result0 ? 0 : 1;
  757. }
  758. # endif//GLM_HAS_BITSCAN_WINDOWS
  759. for(std::size_t i = 0; i < sizeof(Data) / sizeof(entry); ++i)
  760. {
  761. int Result0 = findMSB_pop(Data[i].Value);
  762. Error += Data[i].Return == Result0 ? 0 : 1;
  763. }
  764. return Error;
  765. }
  766. static int test()
  767. {
  768. int Error(0);
  769. Error += test_ivec4();
  770. Error += test_int();
  771. return Error;
  772. }
  773. static int perf(std::size_t Samples)
  774. {
  775. int Error(0);
  776. Error += perf_int(Samples);
  777. return Error;
  778. }
  779. }//findMSB
  780. namespace findLSB
  781. {
  782. template<typename genType, typename retType>
  783. struct type
  784. {
  785. genType Value;
  786. retType Return;
  787. };
  788. typedef type<int, int> entry;
  789. entry const DataI32[] =
  790. {
  791. {0x00000001, 0},
  792. {0x00000003, 0},
  793. {0x00000002, 1},
  794. // {0x80000000, 31}, // Clang generates an error with this
  795. {0x00010000, 16},
  796. {0x7FFF0000, 16},
  797. {0x7F000000, 24},
  798. {0x7F00FF00, 8},
  799. {0x00000000, -1}
  800. };
  801. # if GLM_HAS_BITSCAN_WINDOWS
  802. template<typename genIUType>
  803. static int findLSB_intrinsic(genIUType Value)
  804. {
  805. static_assert(std::numeric_limits<genIUType>::is_integer, "'findLSB' only accept integer values");
  806. if(Value == 0)
  807. return -1;
  808. unsigned long Result(0);
  809. _BitScanForward(&Result, Value);
  810. return int(Result);
  811. }
  812. # endif
  813. template<typename genIUType>
  814. static int findLSB_095(genIUType Value)
  815. {
  816. static_assert(std::numeric_limits<genIUType>::is_integer, "'findLSB' only accept integer values");
  817. if(Value == 0)
  818. return -1;
  819. genIUType Bit;
  820. for(Bit = genIUType(0); !(Value & (1 << Bit)); ++Bit){}
  821. return Bit;
  822. }
  823. template<typename genIUType>
  824. static int findLSB_ntz2(genIUType x)
  825. {
  826. if(x == 0)
  827. return -1;
  828. return glm::bitCount(~x & (x - static_cast<genIUType>(1)));
  829. }
  830. template<typename genIUType>
  831. static int findLSB_branchfree(genIUType x)
  832. {
  833. bool IsNull(x == 0);
  834. int const Keep(!IsNull);
  835. int const Discard(IsNull);
  836. return static_cast<int>(glm::bitCount(~x & (x - static_cast<genIUType>(1)))) * Keep + Discard * -1;
  837. }
  838. static int test_int()
  839. {
  840. int Error(0);
  841. for(std::size_t i = 0; i < sizeof(DataI32) / sizeof(entry); ++i)
  842. {
  843. int Result = glm::findLSB(DataI32[i].Value);
  844. Error += DataI32[i].Return == Result ? 0 : 1;
  845. }
  846. for(std::size_t i = 0; i < sizeof(DataI32) / sizeof(entry); ++i)
  847. {
  848. int Result = findLSB_095(DataI32[i].Value);
  849. Error += DataI32[i].Return == Result ? 0 : 1;
  850. }
  851. # if GLM_HAS_BITSCAN_WINDOWS
  852. for(std::size_t i = 0; i < sizeof(DataI32) / sizeof(entry); ++i)
  853. {
  854. int Result = findLSB_intrinsic(DataI32[i].Value);
  855. Error += DataI32[i].Return == Result ? 0 : 1;
  856. }
  857. # endif
  858. for(std::size_t i = 0; i < sizeof(DataI32) / sizeof(entry); ++i)
  859. {
  860. int Result = findLSB_ntz2(DataI32[i].Value);
  861. Error += DataI32[i].Return == Result ? 0 : 1;
  862. }
  863. for(std::size_t i = 0; i < sizeof(DataI32) / sizeof(entry); ++i)
  864. {
  865. int Result = findLSB_branchfree(DataI32[i].Value);
  866. Error += DataI32[i].Return == Result ? 0 : 1;
  867. }
  868. return Error;
  869. }
  870. static int test()
  871. {
  872. int Error(0);
  873. Error += test_int();
  874. return Error;
  875. }
  876. static int perf_int(std::size_t Count)
  877. {
  878. int Error(0);
  879. std::clock_t Timestamps0 = std::clock();
  880. for(std::size_t k = 0; k < Count; ++k)
  881. for(std::size_t i = 0; i < sizeof(DataI32) / sizeof(entry); ++i)
  882. {
  883. int Result = glm::findLSB(DataI32[i].Value);
  884. Error += DataI32[i].Return == Result ? 0 : 1;
  885. }
  886. std::clock_t Timestamps1 = std::clock();
  887. for(std::size_t k = 0; k < Count; ++k)
  888. for(std::size_t i = 0; i < sizeof(DataI32) / sizeof(entry); ++i)
  889. {
  890. int Result = findLSB_095(DataI32[i].Value);
  891. Error += DataI32[i].Return == Result ? 0 : 1;
  892. }
  893. std::clock_t Timestamps2 = std::clock();
  894. # if GLM_HAS_BITSCAN_WINDOWS
  895. for(std::size_t k = 0; k < Count; ++k)
  896. for(std::size_t i = 0; i < sizeof(DataI32) / sizeof(entry); ++i)
  897. {
  898. int Result = findLSB_intrinsic(DataI32[i].Value);
  899. Error += DataI32[i].Return == Result ? 0 : 1;
  900. }
  901. # endif
  902. std::clock_t Timestamps3 = std::clock();
  903. for(std::size_t k = 0; k < Count; ++k)
  904. for(std::size_t i = 0; i < sizeof(DataI32) / sizeof(entry); ++i)
  905. {
  906. int Result = findLSB_ntz2(DataI32[i].Value);
  907. Error += DataI32[i].Return == Result ? 0 : 1;
  908. }
  909. std::clock_t Timestamps4 = std::clock();
  910. for(std::size_t k = 0; k < Count; ++k)
  911. for(std::size_t i = 0; i < sizeof(DataI32) / sizeof(entry); ++i)
  912. {
  913. int Result = findLSB_branchfree(DataI32[i].Value);
  914. Error += DataI32[i].Return == Result ? 0 : 1;
  915. }
  916. std::clock_t Timestamps5 = std::clock();
  917. std::printf("glm::findLSB: %d clocks\n", static_cast<int>(Timestamps1 - Timestamps0));
  918. std::printf("findLSB - 0.9.5: %d clocks\n", static_cast<int>(Timestamps2 - Timestamps1));
  919. # if GLM_HAS_BITSCAN_WINDOWS
  920. std::printf("findLSB - intrinsics: %d clocks\n", static_cast<int>(Timestamps3 - Timestamps2));
  921. # endif
  922. std::printf("findLSB - ntz2: %d clocks\n", static_cast<int>(Timestamps4 - Timestamps3));
  923. std::printf("findLSB - branchfree: %d clocks\n", static_cast<int>(Timestamps5 - Timestamps4));
  924. return Error;
  925. }
  926. static int perf(std::size_t Samples)
  927. {
  928. int Error(0);
  929. Error += perf_int(Samples);
  930. return Error;
  931. }
  932. }//findLSB
  933. namespace uaddCarry
  934. {
  935. static int test()
  936. {
  937. int Error(0);
  938. {
  939. glm::uint x = std::numeric_limits<glm::uint>::max();
  940. glm::uint y = 0;
  941. glm::uint Carry = 0;
  942. glm::uint Result = glm::uaddCarry(x, y, Carry);
  943. Error += Carry == 0 ? 0 : 1;
  944. Error += Result == std::numeric_limits<glm::uint>::max() ? 0 : 1;
  945. }
  946. {
  947. glm::uint x = std::numeric_limits<glm::uint>::max();
  948. glm::uint y = 1;
  949. glm::uint Carry = 0;
  950. glm::uint Result = glm::uaddCarry(x, y, Carry);
  951. Error += Carry == 1 ? 0 : 1;
  952. Error += Result == 0 ? 0 : 1;
  953. }
  954. {
  955. glm::uvec1 x(std::numeric_limits<glm::uint>::max());
  956. glm::uvec1 y(0);
  957. glm::uvec1 Carry(0);
  958. glm::uvec1 Result(glm::uaddCarry(x, y, Carry));
  959. Error += glm::all(glm::equal(Carry, glm::uvec1(0))) ? 0 : 1;
  960. Error += glm::all(glm::equal(Result, glm::uvec1(std::numeric_limits<glm::uint>::max()))) ? 0 : 1;
  961. }
  962. {
  963. glm::uvec1 x(std::numeric_limits<glm::uint>::max());
  964. glm::uvec1 y(1);
  965. glm::uvec1 Carry(0);
  966. glm::uvec1 Result(glm::uaddCarry(x, y, Carry));
  967. Error += glm::all(glm::equal(Carry, glm::uvec1(1))) ? 0 : 1;
  968. Error += glm::all(glm::equal(Result, glm::uvec1(0))) ? 0 : 1;
  969. }
  970. return Error;
  971. }
  972. }//namespace uaddCarry
  973. namespace usubBorrow
  974. {
  975. static int test()
  976. {
  977. int Error(0);
  978. {
  979. glm::uint x = 16;
  980. glm::uint y = 17;
  981. glm::uint Borrow = 0;
  982. glm::uint Result = glm::usubBorrow(x, y, Borrow);
  983. Error += Borrow == 1 ? 0 : 1;
  984. Error += Result == glm::uint(x-y) ? 0 : 1;
  985. }
  986. {
  987. glm::uvec1 x(16);
  988. glm::uvec1 y(17);
  989. glm::uvec1 Borrow(0);
  990. glm::uvec1 Result(glm::usubBorrow(x, y, Borrow));
  991. Error += glm::all(glm::equal(Borrow, glm::uvec1(1))) ? 0 : 1;
  992. Error += glm::all(glm::equal(Result, glm::uvec1(x-y))) ? 0 : 1;
  993. }
  994. {
  995. glm::uvec2 x(16);
  996. glm::uvec2 y(17);
  997. glm::uvec2 Borrow(0);
  998. glm::uvec2 Result(glm::usubBorrow(x, y, Borrow));
  999. Error += glm::all(glm::equal(Borrow, glm::uvec2(1))) ? 0 : 1;
  1000. Error += glm::all(glm::equal(Result, glm::uvec2(x-y))) ? 0 : 1;
  1001. }
  1002. {
  1003. glm::uvec3 x(16);
  1004. glm::uvec3 y(17);
  1005. glm::uvec3 Borrow(0);
  1006. glm::uvec3 Result(glm::usubBorrow(x, y, Borrow));
  1007. Error += glm::all(glm::equal(Borrow, glm::uvec3(1))) ? 0 : 1;
  1008. Error += glm::all(glm::equal(Result, glm::uvec3(x-y))) ? 0 : 1;
  1009. }
  1010. {
  1011. glm::uvec4 x(16);
  1012. glm::uvec4 y(17);
  1013. glm::uvec4 Borrow(0);
  1014. glm::uvec4 Result(glm::usubBorrow(x, y, Borrow));
  1015. Error += glm::all(glm::equal(Borrow, glm::uvec4(1))) ? 0 : 1;
  1016. Error += glm::all(glm::equal(Result, glm::uvec4(x-y))) ? 0 : 1;
  1017. }
  1018. return Error;
  1019. }
  1020. }//namespace usubBorrow
  1021. namespace umulExtended
  1022. {
  1023. static int test()
  1024. {
  1025. int Error(0);
  1026. {
  1027. glm::uint x = 2;
  1028. glm::uint y = 3;
  1029. glm::uint msb = 0;
  1030. glm::uint lsb = 0;
  1031. glm::umulExtended(x, y, msb, lsb);
  1032. Error += msb == 0 ? 0 : 1;
  1033. Error += lsb == 6 ? 0 : 1;
  1034. }
  1035. {
  1036. glm::uvec1 x(2);
  1037. glm::uvec1 y(3);
  1038. glm::uvec1 msb(0);
  1039. glm::uvec1 lsb(0);
  1040. glm::umulExtended(x, y, msb, lsb);
  1041. Error += glm::all(glm::equal(msb, glm::uvec1(0))) ? 0 : 1;
  1042. Error += glm::all(glm::equal(lsb, glm::uvec1(6))) ? 0 : 1;
  1043. }
  1044. {
  1045. glm::uvec2 x(2);
  1046. glm::uvec2 y(3);
  1047. glm::uvec2 msb(0);
  1048. glm::uvec2 lsb(0);
  1049. glm::umulExtended(x, y, msb, lsb);
  1050. Error += glm::all(glm::equal(msb, glm::uvec2(0))) ? 0 : 1;
  1051. Error += glm::all(glm::equal(lsb, glm::uvec2(6))) ? 0 : 1;
  1052. }
  1053. {
  1054. glm::uvec3 x(2);
  1055. glm::uvec3 y(3);
  1056. glm::uvec3 msb(0);
  1057. glm::uvec3 lsb(0);
  1058. glm::umulExtended(x, y, msb, lsb);
  1059. Error += glm::all(glm::equal(msb, glm::uvec3(0))) ? 0 : 1;
  1060. Error += glm::all(glm::equal(lsb, glm::uvec3(6))) ? 0 : 1;
  1061. }
  1062. {
  1063. glm::uvec4 x(2);
  1064. glm::uvec4 y(3);
  1065. glm::uvec4 msb(0);
  1066. glm::uvec4 lsb(0);
  1067. glm::umulExtended(x, y, msb, lsb);
  1068. Error += glm::all(glm::equal(msb, glm::uvec4(0))) ? 0 : 1;
  1069. Error += glm::all(glm::equal(lsb, glm::uvec4(6))) ? 0 : 1;
  1070. }
  1071. return Error;
  1072. }
  1073. }//namespace umulExtended
  1074. namespace imulExtended
  1075. {
  1076. static int test()
  1077. {
  1078. int Error(0);
  1079. {
  1080. int x = 2;
  1081. int y = 3;
  1082. int msb = 0;
  1083. int lsb = 0;
  1084. glm::imulExtended(x, y, msb, lsb);
  1085. Error += msb == 0 ? 0 : 1;
  1086. Error += lsb == 6 ? 0 : 1;
  1087. }
  1088. {
  1089. glm::ivec1 x(2);
  1090. glm::ivec1 y(3);
  1091. glm::ivec1 msb(0);
  1092. glm::ivec1 lsb(0);
  1093. glm::imulExtended(x, y, msb, lsb);
  1094. Error += glm::all(glm::equal(msb, glm::ivec1(0))) ? 0 : 1;
  1095. Error += glm::all(glm::equal(lsb, glm::ivec1(6))) ? 0 : 1;
  1096. }
  1097. {
  1098. glm::ivec2 x(2);
  1099. glm::ivec2 y(3);
  1100. glm::ivec2 msb(0);
  1101. glm::ivec2 lsb(0);
  1102. glm::imulExtended(x, y, msb, lsb);
  1103. Error += glm::all(glm::equal(msb, glm::ivec2(0))) ? 0 : 1;
  1104. Error += glm::all(glm::equal(lsb, glm::ivec2(6))) ? 0 : 1;
  1105. }
  1106. {
  1107. glm::ivec3 x(2);
  1108. glm::ivec3 y(3);
  1109. glm::ivec3 msb(0);
  1110. glm::ivec3 lsb(0);
  1111. glm::imulExtended(x, y, msb, lsb);
  1112. Error += glm::all(glm::equal(msb, glm::ivec3(0))) ? 0 : 1;
  1113. Error += glm::all(glm::equal(lsb, glm::ivec3(6))) ? 0 : 1;
  1114. }
  1115. {
  1116. glm::ivec4 x(2);
  1117. glm::ivec4 y(3);
  1118. glm::ivec4 msb(0);
  1119. glm::ivec4 lsb(0);
  1120. glm::imulExtended(x, y, msb, lsb);
  1121. Error += glm::all(glm::equal(msb, glm::ivec4(0))) ? 0 : 1;
  1122. Error += glm::all(glm::equal(lsb, glm::ivec4(6))) ? 0 : 1;
  1123. }
  1124. return Error;
  1125. }
  1126. }//namespace imulExtended
  1127. namespace bitCount
  1128. {
  1129. template<typename genType>
  1130. struct type
  1131. {
  1132. genType Value;
  1133. genType Return;
  1134. };
  1135. type<int> const DataI32[] =
  1136. {
  1137. {0x00000001, 1},
  1138. {0x00000003, 2},
  1139. {0x00000002, 1},
  1140. {0x7fffffff, 31},
  1141. {0x00000000, 0}
  1142. };
  1143. template<typename T>
  1144. inline int bitCount_if(T v)
  1145. {
  1146. static_assert(std::numeric_limits<T>::is_integer, "'bitCount' only accept integer values");
  1147. int Count(0);
  1148. for(T i = 0, n = static_cast<T>(sizeof(T) * 8); i < n; ++i)
  1149. {
  1150. if(v & static_cast<T>(1 << i))
  1151. ++Count;
  1152. }
  1153. return Count;
  1154. }
  1155. template<typename T>
  1156. inline int bitCount_vec(T v)
  1157. {
  1158. static_assert(std::numeric_limits<T>::is_integer, "'bitCount' only accept integer values");
  1159. int Count(0);
  1160. for(T i = 0, n = static_cast<T>(sizeof(T) * 8); i < n; ++i)
  1161. {
  1162. Count += static_cast<int>((v >> i) & static_cast<T>(1));
  1163. }
  1164. return Count;
  1165. }
  1166. template<bool EXEC = false>
  1167. struct compute_bitfieldBitCountStep
  1168. {
  1169. template<glm::length_t L, typename T, glm::qualifier Q>
  1170. GLM_FUNC_QUALIFIER static glm::vec<L, T, Q> call(glm::vec<L, T, Q> const& v, T, T)
  1171. {
  1172. return v;
  1173. }
  1174. };
  1175. template<>
  1176. struct compute_bitfieldBitCountStep<true>
  1177. {
  1178. template<glm::length_t L, typename T, glm::qualifier Q>
  1179. GLM_FUNC_QUALIFIER static glm::vec<L, T, Q> call(glm::vec<L, T, Q> const& v, T Mask, T Shift)
  1180. {
  1181. return (v & Mask) + ((v >> Shift) & Mask);
  1182. }
  1183. };
  1184. # if GLM_COMPILER & GLM_COMPILER_VC
  1185. # pragma warning(push)
  1186. # pragma warning(disable : 4309)
  1187. # endif
  1188. template<glm::length_t L, typename T, glm::qualifier Q>
  1189. static glm::vec<L, int, Q> bitCount_bitfield(glm::vec<L, T, Q> const& v)
  1190. {
  1191. glm::vec<L, typename glm::detail::make_unsigned<T>::type, Q> x(v);
  1192. x = compute_bitfieldBitCountStep<sizeof(T) * 8 >= 2>::call(x, static_cast<typename glm::detail::make_unsigned<T>::type>(0x5555555555555555ull), static_cast<typename glm::detail::make_unsigned<T>::type>( 1));
  1193. x = compute_bitfieldBitCountStep<sizeof(T) * 8 >= 4>::call(x, static_cast<typename glm::detail::make_unsigned<T>::type>(0x3333333333333333ull), static_cast<typename glm::detail::make_unsigned<T>::type>( 2));
  1194. x = compute_bitfieldBitCountStep<sizeof(T) * 8 >= 8>::call(x, static_cast<typename glm::detail::make_unsigned<T>::type>(0x0F0F0F0F0F0F0F0Full), static_cast<typename glm::detail::make_unsigned<T>::type>( 4));
  1195. x = compute_bitfieldBitCountStep<sizeof(T) * 8 >= 16>::call(x, static_cast<typename glm::detail::make_unsigned<T>::type>(0x00FF00FF00FF00FFull), static_cast<typename glm::detail::make_unsigned<T>::type>( 8));
  1196. x = compute_bitfieldBitCountStep<sizeof(T) * 8 >= 32>::call(x, static_cast<typename glm::detail::make_unsigned<T>::type>(0x0000FFFF0000FFFFull), static_cast<typename glm::detail::make_unsigned<T>::type>(16));
  1197. x = compute_bitfieldBitCountStep<sizeof(T) * 8 >= 64>::call(x, static_cast<typename glm::detail::make_unsigned<T>::type>(0x00000000FFFFFFFFull), static_cast<typename glm::detail::make_unsigned<T>::type>(32));
  1198. return glm::vec<L, int, Q>(x);
  1199. }
  1200. # if GLM_COMPILER & GLM_COMPILER_VC
  1201. # pragma warning(pop)
  1202. # endif
  1203. template<typename genType>
  1204. static int bitCount_bitfield(genType x)
  1205. {
  1206. return bitCount_bitfield(glm::vec<1, genType, glm::defaultp>(x)).x;
  1207. }
  1208. static int perf(std::size_t Size)
  1209. {
  1210. int Error(0);
  1211. std::vector<int> v;
  1212. v.resize(Size);
  1213. std::vector<glm::ivec4> w;
  1214. w.resize(Size);
  1215. std::clock_t TimestampsA = std::clock();
  1216. // bitCount - TimeIf
  1217. {
  1218. for(std::size_t i = 0, n = v.size(); i < n; ++i)
  1219. v[i] = bitCount_if(static_cast<int>(i));
  1220. }
  1221. std::clock_t TimestampsB = std::clock();
  1222. // bitCount - TimeVec
  1223. {
  1224. for(std::size_t i = 0, n = v.size(); i < n; ++i)
  1225. v[i] = bitCount_vec(i);
  1226. }
  1227. std::clock_t TimestampsC = std::clock();
  1228. // bitCount - TimeDefault
  1229. {
  1230. for(std::size_t i = 0, n = v.size(); i < n; ++i)
  1231. v[i] = glm::bitCount(i);
  1232. }
  1233. std::clock_t TimestampsD = std::clock();
  1234. // bitCount - TimeVec4
  1235. {
  1236. for(std::size_t i = 0, n = v.size(); i < n; ++i)
  1237. w[i] = glm::bitCount(glm::ivec4(static_cast<int>(i)));
  1238. }
  1239. std::clock_t TimestampsE = std::clock();
  1240. {
  1241. for(std::size_t i = 0, n = v.size(); i < n; ++i)
  1242. v[i] = bitCount_bitfield(static_cast<int>(i));
  1243. }
  1244. std::clock_t TimestampsF = std::clock();
  1245. std::printf("bitCount - TimeIf %d\n", static_cast<int>(TimestampsB - TimestampsA));
  1246. std::printf("bitCount - TimeVec %d\n", static_cast<int>(TimestampsC - TimestampsB));
  1247. std::printf("bitCount - TimeDefault %d\n", static_cast<int>(TimestampsD - TimestampsC));
  1248. std::printf("bitCount - TimeVec4 %d\n", static_cast<int>(TimestampsE - TimestampsD));
  1249. std::printf("bitCount - bitfield %d\n", static_cast<int>(TimestampsF - TimestampsE));
  1250. return Error;
  1251. }
  1252. static int test()
  1253. {
  1254. int Error(0);
  1255. for(std::size_t i = 0, n = sizeof(DataI32) / sizeof(type<int>); i < n; ++i)
  1256. {
  1257. int ResultA = glm::bitCount(DataI32[i].Value);
  1258. Error += DataI32[i].Return == ResultA ? 0 : 1;
  1259. assert(!Error);
  1260. int ResultB = bitCount_if(DataI32[i].Value);
  1261. Error += DataI32[i].Return == ResultB ? 0 : 1;
  1262. assert(!Error);
  1263. int ResultC = bitCount_vec(DataI32[i].Value);
  1264. Error += DataI32[i].Return == ResultC ? 0 : 1;
  1265. assert(!Error);
  1266. int ResultE = bitCount_bitfield(DataI32[i].Value);
  1267. Error += DataI32[i].Return == ResultE ? 0 : 1;
  1268. assert(!Error);
  1269. }
  1270. return Error;
  1271. }
  1272. }//bitCount
  1273. int main()
  1274. {
  1275. int Error = 0;
  1276. Error += ::bitCount::test();
  1277. Error += ::bitfieldReverse::test();
  1278. Error += ::findMSB::test();
  1279. Error += ::findLSB::test();
  1280. Error += ::umulExtended::test();
  1281. Error += ::imulExtended::test();
  1282. Error += ::uaddCarry::test();
  1283. Error += ::usubBorrow::test();
  1284. Error += ::bitfieldInsert::test();
  1285. Error += ::bitfieldExtract::test();
  1286. # ifdef NDEBUG
  1287. std::size_t const Samples = 1000;
  1288. # else
  1289. std::size_t const Samples = 1;
  1290. # endif
  1291. ::bitCount::perf(Samples);
  1292. ::bitfieldReverse::perf(Samples);
  1293. ::findMSB::perf(Samples);
  1294. ::findLSB::perf(Samples);
  1295. return Error;
  1296. }