simd_test.cpp 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
  1. /*
  2. * Copyright 2010-2016 Branimir Karadzic. All rights reserved.
  3. * License: https://github.com/bkaradzic/bx#license-bsd-2-clause
  4. */
  5. #include "test.h"
  6. #include <bx/simd_t.h>
  7. #include <bx/fpumath.h>
  8. #include <string.h>
  9. #if 0
  10. # define SIMD_DBG DBG
  11. #else
  12. # define SIMD_DBG(_fmt, ...) BX_UNUSED(__VA_ARGS__);
  13. #endif // 0
  14. using namespace bx;
  15. union simd_cast
  16. {
  17. bx::simd256_t simd256;
  18. bx::simd128_t simd128;
  19. float f[8];
  20. uint32_t ui[8];
  21. int32_t i[8];
  22. char c[32];
  23. };
  24. void simd_check_bool(const char* _str, bool _a, bool _0)
  25. {
  26. SIMD_DBG("%s %d == %d"
  27. , _str
  28. , _a
  29. , _0
  30. );
  31. REQUIRE(_a == _0);
  32. }
  33. void simd_check_int32(
  34. const char* _str
  35. , bx::simd128_t _a
  36. , int32_t _0
  37. , int32_t _1
  38. , int32_t _2
  39. , int32_t _3
  40. )
  41. {
  42. simd_cast c; c.simd128 = _a;
  43. SIMD_DBG("%s (%d, %d, %d, %d) == (%d, %d, %d, %d)"
  44. , _str
  45. , c.i[0], c.i[1], c.i[2], c.i[3]
  46. , _0, _1, _2, _3
  47. );
  48. REQUIRE(c.i[0] == _0);
  49. REQUIRE(c.i[1] == _1);
  50. REQUIRE(c.i[2] == _2);
  51. REQUIRE(c.i[3] == _3);
  52. }
  53. #if 0
  54. void simd_check_int32(
  55. const char* _str
  56. , bx::simd256_t _a
  57. , int32_t _0
  58. , int32_t _1
  59. , int32_t _2
  60. , int32_t _3
  61. , int32_t _4
  62. , int32_t _5
  63. , int32_t _6
  64. , int32_t _7
  65. )
  66. {
  67. simd_cast c; c.simd256 = _a;
  68. SIMD_DBG("%s (%d, %d, %d, %d, %d, %d, %d, %d) == (%d, %d, %d, %d, %d, %d, %d, %d)"
  69. , _str
  70. , c.i[0], c.i[1], c.i[2], c.i[3], c.i[4], c.i[5], c.i[6], c.i[7]
  71. , _0, _1, _2, _3, _4, _5, _6, _7
  72. );
  73. REQUIRE(c.i[0] == _0);
  74. REQUIRE(c.i[1] == _1);
  75. REQUIRE(c.i[2] == _2);
  76. REQUIRE(c.i[3] == _3);
  77. REQUIRE(c.i[4] == _4);
  78. REQUIRE(c.i[5] == _5);
  79. REQUIRE(c.i[6] == _6);
  80. REQUIRE(c.i[7] == _7);
  81. }
  82. #endif // 0
  83. void simd_check_uint32(
  84. const char* _str
  85. , bx::simd128_t _a
  86. , uint32_t _0
  87. , uint32_t _1
  88. , uint32_t _2
  89. , uint32_t _3
  90. )
  91. {
  92. simd_cast c; c.simd128 = _a;
  93. SIMD_DBG("%s (0x%08x, 0x%08x, 0x%08x, 0x%08x) == (0x%08x, 0x%08x, 0x%08x, 0x%08x)"
  94. , _str
  95. , c.ui[0], c.ui[1], c.ui[2], c.ui[3]
  96. , _0, _1, _2, _3
  97. );
  98. REQUIRE(c.ui[0] == _0);
  99. REQUIRE(c.ui[1] == _1);
  100. REQUIRE(c.ui[2] == _2);
  101. REQUIRE(c.ui[3] == _3);
  102. }
  103. #if 0
  104. void simd_check_uint32(
  105. const char* _str
  106. , bx::simd256_t _a
  107. , uint32_t _0
  108. , uint32_t _1
  109. , uint32_t _2
  110. , uint32_t _3
  111. , uint32_t _4
  112. , uint32_t _5
  113. , uint32_t _6
  114. , uint32_t _7
  115. )
  116. {
  117. simd_cast c; c.simd256 = _a;
  118. SIMD_DBG("%s (0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x) == (0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x)"
  119. , _str
  120. , c.ui[0], c.ui[1], c.ui[2], c.ui[3], c.ui[4], c.ui[5], c.ui[6], c.ui[7]
  121. , _0, _1, _2, _3, _4, _5, _6, _7
  122. );
  123. REQUIRE(c.ui[0] == _0);
  124. REQUIRE(c.ui[1] == _1);
  125. REQUIRE(c.ui[2] == _2);
  126. REQUIRE(c.ui[3] == _3);
  127. REQUIRE(c.ui[4] == _4);
  128. REQUIRE(c.ui[5] == _5);
  129. REQUIRE(c.ui[6] == _6);
  130. REQUIRE(c.ui[7] == _7);
  131. }
  132. #endif // 0
  133. void simd_check_float(
  134. const char* _str
  135. , bx::simd128_t _a
  136. , float _0
  137. , float _1
  138. , float _2
  139. , float _3
  140. )
  141. {
  142. simd_cast c; c.simd128 = _a;
  143. SIMD_DBG("%s (%f, %f, %f, %f) == (%f, %f, %f, %f)"
  144. , _str
  145. , c.f[0], c.f[1], c.f[2], c.f[3]
  146. , _0, _1, _2, _3
  147. );
  148. CHECK(bx::fequal(c.f[0], _0, 0.0001f) );
  149. CHECK(bx::fequal(c.f[1], _1, 0.0001f) );
  150. CHECK(bx::fequal(c.f[2], _2, 0.0001f) );
  151. CHECK(bx::fequal(c.f[3], _3, 0.0001f) );
  152. }
  153. #if 0
  154. void simd_check_float(
  155. const char* _str
  156. , bx::simd256_t _a
  157. , float _0
  158. , float _1
  159. , float _2
  160. , float _3
  161. , float _4
  162. , float _5
  163. , float _6
  164. , float _7
  165. )
  166. {
  167. simd_cast c; c.simd256 = _a;
  168. SIMD_DBG("%s (%f, %f, %f, %f, %f, %f, %f, %f) == (%f, %f, %f, %f, %f, %f, %f, %f)"
  169. , _str
  170. , c.f[0], c.f[1], c.f[2], c.f[3], c.f[4], c.f[5], c.f[6], c.f[7]
  171. , _0, _1, _2, _3, _4, _5, _6, _7
  172. );
  173. CHECK(bx::fequal(c.f[0], _0, 0.0001f) );
  174. CHECK(bx::fequal(c.f[1], _1, 0.0001f) );
  175. CHECK(bx::fequal(c.f[2], _2, 0.0001f) );
  176. CHECK(bx::fequal(c.f[3], _3, 0.0001f) );
  177. CHECK(bx::fequal(c.f[4], _4, 0.0001f) );
  178. CHECK(bx::fequal(c.f[5], _5, 0.0001f) );
  179. CHECK(bx::fequal(c.f[6], _6, 0.0001f) );
  180. CHECK(bx::fequal(c.f[7], _7, 0.0001f) );
  181. }
  182. #endif // 0
  183. void simd_check_string(const char* _str, bx::simd128_t _a)
  184. {
  185. simd_cast c; c.simd128 = _a;
  186. const char test[5] = { c.c[0], c.c[4], c.c[8], c.c[12], '\0' };
  187. SIMD_DBG("%s %s", _str, test);
  188. CHECK(0 == strcmp(_str, test) );
  189. }
  190. TEST_CASE("simd_swizzle", "")
  191. {
  192. const simd128_t xyzw = simd_ild(0x78787878, 0x79797979, 0x7a7a7a7a, 0x77777777);
  193. #define ELEMx 0
  194. #define ELEMy 1
  195. #define ELEMz 2
  196. #define ELEMw 3
  197. #define BX_SIMD128_IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \
  198. simd_check_string("" #_x #_y #_z #_w "", simd_swiz_##_x##_y##_z##_w(xyzw) ); \
  199. #include <bx/simd128_swizzle.inl>
  200. #undef BX_SIMD128_IMPLEMENT_SWIZZLE
  201. #undef ELEMw
  202. #undef ELEMz
  203. #undef ELEMy
  204. #undef ELEMx
  205. }
  206. TEST_CASE("simd_shuffle", "")
  207. {
  208. const simd128_t xyzw = simd_ild(0x78787878, 0x79797979, 0x7a7a7a7a, 0x77777777);
  209. const simd128_t ABCD = simd_ild(0x41414141, 0x42424242, 0x43434343, 0x44444444);
  210. simd_check_string("xyAB", simd_shuf_xyAB(xyzw, ABCD) );
  211. simd_check_string("ABxy", simd_shuf_ABxy(xyzw, ABCD) );
  212. simd_check_string("zwCD", simd_shuf_zwCD(xyzw, ABCD) );
  213. simd_check_string("CDzw", simd_shuf_CDzw(xyzw, ABCD) );
  214. simd_check_string("xAyB", simd_shuf_xAyB(xyzw, ABCD) );
  215. simd_check_string("zCwD", simd_shuf_zCwD(xyzw, ABCD) );
  216. simd_check_string("xAzC", simd_shuf_xAzC(xyzw, ABCD) );
  217. simd_check_string("yBwD", simd_shuf_yBwD(xyzw, ABCD) );
  218. simd_check_string("CzDw", simd_shuf_CzDw(xyzw, ABCD) );
  219. }
  220. TEST_CASE("simd_compare", "")
  221. {
  222. simd_check_uint32("cmpeq"
  223. , simd_cmpeq(simd_ld(1.0f, 2.0f, 3.0f, 4.0f), simd_ld(0.0f, 2.0f, 0.0f, 3.0f) )
  224. , 0, 0xffffffff, 0, 0
  225. );
  226. simd_check_uint32("cmplt"
  227. , simd_cmplt(simd_ld(1.0f, 2.0f, 3.0f, 4.0f), simd_ld(0.0f, 2.0f, 0.0f, 3.0f) )
  228. , 0, 0, 0, 0
  229. );
  230. simd_check_uint32("cmple"
  231. , simd_cmple(simd_ld(1.0f, 2.0f, 3.0f, 4.0f), simd_ld(0.0f, 2.0f, 0.0f, 3.0f) )
  232. , 0, 0xffffffff, 0, 0
  233. );
  234. simd_check_uint32("cmpgt"
  235. , simd_cmpgt(simd_ld(1.0f, 2.0f, 3.0f, 4.0f), simd_ld(0.0f, 2.0f, 0.0f, 3.0f) )
  236. , 0xffffffff, 0, 0xffffffff, 0xffffffff
  237. );
  238. simd_check_uint32("cmpge"
  239. , simd_cmpge(simd_ld(1.0f, 2.0f, 3.0f, 4.0f), simd_ld(0.0f, 2.0f, 0.0f, 3.0f) )
  240. , 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
  241. );
  242. simd_check_uint32("icmpeq"
  243. , simd_icmpeq(simd_ild(0, 1, 2, 3), simd_ild(0, uint32_t(-2), 1, 3) )
  244. , 0xffffffff, 0, 0, 0xffffffff
  245. );
  246. simd_check_uint32("icmplt"
  247. , simd_icmplt(simd_ild(0, 1, 2, 3), simd_ild(0, uint32_t(-2), 1, 3) )
  248. , 0, 0, 0, 0
  249. );
  250. simd_check_uint32("icmpgt"
  251. , simd_icmpgt(simd_ild(0, 1, 2, 3), simd_ild(0, uint32_t(-2), 1, 3) )
  252. , 0, 0xffffffff, 0xffffffff, 0
  253. );
  254. }
  255. TEST_CASE("simd_test", "")
  256. {
  257. simd_check_bool("test_any_xyzw"
  258. , simd_test_any_xyzw(simd_ild(0xffffffff, 0, 0, 0) )
  259. , true
  260. );
  261. simd_check_bool("test_all_xyzw"
  262. , simd_test_all_xyzw(simd_ild(0xffffffff, 0, 0xffffffff, 0) )
  263. , false
  264. );
  265. simd_check_bool("test_all_xyzw"
  266. , simd_test_all_xyzw(simd_ild(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff) )
  267. , true
  268. );
  269. simd_check_bool("test_all_xw"
  270. , simd_test_all_xw(simd_ild(0xffffffff, 0, 0, 0xffffffff) )
  271. , true
  272. );
  273. simd_check_bool("test_all_xzw"
  274. , simd_test_all_xzw(simd_ild(0xffffffff, 0, 0, 0xffffffff) )
  275. , false
  276. );
  277. }
  278. TEST_CASE("simd_load", "")
  279. {
  280. simd_check_float("ld"
  281. , simd_ld(0.0f, 1.0f, 2.0f, 3.0f)
  282. , 0.0f, 1.0f, 2.0f, 3.0f
  283. );
  284. // simd_check_float("ld"
  285. // , simd_ld<simd256_t>(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f)
  286. // , 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f
  287. // );
  288. simd_check_int32("ild"
  289. , simd_ild(uint32_t(-1), 0, 1, 2)
  290. , uint32_t(-1), 0, 1, 2
  291. );
  292. // simd_check_int32("ild"
  293. // , simd_ild<simd256_t>(uint32_t(-1), 0, 1, 2, 3, 4, 5, 6)
  294. // , uint32_t(-1), 0, 1, 2, 3, 4, 5, 6
  295. // );
  296. simd_check_int32("ild"
  297. , simd_ild(uint32_t(-1), uint32_t(-2), uint32_t(-3), uint32_t(-4) )
  298. , uint32_t(-1), uint32_t(-2), uint32_t(-3), uint32_t(-4)
  299. );
  300. simd_check_uint32("zero", simd_zero()
  301. , 0, 0, 0, 0
  302. );
  303. simd_check_uint32("isplat", simd_isplat(0x80000001)
  304. , 0x80000001, 0x80000001, 0x80000001, 0x80000001
  305. );
  306. simd_check_float("isplat", simd_splat(1.0f)
  307. , 1.0f, 1.0f, 1.0f, 1.0f
  308. );
  309. }
  310. TEST_CASE("simd_arithmetic", "")
  311. {
  312. simd_check_float("madd"
  313. , simd_madd(simd_ld(0.0f, 1.0f, 2.0f, 3.0f), simd_ld(4.0f, 5.0f, 6.0f, 7.0f), simd_ld(8.0f, 9.0f, 10.0f, 11.0f) )
  314. , 8.0f, 14.0f, 22.0f, 32.0f
  315. );
  316. simd_check_float("cross3"
  317. , simd_cross3(simd_ld(1.0f, 0.0f, 0.0f, 0.0f), simd_ld(0.0f, 1.0f, 0.0f, 0.0f) )
  318. , 0.0f, 0.0f, 1.0f, 0.0f
  319. );
  320. }
  321. TEST_CASE("simd_sqrt", "")
  322. {
  323. simd_check_float("simd_sqrt"
  324. , simd_sqrt(simd_ld(1.0f, 16.0f, 65536.0f, 123456.0f) )
  325. , 1.0f, 4.0f, 256.0f, 351.363060096f
  326. );
  327. simd_check_float("simd_sqrt_nr_ni"
  328. , simd_sqrt_nr_ni(simd_ld(1.0f, 16.0f, 65536.0f, 123456.0f) )
  329. , 1.0f, 4.0f, 256.0f, 351.363060096f
  330. );
  331. simd_check_float("simd_sqrt_nr1_ni"
  332. , simd_sqrt_nr1_ni(simd_ld(1.0f, 16.0f, 65536.0f, 123456.0f) )
  333. , 1.0f, 4.0f, 256.0f, 351.363060096f
  334. );
  335. }
  336. TEST_CASE("float4", "")
  337. {
  338. const simd128_t isplat = simd_isplat(0x80000001);
  339. simd_check_uint32("sll"
  340. , simd_sll(isplat, 1)
  341. , 0x00000002, 0x00000002, 0x00000002, 0x00000002
  342. );
  343. simd_check_uint32("srl"
  344. , simd_srl(isplat, 1)
  345. , 0x40000000, 0x40000000, 0x40000000, 0x40000000
  346. );
  347. simd_check_uint32("sra"
  348. , simd_sra(isplat, 1)
  349. , 0xc0000000, 0xc0000000, 0xc0000000, 0xc0000000
  350. );
  351. simd_check_uint32("and"
  352. , simd_and(simd_isplat(0x55555555), simd_isplat(0xaaaaaaaa) )
  353. , 0, 0, 0, 0
  354. );
  355. simd_check_uint32("or "
  356. , simd_or(simd_isplat(0x55555555), simd_isplat(0xaaaaaaaa) )
  357. , uint32_t(-1), uint32_t(-1), uint32_t(-1), uint32_t(-1)
  358. );
  359. simd_check_uint32("xor"
  360. , simd_or(simd_isplat(0x55555555), simd_isplat(0xaaaaaaaa) )
  361. , uint32_t(-1), uint32_t(-1), uint32_t(-1), uint32_t(-1)
  362. );
  363. simd_check_int32("imin"
  364. , simd_imin(simd_ild(0, 1, 2, 3), simd_ild(uint32_t(-1), 2, uint32_t(-2), 1) )
  365. , uint32_t(-1), 1, uint32_t(-2), 1
  366. );
  367. simd_check_float("min"
  368. , simd_min(simd_ld(0.0f, 1.0f, 2.0f, 3.0f), simd_ld(-1.0f, 2.0f, -2.0f, 1.0f) )
  369. , -1.0f, 1.0f, -2.0f, 1.0f
  370. );
  371. simd_check_int32("imax"
  372. , simd_imax(simd_ild(0, 1, 2, 3), simd_ild(uint32_t(-1), 2, uint32_t(-2), 1) )
  373. , 0, 2, 2, 3
  374. );
  375. simd_check_float("max"
  376. , simd_max(simd_ld(0.0f, 1.0f, 2.0f, 3.0f), simd_ld(-1.0f, 2.0f, -2.0f, 1.0f) )
  377. , 0.0f, 2.0f, 2.0f, 3.0f
  378. );
  379. }