simd_t.cpp 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. /*
  2. * Copyright 2010-2016 Branimir Karadzic. All rights reserved.
  3. * License: https://github.com/bkaradzic/bx#license-bsd-2-clause
  4. */
  5. #include "test.h"
  6. #include <bx/simd_t.h>
  7. #include <bx/fpumath.h>
  8. #include <string.h>
  9. using namespace bx;
  10. union simd_cast
  11. {
  12. bx::simd256_t simd256;
  13. bx::simd128_t simd128;
  14. float f[8];
  15. uint32_t ui[8];
  16. int32_t i[8];
  17. char c[32];
  18. };
  19. void simd_check_bool(const char* _str, bool _a, bool _0)
  20. {
  21. DBG("%s %d == %d"
  22. , _str
  23. , _a
  24. , _0
  25. );
  26. CHECK_EQUAL(_a, _0);
  27. }
  28. void simd_check_int32(
  29. const char* _str
  30. , bx::simd128_t _a
  31. , int32_t _0
  32. , int32_t _1
  33. , int32_t _2
  34. , int32_t _3
  35. )
  36. {
  37. simd_cast c; c.simd128 = _a;
  38. DBG("%s (%d, %d, %d, %d) == (%d, %d, %d, %d)"
  39. , _str
  40. , c.i[0], c.i[1], c.i[2], c.i[3]
  41. , _0, _1, _2, _3
  42. );
  43. CHECK_EQUAL(c.i[0], _0);
  44. CHECK_EQUAL(c.i[1], _1);
  45. CHECK_EQUAL(c.i[2], _2);
  46. CHECK_EQUAL(c.i[3], _3);
  47. }
  48. void simd_check_int32(
  49. const char* _str
  50. , bx::simd256_t _a
  51. , int32_t _0
  52. , int32_t _1
  53. , int32_t _2
  54. , int32_t _3
  55. , int32_t _4
  56. , int32_t _5
  57. , int32_t _6
  58. , int32_t _7
  59. )
  60. {
  61. simd_cast c; c.simd256 = _a;
  62. DBG("%s (%d, %d, %d, %d, %d, %d, %d, %d) == (%d, %d, %d, %d, %d, %d, %d, %d)"
  63. , _str
  64. , c.i[0], c.i[1], c.i[2], c.i[3], c.i[4], c.i[5], c.i[6], c.i[7]
  65. , _0, _1, _2, _3, _4, _5, _6, _7
  66. );
  67. CHECK_EQUAL(c.i[0], _0);
  68. CHECK_EQUAL(c.i[1], _1);
  69. CHECK_EQUAL(c.i[2], _2);
  70. CHECK_EQUAL(c.i[3], _3);
  71. CHECK_EQUAL(c.i[4], _4);
  72. CHECK_EQUAL(c.i[5], _5);
  73. CHECK_EQUAL(c.i[6], _6);
  74. CHECK_EQUAL(c.i[7], _7);
  75. }
  76. void simd_check_uint32(
  77. const char* _str
  78. , bx::simd128_t _a
  79. , uint32_t _0
  80. , uint32_t _1
  81. , uint32_t _2
  82. , uint32_t _3
  83. )
  84. {
  85. simd_cast c; c.simd128 = _a;
  86. DBG("%s (0x%08x, 0x%08x, 0x%08x, 0x%08x) == (0x%08x, 0x%08x, 0x%08x, 0x%08x)"
  87. , _str
  88. , c.ui[0], c.ui[1], c.ui[2], c.ui[3]
  89. , _0, _1, _2, _3
  90. );
  91. CHECK_EQUAL(c.ui[0], _0);
  92. CHECK_EQUAL(c.ui[1], _1);
  93. CHECK_EQUAL(c.ui[2], _2);
  94. CHECK_EQUAL(c.ui[3], _3);
  95. }
  96. void simd_check_uint32(
  97. const char* _str
  98. , bx::simd256_t _a
  99. , uint32_t _0
  100. , uint32_t _1
  101. , uint32_t _2
  102. , uint32_t _3
  103. , uint32_t _4
  104. , uint32_t _5
  105. , uint32_t _6
  106. , uint32_t _7
  107. )
  108. {
  109. simd_cast c; c.simd256 = _a;
  110. DBG("%s (0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x) == (0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x)"
  111. , _str
  112. , c.ui[0], c.ui[1], c.ui[2], c.ui[3], c.ui[4], c.ui[5], c.ui[6], c.ui[7]
  113. , _0, _1, _2, _3, _4, _5, _6, _7
  114. );
  115. CHECK_EQUAL(c.ui[0], _0);
  116. CHECK_EQUAL(c.ui[1], _1);
  117. CHECK_EQUAL(c.ui[2], _2);
  118. CHECK_EQUAL(c.ui[3], _3);
  119. CHECK_EQUAL(c.ui[4], _4);
  120. CHECK_EQUAL(c.ui[5], _5);
  121. CHECK_EQUAL(c.ui[6], _6);
  122. CHECK_EQUAL(c.ui[7], _7);
  123. }
  124. void simd_check_float(
  125. const char* _str
  126. , bx::simd128_t _a
  127. , float _0
  128. , float _1
  129. , float _2
  130. , float _3
  131. )
  132. {
  133. simd_cast c; c.simd128 = _a;
  134. DBG("%s (%f, %f, %f, %f) == (%f, %f, %f, %f)"
  135. , _str
  136. , c.f[0], c.f[1], c.f[2], c.f[3]
  137. , _0, _1, _2, _3
  138. );
  139. CHECK(bx::fequal(c.f[0], _0, 0.0001f) );
  140. CHECK(bx::fequal(c.f[1], _1, 0.0001f) );
  141. CHECK(bx::fequal(c.f[2], _2, 0.0001f) );
  142. CHECK(bx::fequal(c.f[3], _3, 0.0001f) );
  143. }
  144. void simd_check_float(
  145. const char* _str
  146. , bx::simd256_t _a
  147. , float _0
  148. , float _1
  149. , float _2
  150. , float _3
  151. , float _4
  152. , float _5
  153. , float _6
  154. , float _7
  155. )
  156. {
  157. simd_cast c; c.simd256 = _a;
  158. DBG("%s (%f, %f, %f, %f, %f, %f, %f, %f) == (%f, %f, %f, %f, %f, %f, %f, %f)"
  159. , _str
  160. , c.f[0], c.f[1], c.f[2], c.f[3], c.f[4], c.f[5], c.f[6], c.f[7]
  161. , _0, _1, _2, _3, _4, _5, _6, _7
  162. );
  163. CHECK(bx::fequal(c.f[0], _0, 0.0001f) );
  164. CHECK(bx::fequal(c.f[1], _1, 0.0001f) );
  165. CHECK(bx::fequal(c.f[2], _2, 0.0001f) );
  166. CHECK(bx::fequal(c.f[3], _3, 0.0001f) );
  167. CHECK(bx::fequal(c.f[4], _4, 0.0001f) );
  168. CHECK(bx::fequal(c.f[5], _5, 0.0001f) );
  169. CHECK(bx::fequal(c.f[6], _6, 0.0001f) );
  170. CHECK(bx::fequal(c.f[7], _7, 0.0001f) );
  171. }
  172. void simd_check_string(const char* _str, bx::simd128_t _a)
  173. {
  174. simd_cast c; c.simd128 = _a;
  175. const char test[5] = { c.c[0], c.c[4], c.c[8], c.c[12], '\0' };
  176. DBG("%s %s", _str, test);
  177. CHECK(0 == strcmp(_str, test) );
  178. }
  179. TEST(simd_swizzle)
  180. {
  181. const simd128_t xyzw = simd_ild(0x78787878, 0x79797979, 0x7a7a7a7a, 0x77777777);
  182. #define ELEMx 0
  183. #define ELEMy 1
  184. #define ELEMz 2
  185. #define ELEMw 3
  186. #define BX_SIMD128_IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \
  187. simd_check_string("" #_x #_y #_z #_w "", simd_swiz_##_x##_y##_z##_w(xyzw) ); \
  188. #include <bx/simd128_swizzle.inl>
  189. #undef BX_SIMD128_IMPLEMENT_SWIZZLE
  190. #undef ELEMw
  191. #undef ELEMz
  192. #undef ELEMy
  193. #undef ELEMx
  194. }
  195. TEST(simd_shuffle)
  196. {
  197. const simd128_t xyzw = simd_ild(0x78787878, 0x79797979, 0x7a7a7a7a, 0x77777777);
  198. const simd128_t ABCD = simd_ild(0x41414141, 0x42424242, 0x43434343, 0x44444444);
  199. simd_check_string("xyAB", simd_shuf_xyAB(xyzw, ABCD) );
  200. simd_check_string("ABxy", simd_shuf_ABxy(xyzw, ABCD) );
  201. simd_check_string("zwCD", simd_shuf_zwCD(xyzw, ABCD) );
  202. simd_check_string("CDzw", simd_shuf_CDzw(xyzw, ABCD) );
  203. simd_check_string("xAyB", simd_shuf_xAyB(xyzw, ABCD) );
  204. simd_check_string("zCwD", simd_shuf_zCwD(xyzw, ABCD) );
  205. simd_check_string("xAzC", simd_shuf_xAzC(xyzw, ABCD) );
  206. simd_check_string("yBwD", simd_shuf_yBwD(xyzw, ABCD) );
  207. simd_check_string("CzDw", simd_shuf_CzDw(xyzw, ABCD) );
  208. }
  209. TEST(simd_compare)
  210. {
  211. simd_check_uint32("cmpeq"
  212. , simd_cmpeq(simd_ld(1.0f, 2.0f, 3.0f, 4.0f), simd_ld(0.0f, 2.0f, 0.0f, 3.0f) )
  213. , 0, 0xffffffff, 0, 0
  214. );
  215. simd_check_uint32("cmplt"
  216. , simd_cmplt(simd_ld(1.0f, 2.0f, 3.0f, 4.0f), simd_ld(0.0f, 2.0f, 0.0f, 3.0f) )
  217. , 0, 0, 0, 0
  218. );
  219. simd_check_uint32("cmple"
  220. , simd_cmple(simd_ld(1.0f, 2.0f, 3.0f, 4.0f), simd_ld(0.0f, 2.0f, 0.0f, 3.0f) )
  221. , 0, 0xffffffff, 0, 0
  222. );
  223. simd_check_uint32("cmpgt"
  224. , simd_cmpgt(simd_ld(1.0f, 2.0f, 3.0f, 4.0f), simd_ld(0.0f, 2.0f, 0.0f, 3.0f) )
  225. , 0xffffffff, 0, 0xffffffff, 0xffffffff
  226. );
  227. simd_check_uint32("cmpge"
  228. , simd_cmpge(simd_ld(1.0f, 2.0f, 3.0f, 4.0f), simd_ld(0.0f, 2.0f, 0.0f, 3.0f) )
  229. , 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
  230. );
  231. simd_check_uint32("icmpeq"
  232. , simd_icmpeq(simd_ild(0, 1, 2, 3), simd_ild(0, uint32_t(-2), 1, 3) )
  233. , 0xffffffff, 0, 0, 0xffffffff
  234. );
  235. simd_check_uint32("icmplt"
  236. , simd_icmplt(simd_ild(0, 1, 2, 3), simd_ild(0, uint32_t(-2), 1, 3) )
  237. , 0, 0, 0, 0
  238. );
  239. simd_check_uint32("icmpgt"
  240. , simd_icmpgt(simd_ild(0, 1, 2, 3), simd_ild(0, uint32_t(-2), 1, 3) )
  241. , 0, 0xffffffff, 0xffffffff, 0
  242. );
  243. }
  244. TEST(simd_test)
  245. {
  246. simd_check_bool("test_any_xyzw"
  247. , simd_test_any_xyzw(simd_ild(0xffffffff, 0, 0, 0) )
  248. , true
  249. );
  250. simd_check_bool("test_all_xyzw"
  251. , simd_test_all_xyzw(simd_ild(0xffffffff, 0, 0xffffffff, 0) )
  252. , false
  253. );
  254. simd_check_bool("test_all_xyzw"
  255. , simd_test_all_xyzw(simd_ild(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff) )
  256. , true
  257. );
  258. simd_check_bool("test_all_xw"
  259. , simd_test_all_xw(simd_ild(0xffffffff, 0, 0, 0xffffffff) )
  260. , true
  261. );
  262. simd_check_bool("test_all_xzw"
  263. , simd_test_all_xzw(simd_ild(0xffffffff, 0, 0, 0xffffffff) )
  264. , false
  265. );
  266. }
  267. TEST(simd_load)
  268. {
  269. simd_check_float("ld"
  270. , simd_ld(0.0f, 1.0f, 2.0f, 3.0f)
  271. , 0.0f, 1.0f, 2.0f, 3.0f
  272. );
  273. simd_check_float("ld"
  274. , simd_ld<simd256_t>(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f)
  275. , 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f
  276. );
  277. simd_check_int32("ild"
  278. , simd_ild(uint32_t(-1), 0, 1, 2)
  279. , uint32_t(-1), 0, 1, 2
  280. );
  281. simd_check_int32("ild"
  282. , simd_ild<simd256_t>(uint32_t(-1), 0, 1, 2, 3, 4, 5, 6)
  283. , uint32_t(-1), 0, 1, 2, 3, 4, 5, 6
  284. );
  285. simd_check_int32("ild"
  286. , simd_ild(uint32_t(-1), uint32_t(-2), uint32_t(-3), uint32_t(-4) )
  287. , uint32_t(-1), uint32_t(-2), uint32_t(-3), uint32_t(-4)
  288. );
  289. simd_check_uint32("zero", simd_zero()
  290. , 0, 0, 0, 0
  291. );
  292. simd_check_uint32("isplat", simd_isplat(0x80000001)
  293. , 0x80000001, 0x80000001, 0x80000001, 0x80000001
  294. );
  295. simd_check_float("isplat", simd_splat(1.0f)
  296. , 1.0f, 1.0f, 1.0f, 1.0f
  297. );
  298. }
  299. TEST(simd_arithmetic)
  300. {
  301. simd_check_float("madd"
  302. , simd_madd(simd_ld(0.0f, 1.0f, 2.0f, 3.0f), simd_ld(4.0f, 5.0f, 6.0f, 7.0f), simd_ld(8.0f, 9.0f, 10.0f, 11.0f) )
  303. , 8.0f, 14.0f, 22.0f, 32.0f
  304. );
  305. simd_check_float("cross3"
  306. , simd_cross3(simd_ld(1.0f, 0.0f, 0.0f, 0.0f), simd_ld(0.0f, 1.0f, 0.0f, 0.0f) )
  307. , 0.0f, 0.0f, 1.0f, 0.0f
  308. );
  309. }
  310. TEST(simd_sqrt)
  311. {
  312. simd_check_float("simd_sqrt"
  313. , simd_sqrt(simd_ld(1.0f, 16.0f, 65536.0f, 123456.0f) )
  314. , 1.0f, 4.0f, 256.0f, 351.363060096f
  315. );
  316. simd_check_float("simd_sqrt_nr_ni"
  317. , simd_sqrt_nr_ni(simd_ld(1.0f, 16.0f, 65536.0f, 123456.0f) )
  318. , 1.0f, 4.0f, 256.0f, 351.363060096f
  319. );
  320. simd_check_float("simd_sqrt_nr1_ni"
  321. , simd_sqrt_nr1_ni(simd_ld(1.0f, 16.0f, 65536.0f, 123456.0f) )
  322. , 1.0f, 4.0f, 256.0f, 351.363060096f
  323. );
  324. }
  325. TEST(float4)
  326. {
  327. const simd128_t isplat = simd_isplat(0x80000001);
  328. simd_check_uint32("sll"
  329. , simd_sll(isplat, 1)
  330. , 0x00000002, 0x00000002, 0x00000002, 0x00000002
  331. );
  332. simd_check_uint32("srl"
  333. , simd_srl(isplat, 1)
  334. , 0x40000000, 0x40000000, 0x40000000, 0x40000000
  335. );
  336. simd_check_uint32("sra"
  337. , simd_sra(isplat, 1)
  338. , 0xc0000000, 0xc0000000, 0xc0000000, 0xc0000000
  339. );
  340. simd_check_uint32("and"
  341. , simd_and(simd_isplat(0x55555555), simd_isplat(0xaaaaaaaa) )
  342. , 0, 0, 0, 0
  343. );
  344. simd_check_uint32("or "
  345. , simd_or(simd_isplat(0x55555555), simd_isplat(0xaaaaaaaa) )
  346. , uint32_t(-1), uint32_t(-1), uint32_t(-1), uint32_t(-1)
  347. );
  348. simd_check_uint32("xor"
  349. , simd_or(simd_isplat(0x55555555), simd_isplat(0xaaaaaaaa) )
  350. , uint32_t(-1), uint32_t(-1), uint32_t(-1), uint32_t(-1)
  351. );
  352. simd_check_int32("imin"
  353. , simd_imin(simd_ild(0, 1, 2, 3), simd_ild(uint32_t(-1), 2, uint32_t(-2), 1) )
  354. , uint32_t(-1), 1, uint32_t(-2), 1
  355. );
  356. simd_check_float("min"
  357. , simd_min(simd_ld(0.0f, 1.0f, 2.0f, 3.0f), simd_ld(-1.0f, 2.0f, -2.0f, 1.0f) )
  358. , -1.0f, 1.0f, -2.0f, 1.0f
  359. );
  360. simd_check_int32("imax"
  361. , simd_imax(simd_ild(0, 1, 2, 3), simd_ild(uint32_t(-1), 2, uint32_t(-2), 1) )
  362. , 0, 2, 2, 3
  363. );
  364. simd_check_float("max"
  365. , simd_max(simd_ld(0.0f, 1.0f, 2.0f, 3.0f), simd_ld(-1.0f, 2.0f, -2.0f, 1.0f) )
  366. , 0.0f, 2.0f, 2.0f, 3.0f
  367. );
  368. }