simd_t.cpp 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. /*
  2. * Copyright 2010-2016 Branimir Karadzic. All rights reserved.
  3. * License: https://github.com/bkaradzic/bx#license-bsd-2-clause
  4. */
  5. #include "test.h"
  6. #include <bx/simd_t.h>
  7. #include <bx/fpumath.h>
  8. #include <string.h>
  9. using namespace bx;
  10. union simd_cast
  11. {
  12. bx::simd128_t f4;
  13. float f[4];
  14. uint32_t ui[4];
  15. int32_t i[4];
  16. char c[16];
  17. };
  18. void simd_check_bool(const char* _str, bool _a, bool _0)
  19. {
  20. DBG("%s %d == %d"
  21. , _str
  22. , _a
  23. , _0
  24. );
  25. CHECK_EQUAL(_a, _0);
  26. }
  27. void simd_check_int32(const char* _str, bx::simd128_t _a, int32_t _0, int32_t _1, int32_t _2, int32_t _3)
  28. {
  29. simd_cast c; c.f4 = _a;
  30. DBG("%s (%d, %d, %d, %d) == (%d, %d, %d, %d)"
  31. , _str
  32. , c.i[0], c.i[1], c.i[2], c.i[3]
  33. , _0, _1, _2, _3
  34. );
  35. CHECK_EQUAL(c.i[0], _0);
  36. CHECK_EQUAL(c.i[1], _1);
  37. CHECK_EQUAL(c.i[2], _2);
  38. CHECK_EQUAL(c.i[3], _3);
  39. }
  40. void simd_check_uint32(const char* _str, bx::simd128_t _a, uint32_t _0, uint32_t _1, uint32_t _2, uint32_t _3)
  41. {
  42. simd_cast c; c.f4 = _a;
  43. DBG("%s (0x%08x, 0x%08x, 0x%08x, 0x%08x) == (0x%08x, 0x%08x, 0x%08x, 0x%08x)"
  44. , _str
  45. , c.ui[0], c.ui[1], c.ui[2], c.ui[3]
  46. , _0, _1, _2, _3
  47. );
  48. CHECK_EQUAL(c.ui[0], _0);
  49. CHECK_EQUAL(c.ui[1], _1);
  50. CHECK_EQUAL(c.ui[2], _2);
  51. CHECK_EQUAL(c.ui[3], _3);
  52. }
  53. void simd_check_float(const char* _str, bx::simd128_t _a, float _0, float _1, float _2, float _3)
  54. {
  55. simd_cast c; c.f4 = _a;
  56. DBG("%s (%f, %f, %f, %f) == (%f, %f, %f, %f)"
  57. , _str
  58. , c.f[0], c.f[1], c.f[2], c.f[3]
  59. , _0, _1, _2, _3
  60. );
  61. CHECK(bx::fequal(c.f[0], _0, 0.0001f) );
  62. CHECK(bx::fequal(c.f[1], _1, 0.0001f) );
  63. CHECK(bx::fequal(c.f[2], _2, 0.0001f) );
  64. CHECK(bx::fequal(c.f[3], _3, 0.0001f) );
  65. }
  66. void simd_check_string(const char* _str, bx::simd128_t _a)
  67. {
  68. simd_cast c; c.f4 = _a;
  69. const char test[5] = { c.c[0], c.c[4], c.c[8], c.c[12], '\0' };
  70. DBG("%s %s", _str, test);
  71. CHECK(0 == strcmp(_str, test) );
  72. }
  73. TEST(simd_swizzle)
  74. {
  75. const simd128_t xyzw = simd_ild(0x78787878, 0x79797979, 0x7a7a7a7a, 0x77777777);
  76. #define ELEMx 0
  77. #define ELEMy 1
  78. #define ELEMz 2
  79. #define ELEMw 3
  80. #define BX_SIMD128_IMPLEMENT_SWIZZLE(_x, _y, _z, _w) \
  81. simd_check_string("" #_x #_y #_z #_w "", simd_swiz_##_x##_y##_z##_w(xyzw) ); \
  82. #include <bx/simd_swizzle.inl>
  83. #undef BX_SIMD128_IMPLEMENT_SWIZZLE
  84. #undef ELEMw
  85. #undef ELEMz
  86. #undef ELEMy
  87. #undef ELEMx
  88. }
  89. TEST(simd_shuffle)
  90. {
  91. const simd128_t xyzw = simd_ild(0x78787878, 0x79797979, 0x7a7a7a7a, 0x77777777);
  92. const simd128_t ABCD = simd_ild(0x41414141, 0x42424242, 0x43434343, 0x44444444);
  93. simd_check_string("xyAB", simd_shuf_xyAB(xyzw, ABCD) );
  94. simd_check_string("ABxy", simd_shuf_ABxy(xyzw, ABCD) );
  95. simd_check_string("zwCD", simd_shuf_zwCD(xyzw, ABCD) );
  96. simd_check_string("CDzw", simd_shuf_CDzw(xyzw, ABCD) );
  97. simd_check_string("xAyB", simd_shuf_xAyB(xyzw, ABCD) );
  98. simd_check_string("zCwD", simd_shuf_zCwD(xyzw, ABCD) );
  99. simd_check_string("xAzC", simd_shuf_xAzC(xyzw, ABCD) );
  100. simd_check_string("yBwD", simd_shuf_yBwD(xyzw, ABCD) );
  101. simd_check_string("CzDw", simd_shuf_CzDw(xyzw, ABCD) );
  102. }
  103. TEST(simd_compare)
  104. {
  105. simd_check_uint32("cmpeq"
  106. , simd_cmpeq(simd_ld(1.0f, 2.0f, 3.0f, 4.0f), simd_ld(0.0f, 2.0f, 0.0f, 3.0f) )
  107. , 0, 0xffffffff, 0, 0
  108. );
  109. simd_check_uint32("cmplt"
  110. , simd_cmplt(simd_ld(1.0f, 2.0f, 3.0f, 4.0f), simd_ld(0.0f, 2.0f, 0.0f, 3.0f) )
  111. , 0, 0, 0, 0
  112. );
  113. simd_check_uint32("cmple"
  114. , simd_cmple(simd_ld(1.0f, 2.0f, 3.0f, 4.0f), simd_ld(0.0f, 2.0f, 0.0f, 3.0f) )
  115. , 0, 0xffffffff, 0, 0
  116. );
  117. simd_check_uint32("cmpgt"
  118. , simd_cmpgt(simd_ld(1.0f, 2.0f, 3.0f, 4.0f), simd_ld(0.0f, 2.0f, 0.0f, 3.0f) )
  119. , 0xffffffff, 0, 0xffffffff, 0xffffffff
  120. );
  121. simd_check_uint32("cmpge"
  122. , simd_cmpge(simd_ld(1.0f, 2.0f, 3.0f, 4.0f), simd_ld(0.0f, 2.0f, 0.0f, 3.0f) )
  123. , 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
  124. );
  125. simd_check_uint32("icmpeq"
  126. , simd_icmpeq(simd_ild(0, 1, 2, 3), simd_ild(0, uint32_t(-2), 1, 3) )
  127. , 0xffffffff, 0, 0, 0xffffffff
  128. );
  129. simd_check_uint32("icmplt"
  130. , simd_icmplt(simd_ild(0, 1, 2, 3), simd_ild(0, uint32_t(-2), 1, 3) )
  131. , 0, 0, 0, 0
  132. );
  133. simd_check_uint32("icmpgt"
  134. , simd_icmpgt(simd_ild(0, 1, 2, 3), simd_ild(0, uint32_t(-2), 1, 3) )
  135. , 0, 0xffffffff, 0xffffffff, 0
  136. );
  137. }
  138. TEST(simd_test)
  139. {
  140. simd_check_bool("test_any_xyzw"
  141. , simd_test_any_xyzw(simd_ild(0xffffffff, 0, 0, 0) )
  142. , true
  143. );
  144. simd_check_bool("test_all_xyzw"
  145. , simd_test_all_xyzw(simd_ild(0xffffffff, 0, 0xffffffff, 0) )
  146. , false
  147. );
  148. simd_check_bool("test_all_xyzw"
  149. , simd_test_all_xyzw(simd_ild(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff) )
  150. , true
  151. );
  152. simd_check_bool("test_all_xw"
  153. , simd_test_all_xw(simd_ild(0xffffffff, 0, 0, 0xffffffff) )
  154. , true
  155. );
  156. simd_check_bool("test_all_xzw"
  157. , simd_test_all_xzw(simd_ild(0xffffffff, 0, 0, 0xffffffff) )
  158. , false
  159. );
  160. }
  161. TEST(simd_load)
  162. {
  163. simd_check_float("ld"
  164. , simd_ld(0.0f, 1.0f, 2.0f, 3.0f)
  165. , 0.0f, 1.0f, 2.0f, 3.0f
  166. );
  167. simd_check_int32("ild"
  168. , simd_ild(uint32_t(-1), 0, 1, 2)
  169. , uint32_t(-1), 0, 1, 2
  170. );
  171. simd_check_int32("ild"
  172. , simd_ild(uint32_t(-1), uint32_t(-2), uint32_t(-3), uint32_t(-4) )
  173. , uint32_t(-1), uint32_t(-2), uint32_t(-3), uint32_t(-4)
  174. );
  175. simd_check_uint32("zero", simd_zero()
  176. , 0, 0, 0, 0
  177. );
  178. simd_check_uint32("isplat", simd_isplat(0x80000001)
  179. , 0x80000001, 0x80000001, 0x80000001, 0x80000001
  180. );
  181. simd_check_float("isplat", simd_splat(1.0f)
  182. , 1.0f, 1.0f, 1.0f, 1.0f
  183. );
  184. }
  185. TEST(simd_arithmetic)
  186. {
  187. simd_check_float("madd"
  188. , simd_madd(simd_ld(0.0f, 1.0f, 2.0f, 3.0f), simd_ld(4.0f, 5.0f, 6.0f, 7.0f), simd_ld(8.0f, 9.0f, 10.0f, 11.0f) )
  189. , 8.0f, 14.0f, 22.0f, 32.0f
  190. );
  191. simd_check_float("cross3"
  192. , simd_cross3(simd_ld(1.0f, 0.0f, 0.0f, 0.0f), simd_ld(0.0f, 1.0f, 0.0f, 0.0f) )
  193. , 0.0f, 0.0f, 1.0f, 0.0f
  194. );
  195. }
  196. TEST(simd_sqrt)
  197. {
  198. simd_check_float("simd_sqrt"
  199. , simd_sqrt(simd_ld(1.0f, 16.0f, 65536.0f, 123456.0f) )
  200. , 1.0f, 4.0f, 256.0f, 351.363060096f
  201. );
  202. simd_check_float("simd_sqrt_nr_ni"
  203. , simd_sqrt_nr_ni(simd_ld(1.0f, 16.0f, 65536.0f, 123456.0f) )
  204. , 1.0f, 4.0f, 256.0f, 351.363060096f
  205. );
  206. simd_check_float("simd_sqrt_nr1_ni"
  207. , simd_sqrt_nr1_ni(simd_ld(1.0f, 16.0f, 65536.0f, 123456.0f) )
  208. , 1.0f, 4.0f, 256.0f, 351.363060096f
  209. );
  210. }
  211. TEST(float4)
  212. {
  213. const simd128_t isplat = simd_isplat(0x80000001);
  214. simd_check_uint32("sll"
  215. , simd_sll(isplat, 1)
  216. , 0x00000002, 0x00000002, 0x00000002, 0x00000002
  217. );
  218. simd_check_uint32("srl"
  219. , simd_srl(isplat, 1)
  220. , 0x40000000, 0x40000000, 0x40000000, 0x40000000
  221. );
  222. simd_check_uint32("sra"
  223. , simd_sra(isplat, 1)
  224. , 0xc0000000, 0xc0000000, 0xc0000000, 0xc0000000
  225. );
  226. simd_check_uint32("and"
  227. , simd_and(simd_isplat(0x55555555), simd_isplat(0xaaaaaaaa) )
  228. , 0, 0, 0, 0
  229. );
  230. simd_check_uint32("or "
  231. , simd_or(simd_isplat(0x55555555), simd_isplat(0xaaaaaaaa) )
  232. , uint32_t(-1), uint32_t(-1), uint32_t(-1), uint32_t(-1)
  233. );
  234. simd_check_uint32("xor"
  235. , simd_or(simd_isplat(0x55555555), simd_isplat(0xaaaaaaaa) )
  236. , uint32_t(-1), uint32_t(-1), uint32_t(-1), uint32_t(-1)
  237. );
  238. simd_check_int32("imin"
  239. , simd_imin(simd_ild(0, 1, 2, 3), simd_ild(uint32_t(-1), 2, uint32_t(-2), 1) )
  240. , uint32_t(-1), 1, uint32_t(-2), 1
  241. );
  242. simd_check_float("min"
  243. , simd_min(simd_ld(0.0f, 1.0f, 2.0f, 3.0f), simd_ld(-1.0f, 2.0f, -2.0f, 1.0f) )
  244. , -1.0f, 1.0f, -2.0f, 1.0f
  245. );
  246. simd_check_int32("imax"
  247. , simd_imax(simd_ild(0, 1, 2, 3), simd_ild(uint32_t(-1), 2, uint32_t(-2), 1) )
  248. , 0, 2, 2, 3
  249. );
  250. simd_check_float("max"
  251. , simd_max(simd_ld(0.0f, 1.0f, 2.0f, 3.0f), simd_ld(-1.0f, 2.0f, -2.0f, 1.0f) )
  252. , 0.0f, 2.0f, 2.0f, 3.0f
  253. );
  254. }