testautomation_intrinsics.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717
  1. /**
  2. * Intrinsics test suite
  3. */
  4. #include <SDL3/SDL.h>
  5. #include <SDL3/SDL_intrin.h>
  6. #include <SDL3/SDL_test.h>
  7. #include "testautomation_suites.h"
  8. // FIXME: missing tests for loongarch lsx/lasx
  9. // FIXME: missing tests for powerpc altivec
  10. /* ================= Test Case Implementation ================== */
  11. /* Helper functions */
  12. static int allocate_random_int_arrays(Sint32 **dest, Sint32 **a, Sint32 **b, size_t *size) {
  13. size_t i;
  14. *size = (size_t)SDLTest_RandomIntegerInRange(127, 999);
  15. *dest = SDL_malloc(sizeof(Sint32) * *size);
  16. *a = SDL_malloc(sizeof(Sint32) * *size);
  17. *b = SDL_malloc(sizeof(Sint32) * *size);
  18. if (!*dest || !*a || !*b) {
  19. SDLTest_AssertCheck(SDL_FALSE, "SDL_malloc failed");
  20. return -1;
  21. }
  22. for (i = 0; i < *size; ++i) {
  23. (*a)[i] = SDLTest_RandomSint32();
  24. (*b)[i] = SDLTest_RandomSint32();
  25. }
  26. return 0;
  27. }
  28. static int allocate_random_float_arrays(float **dest, float **a, float **b, size_t *size) {
  29. size_t i;
  30. *size = (size_t)SDLTest_RandomIntegerInRange(127, 999);
  31. *dest = SDL_malloc(sizeof(float) * *size);
  32. *a = SDL_malloc(sizeof(float) * *size);
  33. *b = SDL_malloc(sizeof(float) * *size);
  34. if (!*dest || !*a || !*b) {
  35. SDLTest_AssertCheck(SDL_FALSE, "SDL_malloc failed");
  36. return -1;
  37. }
  38. for (i = 0; i < *size; ++i) {
  39. (*a)[i] = SDLTest_RandomUnitFloat();
  40. (*b)[i] = SDLTest_RandomUnitFloat();
  41. }
  42. return 0;
  43. }
  44. static int allocate_random_double_arrays(double **dest, double **a, double **b, size_t *size) {
  45. size_t i;
  46. *size = (size_t)SDLTest_RandomIntegerInRange(127, 999);
  47. *dest = SDL_malloc(sizeof(double) * *size);
  48. *a = SDL_malloc(sizeof(double) * *size);
  49. *b = SDL_malloc(sizeof(double) * *size);
  50. if (!*dest || !*a || !*b) {
  51. SDLTest_AssertCheck(SDL_FALSE, "SDL_malloc failed");
  52. return -1;
  53. }
  54. for (i = 0; i < *size; ++i) {
  55. (*a)[i] = SDLTest_RandomUnitDouble();
  56. (*b)[i] = SDLTest_RandomUnitDouble();
  57. }
  58. return 0;
  59. }
  60. static void free_arrays(void *dest, void *a, void *b) {
  61. SDL_free(dest);
  62. SDL_free(a);
  63. SDL_free(b);
  64. }
  65. /**
  66. * \brief Verify element-wise addition of 2 int arrays.
  67. */
  68. static void verify_ints_addition(const Sint32 *dest, const Sint32 *a, const Sint32 *b, size_t size, const char *desc) {
  69. size_t i;
  70. int all_good = 1;
  71. for (i = 0; i < size; ++i) {
  72. Sint32 expected = a[i] + b[i];
  73. if (dest[i] != expected) {
  74. SDLTest_AssertCheck(SDL_FALSE, "%"SDL_PRIs32" + %"SDL_PRIs32" = %"SDL_PRIs32", expected %"SDL_PRIs32" ([%"SDL_PRIu32"/%"SDL_PRIu32"] %s)",
  75. a[i], b[i], dest[i], expected, (Uint32)i, (Uint32)size, desc);
  76. all_good = 0;
  77. }
  78. }
  79. if (all_good) {
  80. SDLTest_AssertCheck(SDL_TRUE, "All int additions were correct (%s)", desc);
  81. }
  82. }
  83. /**
  84. * \brief Verify element-wise multiplication of 2 int arrays.
  85. */
  86. static void verify_ints_multiplication(const Sint32 *dest, const Sint32 *a, const Sint32 *b, size_t size, const char *desc) {
  87. size_t i;
  88. int all_good = 1;
  89. for (i = 0; i < size; ++i) {
  90. Sint32 expected = a[i] * b[i];
  91. if (dest[i] != expected) {
  92. SDLTest_AssertCheck(SDL_FALSE, "%"SDL_PRIs32" * %"SDL_PRIs32" = %"SDL_PRIs32", expected %"SDL_PRIs32" ([%"SDL_PRIu32"/%"SDL_PRIu32"] %s)",
  93. a[i], b[i], dest[i], expected, (Uint32)i, (Uint32)size, desc);
  94. all_good = 0;
  95. }
  96. }
  97. if (all_good) {
  98. SDLTest_AssertCheck(SDL_TRUE, "All int multiplication were correct (%s)", desc);
  99. }
  100. }
  101. /**
  102. * \brief Verify element-wise addition of 2 float arrays.
  103. */
  104. static void verify_floats_addition(const float *dest, const float *a, const float *b, size_t size, const char *desc) {
  105. size_t i;
  106. int all_good = 1;
  107. for (i = 0; i < size; ++i) {
  108. float expected = a[i] + b[i];
  109. float abs_error = SDL_fabsf(dest[i] - expected);
  110. if (abs_error > 1.0e-5f) {
  111. SDLTest_AssertCheck(SDL_FALSE, "%g + %g = %g, expected %g (error = %g) ([%"SDL_PRIu32"/%"SDL_PRIu32"] %s)",
  112. a[i], b[i], dest[i], expected, abs_error, (Uint32) i, (Uint32) size, desc);
  113. all_good = 0;
  114. }
  115. }
  116. if (all_good) {
  117. SDLTest_AssertCheck(SDL_TRUE, "All float additions were correct (%s)", desc);
  118. }
  119. }
  120. /**
  121. * \brief Verify element-wise addition of 2 double arrays.
  122. */
  123. static void verify_doubles_addition(const double *dest, const double *a, const double *b, size_t size, const char *desc) {
  124. size_t i;
  125. int all_good = 1;
  126. for (i = 0; i < size; ++i) {
  127. double expected = a[i] + b[i];
  128. double abs_error = SDL_fabs(dest[i] - expected);
  129. if (abs_error > 1.0e-5) {
  130. SDLTest_AssertCheck(abs_error < 1.0e-5f, "%g + %g = %g, expected %g (error = %g) ([%"SDL_PRIu32"/%"SDL_PRIu32"] %s)",
  131. a[i], b[i], dest[i], expected, abs_error, (Uint32) i, (Uint32) size, desc);
  132. all_good = SDL_FALSE;
  133. }
  134. }
  135. if (all_good) {
  136. SDLTest_AssertCheck(SDL_TRUE, "All double additions were correct (%s)", desc);
  137. }
  138. }
  139. /* Intrinsic kernels */
  140. static void kernel_ints_add_cpu(Sint32 *dest, const Sint32 *a, const Sint32 *b, size_t size) {
  141. for (; size; --size, ++dest, ++a, ++b) {
  142. *dest = *a + *b;
  143. }
  144. }
  145. static void kernel_ints_mul_cpu(Sint32 *dest, const Sint32 *a, const Sint32 *b, size_t size) {
  146. for (; size; --size, ++dest, ++a, ++b) {
  147. *dest = *a * *b;
  148. }
  149. }
  150. static void kernel_floats_add_cpu(float *dest, const float *a, const float *b, size_t size) {
  151. for (; size; --size, ++dest, ++a, ++b) {
  152. *dest = *a + *b;
  153. }
  154. }
  155. static void kernel_doubles_add_cpu(double *dest, const double *a, const double *b, size_t size) {
  156. for (; size; --size, ++dest, ++a, ++b) {
  157. *dest = *a + *b;
  158. }
  159. }
  160. #if SDL_MMX_INTRINSICS
  161. SDL_TARGETING("mmx") static void kernel_ints_add_mmx(Sint32 *dest, const Sint32 *a, const Sint32 *b, size_t size) {
  162. for (; size >= 2; size -= 2, dest += 2, a += 2, b += 2) {
  163. *(__m64*)dest = _mm_add_pi32(*(__m64*)a, *(__m64*)b);
  164. }
  165. if (size) {
  166. *dest = *a + *b;
  167. }
  168. _mm_empty();
  169. }
  170. #endif
  171. #if SDL_SSE_INTRINSICS
  172. SDL_TARGETING("sse") static void kernel_floats_add_sse(float *dest, const float *a, const float *b, size_t size) {
  173. for (; size >= 4; size -= 4, dest += 4, a += 4, b += 4) {
  174. _mm_storeu_ps(dest, _mm_add_ps(_mm_loadu_ps(a), _mm_loadu_ps (b)));
  175. }
  176. for (; size; size--, ++dest, ++a, ++b) {
  177. *dest = *a + *b;
  178. }
  179. }
  180. #endif
  181. #if SDL_SSE2_INTRINSICS
  182. SDL_TARGETING("sse2") static void kernel_doubles_add_sse2(double *dest, const double *a, const double *b, size_t size) {
  183. for (; size >= 2; size -= 2, dest += 2, a += 2, b += 2) {
  184. _mm_store_pd(dest, _mm_add_pd(_mm_loadu_pd(a), _mm_loadu_pd(b)));
  185. }
  186. if (size) {
  187. *dest = *a + *b;
  188. }
  189. }
  190. #endif
  191. #if SDL_SSE3_INTRINSICS
  192. SDL_TARGETING("sse3") static void kernel_ints_add_sse3(Sint32 *dest, const Sint32 *a, const Sint32 *b, size_t size) {
  193. for (; size >= 4; size -= 4, dest += 4, a += 4, b += 4) {
  194. _mm_storeu_si128((__m128i*)dest, _mm_add_epi32(_mm_lddqu_si128((__m128i*)a), _mm_lddqu_si128((__m128i*)b)));
  195. }
  196. for (;size; --size, ++dest, ++a, ++b) {
  197. *dest = *a + *b;
  198. }
  199. }
  200. #endif
  201. #if SDL_SSE4_1_INTRINSICS
  202. SDL_TARGETING("sse4.1") static void kernel_ints_mul_sse4_1(Sint32 *dest, const Sint32 *a, const Sint32 *b, size_t size) {
  203. for (; size >= 4; size -= 4, dest += 4, a += 4, b += 4) {
  204. _mm_storeu_si128((__m128i*)dest, _mm_mullo_epi32(_mm_lddqu_si128((__m128i*)a), _mm_lddqu_si128((__m128i*)b)));
  205. }
  206. for (;size; --size, ++dest, ++a, ++b) {
  207. *dest = *a * *b;
  208. }
  209. }
  210. #endif
  211. #if SDL_SSE4_2_INTRINSICS
  212. SDL_TARGETING("sse4.2") static Uint32 calculate_crc32c_sse4_2(const char *text) {
  213. Uint32 crc32c = ~0;
  214. size_t len = SDL_strlen(text);
  215. #if defined(__x86_64__) || defined(_M_X64)
  216. for (; len >= 8; len -= 8, text += 8) {
  217. crc32c = (Uint32)_mm_crc32_u64(crc32c, *(Sint64*)text);
  218. }
  219. if (len >= 4) {
  220. crc32c = (Uint32)_mm_crc32_u32(crc32c, *(Sint32*)text);
  221. len -= 4;
  222. text += 4;
  223. }
  224. #else
  225. for (; len >= 4; len -= 4, text += 4) {
  226. crc32c = (Uint32)_mm_crc32_u32(crc32c, *(Sint32*)text);
  227. }
  228. #endif
  229. if (len >= 2) {
  230. crc32c = (Uint32)_mm_crc32_u16(crc32c, *(Sint16*)text);
  231. len -= 2;
  232. text += 2;
  233. }
  234. if (len) {
  235. crc32c = (Uint32)_mm_crc32_u8(crc32c, *text);
  236. }
  237. return ~crc32c;
  238. }
  239. #endif
  240. #if SDL_AVX_INTRINSICS
  241. SDL_TARGETING("avx") static void kernel_floats_add_avx(float *dest, const float *a, const float *b, size_t size) {
  242. for (; size >= 8; size -= 8, dest += 8, a += 8, b += 8) {
  243. _mm256_storeu_ps(dest, _mm256_add_ps(_mm256_loadu_ps(a), _mm256_loadu_ps(b)));
  244. }
  245. for (; size; size--, ++dest, ++a, ++b) {
  246. *dest = *a + *b;
  247. }
  248. }
  249. #endif
  250. #if SDL_AVX2_INTRINSICS
  251. SDL_TARGETING("avx2") static void kernel_ints_add_avx2(Sint32 *dest, const Sint32 *a, const Sint32 *b, size_t size) {
  252. for (; size >= 8; size -= 8, dest += 8, a += 8, b += 8) {
  253. _mm256_storeu_si256((__m256i*)dest, _mm256_add_epi32(_mm256_loadu_si256((__m256i*)a), _mm256_loadu_si256((__m256i*)b)));
  254. }
  255. for (; size; size--, ++dest, ++a, ++b) {
  256. *dest = *a + *b;
  257. }
  258. }
  259. #endif
  260. #if SDL_AVX512F_INTRINSICS
  261. SDL_TARGETING("avx512f") static void kernel_floats_add_avx512f(float *dest, const float *a, const float *b, size_t size) {
  262. for (; size >= 16; size -= 16, dest += 16, a += 16, b += 16) {
  263. _mm512_storeu_ps(dest, _mm512_add_ps(_mm512_loadu_ps(a), _mm512_loadu_ps(b)));
  264. }
  265. for (; size; --size) {
  266. *dest++ = *a++ + *b++;
  267. }
  268. }
  269. #endif
  270. /* Test case functions */
  271. static int intrinsics_selftest(void *arg)
  272. {
  273. {
  274. size_t size;
  275. Sint32 *dest, *a, *b;
  276. if (allocate_random_int_arrays(&dest, &a, &b, &size) < 0) {
  277. return TEST_ABORTED;
  278. }
  279. kernel_ints_mul_cpu(dest, a, b, size);
  280. verify_ints_multiplication(dest, a, b, size, "CPU");
  281. free_arrays(dest, a, b);
  282. }
  283. {
  284. size_t size;
  285. Sint32 *dest, *a, *b;
  286. if (allocate_random_int_arrays(&dest, &a, &b, &size) < 0) {
  287. return TEST_ABORTED;
  288. }
  289. kernel_ints_add_cpu(dest, a, b, size);
  290. verify_ints_addition(dest, a, b, size, "CPU");
  291. free_arrays(dest, a, b);
  292. }
  293. {
  294. size_t size;
  295. float *dest, *a, *b;
  296. if (allocate_random_float_arrays(&dest, &a, &b, &size) < 0) {
  297. return TEST_ABORTED;
  298. }
  299. kernel_floats_add_cpu(dest, a, b, size);
  300. verify_floats_addition(dest, a, b, size, "CPU");
  301. free_arrays(dest, a, b);
  302. }
  303. {
  304. size_t size;
  305. double *dest, *a, *b;
  306. if (allocate_random_double_arrays(&dest, &a, &b, &size) < 0) {
  307. return TEST_ABORTED;
  308. }
  309. kernel_doubles_add_cpu(dest, a, b, size);
  310. verify_doubles_addition(dest, a, b, size, "CPU");
  311. free_arrays(dest, a, b);
  312. }
  313. return TEST_COMPLETED;
  314. }
  315. static int intrinsics_testRDTSC(void *arg)
  316. {
  317. if (SDL_HasRDTSC()) {
  318. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has RDTSC support.");
  319. #if SDL_RDTSC_INTRINSICS
  320. {
  321. Sint64 ticks;
  322. #if defined(_MSC_VER) || defined(__clang__)
  323. ticks = __rdtsc();
  324. #else
  325. ticks = _rdtsc();
  326. #endif
  327. SDLTest_AssertCheck(SDL_TRUE, "rdtsc returned: %" SDL_PRIu64 " ticks", ticks);
  328. return TEST_COMPLETED;
  329. }
  330. #else
  331. SDLTest_AssertCheck(SDL_TRUE, "Test executable does NOT use RDTSC intrinsics.");
  332. #endif
  333. } else {
  334. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has NO RDTSC support.");
  335. }
  336. return TEST_SKIPPED;
  337. }
  338. static int intrinsics_testMMX(void *arg)
  339. {
  340. if (SDL_HasMMX()) {
  341. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has MMX support.");
  342. #if SDL_MMX_INTRINSICS
  343. {
  344. size_t size;
  345. Sint32 *dest, *a, *b;
  346. SDLTest_AssertCheck(SDL_TRUE, "Test executable uses MMX intrinsics.");
  347. if (allocate_random_int_arrays(&dest, &a, &b, &size) < 0) {
  348. return TEST_ABORTED;
  349. }
  350. kernel_ints_add_mmx(dest, a, b, size);
  351. verify_ints_addition(dest, a, b, size, "MMX");
  352. free_arrays(dest, a, b);
  353. return TEST_COMPLETED;
  354. }
  355. #else
  356. SDLTest_AssertCheck(SDL_TRUE, "Test executable does NOT use MMX intrinsics.");
  357. #endif
  358. } else {
  359. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has NO MMX support.");
  360. }
  361. return TEST_SKIPPED;
  362. }
  363. static int intrinsics_testSSE(void *arg)
  364. {
  365. if (SDL_HasSSE()) {
  366. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has SSE support.");
  367. #if SDL_SSE_INTRINSICS
  368. {
  369. size_t size;
  370. float *dest, *a, *b;
  371. SDLTest_AssertCheck(SDL_TRUE, "Test executable uses SSE intrinsics.");
  372. if (allocate_random_float_arrays(&dest, &a, &b, &size) < 0) {
  373. return TEST_ABORTED;
  374. }
  375. kernel_floats_add_sse(dest, a, b, size);
  376. verify_floats_addition(dest, a, b, size, "SSE");
  377. free_arrays(dest, a, b);
  378. return TEST_COMPLETED;
  379. }
  380. #else
  381. SDLTest_AssertCheck(SDL_TRUE, "Test executable does NOT use SSE intrinsics.");
  382. #endif
  383. } else {
  384. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has NO SSE support.");
  385. }
  386. return TEST_SKIPPED;
  387. }
  388. static int intrinsics_testSSE2(void *arg)
  389. {
  390. if (SDL_HasSSE2()) {
  391. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has SSE2 support.");
  392. #if SDL_SSE2_INTRINSICS
  393. {
  394. size_t size;
  395. double *dest, *a, *b;
  396. SDLTest_AssertCheck(SDL_TRUE, "Test executable uses SSE2 intrinsics.");
  397. if (allocate_random_double_arrays(&dest, &a, &b, &size) < 0) {
  398. return TEST_ABORTED;
  399. }
  400. kernel_doubles_add_sse2(dest, a, b, size);
  401. verify_doubles_addition(dest, a, b, size, "SSE2");
  402. free_arrays(dest, a, b);
  403. return TEST_COMPLETED;
  404. }
  405. #else
  406. SDLTest_AssertCheck(SDL_TRUE, "Test executable does NOT use SSE2 intrinsics.");
  407. #endif
  408. } else {
  409. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has NO SSE2 support.");
  410. }
  411. return TEST_SKIPPED;
  412. }
  413. static int intrinsics_testSSE3(void *arg)
  414. {
  415. if (SDL_HasSSE3()) {
  416. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has SSE3 support.");
  417. #if SDL_SSE3_INTRINSICS
  418. {
  419. size_t size;
  420. Sint32 *dest, *a, *b;
  421. SDLTest_AssertCheck(SDL_TRUE, "Test executable uses SSE3 intrinsics.");
  422. if (allocate_random_int_arrays(&dest, &a, &b, &size) < 0) {
  423. return TEST_ABORTED;
  424. }
  425. kernel_ints_add_sse3(dest, a, b, size);
  426. verify_ints_addition(dest, a, b, size, "SSE3");
  427. free_arrays(dest, a, b);
  428. return TEST_COMPLETED;
  429. }
  430. #else
  431. SDLTest_AssertCheck(SDL_TRUE, "Test executable does NOT use SSE3 intrinsics.");
  432. #endif
  433. } else {
  434. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has NO SSE3 support.");
  435. }
  436. return TEST_SKIPPED;
  437. }
  438. static int intrinsics_testSSE4_1(void *arg)
  439. {
  440. if (SDL_HasSSE41()) {
  441. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has SSE4.1 support.");
  442. #if SDL_SSE4_1_INTRINSICS
  443. {
  444. size_t size;
  445. Sint32 *dest, *a, *b;
  446. SDLTest_AssertCheck(SDL_TRUE, "Test executable uses SSE4.1 intrinsics.");
  447. if (allocate_random_int_arrays(&dest, &a, &b, &size) < 0) {
  448. return TEST_ABORTED;
  449. }
  450. kernel_ints_mul_sse4_1(dest, a, b, size);
  451. verify_ints_multiplication(dest, a, b, size, "SSE4.1");
  452. free_arrays(dest, a, b);
  453. return TEST_COMPLETED;
  454. }
  455. #else
  456. SDLTest_AssertCheck(SDL_TRUE, "Test executable does NOT use SSE4.1 intrinsics.");
  457. #endif
  458. } else {
  459. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has NO SSE4.1 support.");
  460. }
  461. return TEST_SKIPPED;
  462. }
  463. static int intrinsics_testSSE4_2(void *arg)
  464. {
  465. if (SDL_HasSSE42()) {
  466. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has SSE4.2 support.");
  467. #if SDL_SSE4_2_INTRINSICS
  468. {
  469. struct {
  470. const char *input;
  471. Uint32 crc32c;
  472. } references[] = {
  473. {"", 0x00000000},
  474. {"Hello world", 0x72b51f78},
  475. {"Simple DirectMedia Layer", 0x56f85341, },
  476. };
  477. size_t i;
  478. SDLTest_AssertCheck(SDL_TRUE, "Test executable uses SSE4.2 intrinsics.");
  479. for (i = 0; i < SDL_arraysize(references); ++i) {
  480. Uint32 actual = calculate_crc32c_sse4_2(references[i].input);
  481. SDLTest_AssertCheck(actual == references[i].crc32c, "CRC32-C(\"%s\")=0x%08x, got 0x%08x",
  482. references[i].input, references[i].crc32c, actual);
  483. }
  484. return TEST_COMPLETED;
  485. }
  486. #else
  487. SDLTest_AssertCheck(SDL_TRUE, "Test executable does NOT use SSE4.2 intrinsics.");
  488. #endif
  489. } else {
  490. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has NO SSE4.2 support.");
  491. }
  492. return TEST_SKIPPED;
  493. }
  494. static int intrinsics_testAVX(void *arg)
  495. {
  496. if (SDL_HasAVX()) {
  497. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has AVX support.");
  498. #if SDL_AVX_INTRINSICS
  499. {
  500. size_t size;
  501. float *dest, *a, *b;
  502. SDLTest_AssertCheck(SDL_TRUE, "Test executable uses AVX intrinsics.");
  503. if (allocate_random_float_arrays(&dest, &a, &b, &size) < 0) {
  504. return TEST_ABORTED;
  505. }
  506. kernel_floats_add_avx(dest, a, b, size);
  507. verify_floats_addition(dest, a, b, size, "AVX");
  508. free_arrays(dest, a, b);
  509. return TEST_COMPLETED;
  510. }
  511. #else
  512. SDLTest_AssertCheck(SDL_TRUE, "Test executable does NOT use AVX intrinsics.");
  513. #endif
  514. } else {
  515. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has NO AVX support.");
  516. }
  517. return TEST_SKIPPED;
  518. }
  519. static int intrinsics_testAVX2(void *arg)
  520. {
  521. if (SDL_HasAVX2()) {
  522. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has AVX2 support.");
  523. #if SDL_AVX2_INTRINSICS
  524. {
  525. size_t size;
  526. Sint32 *dest, *a, *b;
  527. SDLTest_AssertCheck(SDL_TRUE, "Test executable uses AVX2 intrinsics.");
  528. if (allocate_random_int_arrays(&dest, &a, &b, &size) < 0) {
  529. return TEST_ABORTED;
  530. }
  531. kernel_ints_add_avx2(dest, a, b, size);
  532. verify_ints_addition(dest, a, b, size, "AVX2");
  533. free_arrays(dest, a, b);
  534. return TEST_COMPLETED;
  535. }
  536. #else
  537. SDLTest_AssertCheck(SDL_TRUE, "Test executable does NOT use AVX2 intrinsics.");
  538. #endif
  539. } else {
  540. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has NO AVX2 support.");
  541. }
  542. return TEST_SKIPPED;
  543. }
  544. static int intrinsics_testAVX512F(void *arg)
  545. {
  546. if (SDL_HasAVX512F()) {
  547. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has AVX512F support.");
  548. #if SDL_AVX512F_INTRINSICS
  549. {
  550. size_t size;
  551. float *dest, *a, *b;
  552. SDLTest_AssertCheck(SDL_TRUE, "Test executable uses AVX512F intrinsics.");
  553. if (allocate_random_float_arrays(&dest, &a, &b, &size) < 0) {
  554. return TEST_ABORTED;
  555. }
  556. kernel_floats_add_avx512f(dest, a, b, size);
  557. verify_floats_addition(dest, a, b, size, "AVX512F");
  558. free_arrays(dest, a, b);
  559. return TEST_COMPLETED;
  560. }
  561. #else
  562. SDLTest_AssertCheck(SDL_TRUE, "Test executable does NOT use AVX512F intrinsics.");
  563. #endif
  564. } else {
  565. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has NO AVX512F support.");
  566. }
  567. return TEST_SKIPPED;
  568. }
  569. /* ================= Test References ================== */
  570. /* Intrinsics test cases */
  571. static const SDLTest_TestCaseReference intrinsicsTest1 = {
  572. (SDLTest_TestCaseFp)intrinsics_selftest, "intrinsics_selftest", "Intrinsics testautomation selftest", TEST_ENABLED
  573. };
  574. static const SDLTest_TestCaseReference intrinsicsTest2 = {
  575. (SDLTest_TestCaseFp)intrinsics_testRDTSC, "intrinsics_rdtsc", "Tests RDTC intrinsic", TEST_ENABLED
  576. };
  577. static const SDLTest_TestCaseReference intrinsicsTest3 = {
  578. (SDLTest_TestCaseFp)intrinsics_testMMX, "intrinsics_testMMX", "Tests MMX intrinsics", TEST_ENABLED
  579. };
  580. static const SDLTest_TestCaseReference intrinsicsTest4 = {
  581. (SDLTest_TestCaseFp)intrinsics_testSSE, "intrinsics_testSSE", "Tests SSE intrinsics", TEST_ENABLED
  582. };
  583. static const SDLTest_TestCaseReference intrinsicsTest5 = {
  584. (SDLTest_TestCaseFp)intrinsics_testSSE2, "intrinsics_testSSE2", "Tests SSE2 intrinsics", TEST_ENABLED
  585. };
  586. static const SDLTest_TestCaseReference intrinsicsTest6 = {
  587. (SDLTest_TestCaseFp)intrinsics_testSSE3, "intrinsics_testSSE3", "Tests SSE3 intrinsics", TEST_ENABLED
  588. };
  589. static const SDLTest_TestCaseReference intrinsicsTest7 = {
  590. (SDLTest_TestCaseFp)intrinsics_testSSE4_1, "intrinsics_testSSE4.1", "Tests SSE4.1 intrinsics", TEST_ENABLED
  591. };
  592. static const SDLTest_TestCaseReference intrinsicsTest8 = {
  593. (SDLTest_TestCaseFp)intrinsics_testSSE4_2, "intrinsics_testSSE4.2", "Tests SSE4.2 intrinsics", TEST_ENABLED
  594. };
  595. static const SDLTest_TestCaseReference intrinsicsTest9 = {
  596. (SDLTest_TestCaseFp)intrinsics_testAVX, "intrinsics_testAVX", "Tests AVX intrinsics", TEST_ENABLED
  597. };
  598. static const SDLTest_TestCaseReference intrinsicsTest10 = {
  599. (SDLTest_TestCaseFp)intrinsics_testAVX2, "intrinsics_testAVX2", "Tests AVX2 intrinsics", TEST_ENABLED
  600. };
  601. static const SDLTest_TestCaseReference intrinsicsTest11 = {
  602. (SDLTest_TestCaseFp)intrinsics_testAVX512F, "intrinsics_testAVX512F", "Tests AVX512F intrinsics", TEST_ENABLED
  603. };
  604. /* Sequence of Platform test cases */
  605. static const SDLTest_TestCaseReference *platformTests[] = {
  606. &intrinsicsTest1,
  607. &intrinsicsTest2,
  608. &intrinsicsTest3,
  609. &intrinsicsTest4,
  610. &intrinsicsTest5,
  611. &intrinsicsTest6,
  612. &intrinsicsTest7,
  613. &intrinsicsTest8,
  614. &intrinsicsTest9,
  615. &intrinsicsTest10,
  616. &intrinsicsTest11,
  617. NULL
  618. };
  619. /* Platform test suite (global) */
  620. SDLTest_TestSuiteReference intrinsicsTestSuite = {
  621. "Intrinsics",
  622. NULL,
  623. platformTests,
  624. NULL
  625. };