testautomation_intrinsics.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688
  1. /**
  2. * Intrinsics test suite
  3. */
  4. /* Disable intrinsics that are unsupported by the current compiler */
  5. #include <build_config/SDL_build_config.h>
  6. #include <SDL3/SDL.h>
  7. #include <SDL3/SDL_intrin.h>
  8. #include <SDL3/SDL_test.h>
  9. #include "testautomation_suites.h"
  10. // FIXME: missing tests for loongarch lsx/lasx
  11. // FIXME: missing tests for powerpc altivec
  12. /* ================= Test Case Implementation ================== */
  13. /* Helper functions */
  14. static int allocate_random_int_arrays(Sint32 **dest, Sint32 **a, Sint32 **b, size_t *size) {
  15. size_t i;
  16. *size = (size_t)SDLTest_RandomIntegerInRange(127, 999);
  17. *dest = SDL_malloc(sizeof(Sint32) * *size);
  18. *a = SDL_malloc(sizeof(Sint32) * *size);
  19. *b = SDL_malloc(sizeof(Sint32) * *size);
  20. if (!*dest || !*a || !*b) {
  21. SDLTest_AssertCheck(SDL_FALSE, "SDL_malloc failed");
  22. return -1;
  23. }
  24. for (i = 0; i < *size; ++i) {
  25. (*a)[i] = SDLTest_RandomSint32();
  26. (*b)[i] = SDLTest_RandomSint32();
  27. }
  28. return 0;
  29. }
  30. static int allocate_random_float_arrays(float **dest, float **a, float **b, size_t *size) {
  31. size_t i;
  32. *size = (size_t)SDLTest_RandomIntegerInRange(127, 999);
  33. *dest = SDL_malloc(sizeof(float) * *size);
  34. *a = SDL_malloc(sizeof(float) * *size);
  35. *b = SDL_malloc(sizeof(float) * *size);
  36. if (!*dest || !*a || !*b) {
  37. SDLTest_AssertCheck(SDL_FALSE, "SDL_malloc failed");
  38. return -1;
  39. }
  40. for (i = 0; i < *size; ++i) {
  41. (*a)[i] = SDLTest_RandomUnitFloat();
  42. (*b)[i] = SDLTest_RandomUnitFloat();
  43. }
  44. return 0;
  45. }
  46. static int allocate_random_double_arrays(double **dest, double **a, double **b, size_t *size) {
  47. size_t i;
  48. *size = (size_t)SDLTest_RandomIntegerInRange(127, 999);
  49. *dest = SDL_malloc(sizeof(double) * *size);
  50. *a = SDL_malloc(sizeof(double) * *size);
  51. *b = SDL_malloc(sizeof(double) * *size);
  52. if (!*dest || !*a || !*b) {
  53. SDLTest_AssertCheck(SDL_FALSE, "SDL_malloc failed");
  54. return -1;
  55. }
  56. for (i = 0; i < *size; ++i) {
  57. (*a)[i] = SDLTest_RandomUnitDouble();
  58. (*b)[i] = SDLTest_RandomUnitDouble();
  59. }
  60. return 0;
  61. }
  62. static void free_arrays(void *dest, void *a, void *b) {
  63. SDL_free(dest);
  64. SDL_free(a);
  65. SDL_free(b);
  66. }
  67. /**
  68. * Verify element-wise addition of 2 int arrays.
  69. */
  70. static void verify_ints_addition(const Sint32 *dest, const Sint32 *a, const Sint32 *b, size_t size, const char *desc) {
  71. size_t i;
  72. int all_good = 1;
  73. for (i = 0; i < size; ++i) {
  74. Sint32 expected = a[i] + b[i];
  75. if (dest[i] != expected) {
  76. SDLTest_AssertCheck(SDL_FALSE, "%"SDL_PRIs32" + %"SDL_PRIs32" = %"SDL_PRIs32", expected %"SDL_PRIs32" ([%"SDL_PRIu32"/%"SDL_PRIu32"] %s)",
  77. a[i], b[i], dest[i], expected, (Uint32)i, (Uint32)size, desc);
  78. all_good = 0;
  79. }
  80. }
  81. if (all_good) {
  82. SDLTest_AssertCheck(SDL_TRUE, "All int additions were correct (%s)", desc);
  83. }
  84. }
  85. /**
  86. * Verify element-wise multiplication of 2 int arrays.
  87. */
  88. static void verify_ints_multiplication(const Sint32 *dest, const Sint32 *a, const Sint32 *b, size_t size, const char *desc) {
  89. size_t i;
  90. int all_good = 1;
  91. for (i = 0; i < size; ++i) {
  92. Sint32 expected = a[i] * b[i];
  93. if (dest[i] != expected) {
  94. SDLTest_AssertCheck(SDL_FALSE, "%"SDL_PRIs32" * %"SDL_PRIs32" = %"SDL_PRIs32", expected %"SDL_PRIs32" ([%"SDL_PRIu32"/%"SDL_PRIu32"] %s)",
  95. a[i], b[i], dest[i], expected, (Uint32)i, (Uint32)size, desc);
  96. all_good = 0;
  97. }
  98. }
  99. if (all_good) {
  100. SDLTest_AssertCheck(SDL_TRUE, "All int multiplication were correct (%s)", desc);
  101. }
  102. }
  103. /**
  104. * Verify element-wise addition of 2 float arrays.
  105. */
  106. static void verify_floats_addition(const float *dest, const float *a, const float *b, size_t size, const char *desc) {
  107. size_t i;
  108. int all_good = 1;
  109. for (i = 0; i < size; ++i) {
  110. float expected = a[i] + b[i];
  111. float abs_error = SDL_fabsf(dest[i] - expected);
  112. if (abs_error > 1.0e-5f) {
  113. SDLTest_AssertCheck(SDL_FALSE, "%g + %g = %g, expected %g (error = %g) ([%"SDL_PRIu32"/%"SDL_PRIu32"] %s)",
  114. a[i], b[i], dest[i], expected, abs_error, (Uint32) i, (Uint32) size, desc);
  115. all_good = 0;
  116. }
  117. }
  118. if (all_good) {
  119. SDLTest_AssertCheck(SDL_TRUE, "All float additions were correct (%s)", desc);
  120. }
  121. }
  122. /**
  123. * Verify element-wise addition of 2 double arrays.
  124. */
  125. static void verify_doubles_addition(const double *dest, const double *a, const double *b, size_t size, const char *desc) {
  126. size_t i;
  127. int all_good = 1;
  128. for (i = 0; i < size; ++i) {
  129. double expected = a[i] + b[i];
  130. double abs_error = SDL_fabs(dest[i] - expected);
  131. if (abs_error > 1.0e-5) {
  132. SDLTest_AssertCheck(abs_error < 1.0e-5f, "%g + %g = %g, expected %g (error = %g) ([%"SDL_PRIu32"/%"SDL_PRIu32"] %s)",
  133. a[i], b[i], dest[i], expected, abs_error, (Uint32) i, (Uint32) size, desc);
  134. all_good = SDL_FALSE;
  135. }
  136. }
  137. if (all_good) {
  138. SDLTest_AssertCheck(SDL_TRUE, "All double additions were correct (%s)", desc);
  139. }
  140. }
  141. /* Intrinsic kernels */
  142. static void kernel_ints_add_cpu(Sint32 *dest, const Sint32 *a, const Sint32 *b, size_t size) {
  143. for (; size; --size, ++dest, ++a, ++b) {
  144. *dest = *a + *b;
  145. }
  146. }
  147. static void kernel_ints_mul_cpu(Sint32 *dest, const Sint32 *a, const Sint32 *b, size_t size) {
  148. for (; size; --size, ++dest, ++a, ++b) {
  149. *dest = *a * *b;
  150. }
  151. }
  152. static void kernel_floats_add_cpu(float *dest, const float *a, const float *b, size_t size) {
  153. for (; size; --size, ++dest, ++a, ++b) {
  154. *dest = *a + *b;
  155. }
  156. }
  157. static void kernel_doubles_add_cpu(double *dest, const double *a, const double *b, size_t size) {
  158. for (; size; --size, ++dest, ++a, ++b) {
  159. *dest = *a + *b;
  160. }
  161. }
  162. #ifdef SDL_MMX_INTRINSICS
  163. SDL_TARGETING("mmx") static void kernel_ints_add_mmx(Sint32 *dest, const Sint32 *a, const Sint32 *b, size_t size) {
  164. for (; size >= 2; size -= 2, dest += 2, a += 2, b += 2) {
  165. *(__m64*)dest = _mm_add_pi32(*(__m64*)a, *(__m64*)b);
  166. }
  167. if (size) {
  168. *dest = *a + *b;
  169. }
  170. _mm_empty();
  171. }
  172. #endif
  173. #ifdef SDL_SSE_INTRINSICS
  174. SDL_TARGETING("sse") static void kernel_floats_add_sse(float *dest, const float *a, const float *b, size_t size) {
  175. for (; size >= 4; size -= 4, dest += 4, a += 4, b += 4) {
  176. _mm_storeu_ps(dest, _mm_add_ps(_mm_loadu_ps(a), _mm_loadu_ps (b)));
  177. }
  178. for (; size; size--, ++dest, ++a, ++b) {
  179. *dest = *a + *b;
  180. }
  181. }
  182. #endif
  183. #ifdef SDL_SSE2_INTRINSICS
  184. SDL_TARGETING("sse2") static void kernel_doubles_add_sse2(double *dest, const double *a, const double *b, size_t size) {
  185. for (; size >= 2; size -= 2, dest += 2, a += 2, b += 2) {
  186. _mm_storeu_pd(dest, _mm_add_pd(_mm_loadu_pd(a), _mm_loadu_pd(b)));
  187. }
  188. if (size) {
  189. *dest = *a + *b;
  190. }
  191. }
  192. #endif
  193. #ifdef SDL_SSE3_INTRINSICS
  194. SDL_TARGETING("sse3") static void kernel_ints_add_sse3(Sint32 *dest, const Sint32 *a, const Sint32 *b, size_t size) {
  195. for (; size >= 4; size -= 4, dest += 4, a += 4, b += 4) {
  196. _mm_storeu_si128((__m128i*)dest, _mm_add_epi32(_mm_lddqu_si128((__m128i*)a), _mm_lddqu_si128((__m128i*)b)));
  197. }
  198. for (;size; --size, ++dest, ++a, ++b) {
  199. *dest = *a + *b;
  200. }
  201. }
  202. #endif
  203. #ifdef SDL_SSE4_1_INTRINSICS
  204. SDL_TARGETING("sse4.1") static void kernel_ints_mul_sse4_1(Sint32 *dest, const Sint32 *a, const Sint32 *b, size_t size) {
  205. for (; size >= 4; size -= 4, dest += 4, a += 4, b += 4) {
  206. _mm_storeu_si128((__m128i*)dest, _mm_mullo_epi32(_mm_lddqu_si128((__m128i*)a), _mm_lddqu_si128((__m128i*)b)));
  207. }
  208. for (;size; --size, ++dest, ++a, ++b) {
  209. *dest = *a * *b;
  210. }
  211. }
  212. #endif
  213. #ifdef SDL_SSE4_2_INTRINSICS
  214. SDL_TARGETING("sse4.2") static Uint32 calculate_crc32c_sse4_2(const char *text) {
  215. Uint32 crc32c = ~0;
  216. size_t len = SDL_strlen(text);
  217. #if defined(__x86_64__) || defined(_M_X64)
  218. for (; len >= 8; len -= 8, text += 8) {
  219. crc32c = (Uint32)_mm_crc32_u64(crc32c, *(Sint64*)text);
  220. }
  221. if (len >= 4) {
  222. crc32c = (Uint32)_mm_crc32_u32(crc32c, *(Sint32*)text);
  223. len -= 4;
  224. text += 4;
  225. }
  226. #else
  227. for (; len >= 4; len -= 4, text += 4) {
  228. crc32c = (Uint32)_mm_crc32_u32(crc32c, *(Sint32*)text);
  229. }
  230. #endif
  231. if (len >= 2) {
  232. crc32c = (Uint32)_mm_crc32_u16(crc32c, *(Sint16*)text);
  233. len -= 2;
  234. text += 2;
  235. }
  236. if (len) {
  237. crc32c = (Uint32)_mm_crc32_u8(crc32c, *text);
  238. }
  239. return ~crc32c;
  240. }
  241. #endif
  242. #ifdef SDL_AVX_INTRINSICS
  243. SDL_TARGETING("avx") static void kernel_floats_add_avx(float *dest, const float *a, const float *b, size_t size) {
  244. for (; size >= 8; size -= 8, dest += 8, a += 8, b += 8) {
  245. _mm256_storeu_ps(dest, _mm256_add_ps(_mm256_loadu_ps(a), _mm256_loadu_ps(b)));
  246. }
  247. for (; size; size--, ++dest, ++a, ++b) {
  248. *dest = *a + *b;
  249. }
  250. }
  251. #endif
  252. #ifdef SDL_AVX2_INTRINSICS
  253. SDL_TARGETING("avx2") static void kernel_ints_add_avx2(Sint32 *dest, const Sint32 *a, const Sint32 *b, size_t size) {
  254. for (; size >= 8; size -= 8, dest += 8, a += 8, b += 8) {
  255. _mm256_storeu_si256((__m256i*)dest, _mm256_add_epi32(_mm256_loadu_si256((__m256i*)a), _mm256_loadu_si256((__m256i*)b)));
  256. }
  257. for (; size; size--, ++dest, ++a, ++b) {
  258. *dest = *a + *b;
  259. }
  260. }
  261. #endif
  262. #ifdef SDL_AVX512F_INTRINSICS
  263. SDL_TARGETING("avx512f") static void kernel_floats_add_avx512f(float *dest, const float *a, const float *b, size_t size) {
  264. for (; size >= 16; size -= 16, dest += 16, a += 16, b += 16) {
  265. _mm512_storeu_ps(dest, _mm512_add_ps(_mm512_loadu_ps(a), _mm512_loadu_ps(b)));
  266. }
  267. for (; size; --size) {
  268. *dest++ = *a++ + *b++;
  269. }
  270. }
  271. #endif
  272. /* Test case functions */
  273. static int intrinsics_selftest(void *arg)
  274. {
  275. {
  276. size_t size;
  277. Sint32 *dest, *a, *b;
  278. if (allocate_random_int_arrays(&dest, &a, &b, &size) < 0) {
  279. return TEST_ABORTED;
  280. }
  281. kernel_ints_mul_cpu(dest, a, b, size);
  282. verify_ints_multiplication(dest, a, b, size, "CPU");
  283. free_arrays(dest, a, b);
  284. }
  285. {
  286. size_t size;
  287. Sint32 *dest, *a, *b;
  288. if (allocate_random_int_arrays(&dest, &a, &b, &size) < 0) {
  289. return TEST_ABORTED;
  290. }
  291. kernel_ints_add_cpu(dest, a, b, size);
  292. verify_ints_addition(dest, a, b, size, "CPU");
  293. free_arrays(dest, a, b);
  294. }
  295. {
  296. size_t size;
  297. float *dest, *a, *b;
  298. if (allocate_random_float_arrays(&dest, &a, &b, &size) < 0) {
  299. return TEST_ABORTED;
  300. }
  301. kernel_floats_add_cpu(dest, a, b, size);
  302. verify_floats_addition(dest, a, b, size, "CPU");
  303. free_arrays(dest, a, b);
  304. }
  305. {
  306. size_t size;
  307. double *dest, *a, *b;
  308. if (allocate_random_double_arrays(&dest, &a, &b, &size) < 0) {
  309. return TEST_ABORTED;
  310. }
  311. kernel_doubles_add_cpu(dest, a, b, size);
  312. verify_doubles_addition(dest, a, b, size, "CPU");
  313. free_arrays(dest, a, b);
  314. }
  315. return TEST_COMPLETED;
  316. }
  317. static int intrinsics_testMMX(void *arg)
  318. {
  319. if (SDL_HasMMX()) {
  320. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has MMX support.");
  321. #ifdef SDL_MMX_INTRINSICS
  322. {
  323. size_t size;
  324. Sint32 *dest, *a, *b;
  325. SDLTest_AssertCheck(SDL_TRUE, "Test executable uses MMX intrinsics.");
  326. if (allocate_random_int_arrays(&dest, &a, &b, &size) < 0) {
  327. return TEST_ABORTED;
  328. }
  329. kernel_ints_add_mmx(dest, a, b, size);
  330. verify_ints_addition(dest, a, b, size, "MMX");
  331. free_arrays(dest, a, b);
  332. return TEST_COMPLETED;
  333. }
  334. #else
  335. SDLTest_AssertCheck(SDL_TRUE, "Test executable does NOT use MMX intrinsics.");
  336. #endif
  337. } else {
  338. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has NO MMX support.");
  339. }
  340. return TEST_SKIPPED;
  341. }
  342. static int intrinsics_testSSE(void *arg)
  343. {
  344. if (SDL_HasSSE()) {
  345. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has SSE support.");
  346. #ifdef SDL_SSE_INTRINSICS
  347. {
  348. size_t size;
  349. float *dest, *a, *b;
  350. SDLTest_AssertCheck(SDL_TRUE, "Test executable uses SSE intrinsics.");
  351. if (allocate_random_float_arrays(&dest, &a, &b, &size) < 0) {
  352. return TEST_ABORTED;
  353. }
  354. kernel_floats_add_sse(dest, a, b, size);
  355. verify_floats_addition(dest, a, b, size, "SSE");
  356. free_arrays(dest, a, b);
  357. return TEST_COMPLETED;
  358. }
  359. #else
  360. SDLTest_AssertCheck(SDL_TRUE, "Test executable does NOT use SSE intrinsics.");
  361. #endif
  362. } else {
  363. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has NO SSE support.");
  364. }
  365. return TEST_SKIPPED;
  366. }
  367. static int intrinsics_testSSE2(void *arg)
  368. {
  369. if (SDL_HasSSE2()) {
  370. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has SSE2 support.");
  371. #ifdef SDL_SSE2_INTRINSICS
  372. {
  373. size_t size;
  374. double *dest, *a, *b;
  375. SDLTest_AssertCheck(SDL_TRUE, "Test executable uses SSE2 intrinsics.");
  376. if (allocate_random_double_arrays(&dest, &a, &b, &size) < 0) {
  377. return TEST_ABORTED;
  378. }
  379. kernel_doubles_add_sse2(dest, a, b, size);
  380. verify_doubles_addition(dest, a, b, size, "SSE2");
  381. free_arrays(dest, a, b);
  382. return TEST_COMPLETED;
  383. }
  384. #else
  385. SDLTest_AssertCheck(SDL_TRUE, "Test executable does NOT use SSE2 intrinsics.");
  386. #endif
  387. } else {
  388. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has NO SSE2 support.");
  389. }
  390. return TEST_SKIPPED;
  391. }
  392. static int intrinsics_testSSE3(void *arg)
  393. {
  394. if (SDL_HasSSE3()) {
  395. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has SSE3 support.");
  396. #ifdef SDL_SSE3_INTRINSICS
  397. {
  398. size_t size;
  399. Sint32 *dest, *a, *b;
  400. SDLTest_AssertCheck(SDL_TRUE, "Test executable uses SSE3 intrinsics.");
  401. if (allocate_random_int_arrays(&dest, &a, &b, &size) < 0) {
  402. return TEST_ABORTED;
  403. }
  404. kernel_ints_add_sse3(dest, a, b, size);
  405. verify_ints_addition(dest, a, b, size, "SSE3");
  406. free_arrays(dest, a, b);
  407. return TEST_COMPLETED;
  408. }
  409. #else
  410. SDLTest_AssertCheck(SDL_TRUE, "Test executable does NOT use SSE3 intrinsics.");
  411. #endif
  412. } else {
  413. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has NO SSE3 support.");
  414. }
  415. return TEST_SKIPPED;
  416. }
  417. static int intrinsics_testSSE4_1(void *arg)
  418. {
  419. if (SDL_HasSSE41()) {
  420. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has SSE4.1 support.");
  421. #ifdef SDL_SSE4_1_INTRINSICS
  422. {
  423. size_t size;
  424. Sint32 *dest, *a, *b;
  425. SDLTest_AssertCheck(SDL_TRUE, "Test executable uses SSE4.1 intrinsics.");
  426. if (allocate_random_int_arrays(&dest, &a, &b, &size) < 0) {
  427. return TEST_ABORTED;
  428. }
  429. kernel_ints_mul_sse4_1(dest, a, b, size);
  430. verify_ints_multiplication(dest, a, b, size, "SSE4.1");
  431. free_arrays(dest, a, b);
  432. return TEST_COMPLETED;
  433. }
  434. #else
  435. SDLTest_AssertCheck(SDL_TRUE, "Test executable does NOT use SSE4.1 intrinsics.");
  436. #endif
  437. } else {
  438. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has NO SSE4.1 support.");
  439. }
  440. return TEST_SKIPPED;
  441. }
  442. static int intrinsics_testSSE4_2(void *arg)
  443. {
  444. if (SDL_HasSSE42()) {
  445. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has SSE4.2 support.");
  446. #ifdef SDL_SSE4_2_INTRINSICS
  447. {
  448. struct {
  449. const char *input;
  450. Uint32 crc32c;
  451. } references[] = {
  452. {"", 0x00000000},
  453. {"Hello world", 0x72b51f78},
  454. {"Simple DirectMedia Layer", 0x56f85341, },
  455. };
  456. size_t i;
  457. SDLTest_AssertCheck(SDL_TRUE, "Test executable uses SSE4.2 intrinsics.");
  458. for (i = 0; i < SDL_arraysize(references); ++i) {
  459. Uint32 actual = calculate_crc32c_sse4_2(references[i].input);
  460. SDLTest_AssertCheck(actual == references[i].crc32c, "CRC32-C(\"%s\")=0x%08x, got 0x%08x",
  461. references[i].input, references[i].crc32c, actual);
  462. }
  463. return TEST_COMPLETED;
  464. }
  465. #else
  466. SDLTest_AssertCheck(SDL_TRUE, "Test executable does NOT use SSE4.2 intrinsics.");
  467. #endif
  468. } else {
  469. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has NO SSE4.2 support.");
  470. }
  471. return TEST_SKIPPED;
  472. }
  473. static int intrinsics_testAVX(void *arg)
  474. {
  475. if (SDL_HasAVX()) {
  476. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has AVX support.");
  477. #ifdef SDL_AVX_INTRINSICS
  478. {
  479. size_t size;
  480. float *dest, *a, *b;
  481. SDLTest_AssertCheck(SDL_TRUE, "Test executable uses AVX intrinsics.");
  482. if (allocate_random_float_arrays(&dest, &a, &b, &size) < 0) {
  483. return TEST_ABORTED;
  484. }
  485. kernel_floats_add_avx(dest, a, b, size);
  486. verify_floats_addition(dest, a, b, size, "AVX");
  487. free_arrays(dest, a, b);
  488. return TEST_COMPLETED;
  489. }
  490. #else
  491. SDLTest_AssertCheck(SDL_TRUE, "Test executable does NOT use AVX intrinsics.");
  492. #endif
  493. } else {
  494. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has NO AVX support.");
  495. }
  496. return TEST_SKIPPED;
  497. }
  498. static int intrinsics_testAVX2(void *arg)
  499. {
  500. if (SDL_HasAVX2()) {
  501. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has AVX2 support.");
  502. #ifdef SDL_AVX2_INTRINSICS
  503. {
  504. size_t size;
  505. Sint32 *dest, *a, *b;
  506. SDLTest_AssertCheck(SDL_TRUE, "Test executable uses AVX2 intrinsics.");
  507. if (allocate_random_int_arrays(&dest, &a, &b, &size) < 0) {
  508. return TEST_ABORTED;
  509. }
  510. kernel_ints_add_avx2(dest, a, b, size);
  511. verify_ints_addition(dest, a, b, size, "AVX2");
  512. free_arrays(dest, a, b);
  513. return TEST_COMPLETED;
  514. }
  515. #else
  516. SDLTest_AssertCheck(SDL_TRUE, "Test executable does NOT use AVX2 intrinsics.");
  517. #endif
  518. } else {
  519. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has NO AVX2 support.");
  520. }
  521. return TEST_SKIPPED;
  522. }
  523. static int intrinsics_testAVX512F(void *arg)
  524. {
  525. if (SDL_HasAVX512F()) {
  526. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has AVX512F support.");
  527. #ifdef SDL_AVX512F_INTRINSICS
  528. {
  529. size_t size;
  530. float *dest, *a, *b;
  531. SDLTest_AssertCheck(SDL_TRUE, "Test executable uses AVX512F intrinsics.");
  532. if (allocate_random_float_arrays(&dest, &a, &b, &size) < 0) {
  533. return TEST_ABORTED;
  534. }
  535. kernel_floats_add_avx512f(dest, a, b, size);
  536. verify_floats_addition(dest, a, b, size, "AVX512F");
  537. free_arrays(dest, a, b);
  538. return TEST_COMPLETED;
  539. }
  540. #else
  541. SDLTest_AssertCheck(SDL_TRUE, "Test executable does NOT use AVX512F intrinsics.");
  542. #endif
  543. } else {
  544. SDLTest_AssertCheck(SDL_TRUE, "CPU of test machine has NO AVX512F support.");
  545. }
  546. return TEST_SKIPPED;
  547. }
  548. /* ================= Test References ================== */
  549. /* Intrinsics test cases */
  550. static const SDLTest_TestCaseReference intrinsicsTest1 = {
  551. (SDLTest_TestCaseFp)intrinsics_selftest, "intrinsics_selftest", "Intrinsics testautomation selftest", TEST_ENABLED
  552. };
  553. static const SDLTest_TestCaseReference intrinsicsTest2 = {
  554. (SDLTest_TestCaseFp)intrinsics_testMMX, "intrinsics_testMMX", "Tests MMX intrinsics", TEST_ENABLED
  555. };
  556. static const SDLTest_TestCaseReference intrinsicsTest3 = {
  557. (SDLTest_TestCaseFp)intrinsics_testSSE, "intrinsics_testSSE", "Tests SSE intrinsics", TEST_ENABLED
  558. };
  559. static const SDLTest_TestCaseReference intrinsicsTest4 = {
  560. (SDLTest_TestCaseFp)intrinsics_testSSE2, "intrinsics_testSSE2", "Tests SSE2 intrinsics", TEST_ENABLED
  561. };
  562. static const SDLTest_TestCaseReference intrinsicsTest5 = {
  563. (SDLTest_TestCaseFp)intrinsics_testSSE3, "intrinsics_testSSE3", "Tests SSE3 intrinsics", TEST_ENABLED
  564. };
  565. static const SDLTest_TestCaseReference intrinsicsTest6 = {
  566. (SDLTest_TestCaseFp)intrinsics_testSSE4_1, "intrinsics_testSSE4.1", "Tests SSE4.1 intrinsics", TEST_ENABLED
  567. };
  568. static const SDLTest_TestCaseReference intrinsicsTest7 = {
  569. (SDLTest_TestCaseFp)intrinsics_testSSE4_2, "intrinsics_testSSE4.2", "Tests SSE4.2 intrinsics", TEST_ENABLED
  570. };
  571. static const SDLTest_TestCaseReference intrinsicsTest8 = {
  572. (SDLTest_TestCaseFp)intrinsics_testAVX, "intrinsics_testAVX", "Tests AVX intrinsics", TEST_ENABLED
  573. };
  574. static const SDLTest_TestCaseReference intrinsicsTest9 = {
  575. (SDLTest_TestCaseFp)intrinsics_testAVX2, "intrinsics_testAVX2", "Tests AVX2 intrinsics", TEST_ENABLED
  576. };
  577. static const SDLTest_TestCaseReference intrinsicsTest10 = {
  578. (SDLTest_TestCaseFp)intrinsics_testAVX512F, "intrinsics_testAVX512F", "Tests AVX512F intrinsics", TEST_ENABLED
  579. };
  580. /* Sequence of Platform test cases */
  581. static const SDLTest_TestCaseReference *platformTests[] = {
  582. &intrinsicsTest1,
  583. &intrinsicsTest2,
  584. &intrinsicsTest3,
  585. &intrinsicsTest4,
  586. &intrinsicsTest5,
  587. &intrinsicsTest6,
  588. &intrinsicsTest7,
  589. &intrinsicsTest8,
  590. &intrinsicsTest9,
  591. &intrinsicsTest10,
  592. NULL
  593. };
  594. /* Platform test suite (global) */
  595. SDLTest_TestSuiteReference intrinsicsTestSuite = {
  596. "Intrinsics",
  597. NULL,
  598. platformTests,
  599. NULL
  600. };