SimdTest.cpp 68 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760
  1. 
  2. #include "../testTools.h"
  3. #include "../../DFPSR/base/simdExtra.h"
  4. START_TEST(Simd)
  5. printText("\nSIMD test is compiled using:\n");
  6. #ifdef USE_SSE2
  7. printText(" * SSE2\n");
  8. #endif
  9. #ifdef USE_SSSE3
  10. printText(" * SSSE3\n");
  11. #endif
  12. #ifdef USE_AVX
  13. printText(" * AVX\n");
  14. #endif
  15. #ifdef USE_AVX2
  16. printText(" * AVX2\n");
  17. #endif
  18. #ifdef USE_NEON
  19. printText(" * NEON\n");
  20. #endif
  21. // F32x4 Comparisons
  22. ASSERT_EQUAL(F32x4(1.5f), F32x4(1.5f, 1.5f, 1.5f, 1.5f));
  23. ASSERT_EQUAL(F32x4(-1.5f), F32x4(-1.5f, -1.5f, -1.5f, -1.5f));
  24. ASSERT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f), F32x4(1.2f, 3.4f, 5.6f, 7.8f));
  25. ASSERT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f).get().x, 1.2f);
  26. ASSERT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f).get().y, 3.4f);
  27. ASSERT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f).get().z, 5.6f);
  28. ASSERT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f).get().w, 7.8f);
  29. ASSERT_NOT_EQUAL(F32x4(1.3f, 3.4f, 5.6f, 7.8f), F32x4(1.2f, 3.4f, 5.6f, 7.8f));
  30. ASSERT_NOT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f), F32x4(1.2f, -1.4f, 5.6f, 7.8f));
  31. ASSERT_NOT_EQUAL(F32x4(1.2f, 3.4f, 5.5f, 7.8f), F32x4(1.2f, 3.4f, 5.6f, 7.8f));
  32. ASSERT_NOT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f), F32x4(1.2f, 3.4f, 5.6f, -7.8f));
  33. // I32x4 Comparisons
  34. ASSERT_EQUAL(I32x4(4), I32x4(4, 4, 4, 4));
  35. ASSERT_EQUAL(I32x4(-4), I32x4(-4, -4, -4, -4));
  36. ASSERT_EQUAL(I32x4(-1, 2, -3, 4), I32x4(-1, 2, -3, 4));
  37. ASSERT_NOT_EQUAL(I32x4(-1, 2, 7, 4), I32x4(-1, 2, -3, 4));
  38. // U32x4 Comparisons
  39. ASSERT_EQUAL(U32x4(4), U32x4(4, 4, 4, 4));
  40. ASSERT_EQUAL(U32x4(1, 2, 3, 4), U32x4(1, 2, 3, 4));
  41. ASSERT_NOT_EQUAL(U32x4(1, 2, 7, 4), U32x4(1, 2, 3, 4));
  42. // U16x8 Comparisons
  43. ASSERT_EQUAL(U16x8((uint16_t)8), U16x8(8, 8, 8, 8, 8, 8, 8, 8));
  44. ASSERT_EQUAL(U16x8((uint32_t)8), U16x8(8, 0, 8, 0, 8, 0, 8, 0));
  45. ASSERT_EQUAL(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  46. ASSERT_NOT_EQUAL(U16x8(0, 2, 3, 4, 5, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  47. ASSERT_NOT_EQUAL(U16x8(1, 0, 3, 4, 5, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  48. ASSERT_NOT_EQUAL(U16x8(1, 2, 0, 4, 5, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  49. ASSERT_NOT_EQUAL(U16x8(1, 2, 3, 0, 5, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  50. ASSERT_NOT_EQUAL(U16x8(1, 2, 3, 4, 0, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  51. ASSERT_NOT_EQUAL(U16x8(1, 2, 3, 4, 5, 0, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  52. ASSERT_NOT_EQUAL(U16x8(1, 2, 3, 4, 5, 6, 0, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  53. ASSERT_NOT_EQUAL(U16x8(1, 2, 3, 4, 5, 6, 7, 0), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  54. ASSERT_NOT_EQUAL(U16x8(1, 2, 0, 4, 5, 0, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  55. ASSERT_NOT_EQUAL(U16x8(1, 0, 3, 4, 5, 6, 0, 0), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  56. ASSERT_NOT_EQUAL(U16x8(0, 2, 3, 4, 0, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  57. ASSERT_NOT_EQUAL(U16x8(0, 0, 0, 0, 0, 0, 0, 0), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  58. // U8x16 Comparisons
  59. ASSERT_EQUAL(U8x16((uint8_t)250), U8x16(250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250));
  60. ASSERT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  61. ASSERT_NOT_EQUAL(U8x16(0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  62. ASSERT_NOT_EQUAL(U8x16(1, 0, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  63. ASSERT_NOT_EQUAL(U8x16(1, 2, 0, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  64. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 0, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  65. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 0, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  66. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 0, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  67. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 0, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  68. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 0, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  69. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 0, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  70. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  71. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  72. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 0, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  73. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 0, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  74. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 0, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  75. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 0, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  76. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 0), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  77. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 251, 252, 6, 254, 255), U8x16(1, 2, 3, 4, 5, 9, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  78. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 0, 5, 6, 7, 8, 9, 0, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 4, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  79. // Macros
  80. #ifdef USE_BASIC_SIMD
  81. { // Truncate float to int
  82. SIMD_F32x4 f = LOAD_VECTOR_F32_SIMD(-1.01f, -0.99f, 0.99f, 1.01f);
  83. SIMD_I32x4 i = LOAD_VECTOR_I32_SIMD(-1, 0, 0, 1);
  84. ASSERT_EQUAL(I32x4(F32_TO_I32_SIMD(f)), I32x4(i));
  85. }
  86. { // Int to float
  87. SIMD_I32x4 n = LOAD_VECTOR_I32_SIMD(123 , 456 , 789 , -1000 );
  88. SIMD_F32x4 r = LOAD_VECTOR_F32_SIMD(123.0f, 456.0f, 789.0f, -1000.0f);
  89. ASSERT_EQUAL(F32x4(I32_TO_F32_SIMD(n)), F32x4(r));
  90. }
  91. { // Signed-unsigned cast
  92. ASSERT_EQUAL(I32x4(REINTERPRET_U32_TO_I32_SIMD(U32x4(1, 2, 3, 4).v)), I32x4(1, 2, 3, 4));
  93. ASSERT_EQUAL(U32x4(REINTERPRET_I32_TO_U32_SIMD(I32x4(1, 2, 3, 4).v)), U32x4(1, 2, 3, 4));
  94. }
  95. { // F32x4
  96. SIMD_F32x4 a = LOAD_VECTOR_F32_SIMD(-1.3f, 2.5f, -3.4f, 4.7f);
  97. SIMD_F32x4 b = LOAD_VECTOR_F32_SIMD(5.2f, -2.0f, 0.1f, 1.9f);
  98. SIMD_F32x4 c = LOAD_SCALAR_F32_SIMD(0.5f);
  99. ASSERT_EQUAL(F32x4(ADD_F32_SIMD(a, b)), F32x4(-1.3f + 5.2f, 2.5f + -2.0f, -3.4f + 0.1f, 4.7f + 1.9f));
  100. ASSERT_EQUAL(F32x4(SUB_F32_SIMD(a, b)), F32x4(-1.3f - 5.2f, 2.5f - -2.0f, -3.4f - 0.1f, 4.7f - 1.9f));
  101. ASSERT_EQUAL(F32x4(ADD_F32_SIMD(a, c)), F32x4(-1.3f + 0.5f, 2.5f + 0.5f, -3.4f + 0.5f, 4.7f + 0.5f));
  102. ASSERT_EQUAL(F32x4(SUB_F32_SIMD(a, c)), F32x4(-1.3f - 0.5f, 2.5f - 0.5f, -3.4f - 0.5f, 4.7f - 0.5f));
  103. ASSERT_EQUAL(F32x4(MUL_F32_SIMD(a, c)), F32x4(-1.3f * 0.5f, 2.5f * 0.5f, -3.4f * 0.5f, 4.7f * 0.5f));
  104. ASSERT_EQUAL(F32x4(MIN_F32_SIMD(a, b)), F32x4(-1.3f, -2.0f, -3.4f, 1.9f));
  105. ASSERT_EQUAL(F32x4(MAX_F32_SIMD(a, b)), F32x4(5.2f, 2.5f, 0.1f, 4.7f));
  106. }
  107. { // I32x4
  108. SIMD_I32x4 a = LOAD_VECTOR_I32_SIMD(-1, 2, -3, 4);
  109. SIMD_I32x4 b = LOAD_VECTOR_I32_SIMD(5, -2, 0, 1);
  110. SIMD_I32x4 c = LOAD_SCALAR_I32_SIMD(4);
  111. ASSERT_EQUAL(I32x4(ADD_I32_SIMD(a, b)), I32x4(4, 0, -3, 5));
  112. ASSERT_EQUAL(I32x4(SUB_I32_SIMD(a, b)), I32x4(-6, 4, -3, 3));
  113. ASSERT_EQUAL(I32x4(ADD_I32_SIMD(a, c)), I32x4(3, 6, 1, 8));
  114. ASSERT_EQUAL(I32x4(SUB_I32_SIMD(a, c)), I32x4(-5, -2, -7, 0));
  115. }
  116. { // U32x4
  117. SIMD_U32x4 a = LOAD_VECTOR_U32_SIMD(4, 5, 6, 7);
  118. SIMD_U32x4 b = LOAD_VECTOR_U32_SIMD(6, 5, 4, 3);
  119. SIMD_U32x4 c = LOAD_SCALAR_U32_SIMD(10);
  120. ASSERT_EQUAL(U32x4(ADD_U32_SIMD(a, b)), U32x4(c));
  121. ASSERT_EQUAL(U32x4(ADD_U32_SIMD(a, c)), U32x4(14, 15, 16, 17));
  122. ASSERT_EQUAL(U32x4(SUB_U32_SIMD(c, b)), U32x4(a));
  123. }
  124. { // U16x8
  125. SIMD_U16x8 a = LOAD_VECTOR_U16_SIMD(1, 2, 3, 4, 5, 6, 7, 8);
  126. SIMD_U16x8 b = LOAD_VECTOR_U16_SIMD(9, 8, 7, 6, 5, 4, 3, 2);
  127. SIMD_U16x8 c = LOAD_SCALAR_U16_SIMD(10);
  128. ASSERT_EQUAL(U16x8(ADD_U16_SIMD(a, b)), U16x8(c));
  129. ASSERT_EQUAL(U16x8(ADD_U16_SIMD(a, c)), U16x8(11, 12, 13, 14, 15, 16, 17, 18));
  130. ASSERT_EQUAL(U16x8(SUB_U16_SIMD(c, b)), U16x8(a));
  131. ASSERT_EQUAL(U16x8(MUL_U16_SIMD(a, b)), U16x8(9, 16, 21, 24, 25, 24, 21, 16));
  132. }
  133. #endif
  134. // Reinterpret (Depends on endianness!)
  135. ASSERT_EQUAL(U16x8(U32x4(12, 34, 56, 78)), U16x8(12, 0, 34, 0, 56, 0, 78, 0));
  136. ASSERT_EQUAL(U16x8(12, 0, 34, 0, 56, 0, 78, 0).get_U32(), U32x4(12, 34, 56, 78));
  137. // Reciprocal: 1 / x
  138. ASSERT_EQUAL(F32x4(0.5f, 1.0f, 2.0f, 4.0f).reciprocal(), F32x4(2.0f, 1.0f, 0.5f, 0.25f));
  139. // Square root: sqrt(x)
  140. ASSERT_EQUAL(F32x4(1.0f, 4.0f, 9.0f, 100.0f).squareRoot(), F32x4(1.0f, 2.0f, 3.0f, 10.0f));
  141. // Reciprocal square root: 1 / sqrt(x)
  142. ASSERT_EQUAL(F32x4(1.0f, 4.0f, 16.0f, 100.0f).reciprocalSquareRoot(), F32x4(1.0f, 0.5f, 0.25f, 0.1f));
  143. // Minimum
  144. ASSERT_EQUAL(min(F32x4(1.1f, 2.2f, 3.3f, 4.4f), F32x4(5.0f, 3.0f, 1.0f, -1.0f)), F32x4(1.1f, 2.2f, 1.0f, -1.0f));
  145. // Maximum
  146. ASSERT_EQUAL(max(F32x4(1.1f, 2.2f, 3.3f, 4.4f), F32x4(5.0f, 3.0f, 1.0f, -1.0f)), F32x4(5.0f, 3.0f, 3.3f, 4.4f));
  147. // Clamp
  148. ASSERT_EQUAL(F32x4(-35.1f, 1.0f, 2.0f, 45.7f).clamp(-1.5f, 1.5f), F32x4(-1.5f, 1.0f, 1.5f, 1.5f));
  149. // F32x4 operations
  150. ASSERT_EQUAL(F32x4(1.1f, -2.2f, 3.3f, 4.0f) + F32x4(2.2f, -4.4f, 6.6f, 8.0f), F32x4(3.3f, -6.6f, 9.9f, 12.0f));
  151. ASSERT_EQUAL(F32x4(-1.5f, -0.5f, 0.5f, 1.5f) + 1.0f, F32x4(-0.5f, 0.5f, 1.5f, 2.5f));
  152. ASSERT_EQUAL(1.0f + F32x4(-1.5f, -0.5f, 0.5f, 1.5f), F32x4(-0.5f, 0.5f, 1.5f, 2.5f));
  153. ASSERT_EQUAL(F32x4(1.1f, 2.2f, 3.3f, 4.4f) - F32x4(0.1f, 0.2f, 0.3f, 0.4f), F32x4(1.0f, 2.0f, 3.0f, 4.0f));
  154. ASSERT_EQUAL(F32x4(1.0f, 2.0f, 3.0f, 4.0f) - 0.5f, F32x4(0.5f, 1.5f, 2.5f, 3.5f));
  155. ASSERT_EQUAL(0.5f - F32x4(1.0f, 2.0f, 3.0f, 4.0f), F32x4(-0.5f, -1.5f, -2.5f, -3.5f));
  156. ASSERT_EQUAL(2.0f * F32x4(1.0f, 2.0f, 3.0f, 4.0f), F32x4(2.0f, 4.0f, 6.0f, 8.0f));
  157. ASSERT_EQUAL(F32x4(1.0f, -2.0f, 3.0f, -4.0f) * -2.0f, F32x4(-2.0f, 4.0f, -6.0f, 8.0f));
  158. ASSERT_EQUAL(F32x4(1.0f, -2.0f, 3.0f, -4.0f) * F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(1.0f, 4.0f, 9.0f, 16.0f));
  159. ASSERT_EQUAL(-F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(-1.0f, 2.0f, -3.0f, 4.0f));
  160. // I32x4 operations
  161. ASSERT_EQUAL(I32x4(1, 2, -3, 4) + I32x4(-2, 4, 6, 8), I32x4(-1, 6, 3, 12));
  162. ASSERT_EQUAL(I32x4(1, -2, 3, 4) - 4, I32x4(-3, -6, -1, 0));
  163. ASSERT_EQUAL(10 + I32x4(1, 2, 3, 4), I32x4(11, 12, 13, 14));
  164. ASSERT_EQUAL(I32x4(1, 2, 3, 4) + I32x4(4), I32x4(5, 6, 7, 8));
  165. ASSERT_EQUAL(I32x4(10) + I32x4(1, 2, 3, 4), I32x4(11, 12, 13, 14));
  166. ASSERT_EQUAL(I32x4(-3, 6, -9, 12) * I32x4(1, 2, -3, -4), I32x4(-3, 12, 27, -48));
  167. ASSERT_EQUAL(-I32x4(1, -2, 3, -4), I32x4(-1, 2, -3, 4));
  168. // U32x4 operations
  169. ASSERT_EQUAL(U32x4(1, 2, 3, 4) + U32x4(2, 4, 6, 8), U32x4(3, 6, 9, 12));
  170. ASSERT_EQUAL(U32x4(1, 2, 3, 4) + 4, U32x4(5, 6, 7, 8));
  171. ASSERT_EQUAL(10 + U32x4(1, 2, 3, 4), U32x4(11, 12, 13, 14));
  172. ASSERT_EQUAL(U32x4(1, 2, 3, 4) + U32x4(4), U32x4(5, 6, 7, 8));
  173. ASSERT_EQUAL(U32x4(10) + U32x4(1, 2, 3, 4), U32x4(11, 12, 13, 14));
  174. ASSERT_EQUAL(U32x4(3, 6, 9, 12) - U32x4(1, 2, 3, 4), U32x4(2, 4, 6, 8));
  175. ASSERT_EQUAL(U32x4(3, 6, 9, 12) * U32x4(1, 2, 3, 4), U32x4(3, 12, 27, 48));
  176. // U16x8 operations
  177. ASSERT_EQUAL(U16x8(1, 2, 3, 4, 5, 6, 7, 8) + U16x8(2, 4, 6, 8, 10, 12, 14, 16), U16x8(3, 6, 9, 12, 15, 18, 21, 24));
  178. ASSERT_EQUAL(U16x8(1, 2, 3, 4, 5, 6, 7, 8) + 8, U16x8(9, 10, 11, 12, 13, 14, 15, 16));
  179. ASSERT_EQUAL(10 + U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(11, 12, 13, 14, 15, 16, 17, 18));
  180. ASSERT_EQUAL(U16x8(1, 2, 3, 4, 5, 6, 7, 8) + U16x8((uint16_t)8), U16x8(9, 10, 11, 12, 13, 14, 15, 16));
  181. ASSERT_EQUAL(U16x8((uint16_t)10) + U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(11, 12, 13, 14, 15, 16, 17, 18));
  182. ASSERT_EQUAL(U16x8(3, 6, 9, 12, 15, 18, 21, 24) - U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(2, 4, 6, 8, 10, 12, 14, 16));
  183. // U8x16 operations
  184. ASSERT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) + 2, U8x16(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18));
  185. ASSERT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) - 1, U8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
  186. ASSERT_EQUAL(
  187. saturatedAddition(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 255), U8x16((uint8_t)250)),
  188. U8x16(251, 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255)
  189. );
  190. ASSERT_EQUAL(
  191. saturatedSubtraction(
  192. U8x16(128, 128, 128, 0, 255, 255, 0, 200, 123, 80, 46, 46, 46, 255, 255, 255),
  193. U8x16( 0, 128, 255, 0, 255, 0, 255, 100, 23, 81, 45, 46, 47, 128, 127, 200)),
  194. U8x16(128, 0, 0, 0, 0, 255, 0, 100, 100, 0, 1, 0, 0, 127, 128, 55)
  195. );
  196. // Saturated unsigned integer packing
  197. ASSERT_EQUAL(saturateToU8(U16x8(1, 2, 3, 4, 65535, 6, 7, 8), U16x8(9, 10, 11, 12, 1000, 14, 15, 16)), U8x16(1, 2, 3, 4, 255, 6, 7, 8, 9, 10, 11, 12, 255, 14, 15, 16));
  198. // Unsigned integer unpacking
  199. ASSERT_EQUAL(lowerToU32(U16x8(1,2,3,4,5,6,7,8)), U32x4(1, 2, 3, 4));
  200. ASSERT_EQUAL(higherToU32(U16x8(1,2,3,4,5,6,7,8)), U32x4(5, 6, 7, 8));
  201. ASSERT_EQUAL(lowerToU16(U8x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)), U16x8(1,2,3,4,5,6,7,8));
  202. ASSERT_EQUAL(higherToU16(U8x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)), U16x8(9,10,11,12,13,14,15,16));
  203. // Reinterpretation
  204. ASSERT_EQUAL(
  205. reinterpret_U8FromU32(U32x4(ENDIAN32_BYTE_0, ENDIAN32_BYTE_1, ENDIAN32_BYTE_2, ENDIAN32_BYTE_3)),
  206. U8x16(
  207. 255, 0, 0, 0,
  208. 0, 255, 0, 0,
  209. 0, 0, 255, 0,
  210. 0, 0, 0, 255
  211. )
  212. );
  213. ASSERT_EQUAL(
  214. reinterpret_U8FromU32(U32x4(
  215. ENDIAN32_BYTE_0 | ENDIAN32_BYTE_2,
  216. ENDIAN32_BYTE_0 | ENDIAN32_BYTE_3,
  217. ENDIAN32_BYTE_1,
  218. ENDIAN32_BYTE_1 | ENDIAN32_BYTE_3
  219. )),
  220. U8x16(
  221. 255, 0, 255, 0,
  222. 255, 0, 0, 255,
  223. 0, 255, 0, 0,
  224. 0, 255, 0, 255
  225. )
  226. );
  227. ASSERT_EQUAL(
  228. reinterpret_U32FromU8(U8x16(
  229. 255, 0, 255, 0,
  230. 255, 0, 0, 255,
  231. 0, 255, 0, 0,
  232. 0, 255, 0, 255
  233. )),
  234. U32x4(
  235. ENDIAN32_BYTE_0 | ENDIAN32_BYTE_2,
  236. ENDIAN32_BYTE_0 | ENDIAN32_BYTE_3,
  237. ENDIAN32_BYTE_1,
  238. ENDIAN32_BYTE_1 | ENDIAN32_BYTE_3
  239. )
  240. );
  241. // Bit mask
  242. ASSERT_EQUAL(U32x4(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000) & 0x0000FFFF, U32x4(0x0000FFFF, 0x00005678, 0x0000F0F0, 0x00000000));
  243. ASSERT_EQUAL(U32x4(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000) & 0xFFFF0000, U32x4(0xFFFF0000, 0x12340000, 0xF0F00000, 0x00000000));
  244. ASSERT_EQUAL(U32x4(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000) | 0x0000FFFF, U32x4(0xFFFFFFFF, 0x1234FFFF, 0xF0F0FFFF, 0x0000FFFF));
  245. ASSERT_EQUAL(U32x4(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000) | 0xFFFF0000, U32x4(0xFFFFFFFF, 0xFFFF5678, 0xFFFFF0F0, 0xFFFF0000));
  246. ASSERT_EQUAL(U32x4(0xFFFFFFFF, 0xFFF000FF, 0xF0F0F0F0, 0x12345678) & U32x4(0xFF00FF00, 0xFFFF0000, 0x000FF000, 0x0FF00FF0), U32x4(0xFF00FF00, 0xFFF00000, 0x0000F000, 0x02300670));
  247. ASSERT_EQUAL(U32x4(0xF00F000F, 0xFFF000FF, 0x10010011, 0xABC00000) | U32x4(0x0000FF00, 0xFFFF0000, 0x000FF000, 0x000DEF00), U32x4(0xF00FFF0F, 0xFFFF00FF, 0x100FF011, 0xABCDEF00));
  248. // Exclusive or
  249. ASSERT_EQUAL(U32x4(0xFFFFFFFF, 0x01234567, 0xF0F0F0F0, 0x00000000) ^ 0x0000FFFF, U32x4(0xFFFF0000, 0x0123BA98, 0xF0F00F0F, 0x0000FFFF));
  250. // Bit shift
  251. ASSERT_EQUAL(U32x4(1, 2, 3, 4) << 1, U32x4(2, 4, 6, 8));
  252. ASSERT_EQUAL(U32x4(1, 2, 3, 4) << 2, U32x4(4, 8, 12, 16));
  253. ASSERT_EQUAL(U32x4(1, 2, 3, 4) << 3, U32x4(8, 16, 24, 32));
  254. ASSERT_EQUAL(U32x4(1, 2, 3, 4) << 4, U32x4(16, 32, 48, 64));
  255. ASSERT_EQUAL(U32x4(1, 2, 3, 4) >> 1, U32x4(0, 1, 1, 2));
  256. ASSERT_EQUAL(U32x4(2, 4, 6, 8) >> 1, U32x4(1, 2, 3, 4));
  257. ASSERT_EQUAL(U32x4(2, 4, 6, 8) >> 2, U32x4(0, 1, 1, 2));
  258. ASSERT_EQUAL(U32x4(0x0AB12CD0, 0xFFFFFFFF, 0x12345678, 0xF0000000) << 4, U32x4(0xAB12CD00, 0xFFFFFFF0, 0x23456780, 0x00000000));
  259. ASSERT_EQUAL(U32x4(0x0AB12CD0, 0xFFFFFFFF, 0x12345678, 0x0000000F) >> 4, U32x4(0x00AB12CD, 0x0FFFFFFF, 0x01234567, 0x00000000));
  260. // Element shift with insert
  261. ASSERT_EQUAL(vectorExtract_0(U32x4(1, 2, 3, 4), U32x4(5, 6, 7, 8)), U32x4(1, 2, 3, 4));
  262. ASSERT_EQUAL(vectorExtract_1(U32x4(1, 2, 3, 4), U32x4(5, 6, 7, 8)), U32x4(2, 3, 4, 5));
  263. ASSERT_EQUAL(vectorExtract_2(U32x4(1, 2, 3, 4), U32x4(5, 6, 7, 8)), U32x4(3, 4, 5, 6));
  264. ASSERT_EQUAL(vectorExtract_3(U32x4(1, 2, 3, 4), U32x4(5, 6, 7, 8)), U32x4(4, 5, 6, 7));
  265. ASSERT_EQUAL(vectorExtract_4(U32x4(1, 2, 3, 4), U32x4(5, 6, 7, 8)), U32x4(5, 6, 7, 8));
  266. ASSERT_EQUAL(vectorExtract_0(U32x4(123, 4294967295, 712, 45), U32x4(850514, 27, 0, 174)), U32x4(123, 4294967295, 712, 45));
  267. ASSERT_EQUAL(vectorExtract_1(U32x4(123, 4294967295, 712, 45), U32x4(850514, 27, 0, 174)), U32x4(4294967295, 712, 45, 850514));
  268. ASSERT_EQUAL(vectorExtract_2(U32x4(123, 4294967295, 712, 45), U32x4(850514, 27, 0, 174)), U32x4(712, 45, 850514, 27));
  269. ASSERT_EQUAL(vectorExtract_3(U32x4(123, 4294967295, 712, 45), U32x4(850514, 27, 0, 174)), U32x4(45, 850514, 27, 0));
  270. ASSERT_EQUAL(vectorExtract_4(U32x4(123, 4294967295, 712, 45), U32x4(850514, 27, 0, 174)), U32x4(850514, 27, 0, 174));
  271. ASSERT_EQUAL(vectorExtract_0(I32x4(1, 2, 3, 4), I32x4(5, 6, 7, 8)), I32x4(1, 2, 3, 4));
  272. ASSERT_EQUAL(vectorExtract_1(I32x4(1, 2, 3, 4), I32x4(5, 6, 7, 8)), I32x4(2, 3, 4, 5));
  273. ASSERT_EQUAL(vectorExtract_2(I32x4(1, 2, 3, 4), I32x4(5, 6, 7, 8)), I32x4(3, 4, 5, 6));
  274. ASSERT_EQUAL(vectorExtract_3(I32x4(1, 2, 3, 4), I32x4(5, 6, 7, 8)), I32x4(4, 5, 6, 7));
  275. ASSERT_EQUAL(vectorExtract_4(I32x4(1, 2, 3, 4), I32x4(5, 6, 7, 8)), I32x4(5, 6, 7, 8));
  276. ASSERT_EQUAL(vectorExtract_0(I32x4(123, 8462784, -712, 45), I32x4(-37562, 27, 0, 174)), I32x4(123, 8462784, -712, 45));
  277. ASSERT_EQUAL(vectorExtract_1(I32x4(123, 8462784, -712, 45), I32x4(-37562, 27, 0, 174)), I32x4(8462784, -712, 45, -37562));
  278. ASSERT_EQUAL(vectorExtract_2(I32x4(123, 8462784, -712, 45), I32x4(-37562, 27, 0, 174)), I32x4(-712, 45, -37562, 27));
  279. ASSERT_EQUAL(vectorExtract_3(I32x4(123, 8462784, -712, 45), I32x4(-37562, 27, 0, 174)), I32x4(45, -37562, 27, 0));
  280. ASSERT_EQUAL(vectorExtract_4(I32x4(123, 8462784, -712, 45), I32x4(-37562, 27, 0, 174)), I32x4(-37562, 27, 0, 174));
  281. ASSERT_EQUAL(vectorExtract_0(F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(5.0f, 6.0f, -7.0f, 8.0f)), F32x4(1.0f, -2.0f, 3.0f, -4.0f));
  282. ASSERT_EQUAL(vectorExtract_1(F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(5.0f, 6.0f, -7.0f, 8.0f)), F32x4(-2.0f, 3.0f, -4.0f, 5.0f));
  283. ASSERT_EQUAL(vectorExtract_2(F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(5.0f, 6.0f, -7.0f, 8.0f)), F32x4(3.0f, -4.0f, 5.0f, 6.0f));
  284. ASSERT_EQUAL(vectorExtract_3(F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(5.0f, 6.0f, -7.0f, 8.0f)), F32x4(-4.0f, 5.0f, 6.0f, -7.0f));
  285. ASSERT_EQUAL(vectorExtract_4(F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(5.0f, 6.0f, -7.0f, 8.0f)), F32x4(5.0f, 6.0f, -7.0f, 8.0f));
  286. ASSERT_EQUAL(vectorExtract_0(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  287. ASSERT_EQUAL(vectorExtract_1(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(2, 3, 4, 5, 6, 7, 8, 9));
  288. ASSERT_EQUAL(vectorExtract_2(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(3, 4, 5, 6, 7, 8, 9, 10));
  289. ASSERT_EQUAL(vectorExtract_3(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(4, 5, 6, 7, 8, 9, 10, 11));
  290. ASSERT_EQUAL(vectorExtract_4(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(5, 6, 7, 8, 9, 10, 11, 12));
  291. ASSERT_EQUAL(vectorExtract_5(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(6, 7, 8, 9, 10, 11, 12, 13));
  292. ASSERT_EQUAL(vectorExtract_6(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(7, 8, 9, 10, 11, 12, 13, 14));
  293. ASSERT_EQUAL(vectorExtract_7(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(8, 9, 10, 11, 12, 13, 14, 15));
  294. ASSERT_EQUAL(vectorExtract_8(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(9, 10, 11, 12, 13, 14, 15, 16));
  295. ASSERT_EQUAL(vectorExtract_0(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  296. ASSERT_EQUAL(vectorExtract_1(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17));
  297. ASSERT_EQUAL(vectorExtract_2(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18));
  298. ASSERT_EQUAL(vectorExtract_3(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19));
  299. ASSERT_EQUAL(vectorExtract_4(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20));
  300. ASSERT_EQUAL(vectorExtract_5(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21));
  301. ASSERT_EQUAL(vectorExtract_6(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22));
  302. ASSERT_EQUAL(vectorExtract_7(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23));
  303. ASSERT_EQUAL(vectorExtract_8(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24));
  304. ASSERT_EQUAL(vectorExtract_9(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25));
  305. ASSERT_EQUAL(vectorExtract_10(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26));
  306. ASSERT_EQUAL(vectorExtract_11(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27));
  307. ASSERT_EQUAL(vectorExtract_12(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28));
  308. ASSERT_EQUAL(vectorExtract_13(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29));
  309. ASSERT_EQUAL(vectorExtract_14(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30));
  310. ASSERT_EQUAL(vectorExtract_15(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31));
  311. ASSERT_EQUAL(vectorExtract_16(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32));
  312. // TODO: Move SIMD extra into simd.h and implement emulation.
  313. #ifdef USE_SIMD_EXTRA
  314. SIMD_U32x4 a = U32x4(1, 2, 3, 4).v;
  315. SIMD_U32x4 b = U32x4(5, 6, 7, 8).v;
  316. SIMD_U32x4x2 c = ZIP_U32_SIMD(a, b);
  317. ASSERT_EQUAL(U32x4(c.val[0]), U32x4(1, 5, 2, 6));
  318. ASSERT_EQUAL(U32x4(c.val[1]), U32x4(3, 7, 4, 8));
  319. SIMD_U32x4 d = ZIP_LOW_U32_SIMD(a, b);
  320. SIMD_U32x4 e = ZIP_HIGH_U32_SIMD(a, b);
  321. ASSERT_EQUAL(U32x4(d), U32x4(1, 5, 2, 6));
  322. ASSERT_EQUAL(U32x4(e), U32x4(3, 7, 4, 8));
  323. #endif
  324. // 256-bit SIMD tests (emulated using scalar operations if the test is not compiled with AVX2 enabled)
  325. // F32x8 Comparisons
  326. ASSERT_EQUAL(F32x8(1.5f), F32x8(1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f));
  327. ASSERT_EQUAL(F32x8(-1.5f), F32x8(-1.5f, -1.5f, -1.5f, -1.5f, -1.5f, -1.5f, -1.5f, -1.5f));
  328. ASSERT_EQUAL(F32x8(1.2f, 3.4f, 5.6f, 7.8f, -2.4f, 452.351f, 1000000.0f, -1000.0f), F32x8(1.2f, 3.4f, 5.6f, 7.8f, -2.4f, 452.351f, 1000000.0f, -1000.0f));
  329. ASSERT_NOT_EQUAL(F32x8(1.3f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f), F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f));
  330. ASSERT_NOT_EQUAL(F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f), F32x8(1.2f, -1.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f));
  331. ASSERT_NOT_EQUAL(F32x8(1.2f, 3.4f, 5.5f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f), F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f));
  332. ASSERT_NOT_EQUAL(F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f), F32x8(1.2f, 3.4f, 5.6f, -7.8f, 5.3f, 6.7f, 1.4f, -5.2f));
  333. ASSERT_NOT_EQUAL(F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f), F32x8(1.2f, 3.4f, 5.6f, 7.8f, 0.0f, 6.7f, 1.4f, -5.2f));
  334. ASSERT_NOT_EQUAL(F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f), F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.69f, 1.4f, -5.2f));
  335. ASSERT_NOT_EQUAL(F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f), F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.3f, -5.2f));
  336. ASSERT_NOT_EQUAL(F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f), F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, 5.2f));
  337. // I32x8 Comparisons
  338. ASSERT_EQUAL(I32x8(4), I32x8(4, 4, 4, 4, 4, 4, 4, 4));
  339. ASSERT_EQUAL(I32x8(-4), I32x8(-4, -4, -4, -4, -4, -4, -4, -4));
  340. ASSERT_EQUAL(I32x8(-1, 2, -3, 4, -5, 6, -7, 8), I32x8(-1, 2, -3, 4, -5, 6, -7, 8));
  341. ASSERT_NOT_EQUAL(I32x8(-1, 2, 7, 4, 8, 3, 5, 45), I32x8(-1, 2, -3, 4, 8, 3, 5, 45));
  342. // U32x8 Comparisons
  343. ASSERT_EQUAL(U32x8(4), U32x8(4, 4, 4, 4, 4, 4, 4, 4));
  344. ASSERT_EQUAL(U32x8(1, 2, 3, 4, 5, 6, 7, 8), U32x8(1, 2, 3, 4, 5, 6, 7, 8));
  345. ASSERT_NOT_EQUAL(U32x8(1, 2, 3, 4, 5, 6, 12, 8), U32x8(1, 2, 3, 4, 5, 6, 7, 8));
  346. // U16x16 Comparisons
  347. ASSERT_EQUAL(U16x16((uint16_t)8), U16x16(8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8));
  348. ASSERT_EQUAL(U16x16((uint32_t)8), U16x16(8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0));
  349. ASSERT_EQUAL(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  350. ASSERT_NOT_EQUAL(U16x16(0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  351. ASSERT_NOT_EQUAL(U16x16(1, 0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  352. ASSERT_NOT_EQUAL(U16x16(1, 2, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  353. ASSERT_NOT_EQUAL(U16x16(1, 2, 3, 0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  354. ASSERT_NOT_EQUAL(U16x16(1, 2, 3, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  355. ASSERT_NOT_EQUAL(U16x16(1, 2, 3, 4, 5, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  356. ASSERT_NOT_EQUAL(U16x16(1, 2, 3, 4, 5, 6, 0, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  357. ASSERT_NOT_EQUAL(U16x16(1, 2, 3, 4, 5, 6, 7, 0, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  358. ASSERT_NOT_EQUAL(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 0, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  359. ASSERT_NOT_EQUAL(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  360. ASSERT_NOT_EQUAL(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  361. ASSERT_NOT_EQUAL(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  362. ASSERT_NOT_EQUAL(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  363. ASSERT_NOT_EQUAL(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  364. ASSERT_NOT_EQUAL(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  365. ASSERT_NOT_EQUAL(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  366. ASSERT_NOT_EQUAL(U16x16(1, 2, 0, 4, 5, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  367. ASSERT_NOT_EQUAL(U16x16(1, 0, 3, 4, 5, 6, 0, 0, 9, 10, 11, 12, 13, 0, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  368. ASSERT_NOT_EQUAL(U16x16(0, 2, 3, 4, 0, 6, 7, 8, 9, 10, 11, 0, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  369. ASSERT_NOT_EQUAL(U16x16(0, 0, 0, 0, 0, 0, 0, 0, 9, 10, 11, 0, 13, 14, 0, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  370. // U8x32 Comparisons
  371. ASSERT_EQUAL(U8x32((uint8_t)250), U8x32(250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250));
  372. ASSERT_NOT_EQUAL(U8x32((uint8_t)250), U8x32(250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 100, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250));
  373. ASSERT_NOT_EQUAL(U8x32((uint8_t)250), U8x32(0, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250));
  374. ASSERT_NOT_EQUAL(U8x32((uint8_t)250), U8x32(250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 0));
  375. // Reinterpret (Depends on endianness!)
  376. ASSERT_EQUAL(U16x16(U32x8(12, 34, 56, 78, 11, 22, 33, 44)), U16x16(12, 0, 34, 0, 56, 0, 78, 0, 11, 0, 22, 0, 33, 0, 44, 0));
  377. ASSERT_EQUAL(U16x16(U32x8(12, 34, 56, 78, 11, 22, 33, 131116)), U16x16(12, 0, 34, 0, 56, 0, 78, 0, 11, 0, 22, 0, 33, 0, 44, 2));
  378. ASSERT_EQUAL(U16x16(12, 0, 34, 0, 56, 0, 78, 0, 11, 0, 22, 0, 33, 0, 44, 2).get_U32(), U32x8(12, 34, 56, 78, 11, 22, 33, 131116));
  379. // Reciprocal: 1 / x
  380. ASSERT_EQUAL(F32x8(0.5f, 1.0f, 2.0f, 4.0f, 8.0f, 10.0f, 100.0f, 1000.0f).reciprocal(), F32x8(2.0f, 1.0f, 0.5f, 0.25f, 0.125f, 0.1f, 0.01f, 0.001f));
  381. // Square root: sqrt(x)
  382. ASSERT_EQUAL(F32x8(1.0f, 4.0f, 9.0f, 100.0f, 64.0f, 256.0f, 1024.0f, 4096.0f).squareRoot(), F32x8(1.0f, 2.0f, 3.0f, 10.0f, 8.0f, 16.0f, 32.0f, 64.0f));
  383. // Reciprocal square root: 1 / sqrt(x)
  384. ASSERT_EQUAL(F32x8(1.0f, 4.0f, 16.0f, 100.0f, 400.0f, 64.0f, 25.0f, 100.0f).reciprocalSquareRoot(), F32x8(1.0f, 0.5f, 0.25f, 0.1f, 0.05f, 0.125f, 0.2f, 0.1f));
  385. // Minimum
  386. ASSERT_EQUAL(min(F32x8(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f), F32x8(5.0f, 3.0f, 1.0f, -1.0f, 4.0f, 5.0f, -2.5f, 10.0f)), F32x8(1.1f, 2.2f, 1.0f, -1.0f, 4.0f, 5.0f, -2.5f, 8.8f));
  387. // Maximum
  388. ASSERT_EQUAL(max(F32x8(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f), F32x8(5.0f, 3.0f, 1.0f, -1.0f, 4.0f, 5.0f, -2.5f, 10.0f)), F32x8(5.0f, 3.0f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 10.0f));
  389. // Clamp
  390. ASSERT_EQUAL(F32x8(-35.1f, 1.0f, 2.0f, 45.7f, 0.0f, -1.0f, 2.1f, -1.9f).clamp(-1.5f, 1.5f), F32x8(-1.5f, 1.0f, 1.5f, 1.5f, 0.0f, -1.0f, 1.5f, -1.5f));
  391. // F32x8 operations
  392. ASSERT_EQUAL(F32x8(1.1f, -2.2f, 3.3f, 4.0f, 1.4f, 2.3f, 3.2f, 4.1f) + F32x8(2.2f, -4.4f, 6.6f, 8.0f, 4.11f, 3.22f, 2.33f, 1.44f), F32x8(3.3f, -6.6f, 9.9f, 12.0f, 5.51f, 5.52f, 5.53f, 5.54f));
  393. ASSERT_EQUAL(F32x8(-1.5f, -0.5f, 0.5f, 1.5f, 1000.0f, 2000.0f, -4000.0f, -1500.0f) + 1.0f, F32x8(-0.5f, 0.5f, 1.5f, 2.5f, 1001.0f, 2001.0f, -3999.0f, -1499.0f));
  394. ASSERT_EQUAL(1.0f + F32x8(-1.5f, -0.5f, 0.5f, 1.5f, 1000.0f, 2000.0f, -4000.0f, -1500.0f), F32x8(-0.5f, 0.5f, 1.5f, 2.5f, 1001.0f, 2001.0f, -3999.0f, -1499.0f));
  395. ASSERT_EQUAL(F32x8(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f) - F32x8(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f), F32x8(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f));
  396. ASSERT_EQUAL(F32x8(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f) - 0.5f, F32x8(0.5f, 1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f, 7.5f));
  397. ASSERT_EQUAL(0.5f - F32x8(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f), F32x8(-0.5f, -1.5f, -2.5f, -3.5f, -4.5f, -5.5f, -6.5f, -7.5f));
  398. ASSERT_EQUAL(2.0f * F32x8(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f), F32x8(2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f, 14.0f, 16.0f));
  399. ASSERT_EQUAL(F32x8(1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f) * -2.0f, F32x8(-2.0f, 4.0f, -6.0f, 8.0f, -10.0f, 12.0f, -14.0f, 16.0f));
  400. ASSERT_EQUAL(F32x8(1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f) * F32x8(1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f), F32x8(1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f));
  401. ASSERT_EQUAL(-F32x8(1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f), F32x8(-1.0f, 2.0f, -3.0f, 4.0f, -5.0f, 6.0f, -7.0f, 8.0f));
  402. // I32x8 operations
  403. ASSERT_EQUAL(I32x8(1, 2, 3, 4, 5, 6, 7, 8) - 1, I32x8(0, 1, 2, 3, 4, 5, 6, 7));
  404. ASSERT_EQUAL(1 - I32x8(1, 2, 3, 4, 5, 6, 7, 8), I32x8(0, -1, -2, -3, -4, -5, -6, -7));
  405. ASSERT_EQUAL(2 * I32x8(1, 2, 3, 4, 5, 6, 7, 8), I32x8(2, 4, 6, 8, 10, 12, 14, 16));
  406. ASSERT_EQUAL(I32x8(1, -2, 3, -4, 5, -6, 7, -8) * -2, I32x8(-2, 4, -6, 8, -10, 12, -14, 16));
  407. ASSERT_EQUAL(I32x8(1, -2, 3, -4, 5, -6, 7, -8) * I32x8(1, -2, 3, -4, 5, -6, 7, -8), I32x8(1, 4, 9, 16, 25, 36, 49, 64));
  408. ASSERT_EQUAL(-I32x8(1, -2, 3, -4, 5, -6, 7, -8), I32x8(-1, 2, -3, 4, -5, 6, -7, 8));
  409. // U32x8 operations
  410. ASSERT_EQUAL(U32x8(1, 2, 3, 4, 5, 6, 7, 8) - 1, U32x8(0, 1, 2, 3, 4, 5, 6, 7));
  411. ASSERT_EQUAL(10 - U32x8(1, 2, 3, 4, 5, 6, 7, 8), U32x8(9, 8, 7, 6, 5, 4, 3, 2));
  412. ASSERT_EQUAL(2 * U32x8(1, 2, 3, 4, 5, 6, 7, 8), U32x8(2, 4, 6, 8, 10, 12, 14, 16));
  413. ASSERT_EQUAL(U32x8(1, 2, 3, 4, 5, 6, 7, 8) * 2, U32x8(2, 4, 6, 8, 10, 12, 14, 16));
  414. ASSERT_EQUAL(U32x8(1, 2, 3, 4, 5, 6, 7, 8) * U32x8(1, 2, 3, 4, 5, 6, 7, 8), U32x8(1, 4, 9, 16, 25, 36, 49, 64));
  415. // U16x16 operations
  416. ASSERT_EQUAL(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) + U16x16(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32), U16x16(3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48));
  417. ASSERT_EQUAL(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) + 8, U16x16(9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24));
  418. ASSERT_EQUAL(8 + U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24));
  419. ASSERT_EQUAL(U16x16(3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48) - U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32));
  420. ASSERT_EQUAL(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) - 1, U16x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
  421. ASSERT_EQUAL(16 - U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
  422. ASSERT_EQUAL(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) * 2, U16x16(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32));
  423. ASSERT_EQUAL(2 * U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32));
  424. // U8x32 operations
  425. ASSERT_EQUAL(
  426. U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)
  427. + U8x32( 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64),
  428. U8x32( 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57, 60, 63, 66, 69, 72, 75, 78, 81, 84, 87, 90, 93, 96));
  429. ASSERT_EQUAL(
  430. U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32) + 5,
  431. U8x32( 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37));
  432. ASSERT_EQUAL(
  433. 5 + U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32),
  434. U8x32( 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37));
  435. ASSERT_EQUAL(
  436. U8x32( 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57, 60, 63, 66, 69, 72, 75, 78, 81, 84, 87, 90, 93, 96)
  437. - U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32),
  438. U8x32( 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64));
  439. ASSERT_EQUAL(
  440. U8x32( 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37) - 5,
  441. U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32));
  442. ASSERT_EQUAL(
  443. 33 - U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32),
  444. U8x32(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1));
  445. ASSERT_EQUAL(
  446. saturatedAddition(
  447. U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,255),
  448. U8x32((uint8_t)240)),
  449. U8x32(241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255)
  450. );
  451. ASSERT_EQUAL(
  452. saturatedSubtraction(
  453. U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,255),
  454. U8x32((uint8_t)16)),
  455. U8x32( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,239)
  456. );
  457. // Saturated unsigned integer packing
  458. ASSERT_EQUAL(saturateToU8(
  459. U16x16(1, 2, 3, 4, 65535, 6, 7, 8, 9, 10, 11, 12, 1000, 14, 15, 16), U16x16(17, 18, 19, 20, 21, 22, 23, 65535, 25, 26, 27, 28, 29, 30, 31, 32)),
  460. U8x32( 1, 2, 3, 4, 255, 6, 7, 8, 9, 10, 11, 12, 255, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 255, 25, 26, 27, 28, 29, 30, 31, 32));
  461. // Unsigned integer unpacking
  462. ASSERT_EQUAL(lowerToU32(U16x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)), U32x8(1,2,3,4,5,6,7,8));
  463. ASSERT_EQUAL(higherToU32(U16x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)), U32x8(9,10,11,12,13,14,15,16));
  464. ASSERT_EQUAL(lowerToU32(U16x16(1,2,3,4,5,6,65535,8,9,10,11,12,13,1000,15,16)), U32x8(1,2,3,4,5,6,65535,8));
  465. ASSERT_EQUAL(higherToU32(U16x16(1,2,3,4,5,6,65535,8,9,10,11,12,13,1000,15,16)), U32x8(9,10,11,12,13,1000,15,16));
  466. ASSERT_EQUAL(lowerToU16(U8x32(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,255,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,255)), U16x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,255));
  467. ASSERT_EQUAL(higherToU16(U8x32(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,255,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,255)), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,255));
  468. // Bitwise operations
  469. ASSERT_EQUAL(
  470. U32x8(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000, 0xEEEEEEEE, 0x87654321, 0x0F0F0F0F, 0x00010001)
  471. & 0x0000FFFF,
  472. U32x8(0x0000FFFF, 0x00005678, 0x0000F0F0, 0x00000000, 0x0000EEEE, 0x00004321, 0x00000F0F, 0x00000001));
  473. ASSERT_EQUAL(
  474. U32x8(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000, 0xEEEEEEEE, 0x87654321, 0x0F0F0F0F, 0x00010001)
  475. & 0xFFFF0000,
  476. U32x8(0xFFFF0000, 0x12340000, 0xF0F00000, 0x00000000, 0xEEEE0000, 0x87650000, 0x0F0F0000, 0x00010000));
  477. ASSERT_EQUAL(
  478. U32x8(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000, 0xEEEEEEEE, 0x87654321, 0x0F0F0F0F, 0x00010001)
  479. | 0x0000FFFF,
  480. U32x8(0xFFFFFFFF, 0x1234FFFF, 0xF0F0FFFF, 0x0000FFFF, 0xEEEEFFFF, 0x8765FFFF, 0x0F0FFFFF, 0x0001FFFF));
  481. ASSERT_EQUAL(
  482. U32x8(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000, 0xEEEEEEEE, 0x87654321, 0x0F0F0F0F, 0x00010001)
  483. | 0xFFFF0000,
  484. U32x8(0xFFFFFFFF, 0xFFFF5678, 0xFFFFF0F0, 0xFFFF0000, 0xFFFFEEEE, 0xFFFF4321, 0xFFFF0F0F, 0xFFFF0001));
  485. ASSERT_EQUAL(
  486. U32x8(0xFFFFFFFF, 0xFFF000FF, 0xF0F0F0F0, 0x12345678, 0xEEEEEEEE, 0x87654321, 0x0F0F0F0F, 0x00010001)
  487. & U32x8(0xFF00FF00, 0xFFFF0000, 0x000FF000, 0x0FF00FF0, 0xF00FF00F, 0x00FFFF00, 0xF0F0F0F0, 0x0000FFFF),
  488. U32x8(0xFF00FF00, 0xFFF00000, 0x0000F000, 0x02300670, 0xE00EE00E, 0x00654300, 0x00000000, 0x00000001));
  489. ASSERT_EQUAL(
  490. U32x8(0xFFFFFFFF, 0xFFF000FF, 0xF0F0F0F0, 0x12345678, 0xEEEEEEEE, 0x87654321, 0x0F0F0F0F, 0x00010001)
  491. | U32x8(0xFF00FF00, 0xFFFF0000, 0x000FF000, 0x0FF00FF0, 0xF00FF00F, 0x00FFFF00, 0xF0F0F0F0, 0x0000FFFF),
  492. U32x8(0xFFFFFFFF, 0xFFFF00FF, 0xF0FFF0F0, 0x1FF45FF8, 0xFEEFFEEF, 0x87FFFF21, 0xFFFFFFFF, 0x0001FFFF));
  493. ASSERT_EQUAL(
  494. U32x8(0b11001100110000110101010010110011, 0b00101011001011101010001101111001, 0b11001010000110111010010100101100, 0b01010111010001010010101110010110, 0b10101110100110100010101011011001, 0b00101110100111010001101010110000, 0b11101010001011100010101110001111, 0b00101010111100010110010110001000)
  495. ^ U32x8(0b00101101001110100011010010100001, 0b10101110100101000011101001010011, 0b00101011100101001011000010100100, 0b11010011101001000110010110110111, 0b00111100101000101010001101001010, 0b00101110100110000111110011010101, 0b11001010010101010010110010101000, 0b11110000111100001111000011110000),
  496. U32x8(0b11100001111110010110000000010010, 0b10000101101110101001100100101010, 0b11100001100011110001010110001000, 0b10000100111000010100111000100001, 0b10010010001110001000100110010011, 0b00000000000001010110011001100101, 0b00100000011110110000011100100111, 0b11011010000000011001010101111000));
  497. // Bit shift
  498. ASSERT_EQUAL(U32x8(1, 2, 3, 4, 5, 6, 7, 8) << 1, U32x8( 2, 4, 6, 8,10,12,14,16));
  499. ASSERT_EQUAL(U32x8(1, 2, 3, 4, 5, 6, 7, 8) << 2, U32x8( 4, 8,12,16,20,24,28,32));
  500. ASSERT_EQUAL(U32x8(1, 2, 3, 4, 5, 6, 7, 8) << 3, U32x8( 8,16,24,32,40,48,56,64));
  501. ASSERT_EQUAL(U32x8(1, 2, 3, 4, 5, 6, 7, 8) << 4, U32x8(16,32,48,64,80,96,112,128));
  502. ASSERT_EQUAL(U32x8(1, 2, 3, 4, 5, 6, 7, 8) >> 1, U32x8( 0, 1, 1, 2, 2, 3, 3, 4));
  503. ASSERT_EQUAL(U32x8(2, 4, 6, 8, 10, 12, 14, 16) >> 1, U32x8(1, 2, 3, 4, 5, 6, 7, 8));
  504. ASSERT_EQUAL(U32x8(2, 4, 6, 8, 10, 12, 14, 16) >> 2, U32x8(0, 1, 1, 2, 2, 3, 3, 4));
  505. ASSERT_EQUAL(
  506. U32x8(0x0AB12CD0, 0xFFFFFFFF, 0x12345678, 0xF0000000, 0x87654321, 0x48484848, 0x76437643, 0x11111111)
  507. << 4,
  508. U32x8(0xAB12CD00, 0xFFFFFFF0, 0x23456780, 0x00000000, 0x76543210, 0x84848480, 0x64376430, 0x11111110));
  509. ASSERT_EQUAL(
  510. U32x8(0x0AB12CD0, 0xFFFFFFFF, 0x12345678, 0x0000000F, 0x87654321, 0x48484848, 0x76437643, 0x11111111)
  511. >> 4,
  512. U32x8(0x00AB12CD, 0x0FFFFFFF, 0x01234567, 0x00000000, 0x08765432, 0x04848484, 0x07643764, 0x01111111));
  513. // Element shift with insert
  514. ASSERT_EQUAL(vectorExtract_0(U32x8( 1, 2, 3, 4, 5, 6, 7, 8), U32x8( 9,10,11,12,13,14,15,16)),
  515. U32x8( 1, 2, 3, 4, 5, 6, 7, 8));
  516. ASSERT_EQUAL(vectorExtract_1(U32x8( 1, 2, 3, 4, 5, 6, 7, 8), U32x8( 9,10,11,12,13,14,15,16)),
  517. U32x8( 2, 3, 4, 5, 6, 7, 8, 9));
  518. ASSERT_EQUAL(vectorExtract_2(U32x8( 1, 2, 3, 4, 5, 6, 7, 8), U32x8( 9,10,11,12,13,14,15,16)),
  519. U32x8( 3, 4, 5, 6, 7, 8, 9,10));
  520. ASSERT_EQUAL(vectorExtract_3(U32x8( 1, 2, 3, 4, 5, 6, 7, 8), U32x8( 9,10,11,12,13,14,15,16)),
  521. U32x8( 4, 5, 6, 7, 8, 9,10,11));
  522. ASSERT_EQUAL(vectorExtract_4(U32x8( 1, 2, 3, 4, 5, 6, 7, 8), U32x8( 9,10,11,12,13,14,15,16)),
  523. U32x8( 5, 6, 7, 8, 9,10,11,12));
  524. ASSERT_EQUAL(vectorExtract_5(U32x8( 1, 2, 3, 4, 5, 6, 7, 8), U32x8( 9,10,11,12,13,14,15,16)),
  525. U32x8( 6, 7, 8, 9,10,11,12,13));
  526. ASSERT_EQUAL(vectorExtract_6(U32x8( 1, 2, 3, 4, 5, 6, 7, 8), U32x8( 9,10,11,12,13,14,15,16)),
  527. U32x8( 7, 8, 9,10,11,12,13,14));
  528. ASSERT_EQUAL(vectorExtract_7(U32x8( 1, 2, 3, 4, 5, 6, 7, 8), U32x8( 9,10,11,12,13,14,15,16)),
  529. U32x8( 8, 9,10,11,12,13,14,15));
  530. ASSERT_EQUAL(vectorExtract_8(U32x8( 1, 2, 3, 4, 5, 6, 7, 8), U32x8( 9,10,11,12,13,14,15,16)),
  531. U32x8( 9,10,11,12,13,14,15,16));
  532. ASSERT_EQUAL(vectorExtract_5(U32x8( 1, 2, 3, 4, 5, 6, 7, 4294967295), U32x8( 9,10,11,1000,13,14,15,16)),
  533. U32x8( 6, 7, 4294967295, 9,10,11,1000,13));
  534. ASSERT_EQUAL(vectorExtract_0(I32x8( 1,-2, 3, 4,-5, 6, 7, 8), I32x8( 9,10,11,-12,13,14,15,-16)),
  535. I32x8( 1,-2, 3, 4,-5, 6, 7, 8));
  536. ASSERT_EQUAL(vectorExtract_1(I32x8( 1,-2, 3, 4,-5, 6, 7, 8), I32x8( 9,10,11,-12,13,14,15,-16)),
  537. I32x8(-2, 3, 4,-5, 6, 7, 8, 9));
  538. ASSERT_EQUAL(vectorExtract_2(I32x8( 1,-2, 3, 4,-5, 6, 7, 8), I32x8( 9,10,11,-12,13,14,15,-16)),
  539. I32x8( 3, 4,-5, 6, 7, 8, 9,10));
  540. ASSERT_EQUAL(vectorExtract_3(I32x8( 1,-2, 3, 4,-5, 6, 7, 8), I32x8( 9,10,11,-12,13,14,15,-16)),
  541. I32x8( 4,-5, 6, 7, 8, 9,10,11));
  542. ASSERT_EQUAL(vectorExtract_4(I32x8( 1,-2, 3, 4,-5, 6, 7, 8), I32x8( 9,10,11,-12,13,14,15,-16)),
  543. I32x8(-5, 6, 7, 8, 9,10,11,-12));
  544. ASSERT_EQUAL(vectorExtract_5(I32x8( 1,-2, 3, 4,-5, 6, 7, 8), I32x8( 9,10,11,-12,13,14,15,-16)),
  545. I32x8( 6, 7, 8, 9,10,11,-12,13));
  546. ASSERT_EQUAL(vectorExtract_6(I32x8( 1,-2, 3, 4,-5, 6, 7, 8), I32x8( 9,10,11,-12,13,14,15,-16)),
  547. I32x8( 7, 8, 9,10,11,-12,13,14));
  548. ASSERT_EQUAL(vectorExtract_7(I32x8( 1,-2, 3, 4,-5, 6, 7, 8), I32x8( 9,10,11,-12,13,14,15,-16)),
  549. I32x8( 8, 9,10,11,-12,13,14,15));
  550. ASSERT_EQUAL(vectorExtract_8(I32x8( 1,-2, 3, 4,-5, 6, 7, 8), I32x8( 9,10,11,-12,13,14,15,-16)),
  551. I32x8( 9,10,11,-12,13,14,15,-16));
  552. ASSERT_EQUAL(vectorExtract_0(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
  553. F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f));
  554. ASSERT_EQUAL(vectorExtract_1(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
  555. F32x8( -2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f, 9.0f));
  556. ASSERT_EQUAL(vectorExtract_2(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
  557. F32x8( 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f));
  558. ASSERT_EQUAL(vectorExtract_3(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
  559. F32x8( 4.0f,-5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f));
  560. ASSERT_EQUAL(vectorExtract_4(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
  561. F32x8(-5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f,-12.0f));
  562. ASSERT_EQUAL(vectorExtract_5(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
  563. F32x8( 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f,-12.0f, 13.0f));
  564. ASSERT_EQUAL(vectorExtract_6(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
  565. F32x8( 7.0f, 8.0f, 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f));
  566. ASSERT_EQUAL(vectorExtract_7(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
  567. F32x8( 8.0f, 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f));
  568. ASSERT_EQUAL(vectorExtract_8(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
  569. F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f));
  570. ASSERT_EQUAL(vectorExtract_0 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  571. U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16));
  572. ASSERT_EQUAL(vectorExtract_1 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  573. U16x16( 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16, 17));
  574. ASSERT_EQUAL(vectorExtract_2 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  575. U16x16( 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16, 17,18));
  576. ASSERT_EQUAL(vectorExtract_3 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  577. U16x16( 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16, 17,18,19));
  578. ASSERT_EQUAL(vectorExtract_4 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  579. U16x16( 5, 6, 7, 8, 9,10,11,12,13,14,15,16, 17,18,19,20));
  580. ASSERT_EQUAL(vectorExtract_5 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  581. U16x16( 6, 7, 8, 9,10,11,12,13,14,15,16, 17,18,19,20,21));
  582. ASSERT_EQUAL(vectorExtract_6 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  583. U16x16( 7, 8, 9,10,11,12,13,14,15,16, 17,18,19,20,21,22));
  584. ASSERT_EQUAL(vectorExtract_7 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  585. U16x16( 8, 9,10,11,12,13,14,15,16, 17,18,19,20,21,22,23));
  586. ASSERT_EQUAL(vectorExtract_8 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  587. U16x16( 9,10,11,12,13,14,15,16, 17,18,19,20,21,22,23,24));
  588. ASSERT_EQUAL(vectorExtract_9 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  589. U16x16(10,11,12,13,14,15,16, 17,18,19,20,21,22,23,24,25));
  590. ASSERT_EQUAL(vectorExtract_10(U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  591. U16x16(11,12,13,14,15,16, 17,18,19,20,21,22,23,24,25,26));
  592. ASSERT_EQUAL(vectorExtract_11(U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  593. U16x16(12,13,14,15,16, 17,18,19,20,21,22,23,24,25,26,27));
  594. ASSERT_EQUAL(vectorExtract_12(U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  595. U16x16(13,14,15,16, 17,18,19,20,21,22,23,24,25,26,27,28));
  596. ASSERT_EQUAL(vectorExtract_13(U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  597. U16x16(14,15,16, 17,18,19,20,21,22,23,24,25,26,27,28,29));
  598. ASSERT_EQUAL(vectorExtract_14(U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  599. U16x16(15,16, 17,18,19,20,21,22,23,24,25,26,27,28,29,30));
  600. ASSERT_EQUAL(vectorExtract_15(U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  601. U16x16(16, 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31));
  602. ASSERT_EQUAL(vectorExtract_16(U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  603. U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32));
  604. ASSERT_EQUAL(vectorExtract_0(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  605. U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32));
  606. ASSERT_EQUAL(vectorExtract_1 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  607. U8x32( 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33));
  608. ASSERT_EQUAL(vectorExtract_2 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  609. U8x32( 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34));
  610. ASSERT_EQUAL(vectorExtract_3 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  611. U8x32( 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35));
  612. ASSERT_EQUAL(vectorExtract_4 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  613. U8x32( 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36));
  614. ASSERT_EQUAL(vectorExtract_5 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  615. U8x32( 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37));
  616. ASSERT_EQUAL(vectorExtract_6 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  617. U8x32( 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38));
  618. ASSERT_EQUAL(vectorExtract_7 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  619. U8x32( 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39));
  620. ASSERT_EQUAL(vectorExtract_8 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  621. U8x32( 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40));
  622. ASSERT_EQUAL(vectorExtract_9 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  623. U8x32(10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41));
  624. ASSERT_EQUAL(vectorExtract_10(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  625. U8x32(11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42));
  626. ASSERT_EQUAL(vectorExtract_11(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  627. U8x32(12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43));
  628. ASSERT_EQUAL(vectorExtract_12(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  629. U8x32(13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44));
  630. ASSERT_EQUAL(vectorExtract_13(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  631. U8x32(14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45));
  632. ASSERT_EQUAL(vectorExtract_14(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  633. U8x32(15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46));
  634. ASSERT_EQUAL(vectorExtract_15(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  635. U8x32(16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47));
  636. ASSERT_EQUAL(vectorExtract_16(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  637. U8x32(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48));
  638. ASSERT_EQUAL(vectorExtract_17(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  639. U8x32(18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49));
  640. ASSERT_EQUAL(vectorExtract_18(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  641. U8x32(19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50));
  642. ASSERT_EQUAL(vectorExtract_19(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  643. U8x32(20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51));
  644. ASSERT_EQUAL(vectorExtract_20(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  645. U8x32(21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52));
  646. ASSERT_EQUAL(vectorExtract_21(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  647. U8x32(22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53));
  648. ASSERT_EQUAL(vectorExtract_22(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  649. U8x32(23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54));
  650. ASSERT_EQUAL(vectorExtract_23(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  651. U8x32(24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55));
  652. ASSERT_EQUAL(vectorExtract_24(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  653. U8x32(25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56));
  654. ASSERT_EQUAL(vectorExtract_25(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  655. U8x32(26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57));
  656. ASSERT_EQUAL(vectorExtract_26(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  657. U8x32(27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58));
  658. ASSERT_EQUAL(vectorExtract_27(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  659. U8x32(28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59));
  660. ASSERT_EQUAL(vectorExtract_28(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  661. U8x32(29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60));
  662. ASSERT_EQUAL(vectorExtract_29(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  663. U8x32(30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61));
  664. ASSERT_EQUAL(vectorExtract_30(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  665. U8x32(31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62));
  666. ASSERT_EQUAL(vectorExtract_31(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  667. U8x32(32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63));
  668. ASSERT_EQUAL(vectorExtract_32(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  669. U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64));
  670. { // Gather test
  671. // The Buffer must be kept alive during the pointer's lifetime to prevent freeing the memory too early with reference counting.
  672. // Because SafePointer exists only to be faster than Buffer but safer than a raw pointer.
  673. Buffer gatherTestBuffer = buffer_create(sizeof(int32_t) * 32);
  674. {
  675. // 32-bit floating-point gather
  676. SafePointer<float> pointerF = buffer_getSafeData<float>(gatherTestBuffer, "float gather test data");
  677. for (int i = 0; i < 32; i++) { // -32.0f, -30.0f, -28.0f, -26.0f ... 24.0f, 26.0f, 28.0f, 30.0f
  678. pointerF[i] = i * 2.0f - 32.0f;
  679. }
  680. ASSERT_EQUAL(gather(pointerF , U32x4(2, 1, 30, 31)), F32x4(-28.0f, -30.0f, 28.0f, 30.0f));
  681. ASSERT_EQUAL(gather(pointerF + 10, U32x4(0, 1, 2, 3)), F32x4(-12.0f, -10.0f, -8.0f, -6.0f));
  682. ASSERT_EQUAL(gather(pointerF , U32x8(2, 1, 28, 29, 3, 0, 30, 31)), F32x8(-28.0f, -30.0f, 24.0f, 26.0f, -26.0f, -32.0f, 28.0f, 30.0f));
  683. ASSERT_EQUAL(gather(pointerF + 10, U32x8(0, 1, 2, 3, 4, 5, 6, 7)), F32x8(-12.0f, -10.0f, -8.0f, -6.0f, -4.0f, -2.0f, 0.0f, 2.0f));
  684. }
  685. {
  686. // Signed 32-bit integer gather
  687. SafePointer<int32_t> pointerU = buffer_getSafeData<int32_t>(gatherTestBuffer, "int32_t gather test data");
  688. for (int i = 0; i < 32; i++) { // -32, -30, -28, -26 ... 24, 26, 28, 30
  689. pointerU[i] = i * 2 - 32;
  690. }
  691. ASSERT_EQUAL(gather(pointerU , U32x4(2, 1, 30, 31)), I32x4(-28, -30, 28, 30));
  692. ASSERT_EQUAL(gather(pointerU + 10, U32x4(0, 1, 2, 3)), I32x4(-12, -10, -8, -6));
  693. ASSERT_EQUAL(gather(pointerU , U32x8(2, 1, 28, 29, 3, 0, 30, 31)), I32x8(-28, -30, 24, 26, -26, -32, 28, 30));
  694. ASSERT_EQUAL(gather(pointerU + 10, U32x8(0, 1, 2, 3, 4, 5, 6, 7)), I32x8(-12, -10, -8, -6, -4, -2, 0, 2));
  695. }
  696. {
  697. // Unsigned 32-bit integer gather
  698. SafePointer<uint32_t> pointerI = buffer_getSafeData<uint32_t>(gatherTestBuffer, "uint32_t gather test data");
  699. for (int i = 0; i < 32; i++) { // 100, 102, 104, 106 ... 156, 158, 160, 162
  700. pointerI[i] = 100 + i * 2;
  701. }
  702. // Signed 32-bit integer gather
  703. ASSERT_EQUAL(gather(pointerI , U32x4(2, 1, 30, 31)), U32x4(104, 102, 160, 162));
  704. ASSERT_EQUAL(gather(pointerI + 10, U32x4(0, 1, 2, 3)), U32x4(120, 122, 124, 126));
  705. ASSERT_EQUAL(gather(pointerI , U32x8(2, 1, 28, 29, 3, 0, 30, 31)), U32x8(104, 102, 156, 158, 106, 100, 160, 162));
  706. ASSERT_EQUAL(gather(pointerI + 10, U32x8(0, 1, 2, 3, 4, 5, 6, 7)), U32x8(120, 122, 124, 126, 128, 130, 132, 134));
  707. }
  708. }
  709. END_TEST