SimdTest.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. 
  2. #include "../testTools.h"
  3. #include "../../DFPSR/base/simdExtra.h"
  4. START_TEST(Simd)
  5. printText("\nSIMD test is compiled using:\n");
  6. #ifdef USE_SSE2
  7. printText(" * SSE2\n");
  8. #endif
  9. #ifdef USE_SSSE3
  10. printText(" * SSSE3\n");
  11. #endif
  12. #ifdef USE_AVX2
  13. printText(" * AVX2\n");
  14. #endif
  15. #ifdef USE_NEON
  16. printText(" * NEON\n");
  17. #endif
  18. // F32x4 Comparisons
  19. ASSERT_EQUAL(F32x4(1.5f), F32x4(1.5f, 1.5f, 1.5f, 1.5f));
  20. ASSERT_EQUAL(F32x4(-1.5f), F32x4(-1.5f, -1.5f, -1.5f, -1.5f));
  21. ASSERT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f), F32x4(1.2f, 3.4f, 5.6f, 7.8f));
  22. ASSERT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f).get().x, 1.2f);
  23. ASSERT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f).get().y, 3.4f);
  24. ASSERT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f).get().z, 5.6f);
  25. ASSERT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f).get().w, 7.8f);
  26. ASSERT_NOT_EQUAL(F32x4(1.3f, 3.4f, 5.6f, 7.8f), F32x4(1.2f, 3.4f, 5.6f, 7.8f));
  27. ASSERT_NOT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f), F32x4(1.2f, -1.4f, 5.6f, 7.8f));
  28. ASSERT_NOT_EQUAL(F32x4(1.2f, 3.4f, 5.5f, 7.8f), F32x4(1.2f, 3.4f, 5.6f, 7.8f));
  29. ASSERT_NOT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f), F32x4(1.2f, 3.4f, 5.6f, -7.8f));
  30. // I32x4 Comparisons
  31. ASSERT_EQUAL(I32x4(4), I32x4(4, 4, 4, 4));
  32. ASSERT_EQUAL(I32x4(-4), I32x4(-4, -4, -4, -4));
  33. ASSERT_EQUAL(I32x4(-1, 2, -3, 4), I32x4(-1, 2, -3, 4));
  34. ASSERT_NOT_EQUAL(I32x4(-1, 2, 7, 4), I32x4(-1, 2, -3, 4));
  35. // U32x4 Comparisons
  36. ASSERT_EQUAL(U32x4(4), U32x4(4, 4, 4, 4));
  37. ASSERT_EQUAL(U32x4(1, 2, 3, 4), U32x4(1, 2, 3, 4));
  38. ASSERT_NOT_EQUAL(U32x4(1, 2, 7, 4), U32x4(1, 2, 3, 4));
  39. // U16x8 Comparisons
  40. ASSERT_EQUAL(U16x8((uint16_t)8), U16x8(8, 8, 8, 8, 8, 8, 8, 8));
  41. ASSERT_EQUAL(U16x8((uint32_t)8), U16x8(8, 0, 8, 0, 8, 0, 8, 0));
  42. ASSERT_EQUAL(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  43. ASSERT_NOT_EQUAL(U16x8(0, 2, 3, 4, 5, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  44. ASSERT_NOT_EQUAL(U16x8(1, 0, 3, 4, 5, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  45. ASSERT_NOT_EQUAL(U16x8(1, 2, 0, 4, 5, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  46. ASSERT_NOT_EQUAL(U16x8(1, 2, 3, 0, 5, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  47. ASSERT_NOT_EQUAL(U16x8(1, 2, 3, 4, 0, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  48. ASSERT_NOT_EQUAL(U16x8(1, 2, 3, 4, 5, 0, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  49. ASSERT_NOT_EQUAL(U16x8(1, 2, 3, 4, 5, 6, 0, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  50. ASSERT_NOT_EQUAL(U16x8(1, 2, 3, 4, 5, 6, 7, 0), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  51. ASSERT_NOT_EQUAL(U16x8(1, 2, 0, 4, 5, 0, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  52. ASSERT_NOT_EQUAL(U16x8(1, 0, 3, 4, 5, 6, 0, 0), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  53. ASSERT_NOT_EQUAL(U16x8(0, 2, 3, 4, 0, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  54. ASSERT_NOT_EQUAL(U16x8(0, 0, 0, 0, 0, 0, 0, 0), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  55. // U8x16 Comparisons
  56. ASSERT_EQUAL(U8x16((uint8_t)250), U8x16(250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250));
  57. ASSERT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  58. ASSERT_NOT_EQUAL(U8x16(0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  59. ASSERT_NOT_EQUAL(U8x16(1, 0, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  60. ASSERT_NOT_EQUAL(U8x16(1, 2, 0, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  61. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 0, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  62. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 0, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  63. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 0, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  64. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 0, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  65. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 0, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  66. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 0, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  67. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  68. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  69. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 0, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  70. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 0, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  71. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 0, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  72. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 0, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  73. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 0), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  74. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 251, 252, 6, 254, 255), U8x16(1, 2, 3, 4, 5, 9, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  75. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 0, 5, 6, 7, 8, 9, 0, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 4, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  76. // Macros
  77. #ifdef USE_BASIC_SIMD
  78. { // Truncate float to int
  79. SIMD_F32x4 f = LOAD_VECTOR_F32_SIMD(-1.01f, -0.99f, 0.99f, 1.01f);
  80. SIMD_I32x4 i = LOAD_VECTOR_I32_SIMD(-1, 0, 0, 1);
  81. ASSERT_EQUAL(I32x4(F32_TO_I32_SIMD(f)), I32x4(i));
  82. }
  83. { // Int to float
  84. SIMD_I32x4 n = LOAD_VECTOR_I32_SIMD(123 , 456 , 789 , -1000 );
  85. SIMD_F32x4 r = LOAD_VECTOR_F32_SIMD(123.0f, 456.0f, 789.0f, -1000.0f);
  86. ASSERT_EQUAL(F32x4(I32_TO_F32_SIMD(n)), F32x4(r));
  87. }
  88. { // Signed-unsigned cast
  89. ASSERT_EQUAL(I32x4(REINTERPRET_U32_TO_I32_SIMD(U32x4(1, 2, 3, 4).v)), I32x4(1, 2, 3, 4));
  90. ASSERT_EQUAL(U32x4(REINTERPRET_I32_TO_U32_SIMD(I32x4(1, 2, 3, 4).v)), U32x4(1, 2, 3, 4));
  91. }
  92. { // F32x4
  93. SIMD_F32x4 a = LOAD_VECTOR_F32_SIMD(-1.3f, 2.5f, -3.4f, 4.7f);
  94. SIMD_F32x4 b = LOAD_VECTOR_F32_SIMD(5.2f, -2.0f, 0.1f, 1.9f);
  95. SIMD_F32x4 c = LOAD_SCALAR_F32_SIMD(0.5f);
  96. ASSERT_EQUAL(F32x4(ADD_F32_SIMD(a, b)), F32x4(-1.3f + 5.2f, 2.5f + -2.0f, -3.4f + 0.1f, 4.7f + 1.9f));
  97. ASSERT_EQUAL(F32x4(SUB_F32_SIMD(a, b)), F32x4(-1.3f - 5.2f, 2.5f - -2.0f, -3.4f - 0.1f, 4.7f - 1.9f));
  98. ASSERT_EQUAL(F32x4(ADD_F32_SIMD(a, c)), F32x4(-1.3f + 0.5f, 2.5f + 0.5f, -3.4f + 0.5f, 4.7f + 0.5f));
  99. ASSERT_EQUAL(F32x4(SUB_F32_SIMD(a, c)), F32x4(-1.3f - 0.5f, 2.5f - 0.5f, -3.4f - 0.5f, 4.7f - 0.5f));
  100. ASSERT_EQUAL(F32x4(MUL_F32_SIMD(a, c)), F32x4(-1.3f * 0.5f, 2.5f * 0.5f, -3.4f * 0.5f, 4.7f * 0.5f));
  101. ASSERT_EQUAL(F32x4(MIN_F32_SIMD(a, b)), F32x4(-1.3f, -2.0f, -3.4f, 1.9f));
  102. ASSERT_EQUAL(F32x4(MAX_F32_SIMD(a, b)), F32x4(5.2f, 2.5f, 0.1f, 4.7f));
  103. }
  104. { // I32x4
  105. SIMD_I32x4 a = LOAD_VECTOR_I32_SIMD(-1, 2, -3, 4);
  106. SIMD_I32x4 b = LOAD_VECTOR_I32_SIMD(5, -2, 0, 1);
  107. SIMD_I32x4 c = LOAD_SCALAR_I32_SIMD(4);
  108. ASSERT_EQUAL(I32x4(ADD_I32_SIMD(a, b)), I32x4(4, 0, -3, 5));
  109. ASSERT_EQUAL(I32x4(SUB_I32_SIMD(a, b)), I32x4(-6, 4, -3, 3));
  110. ASSERT_EQUAL(I32x4(ADD_I32_SIMD(a, c)), I32x4(3, 6, 1, 8));
  111. ASSERT_EQUAL(I32x4(SUB_I32_SIMD(a, c)), I32x4(-5, -2, -7, 0));
  112. }
  113. { // U32x4
  114. SIMD_U32x4 a = LOAD_VECTOR_U32_SIMD(4, 5, 6, 7);
  115. SIMD_U32x4 b = LOAD_VECTOR_U32_SIMD(6, 5, 4, 3);
  116. SIMD_U32x4 c = LOAD_SCALAR_U32_SIMD(10);
  117. ASSERT_EQUAL(U32x4(ADD_U32_SIMD(a, b)), U32x4(c));
  118. ASSERT_EQUAL(U32x4(ADD_U32_SIMD(a, c)), U32x4(14, 15, 16, 17));
  119. ASSERT_EQUAL(U32x4(SUB_U32_SIMD(c, b)), U32x4(a));
  120. }
  121. { // U16x8
  122. SIMD_U16x8 a = LOAD_VECTOR_U16_SIMD(1, 2, 3, 4, 5, 6, 7, 8);
  123. SIMD_U16x8 b = LOAD_VECTOR_U16_SIMD(9, 8, 7, 6, 5, 4, 3, 2);
  124. SIMD_U16x8 c = LOAD_SCALAR_U16_SIMD(10);
  125. ASSERT_EQUAL(U16x8(ADD_U16_SIMD(a, b)), U16x8(c));
  126. ASSERT_EQUAL(U16x8(ADD_U16_SIMD(a, c)), U16x8(11, 12, 13, 14, 15, 16, 17, 18));
  127. ASSERT_EQUAL(U16x8(SUB_U16_SIMD(c, b)), U16x8(a));
  128. ASSERT_EQUAL(U16x8(MUL_U16_SIMD(a, b)), U16x8(9, 16, 21, 24, 25, 24, 21, 16));
  129. }
  130. #endif
  131. // Reinterpret
  132. ASSERT_EQUAL(U16x8(U32x4(12, 34, 56, 78)), U16x8(12, 0, 34, 0, 56, 0, 78, 0));
  133. ASSERT_EQUAL(U16x8(12, 0, 34, 0, 56, 0, 78, 0).get_U32(), U32x4(12, 34, 56, 78));
  134. // Reciprocal: 1 / x
  135. ASSERT_EQUAL(F32x4(0.5f, 1.0f, 2.0f, 4.0f).reciprocal(), F32x4(2.0f, 1.0f, 0.5f, 0.25f));
  136. // Square root: sqrt(x)
  137. ASSERT_EQUAL(F32x4(1.0f, 4.0f, 9.0f, 100.0f).squareRoot(), F32x4(1.0f, 2.0f, 3.0f, 10.0f));
  138. // Reciprocal square root: 1 / sqrt(x)
  139. ASSERT_EQUAL(F32x4(1.0f, 4.0f, 16.0f, 100.0f).reciprocalSquareRoot(), F32x4(1.0f, 0.5f, 0.25f, 0.1f));
  140. // Minimum
  141. ASSERT_EQUAL(min(F32x4(1.1f, 2.2f, 3.3f, 4.4f), F32x4(5.0f, 3.0f, 1.0f, -1.0f)), F32x4(1.1f, 2.2f, 1.0f, -1.0f));
  142. // Maximum
  143. ASSERT_EQUAL(max(F32x4(1.1f, 2.2f, 3.3f, 4.4f), F32x4(5.0f, 3.0f, 1.0f, -1.0f)), F32x4(5.0f, 3.0f, 3.3f, 4.4f));
  144. // Clamp
  145. ASSERT_EQUAL(F32x4(-35.1f, 1.0f, 2.0f, 45.7f).clamp(-1.5f, 1.5f), F32x4(-1.5f, 1.0f, 1.5f, 1.5f));
  146. // F32x4 operations
  147. ASSERT_EQUAL(F32x4(1.1f, -2.2f, 3.3f, 4.0f) + F32x4(2.2f, -4.4f, 6.6f, 8.0f), F32x4(3.3f, -6.6f, 9.9f, 12.0f));
  148. ASSERT_EQUAL(F32x4(-1.5f, -0.5f, 0.5f, 1.5f) + 1.0f, F32x4(-0.5f, 0.5f, 1.5f, 2.5f));
  149. ASSERT_EQUAL(1.0f + F32x4(-1.5f, -0.5f, 0.5f, 1.5f), F32x4(-0.5f, 0.5f, 1.5f, 2.5f));
  150. ASSERT_EQUAL(F32x4(1.1f, 2.2f, 3.3f, 4.4f) - F32x4(0.1f, 0.2f, 0.3f, 0.4f), F32x4(1.0f, 2.0f, 3.0f, 4.0f));
  151. ASSERT_EQUAL(F32x4(1.0f, 2.0f, 3.0f, 4.0f) - 0.5f, F32x4(0.5f, 1.5f, 2.5f, 3.5f));
  152. ASSERT_EQUAL(0.5f - F32x4(1.0f, 2.0f, 3.0f, 4.0f), F32x4(-0.5f, -1.5f, -2.5f, -3.5f));
  153. ASSERT_EQUAL(2.0f * F32x4(1.0f, 2.0f, 3.0f, 4.0f), F32x4(2.0f, 4.0f, 6.0f, 8.0f));
  154. ASSERT_EQUAL(F32x4(1.0f, -2.0f, 3.0f, -4.0f) * -2.0f, F32x4(-2.0f, 4.0f, -6.0f, 8.0f));
  155. ASSERT_EQUAL(F32x4(1.0f, -2.0f, 3.0f, -4.0f) * F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(1.0f, 4.0f, 9.0f, 16.0f));
  156. // I32x4 operations
  157. ASSERT_EQUAL(I32x4(1, 2, -3, 4) + I32x4(-2, 4, 6, 8), I32x4(-1, 6, 3, 12));
  158. ASSERT_EQUAL(I32x4(1, -2, 3, 4) - 4, I32x4(-3, -6, -1, 0));
  159. ASSERT_EQUAL(10 + I32x4(1, 2, 3, 4), I32x4(11, 12, 13, 14));
  160. ASSERT_EQUAL(I32x4(1, 2, 3, 4) + I32x4(4), I32x4(5, 6, 7, 8));
  161. ASSERT_EQUAL(I32x4(10) + I32x4(1, 2, 3, 4), I32x4(11, 12, 13, 14));
  162. ASSERT_EQUAL(I32x4(-3, 6, -9, 12) * I32x4(1, 2, -3, -4), I32x4(-3, 12, 27, -48));
  163. // U32x4 operations
  164. ASSERT_EQUAL(U32x4(1, 2, 3, 4) + U32x4(2, 4, 6, 8), U32x4(3, 6, 9, 12));
  165. ASSERT_EQUAL(U32x4(1, 2, 3, 4) + 4, U32x4(5, 6, 7, 8));
  166. ASSERT_EQUAL(10 + U32x4(1, 2, 3, 4), U32x4(11, 12, 13, 14));
  167. ASSERT_EQUAL(U32x4(1, 2, 3, 4) + U32x4(4), U32x4(5, 6, 7, 8));
  168. ASSERT_EQUAL(U32x4(10) + U32x4(1, 2, 3, 4), U32x4(11, 12, 13, 14));
  169. ASSERT_EQUAL(U32x4(3, 6, 9, 12) - U32x4(1, 2, 3, 4), U32x4(2, 4, 6, 8));
  170. ASSERT_EQUAL(U32x4(3, 6, 9, 12) * U32x4(1, 2, 3, 4), U32x4(3, 12, 27, 48));
  171. // U16x8 operations
  172. ASSERT_EQUAL(U16x8(1, 2, 3, 4, 5, 6, 7, 8) + U16x8(2, 4, 6, 8, 10, 12, 14, 16), U16x8(3, 6, 9, 12, 15, 18, 21, 24));
  173. ASSERT_EQUAL(U16x8(1, 2, 3, 4, 5, 6, 7, 8) + 8, U16x8(9, 10, 11, 12, 13, 14, 15, 16));
  174. ASSERT_EQUAL(10 + U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(11, 12, 13, 14, 15, 16, 17, 18));
  175. ASSERT_EQUAL(U16x8(1, 2, 3, 4, 5, 6, 7, 8) + U16x8((uint16_t)8), U16x8(9, 10, 11, 12, 13, 14, 15, 16));
  176. ASSERT_EQUAL(U16x8((uint16_t)10) + U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(11, 12, 13, 14, 15, 16, 17, 18));
  177. ASSERT_EQUAL(U16x8(3, 6, 9, 12, 15, 18, 21, 24) - U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(2, 4, 6, 8, 10, 12, 14, 16));
  178. // U8x16 operations
  179. ASSERT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) + 2, U8x16(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18));
  180. ASSERT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) - 1, U8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
  181. ASSERT_EQUAL(
  182. saturatedAddition(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 255), U8x16((uint8_t)250)),
  183. U8x16(251, 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255)
  184. );
  185. // Saturated unsigned integer packing
  186. ASSERT_EQUAL(saturateToU8(U16x8(1, 2, 3, 4, 65535, 6, 7, 8), U16x8(9, 10, 11, 12, 1000, 14, 15, 16)), U8x16(1, 2, 3, 4, 255, 6, 7, 8, 9, 10, 11, 12, 255, 14, 15, 16));
  187. // Unsigned integer unpacking
  188. ASSERT_EQUAL(lowerToU32(U16x8(1,2,3,4,5,6,7,8)), U32x4(1, 2, 3, 4));
  189. ASSERT_EQUAL(higherToU32(U16x8(1,2,3,4,5,6,7,8)), U32x4(5, 6, 7, 8));
  190. ASSERT_EQUAL(lowerToU16(U8x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)), U16x8(1,2,3,4,5,6,7,8));
  191. ASSERT_EQUAL(higherToU16(U8x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)), U16x8(9,10,11,12,13,14,15,16));
  192. // Reinterpretation
  193. ASSERT_EQUAL(
  194. reinterpret_U8FromU32(U32x4(ENDIAN32_BYTE_0, ENDIAN32_BYTE_1, ENDIAN32_BYTE_2, ENDIAN32_BYTE_3)),
  195. U8x16(
  196. 255, 0, 0, 0,
  197. 0, 255, 0, 0,
  198. 0, 0, 255, 0,
  199. 0, 0, 0, 255
  200. )
  201. );
  202. ASSERT_EQUAL(
  203. reinterpret_U8FromU32(U32x4(
  204. ENDIAN32_BYTE_0 | ENDIAN32_BYTE_2,
  205. ENDIAN32_BYTE_0 | ENDIAN32_BYTE_3,
  206. ENDIAN32_BYTE_1,
  207. ENDIAN32_BYTE_1 | ENDIAN32_BYTE_3
  208. )),
  209. U8x16(
  210. 255, 0, 255, 0,
  211. 255, 0, 0, 255,
  212. 0, 255, 0, 0,
  213. 0, 255, 0, 255
  214. )
  215. );
  216. ASSERT_EQUAL(
  217. reinterpret_U32FromU8(U8x16(
  218. 255, 0, 255, 0,
  219. 255, 0, 0, 255,
  220. 0, 255, 0, 0,
  221. 0, 255, 0, 255
  222. )),
  223. U32x4(
  224. ENDIAN32_BYTE_0 | ENDIAN32_BYTE_2,
  225. ENDIAN32_BYTE_0 | ENDIAN32_BYTE_3,
  226. ENDIAN32_BYTE_1,
  227. ENDIAN32_BYTE_1 | ENDIAN32_BYTE_3
  228. )
  229. );
  230. // Bit mask
  231. ASSERT_EQUAL(U32x4(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000) & 0x0000FFFF, U32x4(0x0000FFFF, 0x00005678, 0x0000F0F0, 0x00000000));
  232. ASSERT_EQUAL(U32x4(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000) & 0xFFFF0000, U32x4(0xFFFF0000, 0x12340000, 0xF0F00000, 0x00000000));
  233. ASSERT_EQUAL(U32x4(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000) | 0x0000FFFF, U32x4(0xFFFFFFFF, 0x1234FFFF, 0xF0F0FFFF, 0x0000FFFF));
  234. ASSERT_EQUAL(U32x4(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000) | 0xFFFF0000, U32x4(0xFFFFFFFF, 0xFFFF5678, 0xFFFFF0F0, 0xFFFF0000));
  235. ASSERT_EQUAL(U32x4(0xFFFFFFFF, 0xFFF000FF, 0xF0F0F0F0, 0x12345678) & U32x4(0xFF00FF00, 0xFFFF0000, 0x000FF000, 0x0FF00FF0), U32x4(0xFF00FF00, 0xFFF00000, 0x0000F000, 0x02300670));
  236. ASSERT_EQUAL(U32x4(0xF00F000F, 0xFFF000FF, 0x10010011, 0xABC00000) | U32x4(0x0000FF00, 0xFFFF0000, 0x000FF000, 0x000DEF00), U32x4(0xF00FFF0F, 0xFFFF00FF, 0x100FF011, 0xABCDEF00));
  237. // Bit shift
  238. ASSERT_EQUAL(U32x4(1, 2, 3, 4) << 1, U32x4(2, 4, 6, 8));
  239. ASSERT_EQUAL(U32x4(1, 2, 3, 4) << 2, U32x4(4, 8, 12, 16));
  240. ASSERT_EQUAL(U32x4(1, 2, 3, 4) << 3, U32x4(8, 16, 24, 32));
  241. ASSERT_EQUAL(U32x4(1, 2, 3, 4) << 4, U32x4(16, 32, 48, 64));
  242. ASSERT_EQUAL(U32x4(1, 2, 3, 4) >> 1, U32x4(0, 1, 1, 2));
  243. ASSERT_EQUAL(U32x4(2, 4, 6, 8) >> 1, U32x4(1, 2, 3, 4));
  244. ASSERT_EQUAL(U32x4(2, 4, 6, 8) >> 2, U32x4(0, 1, 1, 2));
  245. ASSERT_EQUAL(U32x4(0x0AB12CD0, 0xFFFFFFFF, 0x12345678, 0xF0000000) << 4, U32x4(0xAB12CD00, 0xFFFFFFF0, 0x23456780, 0x00000000));
  246. ASSERT_EQUAL(U32x4(0x0AB12CD0, 0xFFFFFFFF, 0x12345678, 0x0000000F) >> 4, U32x4(0x00AB12CD, 0x0FFFFFFF, 0x01234567, 0x00000000));
  247. // Element shift with insert
  248. ASSERT_EQUAL(vectorExtract_0(U32x4(1, 2, 3, 4), U32x4(5, 6, 7, 8)), U32x4(1, 2, 3, 4));
  249. ASSERT_EQUAL(vectorExtract_1(U32x4(1, 2, 3, 4), U32x4(5, 6, 7, 8)), U32x4(2, 3, 4, 5));
  250. ASSERT_EQUAL(vectorExtract_2(U32x4(1, 2, 3, 4), U32x4(5, 6, 7, 8)), U32x4(3, 4, 5, 6));
  251. ASSERT_EQUAL(vectorExtract_3(U32x4(1, 2, 3, 4), U32x4(5, 6, 7, 8)), U32x4(4, 5, 6, 7));
  252. ASSERT_EQUAL(vectorExtract_4(U32x4(1, 2, 3, 4), U32x4(5, 6, 7, 8)), U32x4(5, 6, 7, 8));
  253. ASSERT_EQUAL(vectorExtract_0(U32x4(123, 4294967295, 712, 45), U32x4(850514, 27, 0, 174)), U32x4(123, 4294967295, 712, 45));
  254. ASSERT_EQUAL(vectorExtract_1(U32x4(123, 4294967295, 712, 45), U32x4(850514, 27, 0, 174)), U32x4(4294967295, 712, 45, 850514));
  255. ASSERT_EQUAL(vectorExtract_2(U32x4(123, 4294967295, 712, 45), U32x4(850514, 27, 0, 174)), U32x4(712, 45, 850514, 27));
  256. ASSERT_EQUAL(vectorExtract_3(U32x4(123, 4294967295, 712, 45), U32x4(850514, 27, 0, 174)), U32x4(45, 850514, 27, 0));
  257. ASSERT_EQUAL(vectorExtract_4(U32x4(123, 4294967295, 712, 45), U32x4(850514, 27, 0, 174)), U32x4(850514, 27, 0, 174));
  258. ASSERT_EQUAL(vectorExtract_0(I32x4(1, 2, 3, 4), I32x4(5, 6, 7, 8)), I32x4(1, 2, 3, 4));
  259. ASSERT_EQUAL(vectorExtract_1(I32x4(1, 2, 3, 4), I32x4(5, 6, 7, 8)), I32x4(2, 3, 4, 5));
  260. ASSERT_EQUAL(vectorExtract_2(I32x4(1, 2, 3, 4), I32x4(5, 6, 7, 8)), I32x4(3, 4, 5, 6));
  261. ASSERT_EQUAL(vectorExtract_3(I32x4(1, 2, 3, 4), I32x4(5, 6, 7, 8)), I32x4(4, 5, 6, 7));
  262. ASSERT_EQUAL(vectorExtract_4(I32x4(1, 2, 3, 4), I32x4(5, 6, 7, 8)), I32x4(5, 6, 7, 8));
  263. ASSERT_EQUAL(vectorExtract_0(I32x4(123, 8462784, -712, 45), I32x4(-37562, 27, 0, 174)), I32x4(123, 8462784, -712, 45));
  264. ASSERT_EQUAL(vectorExtract_1(I32x4(123, 8462784, -712, 45), I32x4(-37562, 27, 0, 174)), I32x4(8462784, -712, 45, -37562));
  265. ASSERT_EQUAL(vectorExtract_2(I32x4(123, 8462784, -712, 45), I32x4(-37562, 27, 0, 174)), I32x4(-712, 45, -37562, 27));
  266. ASSERT_EQUAL(vectorExtract_3(I32x4(123, 8462784, -712, 45), I32x4(-37562, 27, 0, 174)), I32x4(45, -37562, 27, 0));
  267. ASSERT_EQUAL(vectorExtract_4(I32x4(123, 8462784, -712, 45), I32x4(-37562, 27, 0, 174)), I32x4(-37562, 27, 0, 174));
  268. ASSERT_EQUAL(vectorExtract_0(F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(5.0f, 6.0f, -7.0f, 8.0f)), F32x4(1.0f, -2.0f, 3.0f, -4.0f));
  269. ASSERT_EQUAL(vectorExtract_1(F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(5.0f, 6.0f, -7.0f, 8.0f)), F32x4(-2.0f, 3.0f, -4.0f, 5.0f));
  270. ASSERT_EQUAL(vectorExtract_2(F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(5.0f, 6.0f, -7.0f, 8.0f)), F32x4(3.0f, -4.0f, 5.0f, 6.0f));
  271. ASSERT_EQUAL(vectorExtract_3(F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(5.0f, 6.0f, -7.0f, 8.0f)), F32x4(-4.0f, 5.0f, 6.0f, -7.0f));
  272. ASSERT_EQUAL(vectorExtract_4(F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(5.0f, 6.0f, -7.0f, 8.0f)), F32x4(5.0f, 6.0f, -7.0f, 8.0f));
  273. ASSERT_EQUAL(vectorExtract_0(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  274. ASSERT_EQUAL(vectorExtract_1(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(2, 3, 4, 5, 6, 7, 8, 9));
  275. ASSERT_EQUAL(vectorExtract_2(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(3, 4, 5, 6, 7, 8, 9, 10));
  276. ASSERT_EQUAL(vectorExtract_3(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(4, 5, 6, 7, 8, 9, 10, 11));
  277. ASSERT_EQUAL(vectorExtract_4(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(5, 6, 7, 8, 9, 10, 11, 12));
  278. ASSERT_EQUAL(vectorExtract_5(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(6, 7, 8, 9, 10, 11, 12, 13));
  279. ASSERT_EQUAL(vectorExtract_6(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(7, 8, 9, 10, 11, 12, 13, 14));
  280. ASSERT_EQUAL(vectorExtract_7(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(8, 9, 10, 11, 12, 13, 14, 15));
  281. ASSERT_EQUAL(vectorExtract_8(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(9, 10, 11, 12, 13, 14, 15, 16));
  282. ASSERT_EQUAL(vectorExtract_0(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
  283. ASSERT_EQUAL(vectorExtract_1(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17));
  284. ASSERT_EQUAL(vectorExtract_2(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18));
  285. ASSERT_EQUAL(vectorExtract_3(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19));
  286. ASSERT_EQUAL(vectorExtract_4(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20));
  287. ASSERT_EQUAL(vectorExtract_5(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21));
  288. ASSERT_EQUAL(vectorExtract_6(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22));
  289. ASSERT_EQUAL(vectorExtract_7(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23));
  290. ASSERT_EQUAL(vectorExtract_8(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24));
  291. ASSERT_EQUAL(vectorExtract_9(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25));
  292. ASSERT_EQUAL(vectorExtract_10(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26));
  293. ASSERT_EQUAL(vectorExtract_11(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27));
  294. ASSERT_EQUAL(vectorExtract_12(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28));
  295. ASSERT_EQUAL(vectorExtract_13(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29));
  296. ASSERT_EQUAL(vectorExtract_14(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30));
  297. ASSERT_EQUAL(vectorExtract_15(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31));
  298. ASSERT_EQUAL(vectorExtract_16(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32));
  299. #ifdef USE_SIMD_EXTRA
  300. SIMD_U32x4 a = U32x4(1, 2, 3, 4).v;
  301. SIMD_U32x4 b = U32x4(5, 6, 7, 8).v;
  302. SIMD_U32x4x2 c = ZIP_U32_SIMD(a, b);
  303. ASSERT_EQUAL(U32x4(c.val[0]), U32x4(1, 5, 2, 6));
  304. ASSERT_EQUAL(U32x4(c.val[1]), U32x4(3, 7, 4, 8));
  305. SIMD_U32x4 d = ZIP_LOW_U32_SIMD(a, b);
  306. SIMD_U32x4 e = ZIP_HIGH_U32_SIMD(a, b);
  307. ASSERT_EQUAL(U32x4(d), U32x4(1, 5, 2, 6));
  308. ASSERT_EQUAL(U32x4(e), U32x4(3, 7, 4, 8));
  309. #endif
  310. END_TEST