SimdTest.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. #include "../testTools.h"
  2. #include "../../DFPSR/base/simdExtra.h"
  3. START_TEST(Simd)
  4. // F32x4 Comparisons
  5. ASSERT_EQUAL(F32x4(1.5f), F32x4(1.5f, 1.5f, 1.5f, 1.5f));
  6. ASSERT_EQUAL(F32x4(-1.5f), F32x4(-1.5f, -1.5f, -1.5f, -1.5f));
  7. ASSERT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f), F32x4(1.2f, 3.4f, 5.6f, 7.8f));
  8. ASSERT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f).get().x, 1.2f);
  9. ASSERT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f).get().y, 3.4f);
  10. ASSERT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f).get().z, 5.6f);
  11. ASSERT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f).get().w, 7.8f);
  12. ASSERT_NOT_EQUAL(F32x4(1.3f, 3.4f, 5.6f, 7.8f), F32x4(1.2f, 3.4f, 5.6f, 7.8f));
  13. ASSERT_NOT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f), F32x4(1.2f, -1.4f, 5.6f, 7.8f));
  14. ASSERT_NOT_EQUAL(F32x4(1.2f, 3.4f, 5.5f, 7.8f), F32x4(1.2f, 3.4f, 5.6f, 7.8f));
  15. ASSERT_NOT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f), F32x4(1.2f, 3.4f, 5.6f, -7.8f));
  16. // I32x4 Comparisons
  17. ASSERT_EQUAL(I32x4(4), I32x4(4, 4, 4, 4));
  18. ASSERT_EQUAL(I32x4(-4), I32x4(-4, -4, -4, -4));
  19. ASSERT_EQUAL(I32x4(-1, 2, -3, 4), I32x4(-1, 2, -3, 4));
  20. ASSERT_NOT_EQUAL(I32x4(-1, 2, 7, 4), I32x4(-1, 2, -3, 4));
  21. // U32x4 Comparisons
  22. ASSERT_EQUAL(U32x4(4), U32x4(4, 4, 4, 4));
  23. ASSERT_EQUAL(U32x4(1, 2, 3, 4), U32x4(1, 2, 3, 4));
  24. ASSERT_NOT_EQUAL(U32x4(1, 2, 7, 4), U32x4(1, 2, 3, 4));
  25. // U16x8 Comparisons
  26. ASSERT_EQUAL(U16x8((uint16_t)8), U16x8(8, 8, 8, 8, 8, 8, 8, 8));
  27. ASSERT_EQUAL(U16x8((uint32_t)8), U16x8(8, 0, 8, 0, 8, 0, 8, 0));
  28. ASSERT_EQUAL(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  29. ASSERT_NOT_EQUAL(U16x8(0, 2, 3, 4, 5, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  30. ASSERT_NOT_EQUAL(U16x8(1, 0, 3, 4, 5, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  31. ASSERT_NOT_EQUAL(U16x8(1, 2, 0, 4, 5, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  32. ASSERT_NOT_EQUAL(U16x8(1, 2, 3, 0, 5, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  33. ASSERT_NOT_EQUAL(U16x8(1, 2, 3, 4, 0, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  34. ASSERT_NOT_EQUAL(U16x8(1, 2, 3, 4, 5, 0, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  35. ASSERT_NOT_EQUAL(U16x8(1, 2, 3, 4, 5, 6, 0, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  36. ASSERT_NOT_EQUAL(U16x8(1, 2, 3, 4, 5, 6, 7, 0), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  37. ASSERT_NOT_EQUAL(U16x8(1, 2, 0, 4, 5, 0, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  38. ASSERT_NOT_EQUAL(U16x8(1, 0, 3, 4, 5, 6, 0, 0), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  39. ASSERT_NOT_EQUAL(U16x8(0, 2, 3, 4, 0, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  40. ASSERT_NOT_EQUAL(U16x8(0, 0, 0, 0, 0, 0, 0, 0), U16x8(1, 2, 3, 4, 5, 6, 7, 8));
  41. // U8x16 Comparisons
  42. ASSERT_EQUAL(U8x16((uint8_t)250), U8x16(250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250));
  43. ASSERT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  44. ASSERT_NOT_EQUAL(U8x16(0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  45. ASSERT_NOT_EQUAL(U8x16(1, 0, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  46. ASSERT_NOT_EQUAL(U8x16(1, 2, 0, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  47. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 0, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  48. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 0, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  49. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 0, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  50. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 0, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  51. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 0, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  52. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 0, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  53. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  54. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  55. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 0, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  56. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 0, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  57. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 0, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  58. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 0, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  59. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 0), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  60. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 251, 252, 6, 254, 255), U8x16(1, 2, 3, 4, 5, 9, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  61. ASSERT_NOT_EQUAL(U8x16(1, 2, 3, 0, 5, 6, 7, 8, 9, 0, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 4, 8, 9, 10, 250, 251, 252, 253, 254, 255));
  62. // Macros
  63. #ifdef USE_BASIC_SIMD
  64. { // Truncate float to int
  65. SIMD_F32x4 f = LOAD_VECTOR_F32_SIMD(-1.01f, -0.99f, 0.99f, 1.01f);
  66. SIMD_I32x4 i = LOAD_VECTOR_I32_SIMD(-1, 0, 0, 1);
  67. ASSERT_EQUAL(I32x4(F32_TO_I32_SIMD(f)), I32x4(i));
  68. }
  69. { // Int to float
  70. SIMD_I32x4 n = LOAD_VECTOR_I32_SIMD(123 , 456 , 789 , -1000 );
  71. SIMD_F32x4 r = LOAD_VECTOR_F32_SIMD(123.0f, 456.0f, 789.0f, -1000.0f);
  72. ASSERT_EQUAL(F32x4(I32_TO_F32_SIMD(n)), F32x4(r));
  73. }
  74. { // Signed-unsigned cast
  75. ASSERT_EQUAL(I32x4(REINTERPRET_U32_TO_I32_SIMD(U32x4(1, 2, 3, 4).v)), I32x4(1, 2, 3, 4));
  76. ASSERT_EQUAL(U32x4(REINTERPRET_I32_TO_U32_SIMD(I32x4(1, 2, 3, 4).v)), U32x4(1, 2, 3, 4));
  77. }
  78. { // F32x4
  79. SIMD_F32x4 a = LOAD_VECTOR_F32_SIMD(-1.3f, 2.5f, -3.4f, 4.7f);
  80. SIMD_F32x4 b = LOAD_VECTOR_F32_SIMD(5.2f, -2.0f, 0.1f, 1.9f);
  81. SIMD_F32x4 c = LOAD_SCALAR_F32_SIMD(0.5f);
  82. ASSERT_EQUAL(F32x4(ADD_F32_SIMD(a, b)), F32x4(-1.3f + 5.2f, 2.5f + -2.0f, -3.4f + 0.1f, 4.7f + 1.9f));
  83. ASSERT_EQUAL(F32x4(SUB_F32_SIMD(a, b)), F32x4(-1.3f - 5.2f, 2.5f - -2.0f, -3.4f - 0.1f, 4.7f - 1.9f));
  84. ASSERT_EQUAL(F32x4(ADD_F32_SIMD(a, c)), F32x4(-1.3f + 0.5f, 2.5f + 0.5f, -3.4f + 0.5f, 4.7f + 0.5f));
  85. ASSERT_EQUAL(F32x4(SUB_F32_SIMD(a, c)), F32x4(-1.3f - 0.5f, 2.5f - 0.5f, -3.4f - 0.5f, 4.7f - 0.5f));
  86. ASSERT_EQUAL(F32x4(MUL_F32_SIMD(a, c)), F32x4(-1.3f * 0.5f, 2.5f * 0.5f, -3.4f * 0.5f, 4.7f * 0.5f));
  87. ASSERT_EQUAL(F32x4(MIN_F32_SIMD(a, b)), F32x4(-1.3f, -2.0f, -3.4f, 1.9f));
  88. ASSERT_EQUAL(F32x4(MAX_F32_SIMD(a, b)), F32x4(5.2f, 2.5f, 0.1f, 4.7f));
  89. }
  90. { // I32x4
  91. SIMD_I32x4 a = LOAD_VECTOR_I32_SIMD(-1, 2, -3, 4);
  92. SIMD_I32x4 b = LOAD_VECTOR_I32_SIMD(5, -2, 0, 1);
  93. SIMD_I32x4 c = LOAD_SCALAR_I32_SIMD(4);
  94. ASSERT_EQUAL(I32x4(ADD_I32_SIMD(a, b)), I32x4(4, 0, -3, 5));
  95. ASSERT_EQUAL(I32x4(SUB_I32_SIMD(a, b)), I32x4(-6, 4, -3, 3));
  96. ASSERT_EQUAL(I32x4(ADD_I32_SIMD(a, c)), I32x4(3, 6, 1, 8));
  97. ASSERT_EQUAL(I32x4(SUB_I32_SIMD(a, c)), I32x4(-5, -2, -7, 0));
  98. }
  99. { // U32x4
  100. SIMD_U32x4 a = LOAD_VECTOR_U32_SIMD(4, 5, 6, 7);
  101. SIMD_U32x4 b = LOAD_VECTOR_U32_SIMD(6, 5, 4, 3);
  102. SIMD_U32x4 c = LOAD_SCALAR_U32_SIMD(10);
  103. ASSERT_EQUAL(U32x4(ADD_U32_SIMD(a, b)), U32x4(c));
  104. ASSERT_EQUAL(U32x4(ADD_U32_SIMD(a, c)), U32x4(14, 15, 16, 17));
  105. ASSERT_EQUAL(U32x4(SUB_U32_SIMD(c, b)), U32x4(a));
  106. }
  107. { // U16x8
  108. SIMD_U16x8 a = LOAD_VECTOR_U16_SIMD(1, 2, 3, 4, 5, 6, 7, 8);
  109. SIMD_U16x8 b = LOAD_VECTOR_U16_SIMD(9, 8, 7, 6, 5, 4, 3, 2);
  110. SIMD_U16x8 c = LOAD_SCALAR_U16_SIMD(10);
  111. ASSERT_EQUAL(U16x8(ADD_U16_SIMD(a, b)), U16x8(c));
  112. ASSERT_EQUAL(U16x8(ADD_U16_SIMD(a, c)), U16x8(11, 12, 13, 14, 15, 16, 17, 18));
  113. ASSERT_EQUAL(U16x8(SUB_U16_SIMD(c, b)), U16x8(a));
  114. ASSERT_EQUAL(U16x8(MUL_U16_SIMD(a, b)), U16x8(9, 16, 21, 24, 25, 24, 21, 16));
  115. }
  116. #endif
  117. // Reinterpret
  118. ASSERT_EQUAL(U16x8(U32x4(12, 34, 56, 78)), U16x8(12, 0, 34, 0, 56, 0, 78, 0));
  119. ASSERT_EQUAL(U16x8(12, 0, 34, 0, 56, 0, 78, 0).get_U32(), U32x4(12, 34, 56, 78));
  120. // Reciprocal: 1 / x
  121. ASSERT_EQUAL(F32x4(0.5f, 1.0f, 2.0f, 4.0f).reciprocal(), F32x4(2.0f, 1.0f, 0.5f, 0.25f));
  122. // Square root: sqrt(x)
  123. ASSERT_EQUAL(F32x4(1.0f, 4.0f, 9.0f, 100.0f).squareRoot(), F32x4(1.0f, 2.0f, 3.0f, 10.0f));
  124. // Reciprocal square root: 1 / sqrt(x)
  125. ASSERT_EQUAL(F32x4(1.0f, 4.0f, 16.0f, 100.0f).reciprocalSquareRoot(), F32x4(1.0f, 0.5f, 0.25f, 0.1f));
  126. // Minimum
  127. ASSERT_EQUAL(min(F32x4(1.1f, 2.2f, 3.3f, 4.4f), F32x4(5.0f, 3.0f, 1.0f, -1.0f)), F32x4(1.1f, 2.2f, 1.0f, -1.0f));
  128. // Maximum
  129. ASSERT_EQUAL(max(F32x4(1.1f, 2.2f, 3.3f, 4.4f), F32x4(5.0f, 3.0f, 1.0f, -1.0f)), F32x4(5.0f, 3.0f, 3.3f, 4.4f));
  130. // Clamp
  131. ASSERT_EQUAL(F32x4(-35.1f, 1.0f, 2.0f, 45.7f).clamp(-1.5f, 1.5f), F32x4(-1.5f, 1.0f, 1.5f, 1.5f));
  132. // F32x4 operations
  133. ASSERT_EQUAL(F32x4(1.1f, -2.2f, 3.3f, 4.0f) + F32x4(2.2f, -4.4f, 6.6f, 8.0f), F32x4(3.3f, -6.6f, 9.9f, 12.0f));
  134. ASSERT_EQUAL(F32x4(-1.5f, -0.5f, 0.5f, 1.5f) + 1.0f, F32x4(-0.5f, 0.5f, 1.5f, 2.5f));
  135. ASSERT_EQUAL(1.0f + F32x4(-1.5f, -0.5f, 0.5f, 1.5f), F32x4(-0.5f, 0.5f, 1.5f, 2.5f));
  136. ASSERT_EQUAL(F32x4(1.1f, 2.2f, 3.3f, 4.4f) - F32x4(0.1f, 0.2f, 0.3f, 0.4f), F32x4(1.0f, 2.0f, 3.0f, 4.0f));
  137. ASSERT_EQUAL(F32x4(1.0f, 2.0f, 3.0f, 4.0f) - 0.5f, F32x4(0.5f, 1.5f, 2.5f, 3.5f));
  138. ASSERT_EQUAL(0.5f - F32x4(1.0f, 2.0f, 3.0f, 4.0f), F32x4(-0.5f, -1.5f, -2.5f, -3.5f));
  139. ASSERT_EQUAL(2.0f * F32x4(1.0f, 2.0f, 3.0f, 4.0f), F32x4(2.0f, 4.0f, 6.0f, 8.0f));
  140. ASSERT_EQUAL(F32x4(1.0f, -2.0f, 3.0f, -4.0f) * -2.0f, F32x4(-2.0f, 4.0f, -6.0f, 8.0f));
  141. ASSERT_EQUAL(F32x4(1.0f, -2.0f, 3.0f, -4.0f) * F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(1.0f, 4.0f, 9.0f, 16.0f));
  142. // I32x4 operations
  143. ASSERT_EQUAL(I32x4(1, 2, -3, 4) + I32x4(-2, 4, 6, 8), I32x4(-1, 6, 3, 12));
  144. ASSERT_EQUAL(I32x4(1, -2, 3, 4) - 4, I32x4(-3, -6, -1, 0));
  145. ASSERT_EQUAL(10 + I32x4(1, 2, 3, 4), I32x4(11, 12, 13, 14));
  146. ASSERT_EQUAL(I32x4(1, 2, 3, 4) + I32x4(4), I32x4(5, 6, 7, 8));
  147. ASSERT_EQUAL(I32x4(10) + I32x4(1, 2, 3, 4), I32x4(11, 12, 13, 14));
  148. ASSERT_EQUAL(I32x4(-3, 6, -9, 12) * I32x4(1, 2, -3, -4), I32x4(-3, 12, 27, -48));
  149. // U32x4 operations
  150. ASSERT_EQUAL(U32x4(1, 2, 3, 4) + U32x4(2, 4, 6, 8), U32x4(3, 6, 9, 12));
  151. ASSERT_EQUAL(U32x4(1, 2, 3, 4) + 4, U32x4(5, 6, 7, 8));
  152. ASSERT_EQUAL(10 + U32x4(1, 2, 3, 4), U32x4(11, 12, 13, 14));
  153. ASSERT_EQUAL(U32x4(1, 2, 3, 4) + U32x4(4), U32x4(5, 6, 7, 8));
  154. ASSERT_EQUAL(U32x4(10) + U32x4(1, 2, 3, 4), U32x4(11, 12, 13, 14));
  155. ASSERT_EQUAL(U32x4(3, 6, 9, 12) - U32x4(1, 2, 3, 4), U32x4(2, 4, 6, 8));
  156. ASSERT_EQUAL(U32x4(3, 6, 9, 12) * U32x4(1, 2, 3, 4), U32x4(3, 12, 27, 48));
  157. // U16x8 operations
  158. ASSERT_EQUAL(U16x8(1, 2, 3, 4, 5, 6, 7, 8) + U16x8(2, 4, 6, 8, 10, 12, 14, 16), U16x8(3, 6, 9, 12, 15, 18, 21, 24));
  159. ASSERT_EQUAL(U16x8(1, 2, 3, 4, 5, 6, 7, 8) + 8, U16x8(9, 10, 11, 12, 13, 14, 15, 16));
  160. ASSERT_EQUAL(10 + U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(11, 12, 13, 14, 15, 16, 17, 18));
  161. ASSERT_EQUAL(U16x8(1, 2, 3, 4, 5, 6, 7, 8) + U16x8((uint16_t)8), U16x8(9, 10, 11, 12, 13, 14, 15, 16));
  162. ASSERT_EQUAL(U16x8((uint16_t)10) + U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(11, 12, 13, 14, 15, 16, 17, 18));
  163. ASSERT_EQUAL(U16x8(3, 6, 9, 12, 15, 18, 21, 24) - U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(2, 4, 6, 8, 10, 12, 14, 16));
  164. // U8x16 operations
  165. ASSERT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) + 2, U8x16(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18));
  166. ASSERT_EQUAL(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) - 1, U8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
  167. ASSERT_EQUAL(
  168. saturatedAddition(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 255), U8x16((uint8_t)250)),
  169. U8x16(251, 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255)
  170. );
  171. // Saturated unsigned integer packing
  172. ASSERT_EQUAL(saturateToU8(U16x8(1, 2, 3, 4, 65535, 6, 7, 8), U16x8(9, 10, 11, 12, 1000, 14, 15, 16)), U8x16(1, 2, 3, 4, 255, 6, 7, 8, 9, 10, 11, 12, 255, 14, 15, 16));
  173. // Unsigned integer unpacking
  174. ASSERT_EQUAL(lowerToU32(U16x8(1,2,3,4,5,6,7,8)), U32x4(1, 2, 3, 4));
  175. ASSERT_EQUAL(higherToU32(U16x8(1,2,3,4,5,6,7,8)), U32x4(5, 6, 7, 8));
  176. ASSERT_EQUAL(lowerToU16(U8x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)), U16x8(1,2,3,4,5,6,7,8));
  177. ASSERT_EQUAL(higherToU16(U8x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)), U16x8(9,10,11,12,13,14,15,16));
  178. // Reinterpretation
  179. ASSERT_EQUAL(
  180. reinterpret_U8FromU32(U32x4(ENDIAN32_BYTE_0, ENDIAN32_BYTE_1, ENDIAN32_BYTE_2, ENDIAN32_BYTE_3)),
  181. U8x16(
  182. 255, 0, 0, 0,
  183. 0, 255, 0, 0,
  184. 0, 0, 255, 0,
  185. 0, 0, 0, 255
  186. )
  187. );
  188. ASSERT_EQUAL(
  189. reinterpret_U8FromU32(U32x4(
  190. ENDIAN32_BYTE_0 | ENDIAN32_BYTE_2,
  191. ENDIAN32_BYTE_0 | ENDIAN32_BYTE_3,
  192. ENDIAN32_BYTE_1,
  193. ENDIAN32_BYTE_1 | ENDIAN32_BYTE_3
  194. )),
  195. U8x16(
  196. 255, 0, 255, 0,
  197. 255, 0, 0, 255,
  198. 0, 255, 0, 0,
  199. 0, 255, 0, 255
  200. )
  201. );
  202. ASSERT_EQUAL(
  203. reinterpret_U32FromU8(U8x16(
  204. 255, 0, 255, 0,
  205. 255, 0, 0, 255,
  206. 0, 255, 0, 0,
  207. 0, 255, 0, 255
  208. )),
  209. U32x4(
  210. ENDIAN32_BYTE_0 | ENDIAN32_BYTE_2,
  211. ENDIAN32_BYTE_0 | ENDIAN32_BYTE_3,
  212. ENDIAN32_BYTE_1,
  213. ENDIAN32_BYTE_1 | ENDIAN32_BYTE_3
  214. )
  215. );
  216. // Bit mask
  217. ASSERT_EQUAL(U32x4(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000) & 0x0000FFFF, U32x4(0x0000FFFF, 0x00005678, 0x0000F0F0, 0x00000000));
  218. ASSERT_EQUAL(U32x4(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000) & 0xFFFF0000, U32x4(0xFFFF0000, 0x12340000, 0xF0F00000, 0x00000000));
  219. ASSERT_EQUAL(U32x4(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000) | 0x0000FFFF, U32x4(0xFFFFFFFF, 0x1234FFFF, 0xF0F0FFFF, 0x0000FFFF));
  220. ASSERT_EQUAL(U32x4(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000) | 0xFFFF0000, U32x4(0xFFFFFFFF, 0xFFFF5678, 0xFFFFF0F0, 0xFFFF0000));
  221. ASSERT_EQUAL(U32x4(0xFFFFFFFF, 0xFFF000FF, 0xF0F0F0F0, 0x12345678) & U32x4(0xFF00FF00, 0xFFFF0000, 0x000FF000, 0x0FF00FF0), U32x4(0xFF00FF00, 0xFFF00000, 0x0000F000, 0x02300670));
  222. ASSERT_EQUAL(U32x4(0xF00F000F, 0xFFF000FF, 0x10010011, 0xABC00000) | U32x4(0x0000FF00, 0xFFFF0000, 0x000FF000, 0x000DEF00), U32x4(0xF00FFF0F, 0xFFFF00FF, 0x100FF011, 0xABCDEF00));
  223. // Bit shift
  224. ASSERT_EQUAL(U32x4(1, 2, 3, 4) << 1, U32x4(2, 4, 6, 8));
  225. ASSERT_EQUAL(U32x4(1, 2, 3, 4) << 2, U32x4(4, 8, 12, 16));
  226. ASSERT_EQUAL(U32x4(1, 2, 3, 4) << 3, U32x4(8, 16, 24, 32));
  227. ASSERT_EQUAL(U32x4(1, 2, 3, 4) << 4, U32x4(16, 32, 48, 64));
  228. ASSERT_EQUAL(U32x4(1, 2, 3, 4) >> 1, U32x4(0, 1, 1, 2));
  229. ASSERT_EQUAL(U32x4(2, 4, 6, 8) >> 1, U32x4(1, 2, 3, 4));
  230. ASSERT_EQUAL(U32x4(2, 4, 6, 8) >> 2, U32x4(0, 1, 1, 2));
  231. ASSERT_EQUAL(U32x4(0x0AB12CD0, 0xFFFFFFFF, 0x12345678, 0xF0000000) << 4, U32x4(0xAB12CD00, 0xFFFFFFF0, 0x23456780, 0x00000000));
  232. ASSERT_EQUAL(U32x4(0x0AB12CD0, 0xFFFFFFFF, 0x12345678, 0x0000000F) >> 4, U32x4(0x00AB12CD, 0x0FFFFFFF, 0x01234567, 0x00000000));
  233. #ifdef USE_SIMD_EXTRA
  234. SIMD_U32x4 a = U32x4(1, 2, 3, 4).v;
  235. SIMD_U32x4 b = U32x4(5, 6, 7, 8).v;
  236. SIMD_U32x4x2 c = ZIP_U32_SIMD(a, b);
  237. ASSERT_EQUAL(U32x4(c.val[0]), U32x4(1, 5, 2, 6));
  238. ASSERT_EQUAL(U32x4(c.val[1]), U32x4(3, 7, 4, 8));
  239. SIMD_U32x4 d = ZIP_LOW_U32_SIMD(a, b);
  240. SIMD_U32x4 e = ZIP_HIGH_U32_SIMD(a, b);
  241. ASSERT_EQUAL(U32x4(d), U32x4(1, 5, 2, 6));
  242. ASSERT_EQUAL(U32x4(e), U32x4(3, 7, 4, 8));
  243. #endif
  244. END_TEST