SimdTest.cpp 77 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787
  1. 
  2. #include "../testTools.h"
  3. #include "../../DFPSR/base/simd.h"
  4. // TODO: Test: allLanesNotEqual, allLanesLesser, allLanesGreater, allLanesLesserOrEqual, allLanesGreaterOrEqual, reinterpret_U16FromU32, reinterpret_U32FromU16, operand ~, smaller bit shifts.
  5. // TODO: Test that truncateToU32 saturates to minimum and maximum values.
  6. // TODO: Test that truncateToI32 saturates to minimum and maximum values.
  7. START_TEST(Simd)
  8. printText("\nSIMD test is compiled using:\n");
  9. #ifdef USE_SSE2
  10. printText(" * SSE2\n");
  11. #endif
  12. #ifdef USE_SSSE3
  13. printText(" * SSSE3\n");
  14. #endif
  15. #ifdef USE_AVX
  16. printText(" * AVX\n");
  17. #endif
  18. #ifdef USE_AVX2
  19. printText(" * AVX2\n");
  20. #endif
  21. #ifdef USE_NEON
  22. printText(" * NEON\n");
  23. #endif
  24. // F32x4 Comparisons
  25. ASSERT(allLanesEqual(F32x4(1.5f), F32x4(1.5f, 1.5f, 1.5f, 1.5f)));
  26. ASSERT(allLanesEqual(F32x4(-1.5f), F32x4(-1.5f, -1.5f, -1.5f, -1.5f)));
  27. ASSERT(allLanesEqual(F32x4(1.2f, 3.4f, 5.6f, 7.8f), F32x4(1.2f, 3.4f, 5.6f, 7.8f)));
  28. ASSERT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f).get().x, 1.2f);
  29. ASSERT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f).get().y, 3.4f);
  30. ASSERT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f).get().z, 5.6f);
  31. ASSERT_EQUAL(F32x4(1.2f, 3.4f, 5.6f, 7.8f).get().w, 7.8f);
  32. ASSERT(!allLanesEqual(F32x4(1.3f, 3.4f, 5.6f, 7.8f), F32x4(1.2f, 3.4f, 5.6f, 7.8f)));
  33. ASSERT(!allLanesEqual(F32x4(1.2f, 3.4f, 5.6f, 7.8f), F32x4(1.2f, -1.4f, 5.6f, 7.8f)));
  34. ASSERT(!allLanesEqual(F32x4(1.2f, 3.4f, 5.5f, 7.8f), F32x4(1.2f, 3.4f, 5.6f, 7.8f)));
  35. ASSERT(!allLanesEqual(F32x4(1.2f, 3.4f, 5.6f, 7.8f), F32x4(1.2f, 3.4f, 5.6f, -7.8f)));
  36. // I32x4 Comparisons
  37. ASSERT(allLanesEqual(I32x4(4), I32x4(4, 4, 4, 4)));
  38. ASSERT(allLanesEqual(I32x4(-4), I32x4(-4, -4, -4, -4)));
  39. ASSERT(allLanesEqual(I32x4(-1, 2, -3, 4), I32x4(-1, 2, -3, 4)));
  40. ASSERT(!allLanesEqual(I32x4(-1, 2, 7, 4), I32x4(-1, 2, -3, 4)));
  41. // U32x4 Comparisons
  42. ASSERT(allLanesEqual(U32x4(4), U32x4(4, 4, 4, 4)));
  43. ASSERT(allLanesEqual(U32x4(1, 2, 3, 4), U32x4(1, 2, 3, 4)));
  44. ASSERT(!allLanesEqual(U32x4(1, 2, 7, 4), U32x4(1, 2, 3, 4)));
  45. // U16x8 Comparisons
  46. ASSERT(allLanesEqual(U16x8((uint16_t)8), U16x8(8, 8, 8, 8, 8, 8, 8, 8)));
  47. ASSERT(allLanesEqual(U16x8((uint32_t)8), U16x8(8, 0, 8, 0, 8, 0, 8, 0)));
  48. ASSERT(allLanesEqual(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8)));
  49. ASSERT(!allLanesEqual(U16x8(0, 2, 3, 4, 5, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8)));
  50. ASSERT(!allLanesEqual(U16x8(1, 0, 3, 4, 5, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8)));
  51. ASSERT(!allLanesEqual(U16x8(1, 2, 0, 4, 5, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8)));
  52. ASSERT(!allLanesEqual(U16x8(1, 2, 3, 0, 5, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8)));
  53. ASSERT(!allLanesEqual(U16x8(1, 2, 3, 4, 0, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8)));
  54. ASSERT(!allLanesEqual(U16x8(1, 2, 3, 4, 5, 0, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8)));
  55. ASSERT(!allLanesEqual(U16x8(1, 2, 3, 4, 5, 6, 0, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8)));
  56. ASSERT(!allLanesEqual(U16x8(1, 2, 3, 4, 5, 6, 7, 0), U16x8(1, 2, 3, 4, 5, 6, 7, 8)));
  57. ASSERT(!allLanesEqual(U16x8(1, 2, 0, 4, 5, 0, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8)));
  58. ASSERT(!allLanesEqual(U16x8(1, 0, 3, 4, 5, 6, 0, 0), U16x8(1, 2, 3, 4, 5, 6, 7, 8)));
  59. ASSERT(!allLanesEqual(U16x8(0, 2, 3, 4, 0, 6, 7, 8), U16x8(1, 2, 3, 4, 5, 6, 7, 8)));
  60. ASSERT(!allLanesEqual(U16x8(0, 0, 0, 0, 0, 0, 0, 0), U16x8(1, 2, 3, 4, 5, 6, 7, 8)));
  61. // U8x16 Comparisons
  62. ASSERT(allLanesEqual(U8x16((uint8_t)250), U8x16(250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250)));
  63. ASSERT(allLanesEqual(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255)));
  64. ASSERT(!allLanesEqual(U8x16(0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255)));
  65. ASSERT(!allLanesEqual(U8x16(1, 0, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255)));
  66. ASSERT(!allLanesEqual(U8x16(1, 2, 0, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255)));
  67. ASSERT(!allLanesEqual(U8x16(1, 2, 3, 0, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255)));
  68. ASSERT(!allLanesEqual(U8x16(1, 2, 3, 4, 0, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255)));
  69. ASSERT(!allLanesEqual(U8x16(1, 2, 3, 4, 5, 0, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255)));
  70. ASSERT(!allLanesEqual(U8x16(1, 2, 3, 4, 5, 6, 0, 8, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255)));
  71. ASSERT(!allLanesEqual(U8x16(1, 2, 3, 4, 5, 6, 7, 0, 9, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255)));
  72. ASSERT(!allLanesEqual(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 0, 10, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255)));
  73. ASSERT(!allLanesEqual(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255)));
  74. ASSERT(!allLanesEqual(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255)));
  75. ASSERT(!allLanesEqual(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 0, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255)));
  76. ASSERT(!allLanesEqual(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 0, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255)));
  77. ASSERT(!allLanesEqual(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 0, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255)));
  78. ASSERT(!allLanesEqual(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 0, 255), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255)));
  79. ASSERT(!allLanesEqual(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 0), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255)));
  80. ASSERT(!allLanesEqual(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 251, 252, 6, 254, 255), U8x16(1, 2, 3, 4, 5, 9, 7, 8, 9, 10, 250, 251, 252, 253, 254, 255)));
  81. ASSERT(!allLanesEqual(U8x16(1, 2, 3, 0, 5, 6, 7, 8, 9, 0, 250, 251, 252, 253, 254, 255), U8x16(1, 2, 3, 4, 5, 6, 4, 8, 9, 10, 250, 251, 252, 253, 254, 255)));
  82. // Macros
  83. #ifdef USE_BASIC_SIMD
  84. { // Truncate float to int
  85. SIMD_F32x4 f = LOAD_VECTOR_F32_SIMD(-1.01f, -0.99f, 0.99f, 1.01f);
  86. SIMD_I32x4 i = LOAD_VECTOR_I32_SIMD(-1, 0, 0, 1);
  87. ASSERT(allLanesEqual(I32x4(F32_TO_I32_SIMD(f)), I32x4(i)));
  88. }
  89. { // Int to float
  90. SIMD_I32x4 n = LOAD_VECTOR_I32_SIMD(123 , 456 , 789 , -1000 );
  91. SIMD_F32x4 r = LOAD_VECTOR_F32_SIMD(123.0f, 456.0f, 789.0f, -1000.0f);
  92. ASSERT(allLanesEqual(F32x4(I32_TO_F32_SIMD(n)), F32x4(r)));
  93. }
  94. { // Signed-unsigned cast
  95. ASSERT(allLanesEqual(I32x4(REINTERPRET_U32_TO_I32_SIMD(U32x4(1, 2, 3, 4).v)), I32x4(1, 2, 3, 4)));
  96. ASSERT(allLanesEqual(U32x4(REINTERPRET_I32_TO_U32_SIMD(I32x4(1, 2, 3, 4).v)), U32x4(1, 2, 3, 4)));
  97. }
  98. { // F32x4
  99. SIMD_F32x4 a = LOAD_VECTOR_F32_SIMD(-1.3f, 2.5f, -3.4f, 4.7f);
  100. SIMD_F32x4 b = LOAD_VECTOR_F32_SIMD(5.2f, -2.0f, 0.1f, 1.9f);
  101. SIMD_F32x4 c = LOAD_SCALAR_F32_SIMD(0.5f);
  102. ASSERT(allLanesEqual(F32x4(ADD_F32_SIMD(a, b)), F32x4(-1.3f + 5.2f, 2.5f + -2.0f, -3.4f + 0.1f, 4.7f + 1.9f)));
  103. ASSERT(allLanesEqual(F32x4(SUB_F32_SIMD(a, b)), F32x4(-1.3f - 5.2f, 2.5f - -2.0f, -3.4f - 0.1f, 4.7f - 1.9f)));
  104. ASSERT(allLanesEqual(F32x4(ADD_F32_SIMD(a, c)), F32x4(-1.3f + 0.5f, 2.5f + 0.5f, -3.4f + 0.5f, 4.7f + 0.5f)));
  105. ASSERT(allLanesEqual(F32x4(SUB_F32_SIMD(a, c)), F32x4(-1.3f - 0.5f, 2.5f - 0.5f, -3.4f - 0.5f, 4.7f - 0.5f)));
  106. ASSERT(allLanesEqual(F32x4(MUL_F32_SIMD(a, c)), F32x4(-1.3f * 0.5f, 2.5f * 0.5f, -3.4f * 0.5f, 4.7f * 0.5f)));
  107. ASSERT(allLanesEqual(F32x4(MIN_F32_SIMD(a, b)), F32x4(-1.3f, -2.0f, -3.4f, 1.9f)));
  108. ASSERT(allLanesEqual(F32x4(MAX_F32_SIMD(a, b)), F32x4(5.2f, 2.5f, 0.1f, 4.7f)));
  109. }
  110. { // I32x4
  111. SIMD_I32x4 a = LOAD_VECTOR_I32_SIMD(-1, 2, -3, 4);
  112. SIMD_I32x4 b = LOAD_VECTOR_I32_SIMD(5, -2, 0, 1);
  113. SIMD_I32x4 c = LOAD_SCALAR_I32_SIMD(4);
  114. ASSERT(allLanesEqual(I32x4(ADD_I32_SIMD(a, b)), I32x4(4, 0, -3, 5)));
  115. ASSERT(allLanesEqual(I32x4(SUB_I32_SIMD(a, b)), I32x4(-6, 4, -3, 3)));
  116. ASSERT(allLanesEqual(I32x4(ADD_I32_SIMD(a, c)), I32x4(3, 6, 1, 8)));
  117. ASSERT(allLanesEqual(I32x4(SUB_I32_SIMD(a, c)), I32x4(-5, -2, -7, 0)));
  118. }
  119. { // U32x4
  120. SIMD_U32x4 a = LOAD_VECTOR_U32_SIMD(4, 5, 6, 7);
  121. SIMD_U32x4 b = LOAD_VECTOR_U32_SIMD(6, 5, 4, 3);
  122. SIMD_U32x4 c = LOAD_SCALAR_U32_SIMD(10);
  123. ASSERT(allLanesEqual(U32x4(ADD_U32_SIMD(a, b)), U32x4(c)));
  124. ASSERT(allLanesEqual(U32x4(ADD_U32_SIMD(a, c)), U32x4(14, 15, 16, 17)));
  125. ASSERT(allLanesEqual(U32x4(SUB_U32_SIMD(c, b)), U32x4(a)));
  126. }
  127. { // U16x8
  128. SIMD_U16x8 a = LOAD_VECTOR_U16_SIMD(1, 2, 3, 4, 5, 6, 7, 8);
  129. SIMD_U16x8 b = LOAD_VECTOR_U16_SIMD(9, 8, 7, 6, 5, 4, 3, 2);
  130. SIMD_U16x8 c = LOAD_SCALAR_U16_SIMD(10);
  131. ASSERT(allLanesEqual(U16x8(ADD_U16_SIMD(a, b)), U16x8(c)));
  132. ASSERT(allLanesEqual(U16x8(ADD_U16_SIMD(a, c)), U16x8(11, 12, 13, 14, 15, 16, 17, 18)));
  133. ASSERT(allLanesEqual(U16x8(SUB_U16_SIMD(c, b)), U16x8(a)));
  134. ASSERT(allLanesEqual(U16x8(MUL_U16_SIMD(a, b)), U16x8(9, 16, 21, 24, 25, 24, 21, 16)));
  135. }
  136. #endif
  137. // Reinterpret (Depends on endianness!)
  138. ASSERT(allLanesEqual(U16x8(U32x4(12, 34, 56, 78)), U16x8(12, 0, 34, 0, 56, 0, 78, 0)));
  139. ASSERT(allLanesEqual(U16x8(12, 0, 34, 0, 56, 0, 78, 0).get_U32(), U32x4(12, 34, 56, 78)));
  140. // Reciprocal: 1 / x
  141. ASSERT(allLanesEqual(reciprocal(F32x4(0.5f, 1.0f, 2.0f, 4.0f)), F32x4(2.0f, 1.0f, 0.5f, 0.25f)));
  142. // Square root: sqrt(x)
  143. ASSERT(allLanesEqual(squareRoot(F32x4(1.0f, 4.0f, 9.0f, 100.0f)), F32x4(1.0f, 2.0f, 3.0f, 10.0f)));
  144. // Reciprocal square root: 1 / sqrt(x)
  145. ASSERT(allLanesEqual(reciprocalSquareRoot(F32x4(1.0f, 4.0f, 16.0f, 100.0f)), F32x4(1.0f, 0.5f, 0.25f, 0.1f)));
  146. // Minimum
  147. ASSERT(allLanesEqual(min(F32x4(1.1f, 2.2f, 3.3f, 4.4f), F32x4(5.0f, 3.0f, 1.0f, -1.0f)), F32x4(1.1f, 2.2f, 1.0f, -1.0f)));
  148. // Maximum
  149. ASSERT(allLanesEqual(max(F32x4(1.1f, 2.2f, 3.3f, 4.4f), F32x4(5.0f, 3.0f, 1.0f, -1.0f)), F32x4(5.0f, 3.0f, 3.3f, 4.4f)));
  150. // Clamp
  151. ASSERT(allLanesEqual(clamp(F32x4(-1.5f), F32x4(-35.1f, 1.0f, 2.0f, 45.7f), F32x4(1.5f)), F32x4(-1.5f, 1.0f, 1.5f, 1.5f)));
  152. ASSERT(allLanesEqual(clampUpper(F32x4(-35.1f, 1.0f, 2.0f, 45.7f), F32x4(1.5f)), F32x4(-35.1f, 1.0f, 1.5f, 1.5f)));
  153. ASSERT(allLanesEqual(clampLower(F32x4(-1.5f), F32x4(-35.1f, 1.0f, 2.0f, 45.7f)), F32x4(-1.5f, 1.0f, 2.0f, 45.7f)));
  154. // F32x4 operations
  155. ASSERT(allLanesEqual(F32x4(1.1f, -2.2f, 3.3f, 4.0f) + F32x4(2.2f, -4.4f, 6.6f, 8.0f), F32x4(3.3f, -6.6f, 9.9f, 12.0f)));
  156. ASSERT(allLanesEqual(F32x4(-1.5f, -0.5f, 0.5f, 1.5f) + 1.0f, F32x4(-0.5f, 0.5f, 1.5f, 2.5f)));
  157. ASSERT(allLanesEqual(1.0f + F32x4(-1.5f, -0.5f, 0.5f, 1.5f), F32x4(-0.5f, 0.5f, 1.5f, 2.5f)));
  158. ASSERT(allLanesEqual(F32x4(1.1f, 2.2f, 3.3f, 4.4f) - F32x4(0.1f, 0.2f, 0.3f, 0.4f), F32x4(1.0f, 2.0f, 3.0f, 4.0f)));
  159. ASSERT(allLanesEqual(F32x4(1.0f, 2.0f, 3.0f, 4.0f) - 0.5f, F32x4(0.5f, 1.5f, 2.5f, 3.5f)));
  160. ASSERT(allLanesEqual(0.5f - F32x4(1.0f, 2.0f, 3.0f, 4.0f), F32x4(-0.5f, -1.5f, -2.5f, -3.5f)));
  161. ASSERT(allLanesEqual(2.0f * F32x4(1.0f, 2.0f, 3.0f, 4.0f), F32x4(2.0f, 4.0f, 6.0f, 8.0f)));
  162. ASSERT(allLanesEqual(F32x4(1.0f, -2.0f, 3.0f, -4.0f) * -2.0f, F32x4(-2.0f, 4.0f, -6.0f, 8.0f)));
  163. ASSERT(allLanesEqual(F32x4(1.0f, -2.0f, 3.0f, -4.0f) * F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(1.0f, 4.0f, 9.0f, 16.0f)));
  164. ASSERT(allLanesEqual(-F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(-1.0f, 2.0f, -3.0f, 4.0f)));
  165. // I32x4 operations
  166. ASSERT(allLanesEqual(I32x4(1, 2, -3, 4) + I32x4(-2, 4, 6, 8), I32x4(-1, 6, 3, 12)));
  167. ASSERT(allLanesEqual(I32x4(1, -2, 3, 4) - 4, I32x4(-3, -6, -1, 0)));
  168. ASSERT(allLanesEqual(10 + I32x4(1, 2, 3, 4), I32x4(11, 12, 13, 14)));
  169. ASSERT(allLanesEqual(I32x4(1, 2, 3, 4) + I32x4(4), I32x4(5, 6, 7, 8)));
  170. ASSERT(allLanesEqual(I32x4(10) + I32x4(1, 2, 3, 4), I32x4(11, 12, 13, 14)));
  171. ASSERT(allLanesEqual(I32x4(-3, 6, -9, 12) * I32x4(1, 2, -3, -4), I32x4(-3, 12, 27, -48)));
  172. ASSERT(allLanesEqual(-I32x4(1, -2, 3, -4), I32x4(-1, 2, -3, 4)));
  173. // U32x4 operations
  174. ASSERT(allLanesEqual(U32x4(1, 2, 3, 4) + U32x4(2, 4, 6, 8), U32x4(3, 6, 9, 12)));
  175. ASSERT(allLanesEqual(U32x4(1, 2, 3, 4) + 4, U32x4(5, 6, 7, 8)));
  176. ASSERT(allLanesEqual(10 + U32x4(1, 2, 3, 4), U32x4(11, 12, 13, 14)));
  177. ASSERT(allLanesEqual(U32x4(1, 2, 3, 4) + U32x4(4), U32x4(5, 6, 7, 8)));
  178. ASSERT(allLanesEqual(U32x4(10) + U32x4(1, 2, 3, 4), U32x4(11, 12, 13, 14)));
  179. ASSERT(allLanesEqual(U32x4(3, 6, 9, 12) - U32x4(1, 2, 3, 4), U32x4(2, 4, 6, 8)));
  180. ASSERT(allLanesEqual(U32x4(3, 6, 9, 12) * U32x4(1, 2, 3, 4), U32x4(3, 12, 27, 48)));
  181. // U16x8 operations
  182. ASSERT(allLanesEqual(U16x8(1, 2, 3, 4, 5, 6, 7, 8) + U16x8(2, 4, 6, 8, 10, 12, 14, 16), U16x8(3, 6, 9, 12, 15, 18, 21, 24)));
  183. ASSERT(allLanesEqual(U16x8(1, 2, 3, 4, 5, 6, 7, 8) + 8, U16x8(9, 10, 11, 12, 13, 14, 15, 16)));
  184. ASSERT(allLanesEqual(10 + U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(11, 12, 13, 14, 15, 16, 17, 18)));
  185. ASSERT(allLanesEqual(U16x8(1, 2, 3, 4, 5, 6, 7, 8) + U16x8((uint16_t)8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)));
  186. ASSERT(allLanesEqual(U16x8((uint16_t)10) + U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(11, 12, 13, 14, 15, 16, 17, 18)));
  187. ASSERT(allLanesEqual(U16x8(3, 6, 9, 12, 15, 18, 21, 24) - U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(2, 4, 6, 8, 10, 12, 14, 16)));
  188. // U8x16 operations
  189. ASSERT(allLanesEqual(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) + 2, U8x16(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)));
  190. ASSERT(allLanesEqual(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) - 1, U8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)));
  191. ASSERT(allLanesEqual(
  192. saturatedAddition(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 255), U8x16((uint8_t)250)),
  193. U8x16(251, 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255)
  194. ));
  195. ASSERT(allLanesEqual(
  196. saturatedSubtraction(
  197. U8x16(128, 128, 128, 0, 255, 255, 0, 200, 123, 80, 46, 46, 46, 255, 255, 255),
  198. U8x16( 0, 128, 255, 0, 255, 0, 255, 100, 23, 81, 45, 46, 47, 128, 127, 200)),
  199. U8x16(128, 0, 0, 0, 0, 255, 0, 100, 100, 0, 1, 0, 0, 127, 128, 55)
  200. ));
  201. // Saturated unsigned integer packing
  202. ASSERT(allLanesEqual(saturateToU8(U16x8(1, 2, 3, 4, 65535, 6, 7, 8), U16x8(9, 10, 11, 12, 1000, 14, 15, 16)), U8x16(1, 2, 3, 4, 255, 6, 7, 8, 9, 10, 11, 12, 255, 14, 15, 16)));
  203. // Unsigned integer unpacking
  204. ASSERT(allLanesEqual(lowerToU32(U16x8(1,2,3,4,5,6,7,8)), U32x4(1, 2, 3, 4)));
  205. ASSERT(allLanesEqual(higherToU32(U16x8(1,2,3,4,5,6,7,8)), U32x4(5, 6, 7, 8)));
  206. ASSERT(allLanesEqual(lowerToU16(U8x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)), U16x8(1,2,3,4,5,6,7,8)));
  207. ASSERT(allLanesEqual(higherToU16(U8x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)), U16x8(9,10,11,12,13,14,15,16)));
  208. // Reinterpretation
  209. ASSERT(allLanesEqual(
  210. reinterpret_U8FromU32(U32x4(ENDIAN32_BYTE_0, ENDIAN32_BYTE_1, ENDIAN32_BYTE_2, ENDIAN32_BYTE_3)),
  211. U8x16(
  212. 255, 0, 0, 0,
  213. 0, 255, 0, 0,
  214. 0, 0, 255, 0,
  215. 0, 0, 0, 255
  216. )
  217. ));
  218. ASSERT(allLanesEqual(
  219. reinterpret_U8FromU32(U32x4(
  220. ENDIAN32_BYTE_0 | ENDIAN32_BYTE_2,
  221. ENDIAN32_BYTE_0 | ENDIAN32_BYTE_3,
  222. ENDIAN32_BYTE_1,
  223. ENDIAN32_BYTE_1 | ENDIAN32_BYTE_3
  224. )),
  225. U8x16(
  226. 255, 0, 255, 0,
  227. 255, 0, 0, 255,
  228. 0, 255, 0, 0,
  229. 0, 255, 0, 255
  230. )
  231. ));
  232. ASSERT(allLanesEqual(
  233. reinterpret_U32FromU8(U8x16(
  234. 255, 0, 255, 0,
  235. 255, 0, 0, 255,
  236. 0, 255, 0, 0,
  237. 0, 255, 0, 255
  238. )),
  239. U32x4(
  240. ENDIAN32_BYTE_0 | ENDIAN32_BYTE_2,
  241. ENDIAN32_BYTE_0 | ENDIAN32_BYTE_3,
  242. ENDIAN32_BYTE_1,
  243. ENDIAN32_BYTE_1 | ENDIAN32_BYTE_3
  244. )
  245. ));
  246. // Bit mask
  247. ASSERT(allLanesEqual(U32x4(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000) & 0x0000FFFF, U32x4(0x0000FFFF, 0x00005678, 0x0000F0F0, 0x00000000)));
  248. ASSERT(allLanesEqual(U32x4(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000) & 0xFFFF0000, U32x4(0xFFFF0000, 0x12340000, 0xF0F00000, 0x00000000)));
  249. ASSERT(allLanesEqual(U32x4(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000) | 0x0000FFFF, U32x4(0xFFFFFFFF, 0x1234FFFF, 0xF0F0FFFF, 0x0000FFFF)));
  250. ASSERT(allLanesEqual(U32x4(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000) | 0xFFFF0000, U32x4(0xFFFFFFFF, 0xFFFF5678, 0xFFFFF0F0, 0xFFFF0000)));
  251. ASSERT(allLanesEqual(U32x4(0xFFFFFFFF, 0xFFF000FF, 0xF0F0F0F0, 0x12345678) & U32x4(0xFF00FF00, 0xFFFF0000, 0x000FF000, 0x0FF00FF0), U32x4(0xFF00FF00, 0xFFF00000, 0x0000F000, 0x02300670)));
  252. ASSERT(allLanesEqual(U32x4(0xF00F000F, 0xFFF000FF, 0x10010011, 0xABC00000) | U32x4(0x0000FF00, 0xFFFF0000, 0x000FF000, 0x000DEF00), U32x4(0xF00FFF0F, 0xFFFF00FF, 0x100FF011, 0xABCDEF00)));
  253. // Exclusive or
  254. ASSERT(allLanesEqual(U32x4(0xFFFFFFFF, 0x01234567, 0xF0F0F0F0, 0x00000000) ^ 0x0000FFFF, U32x4(0xFFFF0000, 0x0123BA98, 0xF0F00F0F, 0x0000FFFF)));
  255. // Bit shift with dynamic offset. (volatile to prevent constant propagation)
  256. volatile uint32_t offset = 1;
  257. ASSERT(allLanesEqual(U32x4(1, 2, 3, 4) << U32x4(offset), U32x4(2, 4, 6, 8)));
  258. offset = 2;
  259. ASSERT(allLanesEqual(U32x4(1, 2, 3, 4) << U32x4(offset), U32x4(4, 8, 12, 16)));
  260. offset = 3;
  261. ASSERT(allLanesEqual(U32x4(1, 2, 3, 4) << U32x4(offset), U32x4(8, 16, 24, 32)));
  262. offset = 4;
  263. ASSERT(allLanesEqual(U32x4(1, 2, 3, 4) << U32x4(offset), U32x4(16, 32, 48, 64)));
  264. offset = 1;
  265. ASSERT(allLanesEqual(U32x4(1, 2, 3, 4) >> U32x4(offset), U32x4(0, 1, 1, 2)));
  266. ASSERT(allLanesEqual(U32x4(2, 4, 6, 8) >> U32x4(offset), U32x4(1, 2, 3, 4)));
  267. offset = 2;
  268. ASSERT(allLanesEqual(U32x4(2, 4, 6, 8) >> U32x4(offset), U32x4(0, 1, 1, 2)));
  269. ASSERT(allLanesEqual(bitShiftLeftImmediate<1>(U32x4(1, 2, 3, 4)), U32x4(2, 4, 6, 8)));
  270. ASSERT(allLanesEqual(bitShiftLeftImmediate<2>(U32x4(1, 2, 3, 4)), U32x4(4, 8, 12, 16)));
  271. ASSERT(allLanesEqual(bitShiftLeftImmediate<3>(U32x4(1, 2, 3, 4)), U32x4(8, 16, 24, 32)));
  272. ASSERT(allLanesEqual(bitShiftLeftImmediate<4>(U32x4(1, 2, 3, 4)), U32x4(16, 32, 48, 64)));
  273. ASSERT(allLanesEqual(bitShiftRightImmediate<1>(U32x4(1, 2, 3, 4)), U32x4(0, 1, 1, 2)));
  274. ASSERT(allLanesEqual(bitShiftRightImmediate<1>(U32x4(2, 4, 6, 8)), U32x4(1, 2, 3, 4)));
  275. ASSERT(allLanesEqual(bitShiftRightImmediate<2>(U32x4(2, 4, 6, 8)), U32x4(0, 1, 1, 2)));
  276. ASSERT(allLanesEqual(bitShiftLeftImmediate<4>(U32x4(0x0AB12CD0, 0xFFFFFFFF, 0x12345678, 0xF0000000)), U32x4(0xAB12CD00, 0xFFFFFFF0, 0x23456780, 0x00000000)));
  277. ASSERT(allLanesEqual(bitShiftRightImmediate<4>(U32x4(0x0AB12CD0, 0xFFFFFFFF, 0x12345678, 0x0000000F)), U32x4(0x00AB12CD, 0x0FFFFFFF, 0x01234567, 0x00000000)));
  278. // Element shift with insert
  279. ASSERT(allLanesEqual(vectorExtract_0(U32x4(1, 2, 3, 4), U32x4(5, 6, 7, 8)), U32x4(1, 2, 3, 4)));
  280. ASSERT(allLanesEqual(vectorExtract_1(U32x4(1, 2, 3, 4), U32x4(5, 6, 7, 8)), U32x4(2, 3, 4, 5)));
  281. ASSERT(allLanesEqual(vectorExtract_2(U32x4(1, 2, 3, 4), U32x4(5, 6, 7, 8)), U32x4(3, 4, 5, 6)));
  282. ASSERT(allLanesEqual(vectorExtract_3(U32x4(1, 2, 3, 4), U32x4(5, 6, 7, 8)), U32x4(4, 5, 6, 7)));
  283. ASSERT(allLanesEqual(vectorExtract_4(U32x4(1, 2, 3, 4), U32x4(5, 6, 7, 8)), U32x4(5, 6, 7, 8)));
  284. ASSERT(allLanesEqual(vectorExtract_0(U32x4(123, 4294967295, 712, 45), U32x4(850514, 27, 0, 174)), U32x4(123, 4294967295, 712, 45)));
  285. ASSERT(allLanesEqual(vectorExtract_1(U32x4(123, 4294967295, 712, 45), U32x4(850514, 27, 0, 174)), U32x4(4294967295, 712, 45, 850514)));
  286. ASSERT(allLanesEqual(vectorExtract_2(U32x4(123, 4294967295, 712, 45), U32x4(850514, 27, 0, 174)), U32x4(712, 45, 850514, 27)));
  287. ASSERT(allLanesEqual(vectorExtract_3(U32x4(123, 4294967295, 712, 45), U32x4(850514, 27, 0, 174)), U32x4(45, 850514, 27, 0)));
  288. ASSERT(allLanesEqual(vectorExtract_4(U32x4(123, 4294967295, 712, 45), U32x4(850514, 27, 0, 174)), U32x4(850514, 27, 0, 174)));
  289. ASSERT(allLanesEqual(vectorExtract_0(I32x4(1, 2, 3, 4), I32x4(5, 6, 7, 8)), I32x4(1, 2, 3, 4)));
  290. ASSERT(allLanesEqual(vectorExtract_1(I32x4(1, 2, 3, 4), I32x4(5, 6, 7, 8)), I32x4(2, 3, 4, 5)));
  291. ASSERT(allLanesEqual(vectorExtract_2(I32x4(1, 2, 3, 4), I32x4(5, 6, 7, 8)), I32x4(3, 4, 5, 6)));
  292. ASSERT(allLanesEqual(vectorExtract_3(I32x4(1, 2, 3, 4), I32x4(5, 6, 7, 8)), I32x4(4, 5, 6, 7)));
  293. ASSERT(allLanesEqual(vectorExtract_4(I32x4(1, 2, 3, 4), I32x4(5, 6, 7, 8)), I32x4(5, 6, 7, 8)));
  294. ASSERT(allLanesEqual(vectorExtract_0(I32x4(123, 8462784, -712, 45), I32x4(-37562, 27, 0, 174)), I32x4(123, 8462784, -712, 45)));
  295. ASSERT(allLanesEqual(vectorExtract_1(I32x4(123, 8462784, -712, 45), I32x4(-37562, 27, 0, 174)), I32x4(8462784, -712, 45, -37562)));
  296. ASSERT(allLanesEqual(vectorExtract_2(I32x4(123, 8462784, -712, 45), I32x4(-37562, 27, 0, 174)), I32x4(-712, 45, -37562, 27)));
  297. ASSERT(allLanesEqual(vectorExtract_3(I32x4(123, 8462784, -712, 45), I32x4(-37562, 27, 0, 174)), I32x4(45, -37562, 27, 0)));
  298. ASSERT(allLanesEqual(vectorExtract_4(I32x4(123, 8462784, -712, 45), I32x4(-37562, 27, 0, 174)), I32x4(-37562, 27, 0, 174)));
  299. ASSERT(allLanesEqual(vectorExtract_0(F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(5.0f, 6.0f, -7.0f, 8.0f)), F32x4(1.0f, -2.0f, 3.0f, -4.0f)));
  300. ASSERT(allLanesEqual(vectorExtract_1(F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(5.0f, 6.0f, -7.0f, 8.0f)), F32x4(-2.0f, 3.0f, -4.0f, 5.0f)));
  301. ASSERT(allLanesEqual(vectorExtract_2(F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(5.0f, 6.0f, -7.0f, 8.0f)), F32x4(3.0f, -4.0f, 5.0f, 6.0f)));
  302. ASSERT(allLanesEqual(vectorExtract_3(F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(5.0f, 6.0f, -7.0f, 8.0f)), F32x4(-4.0f, 5.0f, 6.0f, -7.0f)));
  303. ASSERT(allLanesEqual(vectorExtract_4(F32x4(1.0f, -2.0f, 3.0f, -4.0f), F32x4(5.0f, 6.0f, -7.0f, 8.0f)), F32x4(5.0f, 6.0f, -7.0f, 8.0f)));
  304. ASSERT(allLanesEqual(vectorExtract_0(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(1, 2, 3, 4, 5, 6, 7, 8)));
  305. ASSERT(allLanesEqual(vectorExtract_1(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(2, 3, 4, 5, 6, 7, 8, 9)));
  306. ASSERT(allLanesEqual(vectorExtract_2(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(3, 4, 5, 6, 7, 8, 9, 10)));
  307. ASSERT(allLanesEqual(vectorExtract_3(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(4, 5, 6, 7, 8, 9, 10, 11)));
  308. ASSERT(allLanesEqual(vectorExtract_4(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(5, 6, 7, 8, 9, 10, 11, 12)));
  309. ASSERT(allLanesEqual(vectorExtract_5(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(6, 7, 8, 9, 10, 11, 12, 13)));
  310. ASSERT(allLanesEqual(vectorExtract_6(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(7, 8, 9, 10, 11, 12, 13, 14)));
  311. ASSERT(allLanesEqual(vectorExtract_7(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(8, 9, 10, 11, 12, 13, 14, 15)));
  312. ASSERT(allLanesEqual(vectorExtract_8(U16x8(1, 2, 3, 4, 5, 6, 7, 8), U16x8(9, 10, 11, 12, 13, 14, 15, 16)), U16x8(9, 10, 11, 12, 13, 14, 15, 16)));
  313. ASSERT(allLanesEqual(vectorExtract_0(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  314. ASSERT(allLanesEqual(vectorExtract_1(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)));
  315. ASSERT(allLanesEqual(vectorExtract_2(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)));
  316. ASSERT(allLanesEqual(vectorExtract_3(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)));
  317. ASSERT(allLanesEqual(vectorExtract_4(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)));
  318. ASSERT(allLanesEqual(vectorExtract_5(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)));
  319. ASSERT(allLanesEqual(vectorExtract_6(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22)));
  320. ASSERT(allLanesEqual(vectorExtract_7(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)));
  321. ASSERT(allLanesEqual(vectorExtract_8(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)));
  322. ASSERT(allLanesEqual(vectorExtract_9(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25)));
  323. ASSERT(allLanesEqual(vectorExtract_10(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)));
  324. ASSERT(allLanesEqual(vectorExtract_11(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27)));
  325. ASSERT(allLanesEqual(vectorExtract_12(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28)));
  326. ASSERT(allLanesEqual(vectorExtract_13(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29)));
  327. ASSERT(allLanesEqual(vectorExtract_14(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)));
  328. ASSERT(allLanesEqual(vectorExtract_15(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)));
  329. ASSERT(allLanesEqual(vectorExtract_16(U8x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)), U8x16(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)));
  330. // TODO: Move SIMD extra into simd.h and implement emulation.
  331. #ifdef USE_SIMD_EXTRA
  332. SIMD_U32x4 a = U32x4(1, 2, 3, 4).v;
  333. SIMD_U32x4 b = U32x4(5, 6, 7, 8).v;
  334. SIMD_U32x4x2 c = ZIP_U32_SIMD(a, b);
  335. ASSERT(allLanesEqual(U32x4(c.val[0]), U32x4(1, 5, 2, 6)));
  336. ASSERT(allLanesEqual(U32x4(c.val[1]), U32x4(3, 7, 4, 8)));
  337. SIMD_U32x4 d = ZIP_LOW_U32_SIMD(a, b);
  338. SIMD_U32x4 e = ZIP_HIGH_U32_SIMD(a, b);
  339. ASSERT(allLanesEqual(U32x4(d), U32x4(1, 5, 2, 6)));
  340. ASSERT(allLanesEqual(U32x4(e), U32x4(3, 7, 4, 8)));
  341. #endif
  342. // 256-bit SIMD tests (emulated using scalar operations if the test is not compiled with AVX2 enabled)
  343. // F32x8 Comparisons
  344. ASSERT(allLanesEqual(F32x8(1.5f), F32x8(1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f, 1.5f)));
  345. ASSERT(allLanesEqual(F32x8(-1.5f), F32x8(-1.5f, -1.5f, -1.5f, -1.5f, -1.5f, -1.5f, -1.5f, -1.5f)));
  346. ASSERT(allLanesEqual(F32x8(1.2f, 3.4f, 5.6f, 7.8f, -2.4f, 452.351f, 1000000.0f, -1000.0f), F32x8(1.2f, 3.4f, 5.6f, 7.8f, -2.4f, 452.351f, 1000000.0f, -1000.0f)));
  347. ASSERT(!allLanesEqual(F32x8(1.3f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f), F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f)));
  348. ASSERT(!allLanesEqual(F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f), F32x8(1.2f, -1.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f)));
  349. ASSERT(!allLanesEqual(F32x8(1.2f, 3.4f, 5.5f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f), F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f)));
  350. ASSERT(!allLanesEqual(F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f), F32x8(1.2f, 3.4f, 5.6f, -7.8f, 5.3f, 6.7f, 1.4f, -5.2f)));
  351. ASSERT(!allLanesEqual(F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f), F32x8(1.2f, 3.4f, 5.6f, 7.8f, 0.0f, 6.7f, 1.4f, -5.2f)));
  352. ASSERT(!allLanesEqual(F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f), F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.69f, 1.4f, -5.2f)));
  353. ASSERT(!allLanesEqual(F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f), F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.3f, -5.2f)));
  354. ASSERT(!allLanesEqual(F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, -5.2f), F32x8(1.2f, 3.4f, 5.6f, 7.8f, 5.3f, 6.7f, 1.4f, 5.2f)));
  355. // I32x8 Comparisons
  356. ASSERT(allLanesEqual(I32x8(4), I32x8(4, 4, 4, 4, 4, 4, 4, 4)));
  357. ASSERT(allLanesEqual(I32x8(-4), I32x8(-4, -4, -4, -4, -4, -4, -4, -4)));
  358. ASSERT(allLanesEqual(I32x8(-1, 2, -3, 4, -5, 6, -7, 8), I32x8(-1, 2, -3, 4, -5, 6, -7, 8)));
  359. ASSERT(!allLanesEqual(I32x8(-1, 2, 7, 4, 8, 3, 5, 45), I32x8(-1, 2, -3, 4, 8, 3, 5, 45)));
  360. // U32x8 Comparisons
  361. ASSERT(allLanesEqual(U32x8(4), U32x8(4, 4, 4, 4, 4, 4, 4, 4)));
  362. ASSERT(allLanesEqual(U32x8(1, 2, 3, 4, 5, 6, 7, 8), U32x8(1, 2, 3, 4, 5, 6, 7, 8)));
  363. ASSERT(!allLanesEqual(U32x8(1, 2, 3, 4, 5, 6, 12, 8), U32x8(1, 2, 3, 4, 5, 6, 7, 8)));
  364. // U16x16 Comparisons
  365. ASSERT(allLanesEqual(U16x16((uint16_t)8), U16x16(8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8)));
  366. ASSERT(allLanesEqual(U16x16((uint32_t)8), U16x16(8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0, 8, 0)));
  367. ASSERT(allLanesEqual(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  368. ASSERT(!allLanesEqual(U16x16(0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  369. ASSERT(!allLanesEqual(U16x16(1, 0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  370. ASSERT(!allLanesEqual(U16x16(1, 2, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  371. ASSERT(!allLanesEqual(U16x16(1, 2, 3, 0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  372. ASSERT(!allLanesEqual(U16x16(1, 2, 3, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  373. ASSERT(!allLanesEqual(U16x16(1, 2, 3, 4, 5, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  374. ASSERT(!allLanesEqual(U16x16(1, 2, 3, 4, 5, 6, 0, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  375. ASSERT(!allLanesEqual(U16x16(1, 2, 3, 4, 5, 6, 7, 0, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  376. ASSERT(!allLanesEqual(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 0, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  377. ASSERT(!allLanesEqual(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  378. ASSERT(!allLanesEqual(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  379. ASSERT(!allLanesEqual(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  380. ASSERT(!allLanesEqual(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  381. ASSERT(!allLanesEqual(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  382. ASSERT(!allLanesEqual(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  383. ASSERT(!allLanesEqual(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  384. ASSERT(!allLanesEqual(U16x16(1, 2, 0, 4, 5, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  385. ASSERT(!allLanesEqual(U16x16(1, 0, 3, 4, 5, 6, 0, 0, 9, 10, 11, 12, 13, 0, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  386. ASSERT(!allLanesEqual(U16x16(0, 2, 3, 4, 0, 6, 7, 8, 9, 10, 11, 0, 13, 14, 15, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  387. ASSERT(!allLanesEqual(U16x16(0, 0, 0, 0, 0, 0, 0, 0, 9, 10, 11, 0, 13, 14, 0, 16), U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)));
  388. // U8x32 Comparisons
  389. ASSERT(allLanesEqual(U8x32((uint8_t)250), U8x32(250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250)));
  390. ASSERT(!allLanesEqual(U8x32((uint8_t)250), U8x32(250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 100, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250)));
  391. ASSERT(!allLanesEqual(U8x32((uint8_t)250), U8x32(0, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250)));
  392. ASSERT(!allLanesEqual(U8x32((uint8_t)250), U8x32(250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 0)));
  393. // Reinterpret (Depends on endianness!)
  394. ASSERT(allLanesEqual(U16x16(U32x8(12, 34, 56, 78, 11, 22, 33, 44)), U16x16(12, 0, 34, 0, 56, 0, 78, 0, 11, 0, 22, 0, 33, 0, 44, 0)));
  395. ASSERT(allLanesEqual(U16x16(U32x8(12, 34, 56, 78, 11, 22, 33, 131116)), U16x16(12, 0, 34, 0, 56, 0, 78, 0, 11, 0, 22, 0, 33, 0, 44, 2)));
  396. ASSERT(allLanesEqual(U16x16(12, 0, 34, 0, 56, 0, 78, 0, 11, 0, 22, 0, 33, 0, 44, 2).get_U32(), U32x8(12, 34, 56, 78, 11, 22, 33, 131116)));
  397. // Reciprocal: 1 / x
  398. ASSERT(allLanesEqual(reciprocal(F32x8(0.5f, 1.0f, 2.0f, 4.0f, 8.0f, 10.0f, 100.0f, 1000.0f)), F32x8(2.0f, 1.0f, 0.5f, 0.25f, 0.125f, 0.1f, 0.01f, 0.001f)));
  399. // Square root: sqrt(x)
  400. ASSERT(allLanesEqual(squareRoot(F32x8(1.0f, 4.0f, 9.0f, 100.0f, 64.0f, 256.0f, 1024.0f, 4096.0f)), F32x8(1.0f, 2.0f, 3.0f, 10.0f, 8.0f, 16.0f, 32.0f, 64.0f)));
  401. // Reciprocal square root: 1 / sqrt(x)
  402. ASSERT(allLanesEqual(reciprocalSquareRoot(F32x8(1.0f, 4.0f, 16.0f, 100.0f, 400.0f, 64.0f, 25.0f, 100.0f)), F32x8(1.0f, 0.5f, 0.25f, 0.1f, 0.05f, 0.125f, 0.2f, 0.1f)));
  403. // Minimum
  404. ASSERT(allLanesEqual(min(F32x8(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f), F32x8(5.0f, 3.0f, 1.0f, -1.0f, 4.0f, 5.0f, -2.5f, 10.0f)), F32x8(1.1f, 2.2f, 1.0f, -1.0f, 4.0f, 5.0f, -2.5f, 8.8f)));
  405. // Maximum
  406. ASSERT(allLanesEqual(max(F32x8(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f), F32x8(5.0f, 3.0f, 1.0f, -1.0f, 4.0f, 5.0f, -2.5f, 10.0f)), F32x8(5.0f, 3.0f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 10.0f)));
  407. // Clamp
  408. ASSERT(allLanesEqual(clamp(F32x8(-1.5f), F32x8(-35.1f, 1.0f, 2.0f, 45.7f, 0.0f, -1.0f, 2.1f, -1.9f), F32x8(1.5f)), F32x8(-1.5f, 1.0f, 1.5f, 1.5f, 0.0f, -1.0f, 1.5f, -1.5f)));
  409. ASSERT(allLanesEqual(clampUpper(F32x8(-35.1f, 1.0f, 2.0f, 45.7f, 0.0f, -1.0f, 2.1f, -1.9f), F32x8(1.5f)), F32x8(-35.1f, 1.0f, 1.5f, 1.5f, 0.0f, -1.0f, 1.5f, -1.9f)));
  410. ASSERT(allLanesEqual(clampLower(F32x8(-1.5f), F32x8(-35.1f, 1.0f, 2.0f, 45.7f, 0.0f, -1.0f, 2.1f, -1.9f)), F32x8(-1.5f, 1.0f, 2.0f, 45.7f, 0.0f, -1.0f, 2.1f, -1.5f)));
  411. // F32x8 operations
  412. ASSERT(allLanesEqual(F32x8(1.1f, -2.2f, 3.3f, 4.0f, 1.4f, 2.3f, 3.2f, 4.1f) + F32x8(2.2f, -4.4f, 6.6f, 8.0f, 4.11f, 3.22f, 2.33f, 1.44f), F32x8(3.3f, -6.6f, 9.9f, 12.0f, 5.51f, 5.52f, 5.53f, 5.54f)));
  413. ASSERT(allLanesEqual(F32x8(-1.5f, -0.5f, 0.5f, 1.5f, 1000.0f, 2000.0f, -4000.0f, -1500.0f) + 1.0f, F32x8(-0.5f, 0.5f, 1.5f, 2.5f, 1001.0f, 2001.0f, -3999.0f, -1499.0f)));
  414. ASSERT(allLanesEqual(1.0f + F32x8(-1.5f, -0.5f, 0.5f, 1.5f, 1000.0f, 2000.0f, -4000.0f, -1500.0f), F32x8(-0.5f, 0.5f, 1.5f, 2.5f, 1001.0f, 2001.0f, -3999.0f, -1499.0f)));
  415. ASSERT(allLanesEqual(F32x8(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f) - F32x8(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f), F32x8(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f)));
  416. ASSERT(allLanesEqual(F32x8(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f) - 0.5f, F32x8(0.5f, 1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f, 7.5f)));
  417. ASSERT(allLanesEqual(0.5f - F32x8(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f), F32x8(-0.5f, -1.5f, -2.5f, -3.5f, -4.5f, -5.5f, -6.5f, -7.5f)));
  418. ASSERT(allLanesEqual(2.0f * F32x8(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f), F32x8(2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f, 14.0f, 16.0f)));
  419. ASSERT(allLanesEqual(F32x8(1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f) * -2.0f, F32x8(-2.0f, 4.0f, -6.0f, 8.0f, -10.0f, 12.0f, -14.0f, 16.0f)));
  420. ASSERT(allLanesEqual(F32x8(1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f) * F32x8(1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f), F32x8(1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f)));
  421. ASSERT(allLanesEqual(-F32x8(1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f), F32x8(-1.0f, 2.0f, -3.0f, 4.0f, -5.0f, 6.0f, -7.0f, 8.0f)));
  422. // I32x8 operations
  423. ASSERT(allLanesEqual(I32x8(1, 2, 3, 4, 5, 6, 7, 8) - 1, I32x8(0, 1, 2, 3, 4, 5, 6, 7)));
  424. ASSERT(allLanesEqual(1 - I32x8(1, 2, 3, 4, 5, 6, 7, 8), I32x8(0, -1, -2, -3, -4, -5, -6, -7)));
  425. ASSERT(allLanesEqual(2 * I32x8(1, 2, 3, 4, 5, 6, 7, 8), I32x8(2, 4, 6, 8, 10, 12, 14, 16)));
  426. ASSERT(allLanesEqual(I32x8(1, -2, 3, -4, 5, -6, 7, -8) * -2, I32x8(-2, 4, -6, 8, -10, 12, -14, 16)));
  427. ASSERT(allLanesEqual(I32x8(1, -2, 3, -4, 5, -6, 7, -8) * I32x8(1, -2, 3, -4, 5, -6, 7, -8), I32x8(1, 4, 9, 16, 25, 36, 49, 64)));
  428. ASSERT(allLanesEqual(-I32x8(1, -2, 3, -4, 5, -6, 7, -8), I32x8(-1, 2, -3, 4, -5, 6, -7, 8)));
  429. // U32x8 operations
  430. ASSERT(allLanesEqual(U32x8(1, 2, 3, 4, 5, 6, 7, 8) - 1, U32x8(0, 1, 2, 3, 4, 5, 6, 7)));
  431. ASSERT(allLanesEqual(10 - U32x8(1, 2, 3, 4, 5, 6, 7, 8), U32x8(9, 8, 7, 6, 5, 4, 3, 2)));
  432. ASSERT(allLanesEqual(2 * U32x8(1, 2, 3, 4, 5, 6, 7, 8), U32x8(2, 4, 6, 8, 10, 12, 14, 16)));
  433. ASSERT(allLanesEqual(U32x8(1, 2, 3, 4, 5, 6, 7, 8) * 2, U32x8(2, 4, 6, 8, 10, 12, 14, 16)));
  434. ASSERT(allLanesEqual(U32x8(1, 2, 3, 4, 5, 6, 7, 8) * U32x8(1, 2, 3, 4, 5, 6, 7, 8), U32x8(1, 4, 9, 16, 25, 36, 49, 64)));
  435. // U16x16 operations
  436. ASSERT(allLanesEqual(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) + U16x16(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32), U16x16(3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48)));
  437. ASSERT(allLanesEqual(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) + 8, U16x16(9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)));
  438. ASSERT(allLanesEqual(8 + U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)));
  439. ASSERT(allLanesEqual(U16x16(3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48) - U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32)));
  440. ASSERT(allLanesEqual(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) - 1, U16x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)));
  441. ASSERT(allLanesEqual(16 - U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)));
  442. ASSERT(allLanesEqual(U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) * 2, U16x16(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32)));
  443. ASSERT(allLanesEqual(2 * U16x16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), U16x16(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32)));
  444. // U8x32 operations
  445. ASSERT(allLanesEqual(
  446. U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)
  447. + U8x32( 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64),
  448. U8x32( 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57, 60, 63, 66, 69, 72, 75, 78, 81, 84, 87, 90, 93, 96)));
  449. ASSERT(allLanesEqual(
  450. U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32) + 5,
  451. U8x32( 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)));
  452. ASSERT(allLanesEqual(
  453. 5 + U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32),
  454. U8x32( 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)));
  455. ASSERT(allLanesEqual(
  456. U8x32( 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57, 60, 63, 66, 69, 72, 75, 78, 81, 84, 87, 90, 93, 96)
  457. - U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32),
  458. U8x32( 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64)));
  459. ASSERT(allLanesEqual(
  460. U8x32( 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37) - 5,
  461. U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)));
  462. ASSERT(allLanesEqual(
  463. 33 - U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32),
  464. U8x32(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)));
  465. ASSERT(allLanesEqual(
  466. saturatedAddition(
  467. U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,255),
  468. U8x32((uint8_t)240)),
  469. U8x32(241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255)
  470. ));
  471. ASSERT(allLanesEqual(
  472. saturatedSubtraction(
  473. U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,255),
  474. U8x32((uint8_t)16)),
  475. U8x32( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,239)
  476. ));
  477. // Saturated unsigned integer packing
  478. ASSERT(allLanesEqual(saturateToU8(
  479. U16x16(1, 2, 3, 4, 65535, 6, 7, 8, 9, 10, 11, 12, 1000, 14, 15, 16), U16x16(17, 18, 19, 20, 21, 22, 23, 65535, 25, 26, 27, 28, 29, 30, 31, 32)),
  480. U8x32( 1, 2, 3, 4, 255, 6, 7, 8, 9, 10, 11, 12, 255, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 255, 25, 26, 27, 28, 29, 30, 31, 32)));
  481. // Unsigned integer unpacking
  482. ASSERT(allLanesEqual(lowerToU32(U16x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)), U32x8(1,2,3,4,5,6,7,8)));
  483. ASSERT(allLanesEqual(higherToU32(U16x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)), U32x8(9,10,11,12,13,14,15,16)));
  484. ASSERT(allLanesEqual(lowerToU32(U16x16(1,2,3,4,5,6,65535,8,9,10,11,12,13,1000,15,16)), U32x8(1,2,3,4,5,6,65535,8)));
  485. ASSERT(allLanesEqual(higherToU32(U16x16(1,2,3,4,5,6,65535,8,9,10,11,12,13,1000,15,16)), U32x8(9,10,11,12,13,1000,15,16)));
  486. ASSERT(allLanesEqual(lowerToU16(U8x32(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,255,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,255)), U16x16(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,255)));
  487. ASSERT(allLanesEqual(higherToU16(U8x32(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,255,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,255)), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,255)));
  488. // Bitwise operations
  489. ASSERT(allLanesEqual(
  490. U32x8(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000, 0xEEEEEEEE, 0x87654321, 0x0F0F0F0F, 0x00010001)
  491. & 0x0000FFFF,
  492. U32x8(0x0000FFFF, 0x00005678, 0x0000F0F0, 0x00000000, 0x0000EEEE, 0x00004321, 0x00000F0F, 0x00000001)));
  493. ASSERT(allLanesEqual(
  494. U32x8(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000, 0xEEEEEEEE, 0x87654321, 0x0F0F0F0F, 0x00010001)
  495. & 0xFFFF0000,
  496. U32x8(0xFFFF0000, 0x12340000, 0xF0F00000, 0x00000000, 0xEEEE0000, 0x87650000, 0x0F0F0000, 0x00010000)));
  497. ASSERT(allLanesEqual(
  498. U32x8(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000, 0xEEEEEEEE, 0x87654321, 0x0F0F0F0F, 0x00010001)
  499. | 0x0000FFFF,
  500. U32x8(0xFFFFFFFF, 0x1234FFFF, 0xF0F0FFFF, 0x0000FFFF, 0xEEEEFFFF, 0x8765FFFF, 0x0F0FFFFF, 0x0001FFFF)));
  501. ASSERT(allLanesEqual(
  502. U32x8(0xFFFFFFFF, 0x12345678, 0xF0F0F0F0, 0x00000000, 0xEEEEEEEE, 0x87654321, 0x0F0F0F0F, 0x00010001)
  503. | 0xFFFF0000,
  504. U32x8(0xFFFFFFFF, 0xFFFF5678, 0xFFFFF0F0, 0xFFFF0000, 0xFFFFEEEE, 0xFFFF4321, 0xFFFF0F0F, 0xFFFF0001)));
  505. ASSERT(allLanesEqual(
  506. U32x8(0xFFFFFFFF, 0xFFF000FF, 0xF0F0F0F0, 0x12345678, 0xEEEEEEEE, 0x87654321, 0x0F0F0F0F, 0x00010001)
  507. & U32x8(0xFF00FF00, 0xFFFF0000, 0x000FF000, 0x0FF00FF0, 0xF00FF00F, 0x00FFFF00, 0xF0F0F0F0, 0x0000FFFF),
  508. U32x8(0xFF00FF00, 0xFFF00000, 0x0000F000, 0x02300670, 0xE00EE00E, 0x00654300, 0x00000000, 0x00000001)));
  509. ASSERT(allLanesEqual(
  510. U32x8(0xFFFFFFFF, 0xFFF000FF, 0xF0F0F0F0, 0x12345678, 0xEEEEEEEE, 0x87654321, 0x0F0F0F0F, 0x00010001)
  511. | U32x8(0xFF00FF00, 0xFFFF0000, 0x000FF000, 0x0FF00FF0, 0xF00FF00F, 0x00FFFF00, 0xF0F0F0F0, 0x0000FFFF),
  512. U32x8(0xFFFFFFFF, 0xFFFF00FF, 0xF0FFF0F0, 0x1FF45FF8, 0xFEEFFEEF, 0x87FFFF21, 0xFFFFFFFF, 0x0001FFFF)));
  513. ASSERT(allLanesEqual(
  514. U32x8(0b11001100110000110101010010110011, 0b00101011001011101010001101111001, 0b11001010000110111010010100101100, 0b01010111010001010010101110010110, 0b10101110100110100010101011011001, 0b00101110100111010001101010110000, 0b11101010001011100010101110001111, 0b00101010111100010110010110001000)
  515. ^ U32x8(0b00101101001110100011010010100001, 0b10101110100101000011101001010011, 0b00101011100101001011000010100100, 0b11010011101001000110010110110111, 0b00111100101000101010001101001010, 0b00101110100110000111110011010101, 0b11001010010101010010110010101000, 0b11110000111100001111000011110000),
  516. U32x8(0b11100001111110010110000000010010, 0b10000101101110101001100100101010, 0b11100001100011110001010110001000, 0b10000100111000010100111000100001, 0b10010010001110001000100110010011, 0b00000000000001010110011001100101, 0b00100000011110110000011100100111, 0b11011010000000011001010101111000)));
  517. // Bit shift with immediate scalar
  518. ASSERT(allLanesEqual(bitShiftLeftImmediate <1>(U32x8(1, 2, 3, 4, 5, 6, 7, 8)), U32x8( 2, 4, 6, 8, 10, 12, 14, 16)));
  519. ASSERT(allLanesEqual(bitShiftLeftImmediate <2>(U32x8(1, 2, 3, 4, 5, 6, 7, 8)), U32x8( 4, 8, 12, 16, 20, 24, 28, 32)));
  520. ASSERT(allLanesEqual(bitShiftLeftImmediate <3>(U32x8(1, 2, 3, 4, 5, 6, 7, 8)), U32x8( 8, 16, 24, 32, 40, 48, 56, 64)));
  521. ASSERT(allLanesEqual(bitShiftLeftImmediate <4>(U32x8(1, 2, 3, 4, 5, 6, 7, 8)), U32x8(16, 32, 48, 64, 80, 96,112,128)));
  522. ASSERT(allLanesEqual(bitShiftRightImmediate<1>(U32x8(1, 2, 3, 4, 5, 6, 7, 8)), U32x8( 0, 1, 1, 2, 2, 3, 3, 4)));
  523. ASSERT(allLanesEqual(bitShiftRightImmediate<1>(U32x8(2, 4, 6, 8, 10, 12, 14, 16)), U32x8( 1, 2, 3, 4, 5, 6, 7, 8)));
  524. ASSERT(allLanesEqual(bitShiftRightImmediate<2>(U32x8(2, 4, 6, 8, 10, 12, 14, 16)), U32x8( 0, 1, 1, 2, 2, 3, 3, 4)));
  525. // Bit shift with variable offsets
  526. ASSERT(allLanesEqual(U32x4(1u, 2u, 3u, 4u) << U32x4(2u, 4u, 3u, 1u), U32x4(4u, 32u, 24u, 8u)));
  527. ASSERT(allLanesEqual(U32x4(64u, 32u, 5u, 8u) >> U32x4(2u, 1u, 2u, 0u), U32x4(16u, 16u, 1u, 8u)));
  528. ASSERT(allLanesEqual(U32x8(1u, 2u, 3u, 4u, 5u, 6u, 7u, 8u) << U32x8(2u, 4u, 3u, 1u, 0u, 1u, 2u, 1u), U32x8(4u, 32u, 24u, 8u, 5u, 12u, 28u, 16u)));
  529. ASSERT(allLanesEqual(U32x8(64u, 32u, 5u, 8u, 128u, 64u, 128u, 256u) >> U32x8(2u, 4u, 3u, 1u, 3u, 1u, 2u, 1u), U32x8(16u, 2u, 0u, 4u, 16u, 32u, 32u, 128u)));
  530. ASSERT(allLanesEqual(
  531. bitShiftLeftImmediate<4>(U32x8(0x0AB12CD0, 0xFFFFFFFF, 0x12345678, 0xF0000000, 0x87654321, 0x48484848, 0x76437643, 0x11111111)),
  532. U32x8(0xAB12CD00, 0xFFFFFFF0, 0x23456780, 0x00000000, 0x76543210, 0x84848480, 0x64376430, 0x11111110)));
  533. ASSERT(allLanesEqual(
  534. bitShiftRightImmediate<4>(U32x8(0x0AB12CD0, 0xFFFFFFFF, 0x12345678, 0x0000000F, 0x87654321, 0x48484848, 0x76437643, 0x11111111)),
  535. U32x8(0x00AB12CD, 0x0FFFFFFF, 0x01234567, 0x00000000, 0x08765432, 0x04848484, 0x07643764, 0x01111111)));
  536. // Element shift with insert
  537. ASSERT(allLanesEqual(vectorExtract_0(U32x8( 1, 2, 3, 4, 5, 6, 7, 8), U32x8( 9,10,11,12,13,14,15,16)),
  538. U32x8( 1, 2, 3, 4, 5, 6, 7, 8)));
  539. ASSERT(allLanesEqual(vectorExtract_1(U32x8( 1, 2, 3, 4, 5, 6, 7, 8), U32x8( 9,10,11,12,13,14,15,16)),
  540. U32x8( 2, 3, 4, 5, 6, 7, 8, 9)));
  541. ASSERT(allLanesEqual(vectorExtract_2(U32x8( 1, 2, 3, 4, 5, 6, 7, 8), U32x8( 9,10,11,12,13,14,15,16)),
  542. U32x8( 3, 4, 5, 6, 7, 8, 9,10)));
  543. ASSERT(allLanesEqual(vectorExtract_3(U32x8( 1, 2, 3, 4, 5, 6, 7, 8), U32x8( 9,10,11,12,13,14,15,16)),
  544. U32x8( 4, 5, 6, 7, 8, 9,10,11)));
  545. ASSERT(allLanesEqual(vectorExtract_4(U32x8( 1, 2, 3, 4, 5, 6, 7, 8), U32x8( 9,10,11,12,13,14,15,16)),
  546. U32x8( 5, 6, 7, 8, 9,10,11,12)));
  547. ASSERT(allLanesEqual(vectorExtract_5(U32x8( 1, 2, 3, 4, 5, 6, 7, 8), U32x8( 9,10,11,12,13,14,15,16)),
  548. U32x8( 6, 7, 8, 9,10,11,12,13)));
  549. ASSERT(allLanesEqual(vectorExtract_6(U32x8( 1, 2, 3, 4, 5, 6, 7, 8), U32x8( 9,10,11,12,13,14,15,16)),
  550. U32x8( 7, 8, 9,10,11,12,13,14)));
  551. ASSERT(allLanesEqual(vectorExtract_7(U32x8( 1, 2, 3, 4, 5, 6, 7, 8), U32x8( 9,10,11,12,13,14,15,16)),
  552. U32x8( 8, 9,10,11,12,13,14,15)));
  553. ASSERT(allLanesEqual(vectorExtract_8(U32x8( 1, 2, 3, 4, 5, 6, 7, 8), U32x8( 9,10,11,12,13,14,15,16)),
  554. U32x8( 9,10,11,12,13,14,15,16)));
  555. ASSERT(allLanesEqual(vectorExtract_5(U32x8( 1, 2, 3, 4, 5, 6, 7, 4294967295), U32x8( 9,10,11,1000,13,14,15,16)),
  556. U32x8( 6, 7, 4294967295, 9,10,11,1000,13)));
  557. ASSERT(allLanesEqual(vectorExtract_0(I32x8( 1,-2, 3, 4,-5, 6, 7, 8), I32x8( 9,10,11,-12,13,14,15,-16)),
  558. I32x8( 1,-2, 3, 4,-5, 6, 7, 8)));
  559. ASSERT(allLanesEqual(vectorExtract_1(I32x8( 1,-2, 3, 4,-5, 6, 7, 8), I32x8( 9,10,11,-12,13,14,15,-16)),
  560. I32x8(-2, 3, 4,-5, 6, 7, 8, 9)));
  561. ASSERT(allLanesEqual(vectorExtract_2(I32x8( 1,-2, 3, 4,-5, 6, 7, 8), I32x8( 9,10,11,-12,13,14,15,-16)),
  562. I32x8( 3, 4,-5, 6, 7, 8, 9,10)));
  563. ASSERT(allLanesEqual(vectorExtract_3(I32x8( 1,-2, 3, 4,-5, 6, 7, 8), I32x8( 9,10,11,-12,13,14,15,-16)),
  564. I32x8( 4,-5, 6, 7, 8, 9,10,11)));
  565. ASSERT(allLanesEqual(vectorExtract_4(I32x8( 1,-2, 3, 4,-5, 6, 7, 8), I32x8( 9,10,11,-12,13,14,15,-16)),
  566. I32x8(-5, 6, 7, 8, 9,10,11,-12)));
  567. ASSERT(allLanesEqual(vectorExtract_5(I32x8( 1,-2, 3, 4,-5, 6, 7, 8), I32x8( 9,10,11,-12,13,14,15,-16)),
  568. I32x8( 6, 7, 8, 9,10,11,-12,13)));
  569. ASSERT(allLanesEqual(vectorExtract_6(I32x8( 1,-2, 3, 4,-5, 6, 7, 8), I32x8( 9,10,11,-12,13,14,15,-16)),
  570. I32x8( 7, 8, 9,10,11,-12,13,14)));
  571. ASSERT(allLanesEqual(vectorExtract_7(I32x8( 1,-2, 3, 4,-5, 6, 7, 8), I32x8( 9,10,11,-12,13,14,15,-16)),
  572. I32x8( 8, 9,10,11,-12,13,14,15)));
  573. ASSERT(allLanesEqual(vectorExtract_8(I32x8( 1,-2, 3, 4,-5, 6, 7, 8), I32x8( 9,10,11,-12,13,14,15,-16)),
  574. I32x8( 9,10,11,-12,13,14,15,-16)));
  575. ASSERT(allLanesEqual(vectorExtract_0(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
  576. F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f)));
  577. ASSERT(allLanesEqual(vectorExtract_1(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
  578. F32x8( -2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f, 9.0f)));
  579. ASSERT(allLanesEqual(vectorExtract_2(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
  580. F32x8( 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f)));
  581. ASSERT(allLanesEqual(vectorExtract_3(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
  582. F32x8( 4.0f,-5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f)));
  583. ASSERT(allLanesEqual(vectorExtract_4(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
  584. F32x8(-5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f,-12.0f)));
  585. ASSERT(allLanesEqual(vectorExtract_5(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
  586. F32x8( 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f,-12.0f, 13.0f)));
  587. ASSERT(allLanesEqual(vectorExtract_6(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
  588. F32x8( 7.0f, 8.0f, 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f)));
  589. ASSERT(allLanesEqual(vectorExtract_7(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
  590. F32x8( 8.0f, 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f)));
  591. ASSERT(allLanesEqual(vectorExtract_8(F32x8( 1.1f,-2.2f, 3.0f, 4.0f,-5.0f, 6.0f, 7.0f, 8.0f), F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)),
  592. F32x8( 9.0f, 10.0f, 11.0f,-12.0f, 13.0f, 14.0f, 15.0f,-16.0f)));
  593. ASSERT(allLanesEqual(vectorExtract_0 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  594. U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16)));
  595. ASSERT(allLanesEqual(vectorExtract_1 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  596. U16x16( 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16, 17)));
  597. ASSERT(allLanesEqual(vectorExtract_2 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  598. U16x16( 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16, 17,18)));
  599. ASSERT(allLanesEqual(vectorExtract_3 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  600. U16x16( 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16, 17,18,19)));
  601. ASSERT(allLanesEqual(vectorExtract_4 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  602. U16x16( 5, 6, 7, 8, 9,10,11,12,13,14,15,16, 17,18,19,20)));
  603. ASSERT(allLanesEqual(vectorExtract_5 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  604. U16x16( 6, 7, 8, 9,10,11,12,13,14,15,16, 17,18,19,20,21)));
  605. ASSERT(allLanesEqual(vectorExtract_6 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  606. U16x16( 7, 8, 9,10,11,12,13,14,15,16, 17,18,19,20,21,22)));
  607. ASSERT(allLanesEqual(vectorExtract_7 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  608. U16x16( 8, 9,10,11,12,13,14,15,16, 17,18,19,20,21,22,23)));
  609. ASSERT(allLanesEqual(vectorExtract_8 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  610. U16x16( 9,10,11,12,13,14,15,16, 17,18,19,20,21,22,23,24)));
  611. ASSERT(allLanesEqual(vectorExtract_9 (U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  612. U16x16(10,11,12,13,14,15,16, 17,18,19,20,21,22,23,24,25)));
  613. ASSERT(allLanesEqual(vectorExtract_10(U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  614. U16x16(11,12,13,14,15,16, 17,18,19,20,21,22,23,24,25,26)));
  615. ASSERT(allLanesEqual(vectorExtract_11(U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  616. U16x16(12,13,14,15,16, 17,18,19,20,21,22,23,24,25,26,27)));
  617. ASSERT(allLanesEqual(vectorExtract_12(U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  618. U16x16(13,14,15,16, 17,18,19,20,21,22,23,24,25,26,27,28)));
  619. ASSERT(allLanesEqual(vectorExtract_13(U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  620. U16x16(14,15,16, 17,18,19,20,21,22,23,24,25,26,27,28,29)));
  621. ASSERT(allLanesEqual(vectorExtract_14(U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  622. U16x16(15,16, 17,18,19,20,21,22,23,24,25,26,27,28,29,30)));
  623. ASSERT(allLanesEqual(vectorExtract_15(U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  624. U16x16(16, 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31)));
  625. ASSERT(allLanesEqual(vectorExtract_16(U16x16( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16), U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)),
  626. U16x16(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)));
  627. ASSERT(allLanesEqual(vectorExtract_0 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  628. U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32)));
  629. ASSERT(allLanesEqual(vectorExtract_1 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  630. U8x32( 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33)));
  631. ASSERT(allLanesEqual(vectorExtract_2 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  632. U8x32( 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34)));
  633. ASSERT(allLanesEqual(vectorExtract_3 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  634. U8x32( 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35)));
  635. ASSERT(allLanesEqual(vectorExtract_4 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  636. U8x32( 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36)));
  637. ASSERT(allLanesEqual(vectorExtract_5 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  638. U8x32( 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37)));
  639. ASSERT(allLanesEqual(vectorExtract_6 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  640. U8x32( 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38)));
  641. ASSERT(allLanesEqual(vectorExtract_7 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  642. U8x32( 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39)));
  643. ASSERT(allLanesEqual(vectorExtract_8 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  644. U8x32( 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40)));
  645. ASSERT(allLanesEqual(vectorExtract_9 (U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  646. U8x32(10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41)));
  647. ASSERT(allLanesEqual(vectorExtract_10(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  648. U8x32(11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42)));
  649. ASSERT(allLanesEqual(vectorExtract_11(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  650. U8x32(12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43)));
  651. ASSERT(allLanesEqual(vectorExtract_12(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  652. U8x32(13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44)));
  653. ASSERT(allLanesEqual(vectorExtract_13(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  654. U8x32(14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45)));
  655. ASSERT(allLanesEqual(vectorExtract_14(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  656. U8x32(15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46)));
  657. ASSERT(allLanesEqual(vectorExtract_15(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  658. U8x32(16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47)));
  659. ASSERT(allLanesEqual(vectorExtract_16(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  660. U8x32(17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48)));
  661. ASSERT(allLanesEqual(vectorExtract_17(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  662. U8x32(18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49)));
  663. ASSERT(allLanesEqual(vectorExtract_18(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  664. U8x32(19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50)));
  665. ASSERT(allLanesEqual(vectorExtract_19(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  666. U8x32(20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51)));
  667. ASSERT(allLanesEqual(vectorExtract_20(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  668. U8x32(21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52)));
  669. ASSERT(allLanesEqual(vectorExtract_21(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  670. U8x32(22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53)));
  671. ASSERT(allLanesEqual(vectorExtract_22(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  672. U8x32(23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54)));
  673. ASSERT(allLanesEqual(vectorExtract_23(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  674. U8x32(24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55)));
  675. ASSERT(allLanesEqual(vectorExtract_24(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  676. U8x32(25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56)));
  677. ASSERT(allLanesEqual(vectorExtract_25(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  678. U8x32(26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57)));
  679. ASSERT(allLanesEqual(vectorExtract_26(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  680. U8x32(27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58)));
  681. ASSERT(allLanesEqual(vectorExtract_27(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  682. U8x32(28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59)));
  683. ASSERT(allLanesEqual(vectorExtract_28(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  684. U8x32(29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60)));
  685. ASSERT(allLanesEqual(vectorExtract_29(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  686. U8x32(30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61)));
  687. ASSERT(allLanesEqual(vectorExtract_30(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  688. U8x32(31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62)));
  689. ASSERT(allLanesEqual(vectorExtract_31(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  690. U8x32(32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63)));
  691. ASSERT(allLanesEqual(vectorExtract_32(U8x32( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32), U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)),
  692. U8x32(33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64)));
  693. { // Gather test
  694. // The Buffer must be kept alive during the pointer's lifetime to prevent freeing the memory too early with reference counting.
  695. // Because SafePointer exists only to be faster than Buffer but safer than a raw pointer.
  696. Buffer gatherTestBuffer = buffer_create(sizeof(int32_t) * 32);
  697. {
  698. // 32-bit floating-point gather
  699. SafePointer<float> pointerF = buffer_getSafeData<float>(gatherTestBuffer, "float gather test data");
  700. for (int i = 0; i < 32; i++) { // -32.0f, -30.0f, -28.0f, -26.0f ... 24.0f, 26.0f, 28.0f, 30.0f
  701. pointerF[i] = i * 2.0f - 32.0f;
  702. }
  703. ASSERT(allLanesEqual(gather_F32(pointerF , U32x4(2, 1, 30, 31)), F32x4(-28.0f, -30.0f, 28.0f, 30.0f)));
  704. ASSERT(allLanesEqual(gather_F32(pointerF + 10, U32x4(0, 1, 2, 3)), F32x4(-12.0f, -10.0f, -8.0f, -6.0f)));
  705. ASSERT(allLanesEqual(gather_F32(pointerF , U32x8(2, 1, 28, 29, 3, 0, 30, 31)), F32x8(-28.0f, -30.0f, 24.0f, 26.0f, -26.0f, -32.0f, 28.0f, 30.0f)));
  706. ASSERT(allLanesEqual(gather_F32(pointerF + 10, U32x8(0, 1, 2, 3, 4, 5, 6, 7)), F32x8(-12.0f, -10.0f, -8.0f, -6.0f, -4.0f, -2.0f, 0.0f, 2.0f)));
  707. }
  708. {
  709. // Signed 32-bit integer gather
  710. SafePointer<int32_t> pointerU = buffer_getSafeData<int32_t>(gatherTestBuffer, "int32_t gather test data");
  711. for (int i = 0; i < 32; i++) { // -32, -30, -28, -26 ... 24, 26, 28, 30
  712. pointerU[i] = i * 2 - 32;
  713. }
  714. ASSERT(allLanesEqual(gather_I32(pointerU , U32x4(2, 1, 30, 31)), I32x4(-28, -30, 28, 30)));
  715. ASSERT(allLanesEqual(gather_I32(pointerU + 10, U32x4(0, 1, 2, 3)), I32x4(-12, -10, -8, -6)));
  716. ASSERT(allLanesEqual(gather_I32(pointerU , U32x8(2, 1, 28, 29, 3, 0, 30, 31)), I32x8(-28, -30, 24, 26, -26, -32, 28, 30)));
  717. ASSERT(allLanesEqual(gather_I32(pointerU + 10, U32x8(0, 1, 2, 3, 4, 5, 6, 7)), I32x8(-12, -10, -8, -6, -4, -2, 0, 2)));
  718. }
  719. {
  720. // Unsigned 32-bit integer gather
  721. SafePointer<uint32_t> pointerI = buffer_getSafeData<uint32_t>(gatherTestBuffer, "uint32_t gather test data");
  722. for (int i = 0; i < 32; i++) { // 100, 102, 104, 106 ... 156, 158, 160, 162
  723. pointerI[i] = 100 + i * 2;
  724. }
  725. // Signed 32-bit integer gather
  726. ASSERT(allLanesEqual(gather_U32(pointerI , U32x4(2, 1, 30, 31)), U32x4(104, 102, 160, 162)));
  727. ASSERT(allLanesEqual(gather_U32(pointerI + 10, U32x4(0, 1, 2, 3)), U32x4(120, 122, 124, 126)));
  728. ASSERT(allLanesEqual(gather_U32(pointerI , U32x8(2, 1, 28, 29, 3, 0, 30, 31)), U32x8(104, 102, 156, 158, 106, 100, 160, 162)));
  729. ASSERT(allLanesEqual(gather_U32(pointerI + 10, U32x8(0, 1, 2, 3, 4, 5, 6, 7)), U32x8(120, 122, 124, 126, 128, 130, 132, 134)));
  730. }
  731. }
  732. END_TEST