hadamard_test.cc 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. /*
  2. * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <algorithm>
  11. #include "third_party/googletest/src/include/gtest/gtest.h"
  12. #include "./vpx_dsp_rtcd.h"
  13. #include "vpx_ports/vpx_timer.h"
  14. #include "test/acm_random.h"
  15. #include "test/register_state_check.h"
  16. namespace {
  17. using ::libvpx_test::ACMRandom;
  18. typedef void (*HadamardFunc)(const int16_t *a, int a_stride, tran_low_t *b);
  19. void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) {
  20. int16_t b[8];
  21. for (int i = 0; i < 8; i += 2) {
  22. b[i + 0] = a[i * a_stride] + a[(i + 1) * a_stride];
  23. b[i + 1] = a[i * a_stride] - a[(i + 1) * a_stride];
  24. }
  25. int16_t c[8];
  26. for (int i = 0; i < 8; i += 4) {
  27. c[i + 0] = b[i + 0] + b[i + 2];
  28. c[i + 1] = b[i + 1] + b[i + 3];
  29. c[i + 2] = b[i + 0] - b[i + 2];
  30. c[i + 3] = b[i + 1] - b[i + 3];
  31. }
  32. out[0] = c[0] + c[4];
  33. out[7] = c[1] + c[5];
  34. out[3] = c[2] + c[6];
  35. out[4] = c[3] + c[7];
  36. out[2] = c[0] - c[4];
  37. out[6] = c[1] - c[5];
  38. out[1] = c[2] - c[6];
  39. out[5] = c[3] - c[7];
  40. }
  41. void reference_hadamard8x8(const int16_t *a, int a_stride, tran_low_t *b) {
  42. int16_t buf[64];
  43. int16_t buf2[64];
  44. for (int i = 0; i < 8; ++i) hadamard_loop(a + i, a_stride, buf + i * 8);
  45. for (int i = 0; i < 8; ++i) hadamard_loop(buf + i, 8, buf2 + i * 8);
  46. for (int i = 0; i < 64; ++i) b[i] = (tran_low_t)buf2[i];
  47. }
  48. void reference_hadamard16x16(const int16_t *a, int a_stride, tran_low_t *b) {
  49. /* The source is a 16x16 block. The destination is rearranged to 8x32.
  50. * Input is 9 bit. */
  51. reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
  52. reference_hadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64);
  53. reference_hadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128);
  54. reference_hadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192);
  55. /* Overlay the 8x8 blocks and combine. */
  56. for (int i = 0; i < 64; ++i) {
  57. /* 8x8 steps the range up to 15 bits. */
  58. const tran_low_t a0 = b[0];
  59. const tran_low_t a1 = b[64];
  60. const tran_low_t a2 = b[128];
  61. const tran_low_t a3 = b[192];
  62. /* Prevent the result from escaping int16_t. */
  63. const tran_low_t b0 = (a0 + a1) >> 1;
  64. const tran_low_t b1 = (a0 - a1) >> 1;
  65. const tran_low_t b2 = (a2 + a3) >> 1;
  66. const tran_low_t b3 = (a2 - a3) >> 1;
  67. /* Store a 16 bit value. */
  68. b[0] = b0 + b2;
  69. b[64] = b1 + b3;
  70. b[128] = b0 - b2;
  71. b[192] = b1 - b3;
  72. ++b;
  73. }
  74. }
  75. class HadamardTestBase : public ::testing::TestWithParam<HadamardFunc> {
  76. public:
  77. virtual void SetUp() {
  78. h_func_ = GetParam();
  79. rnd_.Reset(ACMRandom::DeterministicSeed());
  80. }
  81. protected:
  82. HadamardFunc h_func_;
  83. ACMRandom rnd_;
  84. };
  85. void HadamardSpeedTest(const char *name, HadamardFunc const func,
  86. const int16_t *input, int stride, tran_low_t *output,
  87. int times) {
  88. int i;
  89. vpx_usec_timer timer;
  90. vpx_usec_timer_start(&timer);
  91. for (i = 0; i < times; ++i) {
  92. func(input, stride, output);
  93. }
  94. vpx_usec_timer_mark(&timer);
  95. const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  96. printf("%s[%12d runs]: %d us\n", name, times, elapsed_time);
  97. }
  98. class Hadamard8x8Test : public HadamardTestBase {};
  99. void HadamardSpeedTest8x8(HadamardFunc const func, int times) {
  100. DECLARE_ALIGNED(16, int16_t, input[64]);
  101. DECLARE_ALIGNED(16, tran_low_t, output[64]);
  102. memset(input, 1, sizeof(input));
  103. HadamardSpeedTest("Hadamard8x8", func, input, 8, output, times);
  104. }
  105. TEST_P(Hadamard8x8Test, CompareReferenceRandom) {
  106. DECLARE_ALIGNED(16, int16_t, a[64]);
  107. DECLARE_ALIGNED(16, tran_low_t, b[64]);
  108. tran_low_t b_ref[64];
  109. for (int i = 0; i < 64; ++i) {
  110. a[i] = rnd_.Rand9Signed();
  111. }
  112. memset(b, 0, sizeof(b));
  113. memset(b_ref, 0, sizeof(b_ref));
  114. reference_hadamard8x8(a, 8, b_ref);
  115. ASM_REGISTER_STATE_CHECK(h_func_(a, 8, b));
  116. // The order of the output is not important. Sort before checking.
  117. std::sort(b, b + 64);
  118. std::sort(b_ref, b_ref + 64);
  119. EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
  120. }
  121. TEST_P(Hadamard8x8Test, VaryStride) {
  122. DECLARE_ALIGNED(16, int16_t, a[64 * 8]);
  123. DECLARE_ALIGNED(16, tran_low_t, b[64]);
  124. tran_low_t b_ref[64];
  125. for (int i = 0; i < 64 * 8; ++i) {
  126. a[i] = rnd_.Rand9Signed();
  127. }
  128. for (int i = 8; i < 64; i += 8) {
  129. memset(b, 0, sizeof(b));
  130. memset(b_ref, 0, sizeof(b_ref));
  131. reference_hadamard8x8(a, i, b_ref);
  132. ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
  133. // The order of the output is not important. Sort before checking.
  134. std::sort(b, b + 64);
  135. std::sort(b_ref, b_ref + 64);
  136. EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
  137. }
  138. }
  139. TEST_P(Hadamard8x8Test, DISABLED_Speed) {
  140. HadamardSpeedTest8x8(h_func_, 10);
  141. HadamardSpeedTest8x8(h_func_, 10000);
  142. HadamardSpeedTest8x8(h_func_, 10000000);
  143. }
  144. INSTANTIATE_TEST_CASE_P(C, Hadamard8x8Test,
  145. ::testing::Values(&vpx_hadamard_8x8_c));
  146. #if HAVE_SSE2
  147. INSTANTIATE_TEST_CASE_P(SSE2, Hadamard8x8Test,
  148. ::testing::Values(&vpx_hadamard_8x8_sse2));
  149. #endif // HAVE_SSE2
  150. #if HAVE_SSSE3 && ARCH_X86_64
  151. INSTANTIATE_TEST_CASE_P(SSSE3, Hadamard8x8Test,
  152. ::testing::Values(&vpx_hadamard_8x8_ssse3));
  153. #endif // HAVE_SSSE3 && ARCH_X86_64
  154. #if HAVE_NEON
  155. INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test,
  156. ::testing::Values(&vpx_hadamard_8x8_neon));
  157. #endif // HAVE_NEON
  158. // TODO(jingning): Remove highbitdepth flag when the SIMD functions are
  159. // in place and turn on the unit test.
  160. #if !CONFIG_VP9_HIGHBITDEPTH
  161. #if HAVE_MSA
  162. INSTANTIATE_TEST_CASE_P(MSA, Hadamard8x8Test,
  163. ::testing::Values(&vpx_hadamard_8x8_msa));
  164. #endif // HAVE_MSA
  165. #endif // !CONFIG_VP9_HIGHBITDEPTH
  166. #if HAVE_VSX
  167. INSTANTIATE_TEST_CASE_P(VSX, Hadamard8x8Test,
  168. ::testing::Values(&vpx_hadamard_8x8_vsx));
  169. #endif // HAVE_VSX
  170. class Hadamard16x16Test : public HadamardTestBase {};
  171. void HadamardSpeedTest16x16(HadamardFunc const func, int times) {
  172. DECLARE_ALIGNED(16, int16_t, input[256]);
  173. DECLARE_ALIGNED(16, tran_low_t, output[256]);
  174. memset(input, 1, sizeof(input));
  175. HadamardSpeedTest("Hadamard16x16", func, input, 16, output, times);
  176. }
  177. TEST_P(Hadamard16x16Test, CompareReferenceRandom) {
  178. DECLARE_ALIGNED(16, int16_t, a[16 * 16]);
  179. DECLARE_ALIGNED(16, tran_low_t, b[16 * 16]);
  180. tran_low_t b_ref[16 * 16];
  181. for (int i = 0; i < 16 * 16; ++i) {
  182. a[i] = rnd_.Rand9Signed();
  183. }
  184. memset(b, 0, sizeof(b));
  185. memset(b_ref, 0, sizeof(b_ref));
  186. reference_hadamard16x16(a, 16, b_ref);
  187. ASM_REGISTER_STATE_CHECK(h_func_(a, 16, b));
  188. // The order of the output is not important. Sort before checking.
  189. std::sort(b, b + 16 * 16);
  190. std::sort(b_ref, b_ref + 16 * 16);
  191. EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
  192. }
  193. TEST_P(Hadamard16x16Test, VaryStride) {
  194. DECLARE_ALIGNED(16, int16_t, a[16 * 16 * 8]);
  195. DECLARE_ALIGNED(16, tran_low_t, b[16 * 16]);
  196. tran_low_t b_ref[16 * 16];
  197. for (int i = 0; i < 16 * 16 * 8; ++i) {
  198. a[i] = rnd_.Rand9Signed();
  199. }
  200. for (int i = 8; i < 64; i += 8) {
  201. memset(b, 0, sizeof(b));
  202. memset(b_ref, 0, sizeof(b_ref));
  203. reference_hadamard16x16(a, i, b_ref);
  204. ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
  205. // The order of the output is not important. Sort before checking.
  206. std::sort(b, b + 16 * 16);
  207. std::sort(b_ref, b_ref + 16 * 16);
  208. EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
  209. }
  210. }
  211. TEST_P(Hadamard16x16Test, DISABLED_Speed) {
  212. HadamardSpeedTest16x16(h_func_, 10);
  213. HadamardSpeedTest16x16(h_func_, 10000);
  214. HadamardSpeedTest16x16(h_func_, 10000000);
  215. }
  216. INSTANTIATE_TEST_CASE_P(C, Hadamard16x16Test,
  217. ::testing::Values(&vpx_hadamard_16x16_c));
  218. #if HAVE_SSE2
  219. INSTANTIATE_TEST_CASE_P(SSE2, Hadamard16x16Test,
  220. ::testing::Values(&vpx_hadamard_16x16_sse2));
  221. #endif // HAVE_SSE2
  222. #if HAVE_VSX
  223. INSTANTIATE_TEST_CASE_P(VSX, Hadamard16x16Test,
  224. ::testing::Values(&vpx_hadamard_16x16_vsx));
  225. #endif // HAVE_VSX
  226. #if HAVE_NEON
  227. INSTANTIATE_TEST_CASE_P(NEON, Hadamard16x16Test,
  228. ::testing::Values(&vpx_hadamard_16x16_neon));
  229. #endif // HAVE_NEON
  230. #if !CONFIG_VP9_HIGHBITDEPTH
  231. #if HAVE_MSA
  232. INSTANTIATE_TEST_CASE_P(MSA, Hadamard16x16Test,
  233. ::testing::Values(&vpx_hadamard_16x16_msa));
  234. #endif // HAVE_MSA
  235. #endif // !CONFIG_VP9_HIGHBITDEPTH
  236. } // namespace