dct_test.cc 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737
  1. /*
  2. * Copyright (c) 2017 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <math.h>
  11. #include <stdlib.h>
  12. #include <string.h>
  13. #include "third_party/googletest/src/include/gtest/gtest.h"
  14. #include "./vp9_rtcd.h"
  15. #include "./vpx_dsp_rtcd.h"
  16. #include "test/acm_random.h"
  17. #include "test/buffer.h"
  18. #include "test/clear_system_state.h"
  19. #include "test/register_state_check.h"
  20. #include "test/util.h"
  21. #include "vp9/common/vp9_entropy.h"
  22. #include "vpx/vpx_codec.h"
  23. #include "vpx/vpx_integer.h"
  24. #include "vpx_ports/mem.h"
  25. using libvpx_test::ACMRandom;
  26. using libvpx_test::Buffer;
  27. using std::tr1::tuple;
  28. using std::tr1::make_tuple;
  29. namespace {
  30. typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
  31. typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
  32. typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
  33. int tx_type);
  34. typedef void (*FhtFuncRef)(const Buffer<int16_t> &in, Buffer<tran_low_t> *out,
  35. int size, int tx_type);
  36. typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
  37. int tx_type);
  38. /* forward transform, inverse transform, size, transform type, bit depth */
  39. typedef tuple<FdctFunc, IdctFunc, int, int, vpx_bit_depth_t> DctParam;
  40. typedef tuple<FhtFunc, IhtFunc, int, int, vpx_bit_depth_t> HtParam;
  41. void fdct_ref(const Buffer<int16_t> &in, Buffer<tran_low_t> *out, int size,
  42. int /*tx_type*/) {
  43. const int16_t *i = in.TopLeftPixel();
  44. const int i_stride = in.stride();
  45. tran_low_t *o = out->TopLeftPixel();
  46. if (size == 4) {
  47. vpx_fdct4x4_c(i, o, i_stride);
  48. } else if (size == 8) {
  49. vpx_fdct8x8_c(i, o, i_stride);
  50. } else if (size == 16) {
  51. vpx_fdct16x16_c(i, o, i_stride);
  52. } else if (size == 32) {
  53. vpx_fdct32x32_c(i, o, i_stride);
  54. }
  55. }
  56. void fht_ref(const Buffer<int16_t> &in, Buffer<tran_low_t> *out, int size,
  57. int tx_type) {
  58. const int16_t *i = in.TopLeftPixel();
  59. const int i_stride = in.stride();
  60. tran_low_t *o = out->TopLeftPixel();
  61. if (size == 4) {
  62. vp9_fht4x4_c(i, o, i_stride, tx_type);
  63. } else if (size == 8) {
  64. vp9_fht8x8_c(i, o, i_stride, tx_type);
  65. } else if (size == 16) {
  66. vp9_fht16x16_c(i, o, i_stride, tx_type);
  67. }
  68. }
  69. void fwht_ref(const Buffer<int16_t> &in, Buffer<tran_low_t> *out, int size,
  70. int /*tx_type*/) {
  71. ASSERT_EQ(size, 4);
  72. vp9_fwht4x4_c(in.TopLeftPixel(), out->TopLeftPixel(), in.stride());
  73. }
  74. #if CONFIG_VP9_HIGHBITDEPTH
  75. #define idctNxN(n, coeffs, bitdepth) \
  76. void idct##n##x##n##_##bitdepth(const tran_low_t *in, uint8_t *out, \
  77. int stride) { \
  78. vpx_highbd_idct##n##x##n##_##coeffs##_add_c(in, CAST_TO_SHORTPTR(out), \
  79. stride, bitdepth); \
  80. }
  81. idctNxN(4, 16, 10);
  82. idctNxN(4, 16, 12);
  83. idctNxN(8, 64, 10);
  84. idctNxN(8, 64, 12);
  85. idctNxN(16, 256, 10);
  86. idctNxN(16, 256, 12);
  87. idctNxN(32, 1024, 10);
  88. idctNxN(32, 1024, 12);
  89. #define ihtNxN(n, coeffs, bitdepth) \
  90. void iht##n##x##n##_##bitdepth(const tran_low_t *in, uint8_t *out, \
  91. int stride, int tx_type) { \
  92. vp9_highbd_iht##n##x##n##_##coeffs##_add_c(in, CAST_TO_SHORTPTR(out), \
  93. stride, tx_type, bitdepth); \
  94. }
  95. ihtNxN(4, 16, 10);
  96. ihtNxN(4, 16, 12);
  97. ihtNxN(8, 64, 10);
  98. ihtNxN(8, 64, 12);
  99. ihtNxN(16, 256, 10);
  100. // ihtNxN(16, 256, 12);
  101. void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
  102. vpx_highbd_iwht4x4_16_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
  103. }
  104. void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
  105. vpx_highbd_iwht4x4_16_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
  106. }
  107. #endif // CONFIG_VP9_HIGHBITDEPTH
  108. class TransTestBase {
  109. public:
  110. virtual void TearDown() { libvpx_test::ClearSystemState(); }
  111. protected:
  112. virtual void RunFwdTxfm(const Buffer<int16_t> &in,
  113. Buffer<tran_low_t> *out) = 0;
  114. virtual void RunInvTxfm(const Buffer<tran_low_t> &in, uint8_t *out) = 0;
  115. void RunAccuracyCheck(int limit) {
  116. ACMRandom rnd(ACMRandom::DeterministicSeed());
  117. Buffer<int16_t> test_input_block =
  118. Buffer<int16_t>(size_, size_, 8, size_ == 4 ? 0 : 16);
  119. ASSERT_TRUE(test_input_block.Init());
  120. Buffer<tran_low_t> test_temp_block =
  121. Buffer<tran_low_t>(size_, size_, 0, 16);
  122. ASSERT_TRUE(test_temp_block.Init());
  123. Buffer<uint8_t> dst = Buffer<uint8_t>(size_, size_, 0, 16);
  124. ASSERT_TRUE(dst.Init());
  125. Buffer<uint8_t> src = Buffer<uint8_t>(size_, size_, 0, 16);
  126. ASSERT_TRUE(src.Init());
  127. #if CONFIG_VP9_HIGHBITDEPTH
  128. Buffer<uint16_t> dst16 = Buffer<uint16_t>(size_, size_, 0, 16);
  129. ASSERT_TRUE(dst16.Init());
  130. Buffer<uint16_t> src16 = Buffer<uint16_t>(size_, size_, 0, 16);
  131. ASSERT_TRUE(src16.Init());
  132. #endif // CONFIG_VP9_HIGHBITDEPTH
  133. uint32_t max_error = 0;
  134. int64_t total_error = 0;
  135. const int count_test_block = 10000;
  136. for (int i = 0; i < count_test_block; ++i) {
  137. if (bit_depth_ == 8) {
  138. src.Set(&rnd, &ACMRandom::Rand8);
  139. dst.Set(&rnd, &ACMRandom::Rand8);
  140. // Initialize a test block with input range [-255, 255].
  141. for (int h = 0; h < size_; ++h) {
  142. for (int w = 0; w < size_; ++w) {
  143. test_input_block.TopLeftPixel()[h * test_input_block.stride() + w] =
  144. src.TopLeftPixel()[h * src.stride() + w] -
  145. dst.TopLeftPixel()[h * dst.stride() + w];
  146. }
  147. }
  148. #if CONFIG_VP9_HIGHBITDEPTH
  149. } else {
  150. src16.Set(&rnd, 0, max_pixel_value_);
  151. dst16.Set(&rnd, 0, max_pixel_value_);
  152. for (int h = 0; h < size_; ++h) {
  153. for (int w = 0; w < size_; ++w) {
  154. test_input_block.TopLeftPixel()[h * test_input_block.stride() + w] =
  155. src16.TopLeftPixel()[h * src16.stride() + w] -
  156. dst16.TopLeftPixel()[h * dst16.stride() + w];
  157. }
  158. }
  159. #endif // CONFIG_VP9_HIGHBITDEPTH
  160. }
  161. ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block, &test_temp_block));
  162. if (bit_depth_ == VPX_BITS_8) {
  163. ASM_REGISTER_STATE_CHECK(
  164. RunInvTxfm(test_temp_block, dst.TopLeftPixel()));
  165. #if CONFIG_VP9_HIGHBITDEPTH
  166. } else {
  167. ASM_REGISTER_STATE_CHECK(
  168. RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16.TopLeftPixel())));
  169. #endif // CONFIG_VP9_HIGHBITDEPTH
  170. }
  171. for (int h = 0; h < size_; ++h) {
  172. for (int w = 0; w < size_; ++w) {
  173. int diff;
  174. #if CONFIG_VP9_HIGHBITDEPTH
  175. if (bit_depth_ != 8) {
  176. diff = dst16.TopLeftPixel()[h * dst16.stride() + w] -
  177. src16.TopLeftPixel()[h * src16.stride() + w];
  178. } else {
  179. #endif // CONFIG_VP9_HIGHBITDEPTH
  180. diff = dst.TopLeftPixel()[h * dst.stride() + w] -
  181. src.TopLeftPixel()[h * src.stride() + w];
  182. #if CONFIG_VP9_HIGHBITDEPTH
  183. }
  184. #endif // CONFIG_VP9_HIGHBITDEPTH
  185. const uint32_t error = diff * diff;
  186. if (max_error < error) max_error = error;
  187. total_error += error;
  188. }
  189. }
  190. }
  191. EXPECT_GE(static_cast<uint32_t>(limit), max_error)
  192. << "Error: 4x4 FHT/IHT has an individual round trip error > " << limit;
  193. EXPECT_GE(count_test_block * limit, total_error)
  194. << "Error: 4x4 FHT/IHT has average round trip error > " << limit
  195. << " per block";
  196. }
  197. void RunCoeffCheck() {
  198. ACMRandom rnd(ACMRandom::DeterministicSeed());
  199. const int count_test_block = 5000;
  200. Buffer<int16_t> input_block =
  201. Buffer<int16_t>(size_, size_, 8, size_ == 4 ? 0 : 16);
  202. ASSERT_TRUE(input_block.Init());
  203. Buffer<tran_low_t> output_ref_block = Buffer<tran_low_t>(size_, size_, 0);
  204. ASSERT_TRUE(output_ref_block.Init());
  205. Buffer<tran_low_t> output_block = Buffer<tran_low_t>(size_, size_, 0, 16);
  206. ASSERT_TRUE(output_block.Init());
  207. for (int i = 0; i < count_test_block; ++i) {
  208. // Initialize a test block with input range [-max_pixel_value_,
  209. // max_pixel_value_].
  210. input_block.Set(&rnd, -max_pixel_value_, max_pixel_value_);
  211. fwd_txfm_ref(input_block, &output_ref_block, size_, tx_type_);
  212. ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, &output_block));
  213. // The minimum quant value is 4.
  214. EXPECT_TRUE(output_block.CheckValues(output_ref_block));
  215. if (::testing::Test::HasFailure()) {
  216. printf("Size: %d Transform type: %d\n", size_, tx_type_);
  217. output_block.PrintDifference(output_ref_block);
  218. return;
  219. }
  220. }
  221. }
  222. void RunMemCheck() {
  223. ACMRandom rnd(ACMRandom::DeterministicSeed());
  224. const int count_test_block = 5000;
  225. Buffer<int16_t> input_extreme_block =
  226. Buffer<int16_t>(size_, size_, 8, size_ == 4 ? 0 : 16);
  227. ASSERT_TRUE(input_extreme_block.Init());
  228. Buffer<tran_low_t> output_ref_block = Buffer<tran_low_t>(size_, size_, 0);
  229. ASSERT_TRUE(output_ref_block.Init());
  230. Buffer<tran_low_t> output_block = Buffer<tran_low_t>(size_, size_, 0, 16);
  231. ASSERT_TRUE(output_block.Init());
  232. for (int i = 0; i < count_test_block; ++i) {
  233. // Initialize a test block with -max_pixel_value_ or max_pixel_value_.
  234. if (i == 0) {
  235. input_extreme_block.Set(max_pixel_value_);
  236. } else if (i == 1) {
  237. input_extreme_block.Set(-max_pixel_value_);
  238. } else {
  239. for (int h = 0; h < size_; ++h) {
  240. for (int w = 0; w < size_; ++w) {
  241. input_extreme_block
  242. .TopLeftPixel()[h * input_extreme_block.stride() + w] =
  243. rnd.Rand8() % 2 ? max_pixel_value_ : -max_pixel_value_;
  244. }
  245. }
  246. }
  247. fwd_txfm_ref(input_extreme_block, &output_ref_block, size_, tx_type_);
  248. ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block, &output_block));
  249. // The minimum quant value is 4.
  250. EXPECT_TRUE(output_block.CheckValues(output_ref_block));
  251. for (int h = 0; h < size_; ++h) {
  252. for (int w = 0; w < size_; ++w) {
  253. EXPECT_GE(
  254. 4 * DCT_MAX_VALUE << (bit_depth_ - 8),
  255. abs(output_block.TopLeftPixel()[h * output_block.stride() + w]))
  256. << "Error: 4x4 FDCT has coefficient larger than "
  257. "4*DCT_MAX_VALUE"
  258. << " at " << w << "," << h;
  259. if (::testing::Test::HasFailure()) {
  260. printf("Size: %d Transform type: %d\n", size_, tx_type_);
  261. output_block.DumpBuffer();
  262. return;
  263. }
  264. }
  265. }
  266. }
  267. }
  268. void RunInvAccuracyCheck(int limit) {
  269. ACMRandom rnd(ACMRandom::DeterministicSeed());
  270. const int count_test_block = 1000;
  271. Buffer<int16_t> in = Buffer<int16_t>(size_, size_, 4);
  272. ASSERT_TRUE(in.Init());
  273. Buffer<tran_low_t> coeff = Buffer<tran_low_t>(size_, size_, 0, 16);
  274. ASSERT_TRUE(coeff.Init());
  275. Buffer<uint8_t> dst = Buffer<uint8_t>(size_, size_, 0, 16);
  276. ASSERT_TRUE(dst.Init());
  277. Buffer<uint8_t> src = Buffer<uint8_t>(size_, size_, 0);
  278. ASSERT_TRUE(src.Init());
  279. Buffer<uint16_t> dst16 = Buffer<uint16_t>(size_, size_, 0, 16);
  280. ASSERT_TRUE(dst16.Init());
  281. Buffer<uint16_t> src16 = Buffer<uint16_t>(size_, size_, 0);
  282. ASSERT_TRUE(src16.Init());
  283. for (int i = 0; i < count_test_block; ++i) {
  284. // Initialize a test block with input range [-max_pixel_value_,
  285. // max_pixel_value_].
  286. if (bit_depth_ == VPX_BITS_8) {
  287. src.Set(&rnd, &ACMRandom::Rand8);
  288. dst.Set(&rnd, &ACMRandom::Rand8);
  289. for (int h = 0; h < size_; ++h) {
  290. for (int w = 0; w < size_; ++w) {
  291. in.TopLeftPixel()[h * in.stride() + w] =
  292. src.TopLeftPixel()[h * src.stride() + w] -
  293. dst.TopLeftPixel()[h * dst.stride() + w];
  294. }
  295. }
  296. #if CONFIG_VP9_HIGHBITDEPTH
  297. } else {
  298. src16.Set(&rnd, 0, max_pixel_value_);
  299. dst16.Set(&rnd, 0, max_pixel_value_);
  300. for (int h = 0; h < size_; ++h) {
  301. for (int w = 0; w < size_; ++w) {
  302. in.TopLeftPixel()[h * in.stride() + w] =
  303. src16.TopLeftPixel()[h * src16.stride() + w] -
  304. dst16.TopLeftPixel()[h * dst16.stride() + w];
  305. }
  306. }
  307. #endif // CONFIG_VP9_HIGHBITDEPTH
  308. }
  309. fwd_txfm_ref(in, &coeff, size_, tx_type_);
  310. if (bit_depth_ == VPX_BITS_8) {
  311. ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst.TopLeftPixel()));
  312. #if CONFIG_VP9_HIGHBITDEPTH
  313. } else {
  314. ASM_REGISTER_STATE_CHECK(
  315. RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16.TopLeftPixel())));
  316. #endif // CONFIG_VP9_HIGHBITDEPTH
  317. }
  318. for (int h = 0; h < size_; ++h) {
  319. for (int w = 0; w < size_; ++w) {
  320. int diff;
  321. #if CONFIG_VP9_HIGHBITDEPTH
  322. if (bit_depth_ != 8) {
  323. diff = dst16.TopLeftPixel()[h * dst16.stride() + w] -
  324. src16.TopLeftPixel()[h * src16.stride() + w];
  325. } else {
  326. #endif // CONFIG_VP9_HIGHBITDEPTH
  327. diff = dst.TopLeftPixel()[h * dst.stride() + w] -
  328. src.TopLeftPixel()[h * src.stride() + w];
  329. #if CONFIG_VP9_HIGHBITDEPTH
  330. }
  331. #endif // CONFIG_VP9_HIGHBITDEPTH
  332. const uint32_t error = diff * diff;
  333. EXPECT_GE(static_cast<uint32_t>(limit), error)
  334. << "Error: " << size_ << "x" << size_ << " IDCT has error "
  335. << error << " at " << w << "," << h;
  336. }
  337. }
  338. }
  339. }
  340. FhtFuncRef fwd_txfm_ref;
  341. vpx_bit_depth_t bit_depth_;
  342. int tx_type_;
  343. int max_pixel_value_;
  344. int size_;
  345. };
  346. class TransDCT : public TransTestBase,
  347. public ::testing::TestWithParam<DctParam> {
  348. public:
  349. TransDCT() {
  350. fwd_txfm_ref = fdct_ref;
  351. fwd_txfm_ = GET_PARAM(0);
  352. inv_txfm_ = GET_PARAM(1);
  353. size_ = GET_PARAM(2);
  354. tx_type_ = GET_PARAM(3);
  355. bit_depth_ = GET_PARAM(4);
  356. max_pixel_value_ = (1 << bit_depth_) - 1;
  357. }
  358. protected:
  359. void RunFwdTxfm(const Buffer<int16_t> &in, Buffer<tran_low_t> *out) {
  360. fwd_txfm_(in.TopLeftPixel(), out->TopLeftPixel(), in.stride());
  361. }
  362. void RunInvTxfm(const Buffer<tran_low_t> &in, uint8_t *out) {
  363. inv_txfm_(in.TopLeftPixel(), out, in.stride());
  364. }
  365. FdctFunc fwd_txfm_;
  366. IdctFunc inv_txfm_;
  367. };
  368. TEST_P(TransDCT, AccuracyCheck) { RunAccuracyCheck(1); }
  369. TEST_P(TransDCT, CoeffCheck) { RunCoeffCheck(); }
  370. TEST_P(TransDCT, MemCheck) { RunMemCheck(); }
  371. TEST_P(TransDCT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
  372. #if CONFIG_VP9_HIGHBITDEPTH
  373. INSTANTIATE_TEST_CASE_P(
  374. C, TransDCT,
  375. ::testing::Values(
  376. make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_10, 32, 0, VPX_BITS_10),
  377. make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_12, 32, 0, VPX_BITS_10),
  378. make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 32, 0,
  379. VPX_BITS_8),
  380. make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_10, 16, 0, VPX_BITS_10),
  381. make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_12, 16, 0, VPX_BITS_10),
  382. make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 16, 0,
  383. VPX_BITS_8),
  384. make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_10, 8, 0, VPX_BITS_10),
  385. make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_12, 8, 0, VPX_BITS_10),
  386. make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 8, 0, VPX_BITS_8),
  387. make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10, 4, 0, VPX_BITS_10),
  388. make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12, 4, 0, VPX_BITS_12),
  389. make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 4, 0, VPX_BITS_8)));
  390. #else
  391. INSTANTIATE_TEST_CASE_P(
  392. C, TransDCT,
  393. ::testing::Values(
  394. make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 32, 0,
  395. VPX_BITS_8),
  396. make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 16, 0,
  397. VPX_BITS_8),
  398. make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 8, 0, VPX_BITS_8),
  399. make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 4, 0, VPX_BITS_8)));
  400. #endif // CONFIG_VP9_HIGHBITDEPTH
  401. #if HAVE_SSE2
  402. #if !CONFIG_EMULATE_HARDWARE
  403. #if CONFIG_VP9_HIGHBITDEPTH
  404. /* TODO:(johannkoenig) Determine why these fail AccuracyCheck
  405. make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 32, 0, VPX_BITS_12),
  406. make_tuple(&vpx_highbd_fdct16x16_sse2, &idct16x16_12, 16, 0, VPX_BITS_12),
  407. */
  408. INSTANTIATE_TEST_CASE_P(
  409. SSE2, TransDCT,
  410. ::testing::Values(
  411. make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_10, 32, 0,
  412. VPX_BITS_10),
  413. make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_sse2, 32, 0,
  414. VPX_BITS_8),
  415. make_tuple(&vpx_highbd_fdct16x16_sse2, &idct16x16_10, 16, 0,
  416. VPX_BITS_10),
  417. make_tuple(&vpx_fdct16x16_sse2, &vpx_idct16x16_256_add_sse2, 16, 0,
  418. VPX_BITS_8),
  419. make_tuple(&vpx_highbd_fdct8x8_sse2, &idct8x8_10, 8, 0, VPX_BITS_10),
  420. make_tuple(&vpx_highbd_fdct8x8_sse2, &idct8x8_12, 8, 0, VPX_BITS_12),
  421. make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_sse2, 8, 0,
  422. VPX_BITS_8),
  423. make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_10, 4, 0, VPX_BITS_10),
  424. make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_12, 4, 0, VPX_BITS_12),
  425. make_tuple(&vpx_fdct4x4_sse2, &vpx_idct4x4_16_add_sse2, 4, 0,
  426. VPX_BITS_8)));
  427. #else
  428. INSTANTIATE_TEST_CASE_P(
  429. SSE2, TransDCT,
  430. ::testing::Values(make_tuple(&vpx_fdct32x32_sse2,
  431. &vpx_idct32x32_1024_add_sse2, 32, 0,
  432. VPX_BITS_8),
  433. make_tuple(&vpx_fdct16x16_sse2,
  434. &vpx_idct16x16_256_add_sse2, 16, 0,
  435. VPX_BITS_8),
  436. make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_sse2, 8,
  437. 0, VPX_BITS_8),
  438. make_tuple(&vpx_fdct4x4_sse2, &vpx_idct4x4_16_add_sse2, 4,
  439. 0, VPX_BITS_8)));
  440. #endif // CONFIG_VP9_HIGHBITDEPTH
  441. #endif // !CONFIG_EMULATE_HARDWARE
  442. #endif // HAVE_SSE2
  443. #if !CONFIG_VP9_HIGHBITDEPTH
  444. #if HAVE_SSSE3 && !CONFIG_EMULATE_HARDWARE
  445. #if !ARCH_X86_64
  446. // TODO(johannkoenig): high bit depth fdct8x8.
  447. INSTANTIATE_TEST_CASE_P(
  448. SSSE3, TransDCT,
  449. ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_sse2,
  450. 32, 0, VPX_BITS_8),
  451. make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_sse2, 8, 0,
  452. VPX_BITS_8)));
  453. #else
  454. // vpx_fdct8x8_ssse3 is only available in 64 bit builds.
  455. INSTANTIATE_TEST_CASE_P(
  456. SSSE3, TransDCT,
  457. ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_sse2,
  458. 32, 0, VPX_BITS_8),
  459. make_tuple(&vpx_fdct8x8_ssse3, &vpx_idct8x8_64_add_sse2,
  460. 8, 0, VPX_BITS_8)));
  461. #endif // !ARCH_X86_64
  462. #endif // HAVE_SSSE3 && !CONFIG_EMULATE_HARDWARE
  463. #endif // !CONFIG_VP9_HIGHBITDEPTH
  464. #if !CONFIG_VP9_HIGHBITDEPTH && HAVE_AVX2 && !CONFIG_EMULATE_HARDWARE
  465. // TODO(johannkoenig): high bit depth fdct32x32.
  466. INSTANTIATE_TEST_CASE_P(
  467. AVX2, TransDCT, ::testing::Values(make_tuple(&vpx_fdct32x32_avx2,
  468. &vpx_idct32x32_1024_add_sse2,
  469. 32, 0, VPX_BITS_8)));
  470. #endif // !CONFIG_VP9_HIGHBITDEPTH && HAVE_AVX2 && !CONFIG_EMULATE_HARDWARE
  471. #if HAVE_NEON
  472. #if !CONFIG_EMULATE_HARDWARE
  473. INSTANTIATE_TEST_CASE_P(
  474. NEON, TransDCT,
  475. ::testing::Values(make_tuple(&vpx_fdct32x32_neon,
  476. &vpx_idct32x32_1024_add_neon, 32, 0,
  477. VPX_BITS_8),
  478. make_tuple(&vpx_fdct16x16_neon,
  479. &vpx_idct16x16_256_add_neon, 16, 0,
  480. VPX_BITS_8),
  481. make_tuple(&vpx_fdct8x8_neon, &vpx_idct8x8_64_add_neon, 8,
  482. 0, VPX_BITS_8),
  483. make_tuple(&vpx_fdct4x4_neon, &vpx_idct4x4_16_add_neon, 4,
  484. 0, VPX_BITS_8)));
  485. #endif // !CONFIG_EMULATE_HARDWARE
  486. #endif // HAVE_NEON
  487. #if HAVE_MSA
  488. #if !CONFIG_VP9_HIGHBITDEPTH
  489. #if !CONFIG_EMULATE_HARDWARE
  490. INSTANTIATE_TEST_CASE_P(
  491. MSA, TransDCT,
  492. ::testing::Values(
  493. make_tuple(&vpx_fdct32x32_msa, &vpx_idct32x32_1024_add_msa, 32, 0,
  494. VPX_BITS_8),
  495. make_tuple(&vpx_fdct16x16_msa, &vpx_idct16x16_256_add_msa, 16, 0,
  496. VPX_BITS_8),
  497. make_tuple(&vpx_fdct8x8_msa, &vpx_idct8x8_64_add_msa, 8, 0, VPX_BITS_8),
  498. make_tuple(&vpx_fdct4x4_msa, &vpx_idct4x4_16_add_msa, 4, 0,
  499. VPX_BITS_8)));
  500. #endif // !CONFIG_EMULATE_HARDWARE
  501. #endif // !CONFIG_VP9_HIGHBITDEPTH
  502. #endif // HAVE_MSA
  503. #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  504. INSTANTIATE_TEST_CASE_P(VSX, TransDCT,
  505. ::testing::Values(make_tuple(&vpx_fdct4x4_c,
  506. &vpx_idct4x4_16_add_vsx, 4,
  507. 0, VPX_BITS_8)));
  508. #endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  509. class TransHT : public TransTestBase, public ::testing::TestWithParam<HtParam> {
  510. public:
  511. TransHT() {
  512. fwd_txfm_ref = fht_ref;
  513. fwd_txfm_ = GET_PARAM(0);
  514. inv_txfm_ = GET_PARAM(1);
  515. size_ = GET_PARAM(2);
  516. tx_type_ = GET_PARAM(3);
  517. bit_depth_ = GET_PARAM(4);
  518. max_pixel_value_ = (1 << bit_depth_) - 1;
  519. }
  520. protected:
  521. void RunFwdTxfm(const Buffer<int16_t> &in, Buffer<tran_low_t> *out) {
  522. fwd_txfm_(in.TopLeftPixel(), out->TopLeftPixel(), in.stride(), tx_type_);
  523. }
  524. void RunInvTxfm(const Buffer<tran_low_t> &in, uint8_t *out) {
  525. inv_txfm_(in.TopLeftPixel(), out, in.stride(), tx_type_);
  526. }
  527. FhtFunc fwd_txfm_;
  528. IhtFunc inv_txfm_;
  529. };
  530. TEST_P(TransHT, AccuracyCheck) { RunAccuracyCheck(1); }
  531. TEST_P(TransHT, CoeffCheck) { RunCoeffCheck(); }
  532. TEST_P(TransHT, MemCheck) { RunMemCheck(); }
  533. TEST_P(TransHT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
  534. /* TODO:(johannkoenig) Determine why these fail AccuracyCheck
  535. make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 16, 0, VPX_BITS_12),
  536. make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 16, 1, VPX_BITS_12),
  537. make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 16, 2, VPX_BITS_12),
  538. make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 16, 3, VPX_BITS_12),
  539. */
  540. #if CONFIG_VP9_HIGHBITDEPTH
  541. INSTANTIATE_TEST_CASE_P(
  542. C, TransHT,
  543. ::testing::Values(
  544. make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 16, 0, VPX_BITS_10),
  545. make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 16, 1, VPX_BITS_10),
  546. make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 16, 2, VPX_BITS_10),
  547. make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 16, 3, VPX_BITS_10),
  548. make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 0, VPX_BITS_8),
  549. make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 1, VPX_BITS_8),
  550. make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 2, VPX_BITS_8),
  551. make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 3, VPX_BITS_8),
  552. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 8, 0, VPX_BITS_10),
  553. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 8, 1, VPX_BITS_10),
  554. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 8, 2, VPX_BITS_10),
  555. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 8, 3, VPX_BITS_10),
  556. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 8, 0, VPX_BITS_12),
  557. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 8, 1, VPX_BITS_12),
  558. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 8, 2, VPX_BITS_12),
  559. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 8, 3, VPX_BITS_12),
  560. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 0, VPX_BITS_8),
  561. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 1, VPX_BITS_8),
  562. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 2, VPX_BITS_8),
  563. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 3, VPX_BITS_8),
  564. make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 4, 0, VPX_BITS_10),
  565. make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 4, 1, VPX_BITS_10),
  566. make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 4, 2, VPX_BITS_10),
  567. make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 4, 3, VPX_BITS_10),
  568. make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 4, 0, VPX_BITS_12),
  569. make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 4, 1, VPX_BITS_12),
  570. make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 4, 2, VPX_BITS_12),
  571. make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 4, 3, VPX_BITS_12),
  572. make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 0, VPX_BITS_8),
  573. make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 1, VPX_BITS_8),
  574. make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 2, VPX_BITS_8),
  575. make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 3, VPX_BITS_8)));
  576. #else
  577. INSTANTIATE_TEST_CASE_P(
  578. C, TransHT,
  579. ::testing::Values(
  580. make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 0, VPX_BITS_8),
  581. make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 1, VPX_BITS_8),
  582. make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 2, VPX_BITS_8),
  583. make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 3, VPX_BITS_8),
  584. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 0, VPX_BITS_8),
  585. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 1, VPX_BITS_8),
  586. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 2, VPX_BITS_8),
  587. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 3, VPX_BITS_8),
  588. make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 0, VPX_BITS_8),
  589. make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 1, VPX_BITS_8),
  590. make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 2, VPX_BITS_8),
  591. make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 3, VPX_BITS_8)));
  592. #endif // CONFIG_VP9_HIGHBITDEPTH
  593. #if HAVE_SSE2
  594. INSTANTIATE_TEST_CASE_P(
  595. SSE2, TransHT,
  596. ::testing::Values(
  597. make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 16, 0,
  598. VPX_BITS_8),
  599. make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 16, 1,
  600. VPX_BITS_8),
  601. make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 16, 2,
  602. VPX_BITS_8),
  603. make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 16, 3,
  604. VPX_BITS_8),
  605. make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 8, 0, VPX_BITS_8),
  606. make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 8, 1, VPX_BITS_8),
  607. make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 8, 2, VPX_BITS_8),
  608. make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 8, 3, VPX_BITS_8),
  609. make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 4, 0, VPX_BITS_8),
  610. make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 4, 1, VPX_BITS_8),
  611. make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 4, 2, VPX_BITS_8),
  612. make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 4, 3,
  613. VPX_BITS_8)));
  614. #endif // HAVE_SSE2
  615. class TransWHT : public TransTestBase,
  616. public ::testing::TestWithParam<DctParam> {
  617. public:
  618. TransWHT() {
  619. fwd_txfm_ref = fwht_ref;
  620. fwd_txfm_ = GET_PARAM(0);
  621. inv_txfm_ = GET_PARAM(1);
  622. size_ = GET_PARAM(2);
  623. tx_type_ = GET_PARAM(3);
  624. bit_depth_ = GET_PARAM(4);
  625. max_pixel_value_ = (1 << bit_depth_) - 1;
  626. }
  627. protected:
  628. void RunFwdTxfm(const Buffer<int16_t> &in, Buffer<tran_low_t> *out) {
  629. fwd_txfm_(in.TopLeftPixel(), out->TopLeftPixel(), in.stride());
  630. }
  631. void RunInvTxfm(const Buffer<tran_low_t> &in, uint8_t *out) {
  632. inv_txfm_(in.TopLeftPixel(), out, in.stride());
  633. }
  634. FdctFunc fwd_txfm_;
  635. IdctFunc inv_txfm_;
  636. };
  637. TEST_P(TransWHT, AccuracyCheck) { RunAccuracyCheck(0); }
  638. TEST_P(TransWHT, CoeffCheck) { RunCoeffCheck(); }
  639. TEST_P(TransWHT, MemCheck) { RunMemCheck(); }
  640. TEST_P(TransWHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
  641. #if CONFIG_VP9_HIGHBITDEPTH
  642. INSTANTIATE_TEST_CASE_P(
  643. C, TransWHT,
  644. ::testing::Values(
  645. make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_10, 4, 0, VPX_BITS_10),
  646. make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_12, 4, 0, VPX_BITS_12),
  647. make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 4, 0, VPX_BITS_8)));
  648. #else
  649. INSTANTIATE_TEST_CASE_P(C, TransWHT,
  650. ::testing::Values(make_tuple(&vp9_fwht4x4_c,
  651. &vpx_iwht4x4_16_add_c, 4,
  652. 0, VPX_BITS_8)));
  653. #endif // CONFIG_VP9_HIGHBITDEPTH
  654. #if HAVE_SSE2
  655. INSTANTIATE_TEST_CASE_P(SSE2, TransWHT,
  656. ::testing::Values(make_tuple(&vp9_fwht4x4_sse2,
  657. &vpx_iwht4x4_16_add_sse2,
  658. 4, 0, VPX_BITS_8)));
  659. #endif // HAVE_SSE2
  660. } // namespace