fdct8x8_test.cc 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766
  1. /*
  2. * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <math.h>
  11. #include <stdlib.h>
  12. #include <string.h>
  13. #include "third_party/googletest/src/include/gtest/gtest.h"
  14. #include "./vp9_rtcd.h"
  15. #include "./vpx_dsp_rtcd.h"
  16. #include "test/acm_random.h"
  17. #include "test/clear_system_state.h"
  18. #include "test/register_state_check.h"
  19. #include "test/util.h"
  20. #include "vp9/common/vp9_entropy.h"
  21. #include "vp9/common/vp9_scan.h"
  22. #include "vpx/vpx_codec.h"
  23. #include "vpx/vpx_integer.h"
  24. #include "vpx_ports/mem.h"
  25. using libvpx_test::ACMRandom;
  26. namespace {
  27. const int kNumCoeffs = 64;
  28. const double kPi = 3.141592653589793238462643383279502884;
  29. const int kSignBiasMaxDiff255 = 1500;
  30. const int kSignBiasMaxDiff15 = 10000;
  31. typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
  32. typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
  33. typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
  34. int tx_type);
  35. typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
  36. int tx_type);
  37. typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
  38. typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
  39. typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
  40. void reference_8x8_dct_1d(const double in[8], double out[8]) {
  41. const double kInvSqrt2 = 0.707106781186547524400844362104;
  42. for (int k = 0; k < 8; k++) {
  43. out[k] = 0.0;
  44. for (int n = 0; n < 8; n++) {
  45. out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 16.0);
  46. }
  47. if (k == 0) out[k] = out[k] * kInvSqrt2;
  48. }
  49. }
  50. void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
  51. double output[kNumCoeffs]) {
  52. // First transform columns
  53. for (int i = 0; i < 8; ++i) {
  54. double temp_in[8], temp_out[8];
  55. for (int j = 0; j < 8; ++j) temp_in[j] = input[j * 8 + i];
  56. reference_8x8_dct_1d(temp_in, temp_out);
  57. for (int j = 0; j < 8; ++j) output[j * 8 + i] = temp_out[j];
  58. }
  59. // Then transform rows
  60. for (int i = 0; i < 8; ++i) {
  61. double temp_in[8], temp_out[8];
  62. for (int j = 0; j < 8; ++j) temp_in[j] = output[j + i * 8];
  63. reference_8x8_dct_1d(temp_in, temp_out);
  64. // Scale by some magic number
  65. for (int j = 0; j < 8; ++j) output[j + i * 8] = temp_out[j] * 2;
  66. }
  67. }
  68. void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride,
  69. int /*tx_type*/) {
  70. vpx_fdct8x8_c(in, out, stride);
  71. }
  72. void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
  73. vp9_fht8x8_c(in, out, stride, tx_type);
  74. }
  75. #if CONFIG_VP9_HIGHBITDEPTH
  76. void idct8x8_10(const tran_low_t *in, uint8_t *out, int stride) {
  77. vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
  78. }
  79. void idct8x8_12(const tran_low_t *in, uint8_t *out, int stride) {
  80. vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
  81. }
  82. void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
  83. vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 10);
  84. }
  85. void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
  86. vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 12);
  87. }
  88. #if HAVE_SSE2
  89. void idct8x8_12_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
  90. vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
  91. }
  92. void idct8x8_12_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
  93. vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
  94. }
  95. void idct8x8_12_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  96. vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
  97. }
  98. void idct8x8_12_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  99. vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
  100. }
  101. void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  102. vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
  103. }
  104. void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  105. vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
  106. }
  107. #endif // HAVE_SSE2
  108. #endif // CONFIG_VP9_HIGHBITDEPTH
  109. class FwdTrans8x8TestBase {
  110. public:
  111. virtual ~FwdTrans8x8TestBase() {}
  112. protected:
  113. virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
  114. virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
  115. void RunSignBiasCheck() {
  116. ACMRandom rnd(ACMRandom::DeterministicSeed());
  117. DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
  118. DECLARE_ALIGNED(16, tran_low_t, test_output_block[64]);
  119. int count_sign_block[64][2];
  120. const int count_test_block = 100000;
  121. memset(count_sign_block, 0, sizeof(count_sign_block));
  122. for (int i = 0; i < count_test_block; ++i) {
  123. // Initialize a test block with input range [-255, 255].
  124. for (int j = 0; j < 64; ++j) {
  125. test_input_block[j] = ((rnd.Rand16() >> (16 - bit_depth_)) & mask_) -
  126. ((rnd.Rand16() >> (16 - bit_depth_)) & mask_);
  127. }
  128. ASM_REGISTER_STATE_CHECK(
  129. RunFwdTxfm(test_input_block, test_output_block, pitch_));
  130. for (int j = 0; j < 64; ++j) {
  131. if (test_output_block[j] < 0) {
  132. ++count_sign_block[j][0];
  133. } else if (test_output_block[j] > 0) {
  134. ++count_sign_block[j][1];
  135. }
  136. }
  137. }
  138. for (int j = 0; j < 64; ++j) {
  139. const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
  140. const int max_diff = kSignBiasMaxDiff255;
  141. EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
  142. << "Error: 8x8 FDCT/FHT has a sign bias > "
  143. << 1. * max_diff / count_test_block * 100 << "%"
  144. << " for input range [-255, 255] at index " << j
  145. << " count0: " << count_sign_block[j][0]
  146. << " count1: " << count_sign_block[j][1] << " diff: " << diff;
  147. }
  148. memset(count_sign_block, 0, sizeof(count_sign_block));
  149. for (int i = 0; i < count_test_block; ++i) {
  150. // Initialize a test block with input range [-mask_ / 16, mask_ / 16].
  151. for (int j = 0; j < 64; ++j) {
  152. test_input_block[j] =
  153. ((rnd.Rand16() & mask_) >> 4) - ((rnd.Rand16() & mask_) >> 4);
  154. }
  155. ASM_REGISTER_STATE_CHECK(
  156. RunFwdTxfm(test_input_block, test_output_block, pitch_));
  157. for (int j = 0; j < 64; ++j) {
  158. if (test_output_block[j] < 0) {
  159. ++count_sign_block[j][0];
  160. } else if (test_output_block[j] > 0) {
  161. ++count_sign_block[j][1];
  162. }
  163. }
  164. }
  165. for (int j = 0; j < 64; ++j) {
  166. const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
  167. const int max_diff = kSignBiasMaxDiff15;
  168. EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
  169. << "Error: 8x8 FDCT/FHT has a sign bias > "
  170. << 1. * max_diff / count_test_block * 100 << "%"
  171. << " for input range [-15, 15] at index " << j
  172. << " count0: " << count_sign_block[j][0]
  173. << " count1: " << count_sign_block[j][1] << " diff: " << diff;
  174. }
  175. }
  176. void RunRoundTripErrorCheck() {
  177. ACMRandom rnd(ACMRandom::DeterministicSeed());
  178. int max_error = 0;
  179. int total_error = 0;
  180. const int count_test_block = 100000;
  181. DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
  182. DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
  183. DECLARE_ALIGNED(16, uint8_t, dst[64]);
  184. DECLARE_ALIGNED(16, uint8_t, src[64]);
  185. #if CONFIG_VP9_HIGHBITDEPTH
  186. DECLARE_ALIGNED(16, uint16_t, dst16[64]);
  187. DECLARE_ALIGNED(16, uint16_t, src16[64]);
  188. #endif
  189. for (int i = 0; i < count_test_block; ++i) {
  190. // Initialize a test block with input range [-mask_, mask_].
  191. for (int j = 0; j < 64; ++j) {
  192. if (bit_depth_ == VPX_BITS_8) {
  193. src[j] = rnd.Rand8();
  194. dst[j] = rnd.Rand8();
  195. test_input_block[j] = src[j] - dst[j];
  196. #if CONFIG_VP9_HIGHBITDEPTH
  197. } else {
  198. src16[j] = rnd.Rand16() & mask_;
  199. dst16[j] = rnd.Rand16() & mask_;
  200. test_input_block[j] = src16[j] - dst16[j];
  201. #endif
  202. }
  203. }
  204. ASM_REGISTER_STATE_CHECK(
  205. RunFwdTxfm(test_input_block, test_temp_block, pitch_));
  206. for (int j = 0; j < 64; ++j) {
  207. if (test_temp_block[j] > 0) {
  208. test_temp_block[j] += 2;
  209. test_temp_block[j] /= 4;
  210. test_temp_block[j] *= 4;
  211. } else {
  212. test_temp_block[j] -= 2;
  213. test_temp_block[j] /= 4;
  214. test_temp_block[j] *= 4;
  215. }
  216. }
  217. if (bit_depth_ == VPX_BITS_8) {
  218. ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
  219. #if CONFIG_VP9_HIGHBITDEPTH
  220. } else {
  221. ASM_REGISTER_STATE_CHECK(
  222. RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
  223. #endif
  224. }
  225. for (int j = 0; j < 64; ++j) {
  226. #if CONFIG_VP9_HIGHBITDEPTH
  227. const int diff =
  228. bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
  229. #else
  230. const int diff = dst[j] - src[j];
  231. #endif
  232. const int error = diff * diff;
  233. if (max_error < error) max_error = error;
  234. total_error += error;
  235. }
  236. }
  237. EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error)
  238. << "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
  239. << " roundtrip error > 1";
  240. EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
  241. << "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
  242. << "error > 1/5 per block";
  243. }
  244. void RunExtremalCheck() {
  245. ACMRandom rnd(ACMRandom::DeterministicSeed());
  246. int max_error = 0;
  247. int total_error = 0;
  248. int total_coeff_error = 0;
  249. const int count_test_block = 100000;
  250. DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
  251. DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
  252. DECLARE_ALIGNED(16, tran_low_t, ref_temp_block[64]);
  253. DECLARE_ALIGNED(16, uint8_t, dst[64]);
  254. DECLARE_ALIGNED(16, uint8_t, src[64]);
  255. #if CONFIG_VP9_HIGHBITDEPTH
  256. DECLARE_ALIGNED(16, uint16_t, dst16[64]);
  257. DECLARE_ALIGNED(16, uint16_t, src16[64]);
  258. #endif
  259. for (int i = 0; i < count_test_block; ++i) {
  260. // Initialize a test block with input range [-mask_, mask_].
  261. for (int j = 0; j < 64; ++j) {
  262. if (bit_depth_ == VPX_BITS_8) {
  263. if (i == 0) {
  264. src[j] = 255;
  265. dst[j] = 0;
  266. } else if (i == 1) {
  267. src[j] = 0;
  268. dst[j] = 255;
  269. } else {
  270. src[j] = rnd.Rand8() % 2 ? 255 : 0;
  271. dst[j] = rnd.Rand8() % 2 ? 255 : 0;
  272. }
  273. test_input_block[j] = src[j] - dst[j];
  274. #if CONFIG_VP9_HIGHBITDEPTH
  275. } else {
  276. if (i == 0) {
  277. src16[j] = mask_;
  278. dst16[j] = 0;
  279. } else if (i == 1) {
  280. src16[j] = 0;
  281. dst16[j] = mask_;
  282. } else {
  283. src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
  284. dst16[j] = rnd.Rand8() % 2 ? mask_ : 0;
  285. }
  286. test_input_block[j] = src16[j] - dst16[j];
  287. #endif
  288. }
  289. }
  290. ASM_REGISTER_STATE_CHECK(
  291. RunFwdTxfm(test_input_block, test_temp_block, pitch_));
  292. ASM_REGISTER_STATE_CHECK(
  293. fwd_txfm_ref(test_input_block, ref_temp_block, pitch_, tx_type_));
  294. if (bit_depth_ == VPX_BITS_8) {
  295. ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
  296. #if CONFIG_VP9_HIGHBITDEPTH
  297. } else {
  298. ASM_REGISTER_STATE_CHECK(
  299. RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
  300. #endif
  301. }
  302. for (int j = 0; j < 64; ++j) {
  303. #if CONFIG_VP9_HIGHBITDEPTH
  304. const int diff =
  305. bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
  306. #else
  307. const int diff = dst[j] - src[j];
  308. #endif
  309. const int error = diff * diff;
  310. if (max_error < error) max_error = error;
  311. total_error += error;
  312. const int coeff_diff = test_temp_block[j] - ref_temp_block[j];
  313. total_coeff_error += abs(coeff_diff);
  314. }
  315. EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error)
  316. << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has"
  317. << "an individual roundtrip error > 1";
  318. EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
  319. << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
  320. << " roundtrip error > 1/5 per block";
  321. EXPECT_EQ(0, total_coeff_error)
  322. << "Error: Extremal 8x8 FDCT/FHT has"
  323. << "overflow issues in the intermediate steps > 1";
  324. }
  325. }
  326. void RunInvAccuracyCheck() {
  327. ACMRandom rnd(ACMRandom::DeterministicSeed());
  328. const int count_test_block = 1000;
  329. DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
  330. DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
  331. DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
  332. DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
  333. #if CONFIG_VP9_HIGHBITDEPTH
  334. DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
  335. DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
  336. #endif
  337. for (int i = 0; i < count_test_block; ++i) {
  338. double out_r[kNumCoeffs];
  339. // Initialize a test block with input range [-255, 255].
  340. for (int j = 0; j < kNumCoeffs; ++j) {
  341. if (bit_depth_ == VPX_BITS_8) {
  342. src[j] = rnd.Rand8() % 2 ? 255 : 0;
  343. dst[j] = src[j] > 0 ? 0 : 255;
  344. in[j] = src[j] - dst[j];
  345. #if CONFIG_VP9_HIGHBITDEPTH
  346. } else {
  347. src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
  348. dst16[j] = src16[j] > 0 ? 0 : mask_;
  349. in[j] = src16[j] - dst16[j];
  350. #endif
  351. }
  352. }
  353. reference_8x8_dct_2d(in, out_r);
  354. for (int j = 0; j < kNumCoeffs; ++j) {
  355. coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
  356. }
  357. if (bit_depth_ == VPX_BITS_8) {
  358. ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
  359. #if CONFIG_VP9_HIGHBITDEPTH
  360. } else {
  361. ASM_REGISTER_STATE_CHECK(
  362. RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
  363. #endif
  364. }
  365. for (int j = 0; j < kNumCoeffs; ++j) {
  366. #if CONFIG_VP9_HIGHBITDEPTH
  367. const int diff =
  368. bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
  369. #else
  370. const int diff = dst[j] - src[j];
  371. #endif
  372. const uint32_t error = diff * diff;
  373. EXPECT_GE(1u << 2 * (bit_depth_ - 8), error)
  374. << "Error: 8x8 IDCT has error " << error << " at index " << j;
  375. }
  376. }
  377. }
  378. void RunFwdAccuracyCheck() {
  379. ACMRandom rnd(ACMRandom::DeterministicSeed());
  380. const int count_test_block = 1000;
  381. DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
  382. DECLARE_ALIGNED(16, tran_low_t, coeff_r[kNumCoeffs]);
  383. DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
  384. for (int i = 0; i < count_test_block; ++i) {
  385. double out_r[kNumCoeffs];
  386. // Initialize a test block with input range [-mask_, mask_].
  387. for (int j = 0; j < kNumCoeffs; ++j) {
  388. in[j] = rnd.Rand8() % 2 == 0 ? mask_ : -mask_;
  389. }
  390. RunFwdTxfm(in, coeff, pitch_);
  391. reference_8x8_dct_2d(in, out_r);
  392. for (int j = 0; j < kNumCoeffs; ++j) {
  393. coeff_r[j] = static_cast<tran_low_t>(round(out_r[j]));
  394. }
  395. for (int j = 0; j < kNumCoeffs; ++j) {
  396. const int32_t diff = coeff[j] - coeff_r[j];
  397. const uint32_t error = diff * diff;
  398. EXPECT_GE(9u << 2 * (bit_depth_ - 8), error)
  399. << "Error: 8x8 DCT has error " << error << " at index " << j;
  400. }
  401. }
  402. }
  403. void CompareInvReference(IdctFunc ref_txfm, int thresh) {
  404. ACMRandom rnd(ACMRandom::DeterministicSeed());
  405. const int count_test_block = 10000;
  406. const int eob = 12;
  407. DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
  408. DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
  409. DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
  410. #if CONFIG_VP9_HIGHBITDEPTH
  411. DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
  412. DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
  413. #endif
  414. const int16_t *scan = vp9_default_scan_orders[TX_8X8].scan;
  415. for (int i = 0; i < count_test_block; ++i) {
  416. for (int j = 0; j < kNumCoeffs; ++j) {
  417. if (j < eob) {
  418. // Random values less than the threshold, either positive or negative
  419. coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
  420. } else {
  421. coeff[scan[j]] = 0;
  422. }
  423. if (bit_depth_ == VPX_BITS_8) {
  424. dst[j] = 0;
  425. ref[j] = 0;
  426. #if CONFIG_VP9_HIGHBITDEPTH
  427. } else {
  428. dst16[j] = 0;
  429. ref16[j] = 0;
  430. #endif
  431. }
  432. }
  433. if (bit_depth_ == VPX_BITS_8) {
  434. ref_txfm(coeff, ref, pitch_);
  435. ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
  436. #if CONFIG_VP9_HIGHBITDEPTH
  437. } else {
  438. ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
  439. ASM_REGISTER_STATE_CHECK(
  440. RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
  441. #endif
  442. }
  443. for (int j = 0; j < kNumCoeffs; ++j) {
  444. #if CONFIG_VP9_HIGHBITDEPTH
  445. const int diff =
  446. bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
  447. #else
  448. const int diff = dst[j] - ref[j];
  449. #endif
  450. const uint32_t error = diff * diff;
  451. EXPECT_EQ(0u, error)
  452. << "Error: 8x8 IDCT has error " << error << " at index " << j;
  453. }
  454. }
  455. }
  456. int pitch_;
  457. int tx_type_;
  458. FhtFunc fwd_txfm_ref;
  459. vpx_bit_depth_t bit_depth_;
  460. int mask_;
  461. };
  462. class FwdTrans8x8DCT : public FwdTrans8x8TestBase,
  463. public ::testing::TestWithParam<Dct8x8Param> {
  464. public:
  465. virtual ~FwdTrans8x8DCT() {}
  466. virtual void SetUp() {
  467. fwd_txfm_ = GET_PARAM(0);
  468. inv_txfm_ = GET_PARAM(1);
  469. tx_type_ = GET_PARAM(2);
  470. pitch_ = 8;
  471. fwd_txfm_ref = fdct8x8_ref;
  472. bit_depth_ = GET_PARAM(3);
  473. mask_ = (1 << bit_depth_) - 1;
  474. }
  475. virtual void TearDown() { libvpx_test::ClearSystemState(); }
  476. protected:
  477. void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
  478. fwd_txfm_(in, out, stride);
  479. }
  480. void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
  481. inv_txfm_(out, dst, stride);
  482. }
  483. FdctFunc fwd_txfm_;
  484. IdctFunc inv_txfm_;
  485. };
  486. TEST_P(FwdTrans8x8DCT, SignBiasCheck) { RunSignBiasCheck(); }
  487. TEST_P(FwdTrans8x8DCT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }
  488. TEST_P(FwdTrans8x8DCT, ExtremalCheck) { RunExtremalCheck(); }
  489. TEST_P(FwdTrans8x8DCT, FwdAccuracyCheck) { RunFwdAccuracyCheck(); }
  490. TEST_P(FwdTrans8x8DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); }
  491. class FwdTrans8x8HT : public FwdTrans8x8TestBase,
  492. public ::testing::TestWithParam<Ht8x8Param> {
  493. public:
  494. virtual ~FwdTrans8x8HT() {}
  495. virtual void SetUp() {
  496. fwd_txfm_ = GET_PARAM(0);
  497. inv_txfm_ = GET_PARAM(1);
  498. tx_type_ = GET_PARAM(2);
  499. pitch_ = 8;
  500. fwd_txfm_ref = fht8x8_ref;
  501. bit_depth_ = GET_PARAM(3);
  502. mask_ = (1 << bit_depth_) - 1;
  503. }
  504. virtual void TearDown() { libvpx_test::ClearSystemState(); }
  505. protected:
  506. void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
  507. fwd_txfm_(in, out, stride, tx_type_);
  508. }
  509. void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
  510. inv_txfm_(out, dst, stride, tx_type_);
  511. }
  512. FhtFunc fwd_txfm_;
  513. IhtFunc inv_txfm_;
  514. };
  515. TEST_P(FwdTrans8x8HT, SignBiasCheck) { RunSignBiasCheck(); }
  516. TEST_P(FwdTrans8x8HT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }
  517. TEST_P(FwdTrans8x8HT, ExtremalCheck) { RunExtremalCheck(); }
  518. class InvTrans8x8DCT : public FwdTrans8x8TestBase,
  519. public ::testing::TestWithParam<Idct8x8Param> {
  520. public:
  521. virtual ~InvTrans8x8DCT() {}
  522. virtual void SetUp() {
  523. ref_txfm_ = GET_PARAM(0);
  524. inv_txfm_ = GET_PARAM(1);
  525. thresh_ = GET_PARAM(2);
  526. pitch_ = 8;
  527. bit_depth_ = GET_PARAM(3);
  528. mask_ = (1 << bit_depth_) - 1;
  529. }
  530. virtual void TearDown() { libvpx_test::ClearSystemState(); }
  531. protected:
  532. void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
  533. inv_txfm_(out, dst, stride);
  534. }
  535. void RunFwdTxfm(int16_t * /*out*/, tran_low_t * /*dst*/, int /*stride*/) {}
  536. IdctFunc ref_txfm_;
  537. IdctFunc inv_txfm_;
  538. int thresh_;
  539. };
  540. TEST_P(InvTrans8x8DCT, CompareReference) {
  541. CompareInvReference(ref_txfm_, thresh_);
  542. }
  543. using std::tr1::make_tuple;
  544. #if CONFIG_VP9_HIGHBITDEPTH
  545. INSTANTIATE_TEST_CASE_P(
  546. C, FwdTrans8x8DCT,
  547. ::testing::Values(
  548. make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8),
  549. make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),
  550. make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12)));
  551. #else
  552. INSTANTIATE_TEST_CASE_P(C, FwdTrans8x8DCT,
  553. ::testing::Values(make_tuple(&vpx_fdct8x8_c,
  554. &vpx_idct8x8_64_add_c, 0,
  555. VPX_BITS_8)));
  556. #endif // CONFIG_VP9_HIGHBITDEPTH
  557. #if CONFIG_VP9_HIGHBITDEPTH
  558. INSTANTIATE_TEST_CASE_P(
  559. C, FwdTrans8x8HT,
  560. ::testing::Values(
  561. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
  562. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 0, VPX_BITS_10),
  563. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 1, VPX_BITS_10),
  564. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 2, VPX_BITS_10),
  565. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 3, VPX_BITS_10),
  566. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 0, VPX_BITS_12),
  567. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 1, VPX_BITS_12),
  568. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 2, VPX_BITS_12),
  569. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 3, VPX_BITS_12),
  570. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
  571. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
  572. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
  573. #else
  574. INSTANTIATE_TEST_CASE_P(
  575. C, FwdTrans8x8HT,
  576. ::testing::Values(
  577. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
  578. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
  579. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
  580. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
  581. #endif // CONFIG_VP9_HIGHBITDEPTH
  582. #if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
  583. INSTANTIATE_TEST_CASE_P(NEON, FwdTrans8x8DCT,
  584. ::testing::Values(make_tuple(&vpx_fdct8x8_neon,
  585. &vpx_idct8x8_64_add_neon,
  586. 0, VPX_BITS_8)));
  587. #if !CONFIG_VP9_HIGHBITDEPTH
  588. INSTANTIATE_TEST_CASE_P(
  589. NEON, FwdTrans8x8HT,
  590. ::testing::Values(
  591. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 0, VPX_BITS_8),
  592. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1, VPX_BITS_8),
  593. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2, VPX_BITS_8),
  594. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8)));
  595. #endif // !CONFIG_VP9_HIGHBITDEPTH
  596. #endif // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
  597. #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  598. INSTANTIATE_TEST_CASE_P(SSE2, FwdTrans8x8DCT,
  599. ::testing::Values(make_tuple(&vpx_fdct8x8_sse2,
  600. &vpx_idct8x8_64_add_sse2,
  601. 0, VPX_BITS_8)));
  602. INSTANTIATE_TEST_CASE_P(
  603. SSE2, FwdTrans8x8HT,
  604. ::testing::Values(
  605. make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0, VPX_BITS_8),
  606. make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1, VPX_BITS_8),
  607. make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2, VPX_BITS_8),
  608. make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3, VPX_BITS_8)));
  609. #endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  610. #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  611. INSTANTIATE_TEST_CASE_P(
  612. SSE2, FwdTrans8x8DCT,
  613. ::testing::Values(make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_c, 0,
  614. VPX_BITS_8),
  615. make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_64_add_10_sse2,
  616. 12, VPX_BITS_10),
  617. make_tuple(&vpx_highbd_fdct8x8_sse2,
  618. &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
  619. make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_64_add_12_sse2,
  620. 12, VPX_BITS_12),
  621. make_tuple(&vpx_highbd_fdct8x8_sse2,
  622. &idct8x8_64_add_12_sse2, 12, VPX_BITS_12)));
  623. INSTANTIATE_TEST_CASE_P(
  624. SSE2, FwdTrans8x8HT,
  625. ::testing::Values(
  626. make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
  627. make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
  628. make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
  629. make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
  630. // Optimizations take effect at a threshold of 6201, so we use a value close to
  631. // that to test both branches.
  632. INSTANTIATE_TEST_CASE_P(
  633. SSE2, InvTrans8x8DCT,
  634. ::testing::Values(
  635. make_tuple(&idct8x8_12_add_10_c, &idct8x8_12_add_10_sse2, 6225,
  636. VPX_BITS_10),
  637. make_tuple(&idct8x8_10, &idct8x8_64_add_10_sse2, 6225, VPX_BITS_10),
  638. make_tuple(&idct8x8_12_add_12_c, &idct8x8_12_add_12_sse2, 6225,
  639. VPX_BITS_12),
  640. make_tuple(&idct8x8_12, &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
  641. #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  642. #if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
  643. !CONFIG_EMULATE_HARDWARE
  644. INSTANTIATE_TEST_CASE_P(SSSE3, FwdTrans8x8DCT,
  645. ::testing::Values(make_tuple(&vpx_fdct8x8_ssse3,
  646. &vpx_idct8x8_64_add_sse2,
  647. 0, VPX_BITS_8)));
  648. #endif
  649. #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  650. INSTANTIATE_TEST_CASE_P(MSA, FwdTrans8x8DCT,
  651. ::testing::Values(make_tuple(&vpx_fdct8x8_msa,
  652. &vpx_idct8x8_64_add_msa, 0,
  653. VPX_BITS_8)));
  654. INSTANTIATE_TEST_CASE_P(
  655. MSA, FwdTrans8x8HT,
  656. ::testing::Values(
  657. make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 0, VPX_BITS_8),
  658. make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 1, VPX_BITS_8),
  659. make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 2, VPX_BITS_8),
  660. make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 3, VPX_BITS_8)));
  661. #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  662. #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  663. INSTANTIATE_TEST_CASE_P(VSX, FwdTrans8x8DCT,
  664. ::testing::Values(make_tuple(&vpx_fdct8x8_c,
  665. &vpx_idct8x8_64_add_vsx, 0,
  666. VPX_BITS_8)));
  667. #endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  668. } // namespace