vp9_idct.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <math.h>
  11. #include "./vp9_rtcd.h"
  12. #include "./vpx_dsp_rtcd.h"
  13. #include "vp9/common/vp9_blockd.h"
  14. #include "vp9/common/vp9_idct.h"
  15. #include "vpx_dsp/inv_txfm.h"
  16. #include "vpx_ports/mem.h"
  17. void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
  18. int tx_type) {
  19. const transform_2d IHT_4[] = {
  20. { idct4_c, idct4_c }, // DCT_DCT = 0
  21. { iadst4_c, idct4_c }, // ADST_DCT = 1
  22. { idct4_c, iadst4_c }, // DCT_ADST = 2
  23. { iadst4_c, iadst4_c } // ADST_ADST = 3
  24. };
  25. int i, j;
  26. tran_low_t out[4 * 4];
  27. tran_low_t *outptr = out;
  28. tran_low_t temp_in[4], temp_out[4];
  29. // inverse transform row vectors
  30. for (i = 0; i < 4; ++i) {
  31. IHT_4[tx_type].rows(input, outptr);
  32. input += 4;
  33. outptr += 4;
  34. }
  35. // inverse transform column vectors
  36. for (i = 0; i < 4; ++i) {
  37. for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
  38. IHT_4[tx_type].cols(temp_in, temp_out);
  39. for (j = 0; j < 4; ++j) {
  40. dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
  41. ROUND_POWER_OF_TWO(temp_out[j], 4));
  42. }
  43. }
  44. }
  45. static const transform_2d IHT_8[] = {
  46. { idct8_c, idct8_c }, // DCT_DCT = 0
  47. { iadst8_c, idct8_c }, // ADST_DCT = 1
  48. { idct8_c, iadst8_c }, // DCT_ADST = 2
  49. { iadst8_c, iadst8_c } // ADST_ADST = 3
  50. };
  51. void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
  52. int tx_type) {
  53. int i, j;
  54. tran_low_t out[8 * 8];
  55. tran_low_t *outptr = out;
  56. tran_low_t temp_in[8], temp_out[8];
  57. const transform_2d ht = IHT_8[tx_type];
  58. // inverse transform row vectors
  59. for (i = 0; i < 8; ++i) {
  60. ht.rows(input, outptr);
  61. input += 8;
  62. outptr += 8;
  63. }
  64. // inverse transform column vectors
  65. for (i = 0; i < 8; ++i) {
  66. for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
  67. ht.cols(temp_in, temp_out);
  68. for (j = 0; j < 8; ++j) {
  69. dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
  70. ROUND_POWER_OF_TWO(temp_out[j], 5));
  71. }
  72. }
  73. }
  74. static const transform_2d IHT_16[] = {
  75. { idct16_c, idct16_c }, // DCT_DCT = 0
  76. { iadst16_c, idct16_c }, // ADST_DCT = 1
  77. { idct16_c, iadst16_c }, // DCT_ADST = 2
  78. { iadst16_c, iadst16_c } // ADST_ADST = 3
  79. };
  80. void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
  81. int tx_type) {
  82. int i, j;
  83. tran_low_t out[16 * 16];
  84. tran_low_t *outptr = out;
  85. tran_low_t temp_in[16], temp_out[16];
  86. const transform_2d ht = IHT_16[tx_type];
  87. // Rows
  88. for (i = 0; i < 16; ++i) {
  89. ht.rows(input, outptr);
  90. input += 16;
  91. outptr += 16;
  92. }
  93. // Columns
  94. for (i = 0; i < 16; ++i) {
  95. for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
  96. ht.cols(temp_in, temp_out);
  97. for (j = 0; j < 16; ++j) {
  98. dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
  99. ROUND_POWER_OF_TWO(temp_out[j], 6));
  100. }
  101. }
  102. }
  103. // idct
  104. void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
  105. int eob) {
  106. if (eob > 1)
  107. vpx_idct4x4_16_add(input, dest, stride);
  108. else
  109. vpx_idct4x4_1_add(input, dest, stride);
  110. }
  111. void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
  112. int eob) {
  113. if (eob > 1)
  114. vpx_iwht4x4_16_add(input, dest, stride);
  115. else
  116. vpx_iwht4x4_1_add(input, dest, stride);
  117. }
  118. void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
  119. int eob) {
  120. // If dc is 1, then input[0] is the reconstructed value, do not need
  121. // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
  122. // The calculation can be simplified if there are not many non-zero dct
  123. // coefficients. Use eobs to decide what to do.
  124. if (eob == 1)
  125. // DC only DCT coefficient
  126. vpx_idct8x8_1_add(input, dest, stride);
  127. else if (eob <= 12)
  128. vpx_idct8x8_12_add(input, dest, stride);
  129. else
  130. vpx_idct8x8_64_add(input, dest, stride);
  131. }
  132. void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
  133. int eob) {
  134. /* The calculation can be simplified if there are not many non-zero dct
  135. * coefficients. Use eobs to separate different cases. */
  136. if (eob == 1) /* DC only DCT coefficient. */
  137. vpx_idct16x16_1_add(input, dest, stride);
  138. else if (eob <= 10)
  139. vpx_idct16x16_10_add(input, dest, stride);
  140. else if (eob <= 38)
  141. vpx_idct16x16_38_add(input, dest, stride);
  142. else
  143. vpx_idct16x16_256_add(input, dest, stride);
  144. }
  145. void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
  146. int eob) {
  147. if (eob == 1)
  148. vpx_idct32x32_1_add(input, dest, stride);
  149. else if (eob <= 34)
  150. // non-zero coeff only in upper-left 8x8
  151. vpx_idct32x32_34_add(input, dest, stride);
  152. else if (eob <= 135)
  153. // non-zero coeff only in upper-left 16x16
  154. vpx_idct32x32_135_add(input, dest, stride);
  155. else
  156. vpx_idct32x32_1024_add(input, dest, stride);
  157. }
  158. // iht
  159. void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
  160. int stride, int eob) {
  161. if (tx_type == DCT_DCT)
  162. vp9_idct4x4_add(input, dest, stride, eob);
  163. else
  164. vp9_iht4x4_16_add(input, dest, stride, tx_type);
  165. }
  166. void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
  167. int stride, int eob) {
  168. if (tx_type == DCT_DCT) {
  169. vp9_idct8x8_add(input, dest, stride, eob);
  170. } else {
  171. vp9_iht8x8_64_add(input, dest, stride, tx_type);
  172. }
  173. }
  174. void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
  175. int stride, int eob) {
  176. if (tx_type == DCT_DCT) {
  177. vp9_idct16x16_add(input, dest, stride, eob);
  178. } else {
  179. vp9_iht16x16_256_add(input, dest, stride, tx_type);
  180. }
  181. }
  182. #if CONFIG_VP9_HIGHBITDEPTH
  183. void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint16_t *dest,
  184. int stride, int tx_type, int bd) {
  185. const highbd_transform_2d IHT_4[] = {
  186. { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0
  187. { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1
  188. { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2
  189. { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3
  190. };
  191. int i, j;
  192. tran_low_t out[4 * 4];
  193. tran_low_t *outptr = out;
  194. tran_low_t temp_in[4], temp_out[4];
  195. // Inverse transform row vectors.
  196. for (i = 0; i < 4; ++i) {
  197. IHT_4[tx_type].rows(input, outptr, bd);
  198. input += 4;
  199. outptr += 4;
  200. }
  201. // Inverse transform column vectors.
  202. for (i = 0; i < 4; ++i) {
  203. for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
  204. IHT_4[tx_type].cols(temp_in, temp_out, bd);
  205. for (j = 0; j < 4; ++j) {
  206. dest[j * stride + i] = highbd_clip_pixel_add(
  207. dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
  208. }
  209. }
  210. }
  211. static const highbd_transform_2d HIGH_IHT_8[] = {
  212. { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0
  213. { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1
  214. { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2
  215. { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c } // ADST_ADST = 3
  216. };
  217. void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint16_t *dest,
  218. int stride, int tx_type, int bd) {
  219. int i, j;
  220. tran_low_t out[8 * 8];
  221. tran_low_t *outptr = out;
  222. tran_low_t temp_in[8], temp_out[8];
  223. const highbd_transform_2d ht = HIGH_IHT_8[tx_type];
  224. // Inverse transform row vectors.
  225. for (i = 0; i < 8; ++i) {
  226. ht.rows(input, outptr, bd);
  227. input += 8;
  228. outptr += 8;
  229. }
  230. // Inverse transform column vectors.
  231. for (i = 0; i < 8; ++i) {
  232. for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
  233. ht.cols(temp_in, temp_out, bd);
  234. for (j = 0; j < 8; ++j) {
  235. dest[j * stride + i] = highbd_clip_pixel_add(
  236. dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
  237. }
  238. }
  239. }
  240. static const highbd_transform_2d HIGH_IHT_16[] = {
  241. { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0
  242. { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1
  243. { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2
  244. { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c } // ADST_ADST = 3
  245. };
  246. void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint16_t *dest,
  247. int stride, int tx_type, int bd) {
  248. int i, j;
  249. tran_low_t out[16 * 16];
  250. tran_low_t *outptr = out;
  251. tran_low_t temp_in[16], temp_out[16];
  252. const highbd_transform_2d ht = HIGH_IHT_16[tx_type];
  253. // Rows
  254. for (i = 0; i < 16; ++i) {
  255. ht.rows(input, outptr, bd);
  256. input += 16;
  257. outptr += 16;
  258. }
  259. // Columns
  260. for (i = 0; i < 16; ++i) {
  261. for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
  262. ht.cols(temp_in, temp_out, bd);
  263. for (j = 0; j < 16; ++j) {
  264. dest[j * stride + i] = highbd_clip_pixel_add(
  265. dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
  266. }
  267. }
  268. }
  269. // idct
  270. void vp9_highbd_idct4x4_add(const tran_low_t *input, uint16_t *dest, int stride,
  271. int eob, int bd) {
  272. if (eob > 1)
  273. vpx_highbd_idct4x4_16_add(input, dest, stride, bd);
  274. else
  275. vpx_highbd_idct4x4_1_add(input, dest, stride, bd);
  276. }
  277. void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint16_t *dest, int stride,
  278. int eob, int bd) {
  279. if (eob > 1)
  280. vpx_highbd_iwht4x4_16_add(input, dest, stride, bd);
  281. else
  282. vpx_highbd_iwht4x4_1_add(input, dest, stride, bd);
  283. }
  284. void vp9_highbd_idct8x8_add(const tran_low_t *input, uint16_t *dest, int stride,
  285. int eob, int bd) {
  286. // If dc is 1, then input[0] is the reconstructed value, do not need
  287. // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
  288. // The calculation can be simplified if there are not many non-zero dct
  289. // coefficients. Use eobs to decide what to do.
  290. // DC only DCT coefficient
  291. if (eob == 1) {
  292. vpx_highbd_idct8x8_1_add(input, dest, stride, bd);
  293. } else if (eob <= 12) {
  294. vpx_highbd_idct8x8_12_add(input, dest, stride, bd);
  295. } else {
  296. vpx_highbd_idct8x8_64_add(input, dest, stride, bd);
  297. }
  298. }
  299. void vp9_highbd_idct16x16_add(const tran_low_t *input, uint16_t *dest,
  300. int stride, int eob, int bd) {
  301. // The calculation can be simplified if there are not many non-zero dct
  302. // coefficients. Use eobs to separate different cases.
  303. // DC only DCT coefficient.
  304. if (eob == 1) {
  305. vpx_highbd_idct16x16_1_add(input, dest, stride, bd);
  306. } else if (eob <= 10) {
  307. vpx_highbd_idct16x16_10_add(input, dest, stride, bd);
  308. } else if (eob <= 38) {
  309. vpx_highbd_idct16x16_38_add(input, dest, stride, bd);
  310. } else {
  311. vpx_highbd_idct16x16_256_add(input, dest, stride, bd);
  312. }
  313. }
  314. void vp9_highbd_idct32x32_add(const tran_low_t *input, uint16_t *dest,
  315. int stride, int eob, int bd) {
  316. // Non-zero coeff only in upper-left 8x8
  317. if (eob == 1) {
  318. vpx_highbd_idct32x32_1_add(input, dest, stride, bd);
  319. } else if (eob <= 34) {
  320. vpx_highbd_idct32x32_34_add(input, dest, stride, bd);
  321. } else if (eob <= 135) {
  322. vpx_highbd_idct32x32_135_add(input, dest, stride, bd);
  323. } else {
  324. vpx_highbd_idct32x32_1024_add(input, dest, stride, bd);
  325. }
  326. }
  327. // iht
  328. void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
  329. uint16_t *dest, int stride, int eob, int bd) {
  330. if (tx_type == DCT_DCT)
  331. vp9_highbd_idct4x4_add(input, dest, stride, eob, bd);
  332. else
  333. vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
  334. }
  335. void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
  336. uint16_t *dest, int stride, int eob, int bd) {
  337. if (tx_type == DCT_DCT) {
  338. vp9_highbd_idct8x8_add(input, dest, stride, eob, bd);
  339. } else {
  340. vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd);
  341. }
  342. }
  343. void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
  344. uint16_t *dest, int stride, int eob, int bd) {
  345. if (tx_type == DCT_DCT) {
  346. vp9_highbd_idct16x16_add(input, dest, stride, eob, bd);
  347. } else {
  348. vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
  349. }
  350. }
  351. #endif // CONFIG_VP9_HIGHBITDEPTH