2
0

vp9_idct.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <math.h>
  11. #include "./vp9_rtcd.h"
  12. #include "./vpx_dsp_rtcd.h"
  13. #include "vp9/common/vp9_blockd.h"
  14. #include "vp9/common/vp9_idct.h"
  15. #include "vpx_dsp/inv_txfm.h"
  16. #include "vpx_ports/mem.h"
  17. void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
  18. int tx_type) {
  19. const transform_2d IHT_4[] = {
  20. { idct4_c, idct4_c }, // DCT_DCT = 0
  21. { iadst4_c, idct4_c }, // ADST_DCT = 1
  22. { idct4_c, iadst4_c }, // DCT_ADST = 2
  23. { iadst4_c, iadst4_c } // ADST_ADST = 3
  24. };
  25. int i, j;
  26. tran_low_t out[4 * 4];
  27. tran_low_t *outptr = out;
  28. tran_low_t temp_in[4], temp_out[4];
  29. // inverse transform row vectors
  30. for (i = 0; i < 4; ++i) {
  31. IHT_4[tx_type].rows(input, outptr);
  32. input += 4;
  33. outptr += 4;
  34. }
  35. // inverse transform column vectors
  36. for (i = 0; i < 4; ++i) {
  37. for (j = 0; j < 4; ++j)
  38. temp_in[j] = out[j * 4 + i];
  39. IHT_4[tx_type].cols(temp_in, temp_out);
  40. for (j = 0; j < 4; ++j) {
  41. dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
  42. ROUND_POWER_OF_TWO(temp_out[j], 4));
  43. }
  44. }
  45. }
  46. static const transform_2d IHT_8[] = {
  47. { idct8_c, idct8_c }, // DCT_DCT = 0
  48. { iadst8_c, idct8_c }, // ADST_DCT = 1
  49. { idct8_c, iadst8_c }, // DCT_ADST = 2
  50. { iadst8_c, iadst8_c } // ADST_ADST = 3
  51. };
  52. void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
  53. int tx_type) {
  54. int i, j;
  55. tran_low_t out[8 * 8];
  56. tran_low_t *outptr = out;
  57. tran_low_t temp_in[8], temp_out[8];
  58. const transform_2d ht = IHT_8[tx_type];
  59. // inverse transform row vectors
  60. for (i = 0; i < 8; ++i) {
  61. ht.rows(input, outptr);
  62. input += 8;
  63. outptr += 8;
  64. }
  65. // inverse transform column vectors
  66. for (i = 0; i < 8; ++i) {
  67. for (j = 0; j < 8; ++j)
  68. temp_in[j] = out[j * 8 + i];
  69. ht.cols(temp_in, temp_out);
  70. for (j = 0; j < 8; ++j) {
  71. dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
  72. ROUND_POWER_OF_TWO(temp_out[j], 5));
  73. }
  74. }
  75. }
  76. static const transform_2d IHT_16[] = {
  77. { idct16_c, idct16_c }, // DCT_DCT = 0
  78. { iadst16_c, idct16_c }, // ADST_DCT = 1
  79. { idct16_c, iadst16_c }, // DCT_ADST = 2
  80. { iadst16_c, iadst16_c } // ADST_ADST = 3
  81. };
  82. void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
  83. int tx_type) {
  84. int i, j;
  85. tran_low_t out[16 * 16];
  86. tran_low_t *outptr = out;
  87. tran_low_t temp_in[16], temp_out[16];
  88. const transform_2d ht = IHT_16[tx_type];
  89. // Rows
  90. for (i = 0; i < 16; ++i) {
  91. ht.rows(input, outptr);
  92. input += 16;
  93. outptr += 16;
  94. }
  95. // Columns
  96. for (i = 0; i < 16; ++i) {
  97. for (j = 0; j < 16; ++j)
  98. temp_in[j] = out[j * 16 + i];
  99. ht.cols(temp_in, temp_out);
  100. for (j = 0; j < 16; ++j) {
  101. dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
  102. ROUND_POWER_OF_TWO(temp_out[j], 6));
  103. }
  104. }
  105. }
  106. // idct
  107. void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
  108. int eob) {
  109. if (eob > 1)
  110. vpx_idct4x4_16_add(input, dest, stride);
  111. else
  112. vpx_idct4x4_1_add(input, dest, stride);
  113. }
  114. void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
  115. int eob) {
  116. if (eob > 1)
  117. vpx_iwht4x4_16_add(input, dest, stride);
  118. else
  119. vpx_iwht4x4_1_add(input, dest, stride);
  120. }
  121. void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
  122. int eob) {
  123. // If dc is 1, then input[0] is the reconstructed value, do not need
  124. // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
  125. // The calculation can be simplified if there are not many non-zero dct
  126. // coefficients. Use eobs to decide what to do.
  127. // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c.
  128. // Combine that with code here.
  129. if (eob == 1)
  130. // DC only DCT coefficient
  131. vpx_idct8x8_1_add(input, dest, stride);
  132. else if (eob <= 12)
  133. vpx_idct8x8_12_add(input, dest, stride);
  134. else
  135. vpx_idct8x8_64_add(input, dest, stride);
  136. }
  137. void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
  138. int eob) {
  139. /* The calculation can be simplified if there are not many non-zero dct
  140. * coefficients. Use eobs to separate different cases. */
  141. if (eob == 1)
  142. /* DC only DCT coefficient. */
  143. vpx_idct16x16_1_add(input, dest, stride);
  144. else if (eob <= 10)
  145. vpx_idct16x16_10_add(input, dest, stride);
  146. else
  147. vpx_idct16x16_256_add(input, dest, stride);
  148. }
  149. void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
  150. int eob) {
  151. if (eob == 1)
  152. vpx_idct32x32_1_add(input, dest, stride);
  153. else if (eob <= 34)
  154. // non-zero coeff only in upper-left 8x8
  155. vpx_idct32x32_34_add(input, dest, stride);
  156. else if (eob <= 135)
  157. // non-zero coeff only in upper-left 16x16
  158. vpx_idct32x32_135_add(input, dest, stride);
  159. else
  160. vpx_idct32x32_1024_add(input, dest, stride);
  161. }
  162. // iht
  163. void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
  164. int stride, int eob) {
  165. if (tx_type == DCT_DCT)
  166. vp9_idct4x4_add(input, dest, stride, eob);
  167. else
  168. vp9_iht4x4_16_add(input, dest, stride, tx_type);
  169. }
  170. void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
  171. int stride, int eob) {
  172. if (tx_type == DCT_DCT) {
  173. vp9_idct8x8_add(input, dest, stride, eob);
  174. } else {
  175. vp9_iht8x8_64_add(input, dest, stride, tx_type);
  176. }
  177. }
  178. void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
  179. int stride, int eob) {
  180. if (tx_type == DCT_DCT) {
  181. vp9_idct16x16_add(input, dest, stride, eob);
  182. } else {
  183. vp9_iht16x16_256_add(input, dest, stride, tx_type);
  184. }
  185. }
  186. #if CONFIG_VP9_HIGHBITDEPTH
  187. void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
  188. int stride, int tx_type, int bd) {
  189. const highbd_transform_2d IHT_4[] = {
  190. { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0
  191. { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1
  192. { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2
  193. { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3
  194. };
  195. uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
  196. int i, j;
  197. tran_low_t out[4 * 4];
  198. tran_low_t *outptr = out;
  199. tran_low_t temp_in[4], temp_out[4];
  200. // Inverse transform row vectors.
  201. for (i = 0; i < 4; ++i) {
  202. IHT_4[tx_type].rows(input, outptr, bd);
  203. input += 4;
  204. outptr += 4;
  205. }
  206. // Inverse transform column vectors.
  207. for (i = 0; i < 4; ++i) {
  208. for (j = 0; j < 4; ++j)
  209. temp_in[j] = out[j * 4 + i];
  210. IHT_4[tx_type].cols(temp_in, temp_out, bd);
  211. for (j = 0; j < 4; ++j) {
  212. dest[j * stride + i] = highbd_clip_pixel_add(
  213. dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
  214. }
  215. }
  216. }
  217. static const highbd_transform_2d HIGH_IHT_8[] = {
  218. { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0
  219. { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1
  220. { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2
  221. { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c } // ADST_ADST = 3
  222. };
  223. void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
  224. int stride, int tx_type, int bd) {
  225. int i, j;
  226. tran_low_t out[8 * 8];
  227. tran_low_t *outptr = out;
  228. tran_low_t temp_in[8], temp_out[8];
  229. const highbd_transform_2d ht = HIGH_IHT_8[tx_type];
  230. uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
  231. // Inverse transform row vectors.
  232. for (i = 0; i < 8; ++i) {
  233. ht.rows(input, outptr, bd);
  234. input += 8;
  235. outptr += 8;
  236. }
  237. // Inverse transform column vectors.
  238. for (i = 0; i < 8; ++i) {
  239. for (j = 0; j < 8; ++j)
  240. temp_in[j] = out[j * 8 + i];
  241. ht.cols(temp_in, temp_out, bd);
  242. for (j = 0; j < 8; ++j) {
  243. dest[j * stride + i] = highbd_clip_pixel_add(
  244. dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
  245. }
  246. }
  247. }
  248. static const highbd_transform_2d HIGH_IHT_16[] = {
  249. { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0
  250. { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1
  251. { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2
  252. { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c } // ADST_ADST = 3
  253. };
  254. void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
  255. int stride, int tx_type, int bd) {
  256. int i, j;
  257. tran_low_t out[16 * 16];
  258. tran_low_t *outptr = out;
  259. tran_low_t temp_in[16], temp_out[16];
  260. const highbd_transform_2d ht = HIGH_IHT_16[tx_type];
  261. uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
  262. // Rows
  263. for (i = 0; i < 16; ++i) {
  264. ht.rows(input, outptr, bd);
  265. input += 16;
  266. outptr += 16;
  267. }
  268. // Columns
  269. for (i = 0; i < 16; ++i) {
  270. for (j = 0; j < 16; ++j)
  271. temp_in[j] = out[j * 16 + i];
  272. ht.cols(temp_in, temp_out, bd);
  273. for (j = 0; j < 16; ++j) {
  274. dest[j * stride + i] = highbd_clip_pixel_add(
  275. dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
  276. }
  277. }
  278. }
  279. // idct
  280. void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
  281. int eob, int bd) {
  282. if (eob > 1)
  283. vpx_highbd_idct4x4_16_add(input, dest, stride, bd);
  284. else
  285. vpx_highbd_idct4x4_1_add(input, dest, stride, bd);
  286. }
  287. void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
  288. int eob, int bd) {
  289. if (eob > 1)
  290. vpx_highbd_iwht4x4_16_add(input, dest, stride, bd);
  291. else
  292. vpx_highbd_iwht4x4_1_add(input, dest, stride, bd);
  293. }
  294. void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
  295. int eob, int bd) {
  296. // If dc is 1, then input[0] is the reconstructed value, do not need
  297. // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
  298. // The calculation can be simplified if there are not many non-zero dct
  299. // coefficients. Use eobs to decide what to do.
  300. // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c.
  301. // Combine that with code here.
  302. // DC only DCT coefficient
  303. if (eob == 1) {
  304. vpx_highbd_idct8x8_1_add(input, dest, stride, bd);
  305. } else if (eob <= 10) {
  306. vpx_highbd_idct8x8_10_add(input, dest, stride, bd);
  307. } else {
  308. vpx_highbd_idct8x8_64_add(input, dest, stride, bd);
  309. }
  310. }
  311. void vp9_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest,
  312. int stride, int eob, int bd) {
  313. // The calculation can be simplified if there are not many non-zero dct
  314. // coefficients. Use eobs to separate different cases.
  315. // DC only DCT coefficient.
  316. if (eob == 1) {
  317. vpx_highbd_idct16x16_1_add(input, dest, stride, bd);
  318. } else if (eob <= 10) {
  319. vpx_highbd_idct16x16_10_add(input, dest, stride, bd);
  320. } else {
  321. vpx_highbd_idct16x16_256_add(input, dest, stride, bd);
  322. }
  323. }
  324. void vp9_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
  325. int stride, int eob, int bd) {
  326. // Non-zero coeff only in upper-left 8x8
  327. if (eob == 1) {
  328. vpx_highbd_idct32x32_1_add(input, dest, stride, bd);
  329. } else if (eob <= 34) {
  330. vpx_highbd_idct32x32_34_add(input, dest, stride, bd);
  331. } else {
  332. vpx_highbd_idct32x32_1024_add(input, dest, stride, bd);
  333. }
  334. }
  335. // iht
  336. void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
  337. uint8_t *dest, int stride, int eob, int bd) {
  338. if (tx_type == DCT_DCT)
  339. vp9_highbd_idct4x4_add(input, dest, stride, eob, bd);
  340. else
  341. vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
  342. }
  343. void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
  344. uint8_t *dest, int stride, int eob, int bd) {
  345. if (tx_type == DCT_DCT) {
  346. vp9_highbd_idct8x8_add(input, dest, stride, eob, bd);
  347. } else {
  348. vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd);
  349. }
  350. }
  351. void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
  352. uint8_t *dest, int stride, int eob, int bd) {
  353. if (tx_type == DCT_DCT) {
  354. vp9_highbd_idct16x16_add(input, dest, stride, eob, bd);
  355. } else {
  356. vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
  357. }
  358. }
  359. #endif // CONFIG_VP9_HIGHBITDEPTH