vp9_detokenize.c 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "vpx_mem/vpx_mem.h"
  11. #include "vpx_ports/mem.h"
  12. #include "vp9/common/vp9_blockd.h"
  13. #include "vp9/common/vp9_common.h"
  14. #include "vp9/common/vp9_entropy.h"
  15. #if CONFIG_COEFFICIENT_RANGE_CHECKING
  16. #include "vp9/common/vp9_idct.h"
  17. #endif
  18. #include "vp9/decoder/vp9_detokenize.h"
  19. #define EOB_CONTEXT_NODE 0
  20. #define ZERO_CONTEXT_NODE 1
  21. #define ONE_CONTEXT_NODE 2
  22. #define INCREMENT_COUNT(token) \
  23. do { \
  24. if (counts) ++coef_counts[band][ctx][token]; \
  25. } while (0)
  26. static INLINE int read_bool(vpx_reader *r, int prob, BD_VALUE *value,
  27. int *count, unsigned int *range) {
  28. const unsigned int split = (*range * prob + (256 - prob)) >> CHAR_BIT;
  29. const BD_VALUE bigsplit = (BD_VALUE)split << (BD_VALUE_SIZE - CHAR_BIT);
  30. if (*count < 0) {
  31. r->value = *value;
  32. r->count = *count;
  33. vpx_reader_fill(r);
  34. *value = r->value;
  35. *count = r->count;
  36. }
  37. if (*value >= bigsplit) {
  38. *range = *range - split;
  39. *value = *value - bigsplit;
  40. {
  41. const int shift = vpx_norm[*range];
  42. *range <<= shift;
  43. *value <<= shift;
  44. *count -= shift;
  45. }
  46. return 1;
  47. }
  48. *range = split;
  49. {
  50. const int shift = vpx_norm[*range];
  51. *range <<= shift;
  52. *value <<= shift;
  53. *count -= shift;
  54. }
  55. return 0;
  56. }
  57. static INLINE int read_coeff(vpx_reader *r, const vpx_prob *probs, int n,
  58. BD_VALUE *value, int *count, unsigned int *range) {
  59. int i, val = 0;
  60. for (i = 0; i < n; ++i)
  61. val = (val << 1) | read_bool(r, probs[i], value, count, range);
  62. return val;
  63. }
  64. static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
  65. tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
  66. int ctx, const int16_t *scan, const int16_t *nb,
  67. vpx_reader *r) {
  68. FRAME_COUNTS *counts = xd->counts;
  69. const int max_eob = 16 << (tx_size << 1);
  70. const FRAME_CONTEXT *const fc = xd->fc;
  71. const int ref = is_inter_block(xd->mi[0]);
  72. int band, c = 0;
  73. const vpx_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
  74. fc->coef_probs[tx_size][type][ref];
  75. const vpx_prob *prob;
  76. unsigned int(*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1];
  77. unsigned int(*eob_branch_count)[COEFF_CONTEXTS];
  78. uint8_t token_cache[32 * 32];
  79. const uint8_t *band_translate = get_band_translate(tx_size);
  80. const int dq_shift = (tx_size == TX_32X32);
  81. int v;
  82. int16_t dqv = dq[0];
  83. const uint8_t *const cat6_prob =
  84. #if CONFIG_VP9_HIGHBITDEPTH
  85. (xd->bd == VPX_BITS_12)
  86. ? vp9_cat6_prob_high12
  87. : (xd->bd == VPX_BITS_10) ? vp9_cat6_prob_high12 + 2 :
  88. #endif // CONFIG_VP9_HIGHBITDEPTH
  89. vp9_cat6_prob;
  90. const int cat6_bits =
  91. #if CONFIG_VP9_HIGHBITDEPTH
  92. (xd->bd == VPX_BITS_12) ? 18
  93. : (xd->bd == VPX_BITS_10) ? 16 :
  94. #endif // CONFIG_VP9_HIGHBITDEPTH
  95. 14;
  96. // Keep value, range, and count as locals. The compiler produces better
  97. // results with the locals than using r directly.
  98. BD_VALUE value = r->value;
  99. unsigned int range = r->range;
  100. int count = r->count;
  101. if (counts) {
  102. coef_counts = counts->coef[tx_size][type][ref];
  103. eob_branch_count = counts->eob_branch[tx_size][type][ref];
  104. }
  105. while (c < max_eob) {
  106. int val = -1;
  107. band = *band_translate++;
  108. prob = coef_probs[band][ctx];
  109. if (counts) ++eob_branch_count[band][ctx];
  110. if (!read_bool(r, prob[EOB_CONTEXT_NODE], &value, &count, &range)) {
  111. INCREMENT_COUNT(EOB_MODEL_TOKEN);
  112. break;
  113. }
  114. while (!read_bool(r, prob[ZERO_CONTEXT_NODE], &value, &count, &range)) {
  115. INCREMENT_COUNT(ZERO_TOKEN);
  116. dqv = dq[1];
  117. token_cache[scan[c]] = 0;
  118. ++c;
  119. if (c >= max_eob) {
  120. r->value = value;
  121. r->range = range;
  122. r->count = count;
  123. return c; // zero tokens at the end (no eob token)
  124. }
  125. ctx = get_coef_context(nb, token_cache, c);
  126. band = *band_translate++;
  127. prob = coef_probs[band][ctx];
  128. }
  129. if (read_bool(r, prob[ONE_CONTEXT_NODE], &value, &count, &range)) {
  130. const vpx_prob *p = vp9_pareto8_full[prob[PIVOT_NODE] - 1];
  131. INCREMENT_COUNT(TWO_TOKEN);
  132. if (read_bool(r, p[0], &value, &count, &range)) {
  133. if (read_bool(r, p[3], &value, &count, &range)) {
  134. token_cache[scan[c]] = 5;
  135. if (read_bool(r, p[5], &value, &count, &range)) {
  136. if (read_bool(r, p[7], &value, &count, &range)) {
  137. val = CAT6_MIN_VAL +
  138. read_coeff(r, cat6_prob, cat6_bits, &value, &count, &range);
  139. } else {
  140. val = CAT5_MIN_VAL +
  141. read_coeff(r, vp9_cat5_prob, 5, &value, &count, &range);
  142. }
  143. } else if (read_bool(r, p[6], &value, &count, &range)) {
  144. val = CAT4_MIN_VAL +
  145. read_coeff(r, vp9_cat4_prob, 4, &value, &count, &range);
  146. } else {
  147. val = CAT3_MIN_VAL +
  148. read_coeff(r, vp9_cat3_prob, 3, &value, &count, &range);
  149. }
  150. } else {
  151. token_cache[scan[c]] = 4;
  152. if (read_bool(r, p[4], &value, &count, &range)) {
  153. val = CAT2_MIN_VAL +
  154. read_coeff(r, vp9_cat2_prob, 2, &value, &count, &range);
  155. } else {
  156. val = CAT1_MIN_VAL +
  157. read_coeff(r, vp9_cat1_prob, 1, &value, &count, &range);
  158. }
  159. }
  160. #if CONFIG_VP9_HIGHBITDEPTH
  161. // val may use 18-bits
  162. v = (int)(((int64_t)val * dqv) >> dq_shift);
  163. #else
  164. v = (val * dqv) >> dq_shift;
  165. #endif
  166. } else {
  167. if (read_bool(r, p[1], &value, &count, &range)) {
  168. token_cache[scan[c]] = 3;
  169. v = ((3 + read_bool(r, p[2], &value, &count, &range)) * dqv) >>
  170. dq_shift;
  171. } else {
  172. token_cache[scan[c]] = 2;
  173. v = (2 * dqv) >> dq_shift;
  174. }
  175. }
  176. } else {
  177. INCREMENT_COUNT(ONE_TOKEN);
  178. token_cache[scan[c]] = 1;
  179. v = dqv >> dq_shift;
  180. }
  181. #if CONFIG_COEFFICIENT_RANGE_CHECKING
  182. #if CONFIG_VP9_HIGHBITDEPTH
  183. dqcoeff[scan[c]] = highbd_check_range(
  184. read_bool(r, 128, &value, &count, &range) ? -v : v, xd->bd);
  185. #else
  186. dqcoeff[scan[c]] =
  187. check_range(read_bool(r, 128, &value, &count, &range) ? -v : v);
  188. #endif // CONFIG_VP9_HIGHBITDEPTH
  189. #else
  190. if (read_bool(r, 128, &value, &count, &range)) {
  191. dqcoeff[scan[c]] = -v;
  192. } else {
  193. dqcoeff[scan[c]] = v;
  194. }
  195. #endif // CONFIG_COEFFICIENT_RANGE_CHECKING
  196. ++c;
  197. ctx = get_coef_context(nb, token_cache, c);
  198. dqv = dq[1];
  199. }
  200. r->value = value;
  201. r->range = range;
  202. r->count = count;
  203. return c;
  204. }
  205. static void get_ctx_shift(MACROBLOCKD *xd, int *ctx_shift_a, int *ctx_shift_l,
  206. int x, int y, unsigned int tx_size_in_blocks) {
  207. if (xd->max_blocks_wide) {
  208. if (tx_size_in_blocks + x > xd->max_blocks_wide)
  209. *ctx_shift_a = (tx_size_in_blocks - (xd->max_blocks_wide - x)) * 8;
  210. }
  211. if (xd->max_blocks_high) {
  212. if (tx_size_in_blocks + y > xd->max_blocks_high)
  213. *ctx_shift_l = (tx_size_in_blocks - (xd->max_blocks_high - y)) * 8;
  214. }
  215. }
  216. int vp9_decode_block_tokens(TileWorkerData *twd, int plane,
  217. const scan_order *sc, int x, int y, TX_SIZE tx_size,
  218. int seg_id) {
  219. vpx_reader *r = &twd->bit_reader;
  220. MACROBLOCKD *xd = &twd->xd;
  221. struct macroblockd_plane *const pd = &xd->plane[plane];
  222. const int16_t *const dequant = pd->seg_dequant[seg_id];
  223. int eob;
  224. ENTROPY_CONTEXT *a = pd->above_context + x;
  225. ENTROPY_CONTEXT *l = pd->left_context + y;
  226. int ctx;
  227. int ctx_shift_a = 0;
  228. int ctx_shift_l = 0;
  229. switch (tx_size) {
  230. case TX_4X4:
  231. ctx = a[0] != 0;
  232. ctx += l[0] != 0;
  233. eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size,
  234. dequant, ctx, sc->scan, sc->neighbors, r);
  235. a[0] = l[0] = (eob > 0);
  236. break;
  237. case TX_8X8:
  238. get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_8X8);
  239. ctx = !!*(const uint16_t *)a;
  240. ctx += !!*(const uint16_t *)l;
  241. eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size,
  242. dequant, ctx, sc->scan, sc->neighbors, r);
  243. *(uint16_t *)a = ((eob > 0) * 0x0101) >> ctx_shift_a;
  244. *(uint16_t *)l = ((eob > 0) * 0x0101) >> ctx_shift_l;
  245. break;
  246. case TX_16X16:
  247. get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_16X16);
  248. ctx = !!*(const uint32_t *)a;
  249. ctx += !!*(const uint32_t *)l;
  250. eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size,
  251. dequant, ctx, sc->scan, sc->neighbors, r);
  252. *(uint32_t *)a = ((eob > 0) * 0x01010101) >> ctx_shift_a;
  253. *(uint32_t *)l = ((eob > 0) * 0x01010101) >> ctx_shift_l;
  254. break;
  255. case TX_32X32:
  256. get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_32X32);
  257. // NOTE: casting to uint64_t here is safe because the default memory
  258. // alignment is at least 8 bytes and the TX_32X32 is aligned on 8 byte
  259. // boundaries.
  260. ctx = !!*(const uint64_t *)a;
  261. ctx += !!*(const uint64_t *)l;
  262. eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size,
  263. dequant, ctx, sc->scan, sc->neighbors, r);
  264. *(uint64_t *)a = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_a;
  265. *(uint64_t *)l = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_l;
  266. break;
  267. default:
  268. assert(0 && "Invalid transform size.");
  269. eob = 0;
  270. break;
  271. }
  272. return eob;
  273. }