quantize_msa.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. /*
  2. * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "./vp8_rtcd.h"
  11. #include "vp8/common/mips/msa/vp8_macros_msa.h"
  12. #include "vp8/encoder/block.h"
  13. static int8_t fast_quantize_b_msa(int16_t *coeff_ptr, int16_t *round,
  14. int16_t *quant, int16_t *de_quant,
  15. int16_t *q_coeff, int16_t *dq_coeff) {
  16. int32_t cnt, eob;
  17. v16i8 inv_zig_zag = { 0, 1, 5, 6, 2, 4, 7, 12, 3, 8, 11, 13, 9, 10, 14, 15 };
  18. v8i16 round0, round1;
  19. v8i16 sign_z0, sign_z1;
  20. v8i16 q_coeff0, q_coeff1;
  21. v8i16 x0, x1, de_quant0, de_quant1;
  22. v8i16 coeff0, coeff1, z0, z1;
  23. v8i16 quant0, quant1, quant2, quant3;
  24. v8i16 zero = { 0 };
  25. v8i16 inv_zig_zag0, inv_zig_zag1;
  26. v8i16 zigzag_mask0 = { 0, 1, 4, 8, 5, 2, 3, 6 };
  27. v8i16 zigzag_mask1 = { 9, 12, 13, 10, 7, 11, 14, 15 };
  28. v8i16 temp0_h, temp1_h, temp2_h, temp3_h;
  29. v4i32 temp0_w, temp1_w, temp2_w, temp3_w;
  30. ILVRL_B2_SH(zero, inv_zig_zag, inv_zig_zag0, inv_zig_zag1);
  31. eob = -1;
  32. LD_SH2(coeff_ptr, 8, coeff0, coeff1);
  33. VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, z0,
  34. z1);
  35. LD_SH2(round, 8, coeff0, coeff1);
  36. VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, round0,
  37. round1);
  38. LD_SH2(quant, 8, coeff0, coeff1);
  39. VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, quant0,
  40. quant2);
  41. sign_z0 = z0 >> 15;
  42. sign_z1 = z1 >> 15;
  43. x0 = __msa_add_a_h(z0, zero);
  44. x1 = __msa_add_a_h(z1, zero);
  45. ILVL_H2_SH(quant0, quant0, quant2, quant2, quant1, quant3);
  46. ILVR_H2_SH(quant0, quant0, quant2, quant2, quant0, quant2);
  47. ILVL_H2_SH(round0, x0, round1, x1, temp1_h, temp3_h);
  48. ILVR_H2_SH(round0, x0, round1, x1, temp0_h, temp2_h);
  49. DOTP_SH4_SW(temp0_h, temp1_h, temp2_h, temp3_h, quant0, quant1, quant2,
  50. quant3, temp0_w, temp1_w, temp2_w, temp3_w);
  51. SRA_4V(temp0_w, temp1_w, temp2_w, temp3_w, 16);
  52. PCKEV_H2_SH(temp1_w, temp0_w, temp3_w, temp2_w, x0, x1);
  53. x0 = x0 ^ sign_z0;
  54. x1 = x1 ^ sign_z1;
  55. SUB2(x0, sign_z0, x1, sign_z1, x0, x1);
  56. VSHF_H2_SH(x0, x1, x0, x1, inv_zig_zag0, inv_zig_zag1, q_coeff0, q_coeff1);
  57. ST_SH2(q_coeff0, q_coeff1, q_coeff, 8);
  58. LD_SH2(de_quant, 8, de_quant0, de_quant1);
  59. q_coeff0 *= de_quant0;
  60. q_coeff1 *= de_quant1;
  61. ST_SH2(q_coeff0, q_coeff1, dq_coeff, 8);
  62. for (cnt = 0; cnt < 16; ++cnt) {
  63. if ((cnt <= 7) && (x1[7 - cnt] != 0)) {
  64. eob = (15 - cnt);
  65. break;
  66. }
  67. if ((cnt > 7) && (x0[7 - (cnt - 8)] != 0)) {
  68. eob = (7 - (cnt - 8));
  69. break;
  70. }
  71. }
  72. return (int8_t)(eob + 1);
  73. }
  74. static int8_t exact_regular_quantize_b_msa(
  75. int16_t *zbin_boost, int16_t *coeff_ptr, int16_t *zbin, int16_t *round,
  76. int16_t *quant, int16_t *quant_shift, int16_t *de_quant, int16_t zbin_oq_in,
  77. int16_t *q_coeff, int16_t *dq_coeff) {
  78. int32_t cnt, eob;
  79. int16_t *boost_temp = zbin_boost;
  80. v16i8 inv_zig_zag = { 0, 1, 5, 6, 2, 4, 7, 12, 3, 8, 11, 13, 9, 10, 14, 15 };
  81. v8i16 round0, round1;
  82. v8i16 sign_z0, sign_z1;
  83. v8i16 q_coeff0, q_coeff1;
  84. v8i16 z_bin0, z_bin1, zbin_o_q;
  85. v8i16 x0, x1, sign_x0, sign_x1, de_quant0, de_quant1;
  86. v8i16 coeff0, coeff1, z0, z1;
  87. v8i16 quant0, quant1, quant2, quant3;
  88. v8i16 zero = { 0 };
  89. v8i16 inv_zig_zag0, inv_zig_zag1;
  90. v8i16 zigzag_mask0 = { 0, 1, 4, 8, 5, 2, 3, 6 };
  91. v8i16 zigzag_mask1 = { 9, 12, 13, 10, 7, 11, 14, 15 };
  92. v8i16 temp0_h, temp1_h, temp2_h, temp3_h;
  93. v4i32 temp0_w, temp1_w, temp2_w, temp3_w;
  94. ILVRL_B2_SH(zero, inv_zig_zag, inv_zig_zag0, inv_zig_zag1);
  95. zbin_o_q = __msa_fill_h(zbin_oq_in);
  96. eob = -1;
  97. LD_SH2(coeff_ptr, 8, coeff0, coeff1);
  98. VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, z0,
  99. z1);
  100. LD_SH2(round, 8, coeff0, coeff1);
  101. VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, round0,
  102. round1);
  103. LD_SH2(quant, 8, coeff0, coeff1);
  104. VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, quant0,
  105. quant2);
  106. LD_SH2(zbin, 8, coeff0, coeff1);
  107. VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, z_bin0,
  108. z_bin1);
  109. sign_z0 = z0 >> 15;
  110. sign_z1 = z1 >> 15;
  111. x0 = __msa_add_a_h(z0, zero);
  112. x1 = __msa_add_a_h(z1, zero);
  113. SUB2(x0, z_bin0, x1, z_bin1, z_bin0, z_bin1);
  114. SUB2(z_bin0, zbin_o_q, z_bin1, zbin_o_q, z_bin0, z_bin1);
  115. ILVL_H2_SH(quant0, quant0, quant2, quant2, quant1, quant3);
  116. ILVR_H2_SH(quant0, quant0, quant2, quant2, quant0, quant2);
  117. ILVL_H2_SH(round0, x0, round1, x1, temp1_h, temp3_h);
  118. ILVR_H2_SH(round0, x0, round1, x1, temp0_h, temp2_h);
  119. DOTP_SH4_SW(temp0_h, temp1_h, temp2_h, temp3_h, quant0, quant1, quant2,
  120. quant3, temp0_w, temp1_w, temp2_w, temp3_w);
  121. SRA_4V(temp0_w, temp1_w, temp2_w, temp3_w, 16);
  122. PCKEV_H2_SH(temp1_w, temp0_w, temp3_w, temp2_w, temp0_h, temp2_h);
  123. LD_SH2(quant_shift, 8, coeff0, coeff1);
  124. VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, quant0,
  125. quant2);
  126. ILVL_H2_SH(quant0, quant0, quant2, quant2, quant1, quant3);
  127. ILVR_H2_SH(quant0, quant0, quant2, quant2, quant0, quant2);
  128. ADD2(x0, round0, x1, round1, x0, x1);
  129. ILVL_H2_SH(temp0_h, x0, temp2_h, x1, temp1_h, temp3_h);
  130. ILVR_H2_SH(temp0_h, x0, temp2_h, x1, temp0_h, temp2_h);
  131. DOTP_SH4_SW(temp0_h, temp1_h, temp2_h, temp3_h, quant0, quant1, quant2,
  132. quant3, temp0_w, temp1_w, temp2_w, temp3_w);
  133. SRA_4V(temp0_w, temp1_w, temp2_w, temp3_w, 16);
  134. PCKEV_H2_SH(temp1_w, temp0_w, temp3_w, temp2_w, x0, x1);
  135. sign_x0 = x0 ^ sign_z0;
  136. sign_x1 = x1 ^ sign_z1;
  137. SUB2(sign_x0, sign_z0, sign_x1, sign_z1, sign_x0, sign_x1);
  138. for (cnt = 0; cnt < 16; ++cnt) {
  139. if (cnt <= 7) {
  140. if (boost_temp[0] <= z_bin0[cnt]) {
  141. if (x0[cnt]) {
  142. eob = cnt;
  143. boost_temp = zbin_boost;
  144. } else {
  145. boost_temp++;
  146. }
  147. } else {
  148. sign_x0[cnt] = 0;
  149. boost_temp++;
  150. }
  151. } else {
  152. if (boost_temp[0] <= z_bin1[cnt - 8]) {
  153. if (x1[cnt - 8]) {
  154. eob = cnt;
  155. boost_temp = zbin_boost;
  156. } else {
  157. boost_temp++;
  158. }
  159. } else {
  160. sign_x1[cnt - 8] = 0;
  161. boost_temp++;
  162. }
  163. }
  164. }
  165. VSHF_H2_SH(sign_x0, sign_x1, sign_x0, sign_x1, inv_zig_zag0, inv_zig_zag1,
  166. q_coeff0, q_coeff1);
  167. ST_SH2(q_coeff0, q_coeff1, q_coeff, 8);
  168. LD_SH2(de_quant, 8, de_quant0, de_quant1);
  169. MUL2(de_quant0, q_coeff0, de_quant1, q_coeff1, de_quant0, de_quant1);
  170. ST_SH2(de_quant0, de_quant1, dq_coeff, 8);
  171. return (int8_t)(eob + 1);
  172. }
  173. void vp8_fast_quantize_b_msa(BLOCK *b, BLOCKD *d) {
  174. int16_t *coeff_ptr = b->coeff;
  175. int16_t *round_ptr = b->round;
  176. int16_t *quant_ptr = b->quant_fast;
  177. int16_t *qcoeff_ptr = d->qcoeff;
  178. int16_t *dqcoeff_ptr = d->dqcoeff;
  179. int16_t *dequant_ptr = d->dequant;
  180. *d->eob = fast_quantize_b_msa(coeff_ptr, round_ptr, quant_ptr, dequant_ptr,
  181. qcoeff_ptr, dqcoeff_ptr);
  182. }
  183. void vp8_regular_quantize_b_msa(BLOCK *b, BLOCKD *d) {
  184. int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
  185. int16_t *coeff_ptr = b->coeff;
  186. int16_t *zbin_ptr = b->zbin;
  187. int16_t *round_ptr = b->round;
  188. int16_t *quant_ptr = b->quant;
  189. int16_t *quant_shift_ptr = b->quant_shift;
  190. int16_t *qcoeff_ptr = d->qcoeff;
  191. int16_t *dqcoeff_ptr = d->dqcoeff;
  192. int16_t *dequant_ptr = d->dequant;
  193. int16_t zbin_oq_value = b->zbin_extra;
  194. *d->eob = exact_regular_quantize_b_msa(
  195. zbin_boost_ptr, coeff_ptr, zbin_ptr, round_ptr, quant_ptr,
  196. quant_shift_ptr, dequant_ptr, zbin_oq_value, qcoeff_ptr, dqcoeff_ptr);
  197. }