convolve.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. /*
  2. * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #ifndef VPX_DSP_X86_CONVOLVE_H_
  11. #define VPX_DSP_X86_CONVOLVE_H_
  12. #include <assert.h>
  13. #include "./vpx_config.h"
  14. #include "vpx/vpx_integer.h"
  15. #include "vpx_ports/mem.h"
  16. typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch,
  17. uint8_t *output_ptr, ptrdiff_t out_pitch,
  18. uint32_t output_height, const int16_t *filter);
  19. #define FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt) \
  20. void vpx_convolve8_##name##_##opt( \
  21. const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
  22. ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4, \
  23. int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { \
  24. const int16_t *filter = filter_kernel[offset]; \
  25. (void)x0_q4; \
  26. (void)x_step_q4; \
  27. (void)y0_q4; \
  28. (void)y_step_q4; \
  29. assert(filter[3] != 128); \
  30. assert(step_q4 == 16); \
  31. if (filter[0] | filter[1] | filter[2]) { \
  32. while (w >= 16) { \
  33. vpx_filter_block1d16_##dir##8_##avg##opt(src_start, src_stride, dst, \
  34. dst_stride, h, filter); \
  35. src += 16; \
  36. dst += 16; \
  37. w -= 16; \
  38. } \
  39. if (w == 8) { \
  40. vpx_filter_block1d8_##dir##8_##avg##opt(src_start, src_stride, dst, \
  41. dst_stride, h, filter); \
  42. } else if (w == 4) { \
  43. vpx_filter_block1d4_##dir##8_##avg##opt(src_start, src_stride, dst, \
  44. dst_stride, h, filter); \
  45. } \
  46. } else { \
  47. while (w >= 16) { \
  48. vpx_filter_block1d16_##dir##2_##avg##opt(src, src_stride, dst, \
  49. dst_stride, h, filter); \
  50. src += 16; \
  51. dst += 16; \
  52. w -= 16; \
  53. } \
  54. if (w == 8) { \
  55. vpx_filter_block1d8_##dir##2_##avg##opt(src, src_stride, dst, \
  56. dst_stride, h, filter); \
  57. } else if (w == 4) { \
  58. vpx_filter_block1d4_##dir##2_##avg##opt(src, src_stride, dst, \
  59. dst_stride, h, filter); \
  60. } \
  61. } \
  62. }
  63. #define FUN_CONV_2D(avg, opt) \
  64. void vpx_convolve8_##avg##opt( \
  65. const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
  66. ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \
  67. int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { \
  68. const int16_t *filter_x = filter[x0_q4]; \
  69. const int16_t *filter_y = filter[y0_q4]; \
  70. (void)filter_y; \
  71. assert(filter_x[3] != 128); \
  72. assert(filter_y[3] != 128); \
  73. assert(w <= 64); \
  74. assert(h <= 64); \
  75. assert(x_step_q4 == 16); \
  76. assert(y_step_q4 == 16); \
  77. if (filter_x[0] | filter_x[1] | filter_x[2]) { \
  78. DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \
  79. vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
  80. filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, \
  81. h + 7); \
  82. vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
  83. filter, x0_q4, x_step_q4, y0_q4, \
  84. y_step_q4, w, h); \
  85. } else { \
  86. DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \
  87. vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, x0_q4, \
  88. x_step_q4, y0_q4, y_step_q4, w, h + 1); \
  89. vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, filter, \
  90. x0_q4, x_step_q4, y0_q4, y_step_q4, w, \
  91. h); \
  92. } \
  93. }
  94. #if CONFIG_VP9_HIGHBITDEPTH
  95. typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr,
  96. const ptrdiff_t src_pitch,
  97. uint16_t *output_ptr,
  98. ptrdiff_t out_pitch,
  99. unsigned int output_height,
  100. const int16_t *filter, int bd);
  101. #define HIGH_FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt) \
  102. void vpx_highbd_convolve8_##name##_##opt( \
  103. const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \
  104. ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4, \
  105. int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { \
  106. const int16_t *filter = filter_kernel[offset]; \
  107. if (step_q4 == 16 && filter[3] != 128) { \
  108. if (filter[0] | filter[1] | filter[2]) { \
  109. while (w >= 16) { \
  110. vpx_highbd_filter_block1d16_##dir##8_##avg##opt( \
  111. src_start, src_stride, dst, dst_stride, h, filter, bd); \
  112. src += 16; \
  113. dst += 16; \
  114. w -= 16; \
  115. } \
  116. while (w >= 8) { \
  117. vpx_highbd_filter_block1d8_##dir##8_##avg##opt( \
  118. src_start, src_stride, dst, dst_stride, h, filter, bd); \
  119. src += 8; \
  120. dst += 8; \
  121. w -= 8; \
  122. } \
  123. while (w >= 4) { \
  124. vpx_highbd_filter_block1d4_##dir##8_##avg##opt( \
  125. src_start, src_stride, dst, dst_stride, h, filter, bd); \
  126. src += 4; \
  127. dst += 4; \
  128. w -= 4; \
  129. } \
  130. } else { \
  131. while (w >= 16) { \
  132. vpx_highbd_filter_block1d16_##dir##2_##avg##opt( \
  133. src, src_stride, dst, dst_stride, h, filter, bd); \
  134. src += 16; \
  135. dst += 16; \
  136. w -= 16; \
  137. } \
  138. while (w >= 8) { \
  139. vpx_highbd_filter_block1d8_##dir##2_##avg##opt( \
  140. src, src_stride, dst, dst_stride, h, filter, bd); \
  141. src += 8; \
  142. dst += 8; \
  143. w -= 8; \
  144. } \
  145. while (w >= 4) { \
  146. vpx_highbd_filter_block1d4_##dir##2_##avg##opt( \
  147. src, src_stride, dst, dst_stride, h, filter, bd); \
  148. src += 4; \
  149. dst += 4; \
  150. w -= 4; \
  151. } \
  152. } \
  153. } \
  154. if (w) { \
  155. vpx_highbd_convolve8_##name##_c(src, src_stride, dst, dst_stride, \
  156. filter_kernel, x0_q4, x_step_q4, y0_q4, \
  157. y_step_q4, w, h, bd); \
  158. } \
  159. }
  160. #define HIGH_FUN_CONV_2D(avg, opt) \
  161. void vpx_highbd_convolve8_##avg##opt( \
  162. const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \
  163. ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \
  164. int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { \
  165. const int16_t *filter_x = filter[x0_q4]; \
  166. assert(w <= 64); \
  167. assert(h <= 64); \
  168. if (x_step_q4 == 16 && y_step_q4 == 16) { \
  169. if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) { \
  170. DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
  171. vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
  172. fdata2, 64, filter, x0_q4, x_step_q4, \
  173. y0_q4, y_step_q4, w, h + 7, bd); \
  174. vpx_highbd_convolve8_##avg##vert_##opt( \
  175. fdata2 + 192, 64, dst, dst_stride, filter, x0_q4, x_step_q4, \
  176. y0_q4, y_step_q4, w, h, bd); \
  177. } else { \
  178. DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \
  179. vpx_highbd_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, \
  180. x0_q4, x_step_q4, y0_q4, y_step_q4, \
  181. w, h + 1, bd); \
  182. vpx_highbd_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \
  183. filter, x0_q4, x_step_q4, \
  184. y0_q4, y_step_q4, w, h, bd); \
  185. } \
  186. } else { \
  187. vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, filter, \
  188. x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, \
  189. bd); \
  190. } \
  191. }
  192. #endif // CONFIG_VP9_HIGHBITDEPTH
  193. #endif // VPX_DSP_X86_CONVOLVE_H_