variance.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "./vpx_config.h"
  11. #include "./vpx_dsp_rtcd.h"
  12. #include "vpx_ports/mem.h"
  13. #include "vpx/vpx_integer.h"
  14. #include "vpx_dsp/variance.h"
  15. static const uint8_t bilinear_filters[8][2] = {
  16. { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 },
  17. { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 },
  18. };
  19. uint32_t vpx_get4x4sse_cs_c(const uint8_t *a, int a_stride, const uint8_t *b,
  20. int b_stride) {
  21. int distortion = 0;
  22. int r, c;
  23. for (r = 0; r < 4; ++r) {
  24. for (c = 0; c < 4; ++c) {
  25. int diff = a[c] - b[c];
  26. distortion += diff * diff;
  27. }
  28. a += a_stride;
  29. b += b_stride;
  30. }
  31. return distortion;
  32. }
  33. uint32_t vpx_get_mb_ss_c(const int16_t *a) {
  34. unsigned int i, sum = 0;
  35. for (i = 0; i < 256; ++i) {
  36. sum += a[i] * a[i];
  37. }
  38. return sum;
  39. }
  40. static void variance(const uint8_t *a, int a_stride, const uint8_t *b,
  41. int b_stride, int w, int h, uint32_t *sse, int *sum) {
  42. int i, j;
  43. *sum = 0;
  44. *sse = 0;
  45. for (i = 0; i < h; ++i) {
  46. for (j = 0; j < w; ++j) {
  47. const int diff = a[j] - b[j];
  48. *sum += diff;
  49. *sse += diff * diff;
  50. }
  51. a += a_stride;
  52. b += b_stride;
  53. }
  54. }
  55. // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
  56. // or vertical direction to produce the filtered output block. Used to implement
  57. // the first-pass of 2-D separable filter.
  58. //
  59. // Produces int16_t output to retain precision for the next pass. Two filter
  60. // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
  61. // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
  62. // It defines the offset required to move from one input to the next.
  63. static void var_filter_block2d_bil_first_pass(const uint8_t *a, uint16_t *b,
  64. unsigned int src_pixels_per_line,
  65. int pixel_step,
  66. unsigned int output_height,
  67. unsigned int output_width,
  68. const uint8_t *filter) {
  69. unsigned int i, j;
  70. for (i = 0; i < output_height; ++i) {
  71. for (j = 0; j < output_width; ++j) {
  72. b[j] = ROUND_POWER_OF_TWO(
  73. (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
  74. ++a;
  75. }
  76. a += src_pixels_per_line - output_width;
  77. b += output_width;
  78. }
  79. }
  80. // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
  81. // or vertical direction to produce the filtered output block. Used to implement
  82. // the second-pass of 2-D separable filter.
  83. //
  84. // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
  85. // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
  86. // filter is applied horizontally (pixel_step = 1) or vertically
  87. // (pixel_step = stride). It defines the offset required to move from one input
  88. // to the next. Output is 8-bit.
  89. static void var_filter_block2d_bil_second_pass(const uint16_t *a, uint8_t *b,
  90. unsigned int src_pixels_per_line,
  91. unsigned int pixel_step,
  92. unsigned int output_height,
  93. unsigned int output_width,
  94. const uint8_t *filter) {
  95. unsigned int i, j;
  96. for (i = 0; i < output_height; ++i) {
  97. for (j = 0; j < output_width; ++j) {
  98. b[j] = ROUND_POWER_OF_TWO(
  99. (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
  100. ++a;
  101. }
  102. a += src_pixels_per_line - output_width;
  103. b += output_width;
  104. }
  105. }
  106. #define VAR(W, H) \
  107. uint32_t vpx_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
  108. const uint8_t *b, int b_stride, \
  109. uint32_t *sse) { \
  110. int sum; \
  111. variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
  112. return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
  113. }
  114. #define SUBPIX_VAR(W, H) \
  115. uint32_t vpx_sub_pixel_variance##W##x##H##_c( \
  116. const uint8_t *a, int a_stride, int xoffset, int yoffset, \
  117. const uint8_t *b, int b_stride, uint32_t *sse) { \
  118. uint16_t fdata3[(H + 1) * W]; \
  119. uint8_t temp2[H * W]; \
  120. \
  121. var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
  122. bilinear_filters[xoffset]); \
  123. var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  124. bilinear_filters[yoffset]); \
  125. \
  126. return vpx_variance##W##x##H##_c(temp2, W, b, b_stride, sse); \
  127. }
  128. #define SUBPIX_AVG_VAR(W, H) \
  129. uint32_t vpx_sub_pixel_avg_variance##W##x##H##_c( \
  130. const uint8_t *a, int a_stride, int xoffset, int yoffset, \
  131. const uint8_t *b, int b_stride, uint32_t *sse, \
  132. const uint8_t *second_pred) { \
  133. uint16_t fdata3[(H + 1) * W]; \
  134. uint8_t temp2[H * W]; \
  135. DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
  136. \
  137. var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
  138. bilinear_filters[xoffset]); \
  139. var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  140. bilinear_filters[yoffset]); \
  141. \
  142. vpx_comp_avg_pred_c(temp3, second_pred, W, H, temp2, W); \
  143. \
  144. return vpx_variance##W##x##H##_c(temp3, W, b, b_stride, sse); \
  145. }
  146. /* Identical to the variance call except it takes an additional parameter, sum,
  147. * and returns that value using pass-by-reference instead of returning
  148. * sse - sum^2 / w*h
  149. */
  150. #define GET_VAR(W, H) \
  151. void vpx_get##W##x##H##var_c(const uint8_t *a, int a_stride, \
  152. const uint8_t *b, int b_stride, uint32_t *sse, \
  153. int *sum) { \
  154. variance(a, a_stride, b, b_stride, W, H, sse, sum); \
  155. }
  156. /* Identical to the variance call except it does not calculate the
  157. * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
  158. * variable.
  159. */
  160. #define MSE(W, H) \
  161. uint32_t vpx_mse##W##x##H##_c(const uint8_t *a, int a_stride, \
  162. const uint8_t *b, int b_stride, \
  163. uint32_t *sse) { \
  164. int sum; \
  165. variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
  166. return *sse; \
  167. }
  168. /* All three forms of the variance are available in the same sizes. */
  169. #define VARIANCES(W, H) \
  170. VAR(W, H) \
  171. SUBPIX_VAR(W, H) \
  172. SUBPIX_AVG_VAR(W, H)
  173. VARIANCES(64, 64)
  174. VARIANCES(64, 32)
  175. VARIANCES(32, 64)
  176. VARIANCES(32, 32)
  177. VARIANCES(32, 16)
  178. VARIANCES(16, 32)
  179. VARIANCES(16, 16)
  180. VARIANCES(16, 8)
  181. VARIANCES(8, 16)
  182. VARIANCES(8, 8)
  183. VARIANCES(8, 4)
  184. VARIANCES(4, 8)
  185. VARIANCES(4, 4)
  186. GET_VAR(16, 16)
  187. GET_VAR(8, 8)
  188. MSE(16, 16)
  189. MSE(16, 8)
  190. MSE(8, 16)
  191. MSE(8, 8)
  192. void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
  193. int height, const uint8_t *ref, int ref_stride) {
  194. int i, j;
  195. for (i = 0; i < height; ++i) {
  196. for (j = 0; j < width; ++j) {
  197. const int tmp = pred[j] + ref[j];
  198. comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
  199. }
  200. comp_pred += width;
  201. pred += width;
  202. ref += ref_stride;
  203. }
  204. }
  205. #if CONFIG_VP9_HIGHBITDEPTH
  206. static void highbd_variance64(const uint8_t *a8, int a_stride,
  207. const uint8_t *b8, int b_stride, int w, int h,
  208. uint64_t *sse, int64_t *sum) {
  209. int i, j;
  210. uint16_t *a = CONVERT_TO_SHORTPTR(a8);
  211. uint16_t *b = CONVERT_TO_SHORTPTR(b8);
  212. *sum = 0;
  213. *sse = 0;
  214. for (i = 0; i < h; ++i) {
  215. for (j = 0; j < w; ++j) {
  216. const int diff = a[j] - b[j];
  217. *sum += diff;
  218. *sse += diff * diff;
  219. }
  220. a += a_stride;
  221. b += b_stride;
  222. }
  223. }
  224. static void highbd_8_variance(const uint8_t *a8, int a_stride,
  225. const uint8_t *b8, int b_stride, int w, int h,
  226. uint32_t *sse, int *sum) {
  227. uint64_t sse_long = 0;
  228. int64_t sum_long = 0;
  229. highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
  230. *sse = (uint32_t)sse_long;
  231. *sum = (int)sum_long;
  232. }
  233. static void highbd_10_variance(const uint8_t *a8, int a_stride,
  234. const uint8_t *b8, int b_stride, int w, int h,
  235. uint32_t *sse, int *sum) {
  236. uint64_t sse_long = 0;
  237. int64_t sum_long = 0;
  238. highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
  239. *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
  240. *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
  241. }
  242. static void highbd_12_variance(const uint8_t *a8, int a_stride,
  243. const uint8_t *b8, int b_stride, int w, int h,
  244. uint32_t *sse, int *sum) {
  245. uint64_t sse_long = 0;
  246. int64_t sum_long = 0;
  247. highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
  248. *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
  249. *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
  250. }
  251. #define HIGHBD_VAR(W, H) \
  252. uint32_t vpx_highbd_8_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
  253. const uint8_t *b, int b_stride, \
  254. uint32_t *sse) { \
  255. int sum; \
  256. highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
  257. return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
  258. } \
  259. \
  260. uint32_t vpx_highbd_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
  261. const uint8_t *b, int b_stride, \
  262. uint32_t *sse) { \
  263. int sum; \
  264. int64_t var; \
  265. highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
  266. var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
  267. return (var >= 0) ? (uint32_t)var : 0; \
  268. } \
  269. \
  270. uint32_t vpx_highbd_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
  271. const uint8_t *b, int b_stride, \
  272. uint32_t *sse) { \
  273. int sum; \
  274. int64_t var; \
  275. highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
  276. var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
  277. return (var >= 0) ? (uint32_t)var : 0; \
  278. }
  279. #define HIGHBD_GET_VAR(S) \
  280. void vpx_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
  281. const uint8_t *ref, int ref_stride, \
  282. uint32_t *sse, int *sum) { \
  283. highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
  284. } \
  285. \
  286. void vpx_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
  287. const uint8_t *ref, int ref_stride, \
  288. uint32_t *sse, int *sum) { \
  289. highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
  290. } \
  291. \
  292. void vpx_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
  293. const uint8_t *ref, int ref_stride, \
  294. uint32_t *sse, int *sum) { \
  295. highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
  296. }
  297. #define HIGHBD_MSE(W, H) \
  298. uint32_t vpx_highbd_8_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
  299. const uint8_t *ref, int ref_stride, \
  300. uint32_t *sse) { \
  301. int sum; \
  302. highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
  303. return *sse; \
  304. } \
  305. \
  306. uint32_t vpx_highbd_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
  307. const uint8_t *ref, int ref_stride, \
  308. uint32_t *sse) { \
  309. int sum; \
  310. highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
  311. return *sse; \
  312. } \
  313. \
  314. uint32_t vpx_highbd_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
  315. const uint8_t *ref, int ref_stride, \
  316. uint32_t *sse) { \
  317. int sum; \
  318. highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
  319. return *sse; \
  320. }
  321. static void highbd_var_filter_block2d_bil_first_pass(
  322. const uint8_t *src_ptr8, uint16_t *output_ptr,
  323. unsigned int src_pixels_per_line, int pixel_step,
  324. unsigned int output_height, unsigned int output_width,
  325. const uint8_t *filter) {
  326. unsigned int i, j;
  327. uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
  328. for (i = 0; i < output_height; ++i) {
  329. for (j = 0; j < output_width; ++j) {
  330. output_ptr[j] = ROUND_POWER_OF_TWO(
  331. (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
  332. FILTER_BITS);
  333. ++src_ptr;
  334. }
  335. // Next row...
  336. src_ptr += src_pixels_per_line - output_width;
  337. output_ptr += output_width;
  338. }
  339. }
  340. static void highbd_var_filter_block2d_bil_second_pass(
  341. const uint16_t *src_ptr, uint16_t *output_ptr,
  342. unsigned int src_pixels_per_line, unsigned int pixel_step,
  343. unsigned int output_height, unsigned int output_width,
  344. const uint8_t *filter) {
  345. unsigned int i, j;
  346. for (i = 0; i < output_height; ++i) {
  347. for (j = 0; j < output_width; ++j) {
  348. output_ptr[j] = ROUND_POWER_OF_TWO(
  349. (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
  350. FILTER_BITS);
  351. ++src_ptr;
  352. }
  353. src_ptr += src_pixels_per_line - output_width;
  354. output_ptr += output_width;
  355. }
  356. }
  357. #define HIGHBD_SUBPIX_VAR(W, H) \
  358. uint32_t vpx_highbd_8_sub_pixel_variance##W##x##H##_c( \
  359. const uint8_t *src, int src_stride, int xoffset, int yoffset, \
  360. const uint8_t *dst, int dst_stride, uint32_t *sse) { \
  361. uint16_t fdata3[(H + 1) * W]; \
  362. uint16_t temp2[H * W]; \
  363. \
  364. highbd_var_filter_block2d_bil_first_pass( \
  365. src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \
  366. highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  367. bilinear_filters[yoffset]); \
  368. \
  369. return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
  370. dst, dst_stride, sse); \
  371. } \
  372. \
  373. uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c( \
  374. const uint8_t *src, int src_stride, int xoffset, int yoffset, \
  375. const uint8_t *dst, int dst_stride, uint32_t *sse) { \
  376. uint16_t fdata3[(H + 1) * W]; \
  377. uint16_t temp2[H * W]; \
  378. \
  379. highbd_var_filter_block2d_bil_first_pass( \
  380. src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \
  381. highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  382. bilinear_filters[yoffset]); \
  383. \
  384. return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
  385. dst, dst_stride, sse); \
  386. } \
  387. \
  388. uint32_t vpx_highbd_12_sub_pixel_variance##W##x##H##_c( \
  389. const uint8_t *src, int src_stride, int xoffset, int yoffset, \
  390. const uint8_t *dst, int dst_stride, uint32_t *sse) { \
  391. uint16_t fdata3[(H + 1) * W]; \
  392. uint16_t temp2[H * W]; \
  393. \
  394. highbd_var_filter_block2d_bil_first_pass( \
  395. src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \
  396. highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  397. bilinear_filters[yoffset]); \
  398. \
  399. return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
  400. dst, dst_stride, sse); \
  401. }
  402. #define HIGHBD_SUBPIX_AVG_VAR(W, H) \
  403. uint32_t vpx_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \
  404. const uint8_t *src, int src_stride, int xoffset, int yoffset, \
  405. const uint8_t *dst, int dst_stride, uint32_t *sse, \
  406. const uint8_t *second_pred) { \
  407. uint16_t fdata3[(H + 1) * W]; \
  408. uint16_t temp2[H * W]; \
  409. DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
  410. \
  411. highbd_var_filter_block2d_bil_first_pass( \
  412. src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \
  413. highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  414. bilinear_filters[yoffset]); \
  415. \
  416. vpx_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \
  417. CONVERT_TO_BYTEPTR(temp2), W); \
  418. \
  419. return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
  420. dst, dst_stride, sse); \
  421. } \
  422. \
  423. uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
  424. const uint8_t *src, int src_stride, int xoffset, int yoffset, \
  425. const uint8_t *dst, int dst_stride, uint32_t *sse, \
  426. const uint8_t *second_pred) { \
  427. uint16_t fdata3[(H + 1) * W]; \
  428. uint16_t temp2[H * W]; \
  429. DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
  430. \
  431. highbd_var_filter_block2d_bil_first_pass( \
  432. src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \
  433. highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  434. bilinear_filters[yoffset]); \
  435. \
  436. vpx_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \
  437. CONVERT_TO_BYTEPTR(temp2), W); \
  438. \
  439. return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
  440. dst, dst_stride, sse); \
  441. } \
  442. \
  443. uint32_t vpx_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
  444. const uint8_t *src, int src_stride, int xoffset, int yoffset, \
  445. const uint8_t *dst, int dst_stride, uint32_t *sse, \
  446. const uint8_t *second_pred) { \
  447. uint16_t fdata3[(H + 1) * W]; \
  448. uint16_t temp2[H * W]; \
  449. DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
  450. \
  451. highbd_var_filter_block2d_bil_first_pass( \
  452. src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \
  453. highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  454. bilinear_filters[yoffset]); \
  455. \
  456. vpx_highbd_comp_avg_pred_c(temp3, second_pred, W, H, \
  457. CONVERT_TO_BYTEPTR(temp2), W); \
  458. \
  459. return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
  460. dst, dst_stride, sse); \
  461. }
  462. /* All three forms of the variance are available in the same sizes. */
  463. #define HIGHBD_VARIANCES(W, H) \
  464. HIGHBD_VAR(W, H) \
  465. HIGHBD_SUBPIX_VAR(W, H) \
  466. HIGHBD_SUBPIX_AVG_VAR(W, H)
  467. HIGHBD_VARIANCES(64, 64)
  468. HIGHBD_VARIANCES(64, 32)
  469. HIGHBD_VARIANCES(32, 64)
  470. HIGHBD_VARIANCES(32, 32)
  471. HIGHBD_VARIANCES(32, 16)
  472. HIGHBD_VARIANCES(16, 32)
  473. HIGHBD_VARIANCES(16, 16)
  474. HIGHBD_VARIANCES(16, 8)
  475. HIGHBD_VARIANCES(8, 16)
  476. HIGHBD_VARIANCES(8, 8)
  477. HIGHBD_VARIANCES(8, 4)
  478. HIGHBD_VARIANCES(4, 8)
  479. HIGHBD_VARIANCES(4, 4)
  480. HIGHBD_GET_VAR(8)
  481. HIGHBD_GET_VAR(16)
  482. HIGHBD_MSE(16, 16)
  483. HIGHBD_MSE(16, 8)
  484. HIGHBD_MSE(8, 16)
  485. HIGHBD_MSE(8, 8)
  486. void vpx_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8,
  487. int width, int height, const uint8_t *ref8,
  488. int ref_stride) {
  489. int i, j;
  490. uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
  491. uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
  492. for (i = 0; i < height; ++i) {
  493. for (j = 0; j < width; ++j) {
  494. const int tmp = pred[j] + ref[j];
  495. comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
  496. }
  497. comp_pred += width;
  498. pred += width;
  499. ref += ref_stride;
  500. }
  501. }
  502. #endif // CONFIG_VP9_HIGHBITDEPTH