variance_msa.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621
  1. /*
  2. * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "./vpx_dsp_rtcd.h"
  11. #include "vpx_dsp/mips/macros_msa.h"
  12. #define CALC_MSE_B(src, ref, var) \
  13. { \
  14. v16u8 src_l0_m, src_l1_m; \
  15. v8i16 res_l0_m, res_l1_m; \
  16. \
  17. ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m); \
  18. HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m); \
  19. DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \
  20. }
  21. #define CALC_MSE_AVG_B(src, ref, var, sub) \
  22. { \
  23. v16u8 src_l0_m, src_l1_m; \
  24. v8i16 res_l0_m, res_l1_m; \
  25. \
  26. ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m); \
  27. HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m); \
  28. DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \
  29. \
  30. sub += res_l0_m + res_l1_m; \
  31. }
  32. #define VARIANCE_WxH(sse, diff, shift) sse - (((uint32_t)diff * diff) >> shift)
  33. #define VARIANCE_LARGE_WxH(sse, diff, shift) \
  34. sse - (((int64_t)diff * diff) >> shift)
  35. static uint32_t sse_diff_4width_msa(const uint8_t *src_ptr, int32_t src_stride,
  36. const uint8_t *ref_ptr, int32_t ref_stride,
  37. int32_t height, int32_t *diff) {
  38. uint32_t src0, src1, src2, src3;
  39. uint32_t ref0, ref1, ref2, ref3;
  40. int32_t ht_cnt;
  41. v16u8 src = { 0 };
  42. v16u8 ref = { 0 };
  43. v8i16 avg = { 0 };
  44. v4i32 vec, var = { 0 };
  45. for (ht_cnt = (height >> 2); ht_cnt--;) {
  46. LW4(src_ptr, src_stride, src0, src1, src2, src3);
  47. src_ptr += (4 * src_stride);
  48. LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
  49. ref_ptr += (4 * ref_stride);
  50. INSERT_W4_UB(src0, src1, src2, src3, src);
  51. INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
  52. CALC_MSE_AVG_B(src, ref, var, avg);
  53. }
  54. vec = __msa_hadd_s_w(avg, avg);
  55. *diff = HADD_SW_S32(vec);
  56. return HADD_SW_S32(var);
  57. }
  58. static uint32_t sse_diff_8width_msa(const uint8_t *src_ptr, int32_t src_stride,
  59. const uint8_t *ref_ptr, int32_t ref_stride,
  60. int32_t height, int32_t *diff) {
  61. int32_t ht_cnt;
  62. v16u8 src0, src1, src2, src3;
  63. v16u8 ref0, ref1, ref2, ref3;
  64. v8i16 avg = { 0 };
  65. v4i32 vec, var = { 0 };
  66. for (ht_cnt = (height >> 2); ht_cnt--;) {
  67. LD_UB4(src_ptr, src_stride, src0, src1, src2, src3);
  68. src_ptr += (4 * src_stride);
  69. LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
  70. ref_ptr += (4 * ref_stride);
  71. PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1,
  72. ref0, ref1);
  73. CALC_MSE_AVG_B(src0, ref0, var, avg);
  74. CALC_MSE_AVG_B(src1, ref1, var, avg);
  75. }
  76. vec = __msa_hadd_s_w(avg, avg);
  77. *diff = HADD_SW_S32(vec);
  78. return HADD_SW_S32(var);
  79. }
  80. static uint32_t sse_diff_16width_msa(const uint8_t *src_ptr, int32_t src_stride,
  81. const uint8_t *ref_ptr, int32_t ref_stride,
  82. int32_t height, int32_t *diff) {
  83. int32_t ht_cnt;
  84. v16u8 src, ref;
  85. v8i16 avg = { 0 };
  86. v4i32 vec, var = { 0 };
  87. for (ht_cnt = (height >> 2); ht_cnt--;) {
  88. src = LD_UB(src_ptr);
  89. src_ptr += src_stride;
  90. ref = LD_UB(ref_ptr);
  91. ref_ptr += ref_stride;
  92. CALC_MSE_AVG_B(src, ref, var, avg);
  93. src = LD_UB(src_ptr);
  94. src_ptr += src_stride;
  95. ref = LD_UB(ref_ptr);
  96. ref_ptr += ref_stride;
  97. CALC_MSE_AVG_B(src, ref, var, avg);
  98. src = LD_UB(src_ptr);
  99. src_ptr += src_stride;
  100. ref = LD_UB(ref_ptr);
  101. ref_ptr += ref_stride;
  102. CALC_MSE_AVG_B(src, ref, var, avg);
  103. src = LD_UB(src_ptr);
  104. src_ptr += src_stride;
  105. ref = LD_UB(ref_ptr);
  106. ref_ptr += ref_stride;
  107. CALC_MSE_AVG_B(src, ref, var, avg);
  108. }
  109. vec = __msa_hadd_s_w(avg, avg);
  110. *diff = HADD_SW_S32(vec);
  111. return HADD_SW_S32(var);
  112. }
  113. static uint32_t sse_diff_32width_msa(const uint8_t *src_ptr, int32_t src_stride,
  114. const uint8_t *ref_ptr, int32_t ref_stride,
  115. int32_t height, int32_t *diff) {
  116. int32_t ht_cnt;
  117. v16u8 src0, src1, ref0, ref1;
  118. v8i16 avg = { 0 };
  119. v4i32 vec, var = { 0 };
  120. for (ht_cnt = (height >> 2); ht_cnt--;) {
  121. LD_UB2(src_ptr, 16, src0, src1);
  122. src_ptr += src_stride;
  123. LD_UB2(ref_ptr, 16, ref0, ref1);
  124. ref_ptr += ref_stride;
  125. CALC_MSE_AVG_B(src0, ref0, var, avg);
  126. CALC_MSE_AVG_B(src1, ref1, var, avg);
  127. LD_UB2(src_ptr, 16, src0, src1);
  128. src_ptr += src_stride;
  129. LD_UB2(ref_ptr, 16, ref0, ref1);
  130. ref_ptr += ref_stride;
  131. CALC_MSE_AVG_B(src0, ref0, var, avg);
  132. CALC_MSE_AVG_B(src1, ref1, var, avg);
  133. LD_UB2(src_ptr, 16, src0, src1);
  134. src_ptr += src_stride;
  135. LD_UB2(ref_ptr, 16, ref0, ref1);
  136. ref_ptr += ref_stride;
  137. CALC_MSE_AVG_B(src0, ref0, var, avg);
  138. CALC_MSE_AVG_B(src1, ref1, var, avg);
  139. LD_UB2(src_ptr, 16, src0, src1);
  140. src_ptr += src_stride;
  141. LD_UB2(ref_ptr, 16, ref0, ref1);
  142. ref_ptr += ref_stride;
  143. CALC_MSE_AVG_B(src0, ref0, var, avg);
  144. CALC_MSE_AVG_B(src1, ref1, var, avg);
  145. }
  146. vec = __msa_hadd_s_w(avg, avg);
  147. *diff = HADD_SW_S32(vec);
  148. return HADD_SW_S32(var);
  149. }
  150. static uint32_t sse_diff_32x64_msa(const uint8_t *src_ptr, int32_t src_stride,
  151. const uint8_t *ref_ptr, int32_t ref_stride,
  152. int32_t *diff) {
  153. int32_t ht_cnt;
  154. v16u8 src0, src1, ref0, ref1;
  155. v8i16 avg0 = { 0 };
  156. v8i16 avg1 = { 0 };
  157. v4i32 vec, var = { 0 };
  158. for (ht_cnt = 16; ht_cnt--;) {
  159. LD_UB2(src_ptr, 16, src0, src1);
  160. src_ptr += src_stride;
  161. LD_UB2(ref_ptr, 16, ref0, ref1);
  162. ref_ptr += ref_stride;
  163. CALC_MSE_AVG_B(src0, ref0, var, avg0);
  164. CALC_MSE_AVG_B(src1, ref1, var, avg1);
  165. LD_UB2(src_ptr, 16, src0, src1);
  166. src_ptr += src_stride;
  167. LD_UB2(ref_ptr, 16, ref0, ref1);
  168. ref_ptr += ref_stride;
  169. CALC_MSE_AVG_B(src0, ref0, var, avg0);
  170. CALC_MSE_AVG_B(src1, ref1, var, avg1);
  171. LD_UB2(src_ptr, 16, src0, src1);
  172. src_ptr += src_stride;
  173. LD_UB2(ref_ptr, 16, ref0, ref1);
  174. ref_ptr += ref_stride;
  175. CALC_MSE_AVG_B(src0, ref0, var, avg0);
  176. CALC_MSE_AVG_B(src1, ref1, var, avg1);
  177. LD_UB2(src_ptr, 16, src0, src1);
  178. src_ptr += src_stride;
  179. LD_UB2(ref_ptr, 16, ref0, ref1);
  180. ref_ptr += ref_stride;
  181. CALC_MSE_AVG_B(src0, ref0, var, avg0);
  182. CALC_MSE_AVG_B(src1, ref1, var, avg1);
  183. }
  184. vec = __msa_hadd_s_w(avg0, avg0);
  185. vec += __msa_hadd_s_w(avg1, avg1);
  186. *diff = HADD_SW_S32(vec);
  187. return HADD_SW_S32(var);
  188. }
  189. static uint32_t sse_diff_64x32_msa(const uint8_t *src_ptr, int32_t src_stride,
  190. const uint8_t *ref_ptr, int32_t ref_stride,
  191. int32_t *diff) {
  192. int32_t ht_cnt;
  193. v16u8 src0, src1, src2, src3;
  194. v16u8 ref0, ref1, ref2, ref3;
  195. v8i16 avg0 = { 0 };
  196. v8i16 avg1 = { 0 };
  197. v4i32 vec, var = { 0 };
  198. for (ht_cnt = 16; ht_cnt--;) {
  199. LD_UB4(src_ptr, 16, src0, src1, src2, src3);
  200. src_ptr += src_stride;
  201. LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
  202. ref_ptr += ref_stride;
  203. CALC_MSE_AVG_B(src0, ref0, var, avg0);
  204. CALC_MSE_AVG_B(src2, ref2, var, avg0);
  205. CALC_MSE_AVG_B(src1, ref1, var, avg1);
  206. CALC_MSE_AVG_B(src3, ref3, var, avg1);
  207. LD_UB4(src_ptr, 16, src0, src1, src2, src3);
  208. src_ptr += src_stride;
  209. LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
  210. ref_ptr += ref_stride;
  211. CALC_MSE_AVG_B(src0, ref0, var, avg0);
  212. CALC_MSE_AVG_B(src2, ref2, var, avg0);
  213. CALC_MSE_AVG_B(src1, ref1, var, avg1);
  214. CALC_MSE_AVG_B(src3, ref3, var, avg1);
  215. }
  216. vec = __msa_hadd_s_w(avg0, avg0);
  217. vec += __msa_hadd_s_w(avg1, avg1);
  218. *diff = HADD_SW_S32(vec);
  219. return HADD_SW_S32(var);
  220. }
  221. static uint32_t sse_diff_64x64_msa(const uint8_t *src_ptr, int32_t src_stride,
  222. const uint8_t *ref_ptr, int32_t ref_stride,
  223. int32_t *diff) {
  224. int32_t ht_cnt;
  225. v16u8 src0, src1, src2, src3;
  226. v16u8 ref0, ref1, ref2, ref3;
  227. v8i16 avg0 = { 0 };
  228. v8i16 avg1 = { 0 };
  229. v8i16 avg2 = { 0 };
  230. v8i16 avg3 = { 0 };
  231. v4i32 vec, var = { 0 };
  232. for (ht_cnt = 32; ht_cnt--;) {
  233. LD_UB4(src_ptr, 16, src0, src1, src2, src3);
  234. src_ptr += src_stride;
  235. LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
  236. ref_ptr += ref_stride;
  237. CALC_MSE_AVG_B(src0, ref0, var, avg0);
  238. CALC_MSE_AVG_B(src1, ref1, var, avg1);
  239. CALC_MSE_AVG_B(src2, ref2, var, avg2);
  240. CALC_MSE_AVG_B(src3, ref3, var, avg3);
  241. LD_UB4(src_ptr, 16, src0, src1, src2, src3);
  242. src_ptr += src_stride;
  243. LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
  244. ref_ptr += ref_stride;
  245. CALC_MSE_AVG_B(src0, ref0, var, avg0);
  246. CALC_MSE_AVG_B(src1, ref1, var, avg1);
  247. CALC_MSE_AVG_B(src2, ref2, var, avg2);
  248. CALC_MSE_AVG_B(src3, ref3, var, avg3);
  249. }
  250. vec = __msa_hadd_s_w(avg0, avg0);
  251. vec += __msa_hadd_s_w(avg1, avg1);
  252. vec += __msa_hadd_s_w(avg2, avg2);
  253. vec += __msa_hadd_s_w(avg3, avg3);
  254. *diff = HADD_SW_S32(vec);
  255. return HADD_SW_S32(var);
  256. }
  257. static uint32_t get_mb_ss_msa(const int16_t *src) {
  258. uint32_t sum, cnt;
  259. v8i16 src0, src1, src2, src3;
  260. v4i32 src0_l, src1_l, src2_l, src3_l;
  261. v4i32 src0_r, src1_r, src2_r, src3_r;
  262. v2i64 sq_src_l = { 0 };
  263. v2i64 sq_src_r = { 0 };
  264. for (cnt = 8; cnt--;) {
  265. LD_SH4(src, 8, src0, src1, src2, src3);
  266. src += 4 * 8;
  267. UNPCK_SH_SW(src0, src0_l, src0_r);
  268. UNPCK_SH_SW(src1, src1_l, src1_r);
  269. UNPCK_SH_SW(src2, src2_l, src2_r);
  270. UNPCK_SH_SW(src3, src3_l, src3_r);
  271. DPADD_SD2_SD(src0_l, src0_r, sq_src_l, sq_src_r);
  272. DPADD_SD2_SD(src1_l, src1_r, sq_src_l, sq_src_r);
  273. DPADD_SD2_SD(src2_l, src2_r, sq_src_l, sq_src_r);
  274. DPADD_SD2_SD(src3_l, src3_r, sq_src_l, sq_src_r);
  275. }
  276. sq_src_l += __msa_splati_d(sq_src_l, 1);
  277. sq_src_r += __msa_splati_d(sq_src_r, 1);
  278. sum = __msa_copy_s_d(sq_src_l, 0);
  279. sum += __msa_copy_s_d(sq_src_r, 0);
  280. return sum;
  281. }
  282. static uint32_t sse_4width_msa(const uint8_t *src_ptr, int32_t src_stride,
  283. const uint8_t *ref_ptr, int32_t ref_stride,
  284. int32_t height) {
  285. int32_t ht_cnt;
  286. uint32_t src0, src1, src2, src3;
  287. uint32_t ref0, ref1, ref2, ref3;
  288. v16u8 src = { 0 };
  289. v16u8 ref = { 0 };
  290. v4i32 var = { 0 };
  291. for (ht_cnt = (height >> 2); ht_cnt--;) {
  292. LW4(src_ptr, src_stride, src0, src1, src2, src3);
  293. src_ptr += (4 * src_stride);
  294. LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
  295. ref_ptr += (4 * ref_stride);
  296. INSERT_W4_UB(src0, src1, src2, src3, src);
  297. INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
  298. CALC_MSE_B(src, ref, var);
  299. }
  300. return HADD_SW_S32(var);
  301. }
  302. static uint32_t sse_8width_msa(const uint8_t *src_ptr, int32_t src_stride,
  303. const uint8_t *ref_ptr, int32_t ref_stride,
  304. int32_t height) {
  305. int32_t ht_cnt;
  306. v16u8 src0, src1, src2, src3;
  307. v16u8 ref0, ref1, ref2, ref3;
  308. v4i32 var = { 0 };
  309. for (ht_cnt = (height >> 2); ht_cnt--;) {
  310. LD_UB4(src_ptr, src_stride, src0, src1, src2, src3);
  311. src_ptr += (4 * src_stride);
  312. LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
  313. ref_ptr += (4 * ref_stride);
  314. PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1,
  315. ref0, ref1);
  316. CALC_MSE_B(src0, ref0, var);
  317. CALC_MSE_B(src1, ref1, var);
  318. }
  319. return HADD_SW_S32(var);
  320. }
  321. static uint32_t sse_16width_msa(const uint8_t *src_ptr, int32_t src_stride,
  322. const uint8_t *ref_ptr, int32_t ref_stride,
  323. int32_t height) {
  324. int32_t ht_cnt;
  325. v16u8 src, ref;
  326. v4i32 var = { 0 };
  327. for (ht_cnt = (height >> 2); ht_cnt--;) {
  328. src = LD_UB(src_ptr);
  329. src_ptr += src_stride;
  330. ref = LD_UB(ref_ptr);
  331. ref_ptr += ref_stride;
  332. CALC_MSE_B(src, ref, var);
  333. src = LD_UB(src_ptr);
  334. src_ptr += src_stride;
  335. ref = LD_UB(ref_ptr);
  336. ref_ptr += ref_stride;
  337. CALC_MSE_B(src, ref, var);
  338. src = LD_UB(src_ptr);
  339. src_ptr += src_stride;
  340. ref = LD_UB(ref_ptr);
  341. ref_ptr += ref_stride;
  342. CALC_MSE_B(src, ref, var);
  343. src = LD_UB(src_ptr);
  344. src_ptr += src_stride;
  345. ref = LD_UB(ref_ptr);
  346. ref_ptr += ref_stride;
  347. CALC_MSE_B(src, ref, var);
  348. }
  349. return HADD_SW_S32(var);
  350. }
  351. static uint32_t sse_32width_msa(const uint8_t *src_ptr, int32_t src_stride,
  352. const uint8_t *ref_ptr, int32_t ref_stride,
  353. int32_t height) {
  354. int32_t ht_cnt;
  355. v16u8 src0, src1, ref0, ref1;
  356. v4i32 var = { 0 };
  357. for (ht_cnt = (height >> 2); ht_cnt--;) {
  358. LD_UB2(src_ptr, 16, src0, src1);
  359. src_ptr += src_stride;
  360. LD_UB2(ref_ptr, 16, ref0, ref1);
  361. ref_ptr += ref_stride;
  362. CALC_MSE_B(src0, ref0, var);
  363. CALC_MSE_B(src1, ref1, var);
  364. LD_UB2(src_ptr, 16, src0, src1);
  365. src_ptr += src_stride;
  366. LD_UB2(ref_ptr, 16, ref0, ref1);
  367. ref_ptr += ref_stride;
  368. CALC_MSE_B(src0, ref0, var);
  369. CALC_MSE_B(src1, ref1, var);
  370. LD_UB2(src_ptr, 16, src0, src1);
  371. src_ptr += src_stride;
  372. LD_UB2(ref_ptr, 16, ref0, ref1);
  373. ref_ptr += ref_stride;
  374. CALC_MSE_B(src0, ref0, var);
  375. CALC_MSE_B(src1, ref1, var);
  376. LD_UB2(src_ptr, 16, src0, src1);
  377. src_ptr += src_stride;
  378. LD_UB2(ref_ptr, 16, ref0, ref1);
  379. ref_ptr += ref_stride;
  380. CALC_MSE_B(src0, ref0, var);
  381. CALC_MSE_B(src1, ref1, var);
  382. }
  383. return HADD_SW_S32(var);
  384. }
  385. static uint32_t sse_64width_msa(const uint8_t *src_ptr, int32_t src_stride,
  386. const uint8_t *ref_ptr, int32_t ref_stride,
  387. int32_t height) {
  388. int32_t ht_cnt;
  389. v16u8 src0, src1, src2, src3;
  390. v16u8 ref0, ref1, ref2, ref3;
  391. v4i32 var = { 0 };
  392. for (ht_cnt = height >> 1; ht_cnt--;) {
  393. LD_UB4(src_ptr, 16, src0, src1, src2, src3);
  394. src_ptr += src_stride;
  395. LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
  396. ref_ptr += ref_stride;
  397. CALC_MSE_B(src0, ref0, var);
  398. CALC_MSE_B(src2, ref2, var);
  399. CALC_MSE_B(src1, ref1, var);
  400. CALC_MSE_B(src3, ref3, var);
  401. LD_UB4(src_ptr, 16, src0, src1, src2, src3);
  402. src_ptr += src_stride;
  403. LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
  404. ref_ptr += ref_stride;
  405. CALC_MSE_B(src0, ref0, var);
  406. CALC_MSE_B(src2, ref2, var);
  407. CALC_MSE_B(src1, ref1, var);
  408. CALC_MSE_B(src3, ref3, var);
  409. }
  410. return HADD_SW_S32(var);
  411. }
  412. uint32_t vpx_get4x4sse_cs_msa(const uint8_t *src_ptr, int32_t src_stride,
  413. const uint8_t *ref_ptr, int32_t ref_stride) {
  414. uint32_t src0, src1, src2, src3;
  415. uint32_t ref0, ref1, ref2, ref3;
  416. v16i8 src = { 0 };
  417. v16i8 ref = { 0 };
  418. v4i32 err0 = { 0 };
  419. LW4(src_ptr, src_stride, src0, src1, src2, src3);
  420. LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
  421. INSERT_W4_SB(src0, src1, src2, src3, src);
  422. INSERT_W4_SB(ref0, ref1, ref2, ref3, ref);
  423. CALC_MSE_B(src, ref, err0);
  424. return HADD_SW_S32(err0);
  425. }
  426. #define VARIANCE_4Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 4);
  427. #define VARIANCE_4Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 5);
  428. #define VARIANCE_8Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 5);
  429. #define VARIANCE_8Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 6);
  430. #define VARIANCE_8Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 7);
  431. #define VARIANCE_16Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 7);
  432. #define VARIANCE_16Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 8);
  433. #define VARIANCE_16Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9);
  434. #define VARIANCE_32Wx16H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9);
  435. #define VARIANCE_32Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 10);
  436. #define VARIANCE_32Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11);
  437. #define VARIANCE_64Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11);
  438. #define VARIANCE_64Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 12);
  439. #define VPX_VARIANCE_WDXHT_MSA(wd, ht) \
  440. uint32_t vpx_variance##wd##x##ht##_msa( \
  441. const uint8_t *src, int32_t src_stride, const uint8_t *ref, \
  442. int32_t ref_stride, uint32_t *sse) { \
  443. int32_t diff; \
  444. \
  445. *sse = \
  446. sse_diff_##wd##width_msa(src, src_stride, ref, ref_stride, ht, &diff); \
  447. \
  448. return VARIANCE_##wd##Wx##ht##H(*sse, diff); \
  449. }
  450. VPX_VARIANCE_WDXHT_MSA(4, 4);
  451. VPX_VARIANCE_WDXHT_MSA(4, 8);
  452. VPX_VARIANCE_WDXHT_MSA(8, 4)
  453. VPX_VARIANCE_WDXHT_MSA(8, 8)
  454. VPX_VARIANCE_WDXHT_MSA(8, 16)
  455. VPX_VARIANCE_WDXHT_MSA(16, 8)
  456. VPX_VARIANCE_WDXHT_MSA(16, 16)
  457. VPX_VARIANCE_WDXHT_MSA(16, 32)
  458. VPX_VARIANCE_WDXHT_MSA(32, 16)
  459. VPX_VARIANCE_WDXHT_MSA(32, 32)
  460. uint32_t vpx_variance32x64_msa(const uint8_t *src, int32_t src_stride,
  461. const uint8_t *ref, int32_t ref_stride,
  462. uint32_t *sse) {
  463. int32_t diff;
  464. *sse = sse_diff_32x64_msa(src, src_stride, ref, ref_stride, &diff);
  465. return VARIANCE_32Wx64H(*sse, diff);
  466. }
  467. uint32_t vpx_variance64x32_msa(const uint8_t *src, int32_t src_stride,
  468. const uint8_t *ref, int32_t ref_stride,
  469. uint32_t *sse) {
  470. int32_t diff;
  471. *sse = sse_diff_64x32_msa(src, src_stride, ref, ref_stride, &diff);
  472. return VARIANCE_64Wx32H(*sse, diff);
  473. }
  474. uint32_t vpx_variance64x64_msa(const uint8_t *src, int32_t src_stride,
  475. const uint8_t *ref, int32_t ref_stride,
  476. uint32_t *sse) {
  477. int32_t diff;
  478. *sse = sse_diff_64x64_msa(src, src_stride, ref, ref_stride, &diff);
  479. return VARIANCE_64Wx64H(*sse, diff);
  480. }
  481. uint32_t vpx_mse8x8_msa(const uint8_t *src, int32_t src_stride,
  482. const uint8_t *ref, int32_t ref_stride, uint32_t *sse) {
  483. *sse = sse_8width_msa(src, src_stride, ref, ref_stride, 8);
  484. return *sse;
  485. }
  486. uint32_t vpx_mse8x16_msa(const uint8_t *src, int32_t src_stride,
  487. const uint8_t *ref, int32_t ref_stride,
  488. uint32_t *sse) {
  489. *sse = sse_8width_msa(src, src_stride, ref, ref_stride, 16);
  490. return *sse;
  491. }
  492. uint32_t vpx_mse16x8_msa(const uint8_t *src, int32_t src_stride,
  493. const uint8_t *ref, int32_t ref_stride,
  494. uint32_t *sse) {
  495. *sse = sse_16width_msa(src, src_stride, ref, ref_stride, 8);
  496. return *sse;
  497. }
  498. uint32_t vpx_mse16x16_msa(const uint8_t *src, int32_t src_stride,
  499. const uint8_t *ref, int32_t ref_stride,
  500. uint32_t *sse) {
  501. *sse = sse_16width_msa(src, src_stride, ref, ref_stride, 16);
  502. return *sse;
  503. }
  504. void vpx_get8x8var_msa(const uint8_t *src, int32_t src_stride,
  505. const uint8_t *ref, int32_t ref_stride, uint32_t *sse,
  506. int32_t *sum) {
  507. *sse = sse_diff_8width_msa(src, src_stride, ref, ref_stride, 8, sum);
  508. }
  509. void vpx_get16x16var_msa(const uint8_t *src, int32_t src_stride,
  510. const uint8_t *ref, int32_t ref_stride, uint32_t *sse,
  511. int32_t *sum) {
  512. *sse = sse_diff_16width_msa(src, src_stride, ref, ref_stride, 16, sum);
  513. }
  514. uint32_t vpx_get_mb_ss_msa(const int16_t *src) { return get_mb_ss_msa(src); }