vp9_mcomp.c 87 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <assert.h>
  11. #include <limits.h>
  12. #include <math.h>
  13. #include <stdio.h>
  14. #include "./vpx_config.h"
  15. #include "./vpx_dsp_rtcd.h"
  16. #include "vpx_dsp/vpx_dsp_common.h"
  17. #include "vpx_mem/vpx_mem.h"
  18. #include "vpx_ports/mem.h"
  19. #include "vp9/common/vp9_common.h"
  20. #include "vp9/common/vp9_mvref_common.h"
  21. #include "vp9/common/vp9_reconinter.h"
  22. #include "vp9/encoder/vp9_encoder.h"
  23. #include "vp9/encoder/vp9_mcomp.h"
  24. // #define NEW_DIAMOND_SEARCH
  25. static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
  26. const MV *mv) {
  27. return &buf->buf[mv->row * buf->stride + mv->col];
  28. }
  29. void vp9_set_mv_search_range(MvLimits *mv_limits, const MV *mv) {
  30. int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
  31. int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
  32. int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
  33. int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
  34. col_min = VPXMAX(col_min, (MV_LOW >> 3) + 1);
  35. row_min = VPXMAX(row_min, (MV_LOW >> 3) + 1);
  36. col_max = VPXMIN(col_max, (MV_UPP >> 3) - 1);
  37. row_max = VPXMIN(row_max, (MV_UPP >> 3) - 1);
  38. // Get intersection of UMV window and valid MV window to reduce # of checks
  39. // in diamond search.
  40. if (mv_limits->col_min < col_min) mv_limits->col_min = col_min;
  41. if (mv_limits->col_max > col_max) mv_limits->col_max = col_max;
  42. if (mv_limits->row_min < row_min) mv_limits->row_min = row_min;
  43. if (mv_limits->row_max > row_max) mv_limits->row_max = row_max;
  44. }
  45. void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits,
  46. const MvLimits *umv_window_limits,
  47. const MV *ref_mv) {
  48. subpel_mv_limits->col_min = VPXMAX(umv_window_limits->col_min * 8,
  49. ref_mv->col - MAX_FULL_PEL_VAL * 8);
  50. subpel_mv_limits->col_max = VPXMIN(umv_window_limits->col_max * 8,
  51. ref_mv->col + MAX_FULL_PEL_VAL * 8);
  52. subpel_mv_limits->row_min = VPXMAX(umv_window_limits->row_min * 8,
  53. ref_mv->row - MAX_FULL_PEL_VAL * 8);
  54. subpel_mv_limits->row_max = VPXMIN(umv_window_limits->row_max * 8,
  55. ref_mv->row + MAX_FULL_PEL_VAL * 8);
  56. subpel_mv_limits->col_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->col_min);
  57. subpel_mv_limits->col_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->col_max);
  58. subpel_mv_limits->row_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->row_min);
  59. subpel_mv_limits->row_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->row_max);
  60. }
  61. int vp9_init_search_range(int size) {
  62. int sr = 0;
  63. // Minimum search size no matter what the passed in value.
  64. size = VPXMAX(16, size);
  65. while ((size << sr) < MAX_FULL_PEL_VAL) sr++;
  66. sr = VPXMIN(sr, MAX_MVSEARCH_STEPS - 2);
  67. return sr;
  68. }
  69. static INLINE int mv_cost(const MV *mv, const int *joint_cost,
  70. int *const comp_cost[2]) {
  71. assert(mv->row >= -MV_MAX && mv->row < MV_MAX);
  72. assert(mv->col >= -MV_MAX && mv->col < MV_MAX);
  73. return joint_cost[vp9_get_mv_joint(mv)] + comp_cost[0][mv->row] +
  74. comp_cost[1][mv->col];
  75. }
  76. int vp9_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
  77. int *mvcost[2], int weight) {
  78. const MV diff = { mv->row - ref->row, mv->col - ref->col };
  79. return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
  80. }
  81. #define PIXEL_TRANSFORM_ERROR_SCALE 4
  82. static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
  83. int *mvcost[2], int error_per_bit) {
  84. if (mvcost) {
  85. const MV diff = { mv->row - ref->row, mv->col - ref->col };
  86. return (int)ROUND64_POWER_OF_TWO(
  87. (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
  88. RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT +
  89. PIXEL_TRANSFORM_ERROR_SCALE);
  90. }
  91. return 0;
  92. }
  93. static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
  94. int sad_per_bit) {
  95. const MV diff = { mv->row - ref->row, mv->col - ref->col };
  96. return ROUND_POWER_OF_TWO(
  97. (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) * sad_per_bit,
  98. VP9_PROB_COST_SHIFT);
  99. }
  100. void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
  101. int len;
  102. int ss_count = 0;
  103. for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
  104. // Generate offsets for 4 search sites per step.
  105. const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } };
  106. int i;
  107. for (i = 0; i < 4; ++i, ++ss_count) {
  108. cfg->ss_mv[ss_count] = ss_mvs[i];
  109. cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
  110. }
  111. }
  112. cfg->searches_per_step = 4;
  113. cfg->total_steps = ss_count / cfg->searches_per_step;
  114. }
  115. void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
  116. int len;
  117. int ss_count = 0;
  118. for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
  119. // Generate offsets for 8 search sites per step.
  120. const MV ss_mvs[8] = { { -len, 0 }, { len, 0 }, { 0, -len },
  121. { 0, len }, { -len, -len }, { -len, len },
  122. { len, -len }, { len, len } };
  123. int i;
  124. for (i = 0; i < 8; ++i, ++ss_count) {
  125. cfg->ss_mv[ss_count] = ss_mvs[i];
  126. cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
  127. }
  128. }
  129. cfg->searches_per_step = 8;
  130. cfg->total_steps = ss_count / cfg->searches_per_step;
  131. }
  132. // convert motion vector component to offset for sv[a]f calc
  133. static INLINE int sp(int x) { return x & 7; }
  134. static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
  135. return &buf[(r >> 3) * stride + (c >> 3)];
  136. }
  137. #if CONFIG_VP9_HIGHBITDEPTH
  138. /* checks if (r, c) has better score than previous best */
  139. #define CHECK_BETTER(v, r, c) \
  140. if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
  141. int64_t tmpmse; \
  142. const MV mv = { r, c }; \
  143. const MV ref_mv = { rr, rc }; \
  144. if (second_pred == NULL) { \
  145. thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
  146. src_stride, &sse); \
  147. } else { \
  148. thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
  149. src_stride, &sse, second_pred); \
  150. } \
  151. tmpmse = thismse; \
  152. tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit); \
  153. if (tmpmse >= INT_MAX) { \
  154. v = INT_MAX; \
  155. } else if ((v = (uint32_t)tmpmse) < besterr) { \
  156. besterr = v; \
  157. br = r; \
  158. bc = c; \
  159. *distortion = thismse; \
  160. *sse1 = sse; \
  161. } \
  162. } else { \
  163. v = INT_MAX; \
  164. }
  165. #else
  166. /* checks if (r, c) has better score than previous best */
  167. #define CHECK_BETTER(v, r, c) \
  168. if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
  169. const MV mv = { r, c }; \
  170. const MV ref_mv = { rr, rc }; \
  171. if (second_pred == NULL) \
  172. thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
  173. src_stride, &sse); \
  174. else \
  175. thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
  176. src_stride, &sse, second_pred); \
  177. if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) + \
  178. thismse) < besterr) { \
  179. besterr = v; \
  180. br = r; \
  181. bc = c; \
  182. *distortion = thismse; \
  183. *sse1 = sse; \
  184. } \
  185. } else { \
  186. v = INT_MAX; \
  187. }
  188. #endif
  189. #define FIRST_LEVEL_CHECKS \
  190. { \
  191. unsigned int left, right, up, down, diag; \
  192. CHECK_BETTER(left, tr, tc - hstep); \
  193. CHECK_BETTER(right, tr, tc + hstep); \
  194. CHECK_BETTER(up, tr - hstep, tc); \
  195. CHECK_BETTER(down, tr + hstep, tc); \
  196. whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); \
  197. switch (whichdir) { \
  198. case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \
  199. case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \
  200. case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \
  201. case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \
  202. } \
  203. }
  204. #define SECOND_LEVEL_CHECKS \
  205. { \
  206. int kr, kc; \
  207. unsigned int second; \
  208. if (tr != br && tc != bc) { \
  209. kr = br - tr; \
  210. kc = bc - tc; \
  211. CHECK_BETTER(second, tr + kr, tc + 2 * kc); \
  212. CHECK_BETTER(second, tr + 2 * kr, tc + kc); \
  213. } else if (tr == br && tc != bc) { \
  214. kc = bc - tc; \
  215. CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \
  216. CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \
  217. switch (whichdir) { \
  218. case 0: \
  219. case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \
  220. case 2: \
  221. case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \
  222. } \
  223. } else if (tr != br && tc == bc) { \
  224. kr = br - tr; \
  225. CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \
  226. CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \
  227. switch (whichdir) { \
  228. case 0: \
  229. case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \
  230. case 1: \
  231. case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \
  232. } \
  233. } \
  234. }
  235. // TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of
  236. // SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten
  237. // later in the same way.
  238. #define SECOND_LEVEL_CHECKS_BEST \
  239. { \
  240. unsigned int second; \
  241. int br0 = br; \
  242. int bc0 = bc; \
  243. assert(tr == br || tc == bc); \
  244. if (tr == br && tc != bc) { \
  245. kc = bc - tc; \
  246. } else if (tr != br && tc == bc) { \
  247. kr = br - tr; \
  248. } \
  249. CHECK_BETTER(second, br0 + kr, bc0); \
  250. CHECK_BETTER(second, br0, bc0 + kc); \
  251. if (br0 != br || bc0 != bc) { \
  252. CHECK_BETTER(second, br0 + kr, bc0 + kc); \
  253. } \
  254. }
  255. #define SETUP_SUBPEL_SEARCH \
  256. const uint8_t *const z = x->plane[0].src.buf; \
  257. const int src_stride = x->plane[0].src.stride; \
  258. const MACROBLOCKD *xd = &x->e_mbd; \
  259. unsigned int besterr = UINT_MAX; \
  260. unsigned int sse; \
  261. unsigned int whichdir; \
  262. int thismse; \
  263. const unsigned int halfiters = iters_per_step; \
  264. const unsigned int quarteriters = iters_per_step; \
  265. const unsigned int eighthiters = iters_per_step; \
  266. const int y_stride = xd->plane[0].pre[0].stride; \
  267. const int offset = bestmv->row * y_stride + bestmv->col; \
  268. const uint8_t *const y = xd->plane[0].pre[0].buf; \
  269. \
  270. int rr = ref_mv->row; \
  271. int rc = ref_mv->col; \
  272. int br = bestmv->row * 8; \
  273. int bc = bestmv->col * 8; \
  274. int hstep = 4; \
  275. int minc, maxc, minr, maxr; \
  276. int tr = br; \
  277. int tc = bc; \
  278. MvLimits subpel_mv_limits; \
  279. \
  280. vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); \
  281. minc = subpel_mv_limits.col_min; \
  282. maxc = subpel_mv_limits.col_max; \
  283. minr = subpel_mv_limits.row_min; \
  284. maxr = subpel_mv_limits.row_max; \
  285. \
  286. bestmv->row *= 8; \
  287. bestmv->col *= 8;
  288. static unsigned int setup_center_error(
  289. const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
  290. int error_per_bit, const vp9_variance_fn_ptr_t *vfp,
  291. const uint8_t *const src, const int src_stride, const uint8_t *const y,
  292. int y_stride, const uint8_t *second_pred, int w, int h, int offset,
  293. int *mvjcost, int *mvcost[2], uint32_t *sse1, uint32_t *distortion) {
  294. #if CONFIG_VP9_HIGHBITDEPTH
  295. uint64_t besterr;
  296. if (second_pred != NULL) {
  297. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  298. DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
  299. vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
  300. y_stride);
  301. besterr =
  302. vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1);
  303. } else {
  304. DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
  305. vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
  306. besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
  307. }
  308. } else {
  309. besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
  310. }
  311. *distortion = (uint32_t)besterr;
  312. besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
  313. if (besterr >= UINT_MAX) return UINT_MAX;
  314. return (uint32_t)besterr;
  315. #else
  316. uint32_t besterr;
  317. (void)xd;
  318. if (second_pred != NULL) {
  319. DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
  320. vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
  321. besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
  322. } else {
  323. besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
  324. }
  325. *distortion = besterr;
  326. besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
  327. return besterr;
  328. #endif // CONFIG_VP9_HIGHBITDEPTH
  329. }
  330. static INLINE int64_t divide_and_round(const int64_t n, const int64_t d) {
  331. return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
  332. }
  333. static INLINE int is_cost_list_wellbehaved(int *cost_list) {
  334. return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] &&
  335. cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4];
  336. }
  337. // Returns surface minima estimate at given precision in 1/2^n bits.
  338. // Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
  339. // For a given set of costs S0, S1, S2, S3, S4 at points
  340. // (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
  341. // the solution for the location of the minima (x0, y0) is given by:
  342. // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
  343. // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
  344. // The code below is an integerized version of that.
  345. static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) {
  346. const int64_t x0 = (int64_t)cost_list[1] - cost_list[3];
  347. const int64_t y0 = cost_list[1] - 2 * (int64_t)cost_list[0] + cost_list[3];
  348. const int64_t x1 = (int64_t)cost_list[4] - cost_list[2];
  349. const int64_t y1 = cost_list[4] - 2 * (int64_t)cost_list[0] + cost_list[2];
  350. const int b = 1 << (bits - 1);
  351. *ic = (int)divide_and_round(x0 * b, y0);
  352. *ir = (int)divide_and_round(x1 * b, y1);
  353. }
  354. uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv,
  355. const MV *ref_mv, int allow_hp,
  356. int error_per_bit,
  357. const vp9_variance_fn_ptr_t *vfp,
  358. int forced_stop, int iters_per_step,
  359. int *cost_list, int *mvjcost, int *mvcost[2],
  360. uint32_t *distortion, uint32_t *sse1,
  361. const uint8_t *second_pred, int w, int h) {
  362. SETUP_SUBPEL_SEARCH;
  363. besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
  364. src_stride, y, y_stride, second_pred, w, h,
  365. offset, mvjcost, mvcost, sse1, distortion);
  366. (void)halfiters;
  367. (void)quarteriters;
  368. (void)eighthiters;
  369. (void)whichdir;
  370. (void)allow_hp;
  371. (void)forced_stop;
  372. (void)hstep;
  373. (void)rr;
  374. (void)rc;
  375. (void)minr;
  376. (void)minc;
  377. (void)maxr;
  378. (void)maxc;
  379. (void)tr;
  380. (void)tc;
  381. (void)sse;
  382. (void)thismse;
  383. (void)cost_list;
  384. return besterr;
  385. }
  386. uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore(
  387. const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
  388. int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
  389. int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
  390. uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
  391. int h) {
  392. SETUP_SUBPEL_SEARCH;
  393. besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
  394. src_stride, y, y_stride, second_pred, w, h,
  395. offset, mvjcost, mvcost, sse1, distortion);
  396. (void)halfiters;
  397. (void)quarteriters;
  398. (void)eighthiters;
  399. (void)whichdir;
  400. (void)allow_hp;
  401. (void)forced_stop;
  402. (void)hstep;
  403. if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
  404. cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
  405. cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
  406. int ir, ic;
  407. unsigned int minpt = INT_MAX;
  408. get_cost_surf_min(cost_list, &ir, &ic, 2);
  409. if (ir != 0 || ic != 0) {
  410. CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic);
  411. }
  412. } else {
  413. FIRST_LEVEL_CHECKS;
  414. if (halfiters > 1) {
  415. SECOND_LEVEL_CHECKS;
  416. }
  417. tr = br;
  418. tc = bc;
  419. // Each subsequent iteration checks at least one point in common with
  420. // the last iteration could be 2 ( if diag selected) 1/4 pel
  421. // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  422. if (forced_stop != 2) {
  423. hstep >>= 1;
  424. FIRST_LEVEL_CHECKS;
  425. if (quarteriters > 1) {
  426. SECOND_LEVEL_CHECKS;
  427. }
  428. }
  429. }
  430. tr = br;
  431. tc = bc;
  432. if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
  433. hstep >>= 1;
  434. FIRST_LEVEL_CHECKS;
  435. if (eighthiters > 1) {
  436. SECOND_LEVEL_CHECKS;
  437. }
  438. }
  439. bestmv->row = br;
  440. bestmv->col = bc;
  441. return besterr;
  442. }
  443. uint32_t vp9_find_best_sub_pixel_tree_pruned_more(
  444. const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
  445. int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
  446. int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
  447. uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
  448. int h) {
  449. SETUP_SUBPEL_SEARCH;
  450. besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
  451. src_stride, y, y_stride, second_pred, w, h,
  452. offset, mvjcost, mvcost, sse1, distortion);
  453. if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
  454. cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
  455. cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
  456. unsigned int minpt;
  457. int ir, ic;
  458. get_cost_surf_min(cost_list, &ir, &ic, 1);
  459. if (ir != 0 || ic != 0) {
  460. CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
  461. }
  462. } else {
  463. FIRST_LEVEL_CHECKS;
  464. if (halfiters > 1) {
  465. SECOND_LEVEL_CHECKS;
  466. }
  467. }
  468. // Each subsequent iteration checks at least one point in common with
  469. // the last iteration could be 2 ( if diag selected) 1/4 pel
  470. // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  471. if (forced_stop != 2) {
  472. tr = br;
  473. tc = bc;
  474. hstep >>= 1;
  475. FIRST_LEVEL_CHECKS;
  476. if (quarteriters > 1) {
  477. SECOND_LEVEL_CHECKS;
  478. }
  479. }
  480. if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
  481. tr = br;
  482. tc = bc;
  483. hstep >>= 1;
  484. FIRST_LEVEL_CHECKS;
  485. if (eighthiters > 1) {
  486. SECOND_LEVEL_CHECKS;
  487. }
  488. }
  489. // These lines insure static analysis doesn't warn that
  490. // tr and tc aren't used after the above point.
  491. (void)tr;
  492. (void)tc;
  493. bestmv->row = br;
  494. bestmv->col = bc;
  495. return besterr;
  496. }
  497. uint32_t vp9_find_best_sub_pixel_tree_pruned(
  498. const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
  499. int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
  500. int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
  501. uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
  502. int h) {
  503. SETUP_SUBPEL_SEARCH;
  504. besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
  505. src_stride, y, y_stride, second_pred, w, h,
  506. offset, mvjcost, mvcost, sse1, distortion);
  507. if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
  508. cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
  509. cost_list[4] != INT_MAX) {
  510. unsigned int left, right, up, down, diag;
  511. whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) +
  512. (cost_list[2] < cost_list[4] ? 0 : 2);
  513. switch (whichdir) {
  514. case 0:
  515. CHECK_BETTER(left, tr, tc - hstep);
  516. CHECK_BETTER(down, tr + hstep, tc);
  517. CHECK_BETTER(diag, tr + hstep, tc - hstep);
  518. break;
  519. case 1:
  520. CHECK_BETTER(right, tr, tc + hstep);
  521. CHECK_BETTER(down, tr + hstep, tc);
  522. CHECK_BETTER(diag, tr + hstep, tc + hstep);
  523. break;
  524. case 2:
  525. CHECK_BETTER(left, tr, tc - hstep);
  526. CHECK_BETTER(up, tr - hstep, tc);
  527. CHECK_BETTER(diag, tr - hstep, tc - hstep);
  528. break;
  529. case 3:
  530. CHECK_BETTER(right, tr, tc + hstep);
  531. CHECK_BETTER(up, tr - hstep, tc);
  532. CHECK_BETTER(diag, tr - hstep, tc + hstep);
  533. break;
  534. }
  535. } else {
  536. FIRST_LEVEL_CHECKS;
  537. if (halfiters > 1) {
  538. SECOND_LEVEL_CHECKS;
  539. }
  540. }
  541. tr = br;
  542. tc = bc;
  543. // Each subsequent iteration checks at least one point in common with
  544. // the last iteration could be 2 ( if diag selected) 1/4 pel
  545. // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  546. if (forced_stop != 2) {
  547. hstep >>= 1;
  548. FIRST_LEVEL_CHECKS;
  549. if (quarteriters > 1) {
  550. SECOND_LEVEL_CHECKS;
  551. }
  552. tr = br;
  553. tc = bc;
  554. }
  555. if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
  556. hstep >>= 1;
  557. FIRST_LEVEL_CHECKS;
  558. if (eighthiters > 1) {
  559. SECOND_LEVEL_CHECKS;
  560. }
  561. tr = br;
  562. tc = bc;
  563. }
  564. // These lines insure static analysis doesn't warn that
  565. // tr and tc aren't used after the above point.
  566. (void)tr;
  567. (void)tc;
  568. bestmv->row = br;
  569. bestmv->col = bc;
  570. return besterr;
  571. }
  572. /* clang-format off */
  573. static const MV search_step_table[12] = {
  574. // left, right, up, down
  575. { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 },
  576. { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 },
  577. { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 }
  578. };
  579. /* clang-format on */
  580. uint32_t vp9_find_best_sub_pixel_tree(
  581. const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
  582. int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
  583. int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
  584. uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
  585. int h) {
  586. const uint8_t *const z = x->plane[0].src.buf;
  587. const uint8_t *const src_address = z;
  588. const int src_stride = x->plane[0].src.stride;
  589. const MACROBLOCKD *xd = &x->e_mbd;
  590. unsigned int besterr = UINT_MAX;
  591. unsigned int sse;
  592. int thismse;
  593. const int y_stride = xd->plane[0].pre[0].stride;
  594. const int offset = bestmv->row * y_stride + bestmv->col;
  595. const uint8_t *const y = xd->plane[0].pre[0].buf;
  596. int rr = ref_mv->row;
  597. int rc = ref_mv->col;
  598. int br = bestmv->row * 8;
  599. int bc = bestmv->col * 8;
  600. int hstep = 4;
  601. int iter, round = 3 - forced_stop;
  602. int minc, maxc, minr, maxr;
  603. int tr = br;
  604. int tc = bc;
  605. const MV *search_step = search_step_table;
  606. int idx, best_idx = -1;
  607. unsigned int cost_array[5];
  608. int kr, kc;
  609. MvLimits subpel_mv_limits;
  610. vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv);
  611. minc = subpel_mv_limits.col_min;
  612. maxc = subpel_mv_limits.col_max;
  613. minr = subpel_mv_limits.row_min;
  614. maxr = subpel_mv_limits.row_max;
  615. if (!(allow_hp && use_mv_hp(ref_mv)))
  616. if (round == 3) round = 2;
  617. bestmv->row *= 8;
  618. bestmv->col *= 8;
  619. besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
  620. src_stride, y, y_stride, second_pred, w, h,
  621. offset, mvjcost, mvcost, sse1, distortion);
  622. (void)cost_list; // to silence compiler warning
  623. for (iter = 0; iter < round; ++iter) {
  624. // Check vertical and horizontal sub-pixel positions.
  625. for (idx = 0; idx < 4; ++idx) {
  626. tr = br + search_step[idx].row;
  627. tc = bc + search_step[idx].col;
  628. if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
  629. const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
  630. MV this_mv;
  631. this_mv.row = tr;
  632. this_mv.col = tc;
  633. if (second_pred == NULL)
  634. thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
  635. src_stride, &sse);
  636. else
  637. thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
  638. src_address, src_stride, &sse, second_pred);
  639. cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
  640. mvcost, error_per_bit);
  641. if (cost_array[idx] < besterr) {
  642. best_idx = idx;
  643. besterr = cost_array[idx];
  644. *distortion = thismse;
  645. *sse1 = sse;
  646. }
  647. } else {
  648. cost_array[idx] = UINT_MAX;
  649. }
  650. }
  651. // Check diagonal sub-pixel position
  652. kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
  653. kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
  654. tc = bc + kc;
  655. tr = br + kr;
  656. if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
  657. const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
  658. MV this_mv = { tr, tc };
  659. if (second_pred == NULL)
  660. thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
  661. src_stride, &sse);
  662. else
  663. thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), src_address,
  664. src_stride, &sse, second_pred);
  665. cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
  666. error_per_bit);
  667. if (cost_array[4] < besterr) {
  668. best_idx = 4;
  669. besterr = cost_array[4];
  670. *distortion = thismse;
  671. *sse1 = sse;
  672. }
  673. } else {
  674. cost_array[idx] = UINT_MAX;
  675. }
  676. if (best_idx < 4 && best_idx >= 0) {
  677. br += search_step[best_idx].row;
  678. bc += search_step[best_idx].col;
  679. } else if (best_idx == 4) {
  680. br = tr;
  681. bc = tc;
  682. }
  683. if (iters_per_step > 1 && best_idx != -1) SECOND_LEVEL_CHECKS_BEST;
  684. tr = br;
  685. tc = bc;
  686. search_step += 4;
  687. hstep >>= 1;
  688. best_idx = -1;
  689. }
  690. // Each subsequent iteration checks at least one point in common with
  691. // the last iteration could be 2 ( if diag selected) 1/4 pel
  692. // These lines insure static analysis doesn't warn that
  693. // tr and tc aren't used after the above point.
  694. (void)tr;
  695. (void)tc;
  696. bestmv->row = br;
  697. bestmv->col = bc;
  698. return besterr;
  699. }
  700. #undef CHECK_BETTER
  701. static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col,
  702. int range) {
  703. return ((row - range) >= mv_limits->row_min) &
  704. ((row + range) <= mv_limits->row_max) &
  705. ((col - range) >= mv_limits->col_min) &
  706. ((col + range) <= mv_limits->col_max);
  707. }
  708. static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) {
  709. return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) &&
  710. (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max);
  711. }
  712. #define CHECK_BETTER \
  713. { \
  714. if (thissad < bestsad) { \
  715. if (use_mvcost) \
  716. thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \
  717. if (thissad < bestsad) { \
  718. bestsad = thissad; \
  719. best_site = i; \
  720. } \
  721. } \
  722. }
  723. #define MAX_PATTERN_SCALES 11
  724. #define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale
  725. #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates
  726. // Calculate and return a sad+mvcost list around an integer best pel.
  727. static INLINE void calc_int_cost_list(const MACROBLOCK *x, const MV *ref_mv,
  728. int sadpb,
  729. const vp9_variance_fn_ptr_t *fn_ptr,
  730. const MV *best_mv, int *cost_list) {
  731. static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
  732. const struct buf_2d *const what = &x->plane[0].src;
  733. const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
  734. const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
  735. int br = best_mv->row;
  736. int bc = best_mv->col;
  737. MV this_mv;
  738. int i;
  739. unsigned int sse;
  740. this_mv.row = br;
  741. this_mv.col = bc;
  742. cost_list[0] =
  743. fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv),
  744. in_what->stride, &sse) +
  745. mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
  746. if (check_bounds(&x->mv_limits, br, bc, 1)) {
  747. for (i = 0; i < 4; i++) {
  748. const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
  749. cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
  750. get_buf_from_mv(in_what, &this_mv),
  751. in_what->stride, &sse) +
  752. mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
  753. x->mvcost, x->errorperbit);
  754. }
  755. } else {
  756. for (i = 0; i < 4; i++) {
  757. const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
  758. if (!is_mv_in(&x->mv_limits, &this_mv))
  759. cost_list[i + 1] = INT_MAX;
  760. else
  761. cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
  762. get_buf_from_mv(in_what, &this_mv),
  763. in_what->stride, &sse) +
  764. mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
  765. x->mvcost, x->errorperbit);
  766. }
  767. }
  768. }
  769. // Generic pattern search function that searches over multiple scales.
  770. // Each scale can have a different number of candidates and shape of
  771. // candidates as indicated in the num_candidates and candidates arrays
  772. // passed into this function
  773. //
  774. static int vp9_pattern_search(
  775. const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit,
  776. int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp,
  777. int use_mvcost, const MV *center_mv, MV *best_mv,
  778. const int num_candidates[MAX_PATTERN_SCALES],
  779. const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
  780. const MACROBLOCKD *const xd = &x->e_mbd;
  781. static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
  782. 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
  783. };
  784. int i, s, t;
  785. const struct buf_2d *const what = &x->plane[0].src;
  786. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  787. int br, bc;
  788. int bestsad = INT_MAX;
  789. int thissad;
  790. int k = -1;
  791. const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
  792. int best_init_s = search_param_to_steps[search_param];
  793. // adjust ref_mv to make sure it is within MV range
  794. clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
  795. x->mv_limits.row_min, x->mv_limits.row_max);
  796. br = ref_mv->row;
  797. bc = ref_mv->col;
  798. // Work out the start point for the search
  799. bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
  800. in_what->stride) +
  801. mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
  802. // Search all possible scales upto the search param around the center point
  803. // pick the scale of the point that is best as the starting scale of
  804. // further steps around it.
  805. if (do_init_search) {
  806. s = best_init_s;
  807. best_init_s = -1;
  808. for (t = 0; t <= s; ++t) {
  809. int best_site = -1;
  810. if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
  811. for (i = 0; i < num_candidates[t]; i++) {
  812. const MV this_mv = { br + candidates[t][i].row,
  813. bc + candidates[t][i].col };
  814. thissad =
  815. vfp->sdf(what->buf, what->stride,
  816. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  817. CHECK_BETTER
  818. }
  819. } else {
  820. for (i = 0; i < num_candidates[t]; i++) {
  821. const MV this_mv = { br + candidates[t][i].row,
  822. bc + candidates[t][i].col };
  823. if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
  824. thissad =
  825. vfp->sdf(what->buf, what->stride,
  826. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  827. CHECK_BETTER
  828. }
  829. }
  830. if (best_site == -1) {
  831. continue;
  832. } else {
  833. best_init_s = t;
  834. k = best_site;
  835. }
  836. }
  837. if (best_init_s != -1) {
  838. br += candidates[best_init_s][k].row;
  839. bc += candidates[best_init_s][k].col;
  840. }
  841. }
  842. // If the center point is still the best, just skip this and move to
  843. // the refinement step.
  844. if (best_init_s != -1) {
  845. int best_site = -1;
  846. s = best_init_s;
  847. do {
  848. // No need to search all 6 points the 1st time if initial search was used
  849. if (!do_init_search || s != best_init_s) {
  850. if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
  851. for (i = 0; i < num_candidates[s]; i++) {
  852. const MV this_mv = { br + candidates[s][i].row,
  853. bc + candidates[s][i].col };
  854. thissad =
  855. vfp->sdf(what->buf, what->stride,
  856. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  857. CHECK_BETTER
  858. }
  859. } else {
  860. for (i = 0; i < num_candidates[s]; i++) {
  861. const MV this_mv = { br + candidates[s][i].row,
  862. bc + candidates[s][i].col };
  863. if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
  864. thissad =
  865. vfp->sdf(what->buf, what->stride,
  866. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  867. CHECK_BETTER
  868. }
  869. }
  870. if (best_site == -1) {
  871. continue;
  872. } else {
  873. br += candidates[s][best_site].row;
  874. bc += candidates[s][best_site].col;
  875. k = best_site;
  876. }
  877. }
  878. do {
  879. int next_chkpts_indices[PATTERN_CANDIDATES_REF];
  880. best_site = -1;
  881. next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
  882. next_chkpts_indices[1] = k;
  883. next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
  884. if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
  885. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  886. const MV this_mv = {
  887. br + candidates[s][next_chkpts_indices[i]].row,
  888. bc + candidates[s][next_chkpts_indices[i]].col
  889. };
  890. thissad =
  891. vfp->sdf(what->buf, what->stride,
  892. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  893. CHECK_BETTER
  894. }
  895. } else {
  896. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  897. const MV this_mv = {
  898. br + candidates[s][next_chkpts_indices[i]].row,
  899. bc + candidates[s][next_chkpts_indices[i]].col
  900. };
  901. if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
  902. thissad =
  903. vfp->sdf(what->buf, what->stride,
  904. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  905. CHECK_BETTER
  906. }
  907. }
  908. if (best_site != -1) {
  909. k = next_chkpts_indices[best_site];
  910. br += candidates[s][k].row;
  911. bc += candidates[s][k].col;
  912. }
  913. } while (best_site != -1);
  914. } while (s--);
  915. }
  916. // Returns the one-away integer pel sad values around the best as follows:
  917. // cost_list[0]: cost at the best integer pel
  918. // cost_list[1]: cost at delta {0, -1} (left) from the best integer pel
  919. // cost_list[2]: cost at delta { 1, 0} (bottom) from the best integer pel
  920. // cost_list[3]: cost at delta { 0, 1} (right) from the best integer pel
  921. // cost_list[4]: cost at delta {-1, 0} (top) from the best integer pel
  922. if (cost_list) {
  923. const MV best_mv = { br, bc };
  924. calc_int_cost_list(x, &fcenter_mv, sad_per_bit, vfp, &best_mv, cost_list);
  925. }
  926. best_mv->row = br;
  927. best_mv->col = bc;
  928. return bestsad;
  929. }
  930. // A specialized function where the smallest scale search candidates
  931. // are 4 1-away neighbors, and cost_list is non-null
  932. // TODO(debargha): Merge this function with the one above. Also remove
  933. // use_mvcost option since it is always 1, to save unnecessary branches.
  934. static int vp9_pattern_search_sad(
  935. const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit,
  936. int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp,
  937. int use_mvcost, const MV *center_mv, MV *best_mv,
  938. const int num_candidates[MAX_PATTERN_SCALES],
  939. const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
  940. const MACROBLOCKD *const xd = &x->e_mbd;
  941. static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
  942. 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
  943. };
  944. int i, s, t;
  945. const struct buf_2d *const what = &x->plane[0].src;
  946. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  947. int br, bc;
  948. int bestsad = INT_MAX;
  949. int thissad;
  950. int k = -1;
  951. const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
  952. int best_init_s = search_param_to_steps[search_param];
  953. // adjust ref_mv to make sure it is within MV range
  954. clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
  955. x->mv_limits.row_min, x->mv_limits.row_max);
  956. br = ref_mv->row;
  957. bc = ref_mv->col;
  958. if (cost_list != NULL) {
  959. cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
  960. INT_MAX;
  961. }
  962. // Work out the start point for the search
  963. bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
  964. in_what->stride) +
  965. mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
  966. // Search all possible scales upto the search param around the center point
  967. // pick the scale of the point that is best as the starting scale of
  968. // further steps around it.
  969. if (do_init_search) {
  970. s = best_init_s;
  971. best_init_s = -1;
  972. for (t = 0; t <= s; ++t) {
  973. int best_site = -1;
  974. if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
  975. for (i = 0; i < num_candidates[t]; i++) {
  976. const MV this_mv = { br + candidates[t][i].row,
  977. bc + candidates[t][i].col };
  978. thissad =
  979. vfp->sdf(what->buf, what->stride,
  980. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  981. CHECK_BETTER
  982. }
  983. } else {
  984. for (i = 0; i < num_candidates[t]; i++) {
  985. const MV this_mv = { br + candidates[t][i].row,
  986. bc + candidates[t][i].col };
  987. if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
  988. thissad =
  989. vfp->sdf(what->buf, what->stride,
  990. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  991. CHECK_BETTER
  992. }
  993. }
  994. if (best_site == -1) {
  995. continue;
  996. } else {
  997. best_init_s = t;
  998. k = best_site;
  999. }
  1000. }
  1001. if (best_init_s != -1) {
  1002. br += candidates[best_init_s][k].row;
  1003. bc += candidates[best_init_s][k].col;
  1004. }
  1005. }
  1006. // If the center point is still the best, just skip this and move to
  1007. // the refinement step.
  1008. if (best_init_s != -1) {
  1009. int do_sad = (num_candidates[0] == 4 && cost_list != NULL);
  1010. int best_site = -1;
  1011. s = best_init_s;
  1012. for (; s >= do_sad; s--) {
  1013. if (!do_init_search || s != best_init_s) {
  1014. if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
  1015. for (i = 0; i < num_candidates[s]; i++) {
  1016. const MV this_mv = { br + candidates[s][i].row,
  1017. bc + candidates[s][i].col };
  1018. thissad =
  1019. vfp->sdf(what->buf, what->stride,
  1020. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1021. CHECK_BETTER
  1022. }
  1023. } else {
  1024. for (i = 0; i < num_candidates[s]; i++) {
  1025. const MV this_mv = { br + candidates[s][i].row,
  1026. bc + candidates[s][i].col };
  1027. if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
  1028. thissad =
  1029. vfp->sdf(what->buf, what->stride,
  1030. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1031. CHECK_BETTER
  1032. }
  1033. }
  1034. if (best_site == -1) {
  1035. continue;
  1036. } else {
  1037. br += candidates[s][best_site].row;
  1038. bc += candidates[s][best_site].col;
  1039. k = best_site;
  1040. }
  1041. }
  1042. do {
  1043. int next_chkpts_indices[PATTERN_CANDIDATES_REF];
  1044. best_site = -1;
  1045. next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
  1046. next_chkpts_indices[1] = k;
  1047. next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
  1048. if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
  1049. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  1050. const MV this_mv = {
  1051. br + candidates[s][next_chkpts_indices[i]].row,
  1052. bc + candidates[s][next_chkpts_indices[i]].col
  1053. };
  1054. thissad =
  1055. vfp->sdf(what->buf, what->stride,
  1056. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1057. CHECK_BETTER
  1058. }
  1059. } else {
  1060. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  1061. const MV this_mv = {
  1062. br + candidates[s][next_chkpts_indices[i]].row,
  1063. bc + candidates[s][next_chkpts_indices[i]].col
  1064. };
  1065. if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
  1066. thissad =
  1067. vfp->sdf(what->buf, what->stride,
  1068. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1069. CHECK_BETTER
  1070. }
  1071. }
  1072. if (best_site != -1) {
  1073. k = next_chkpts_indices[best_site];
  1074. br += candidates[s][k].row;
  1075. bc += candidates[s][k].col;
  1076. }
  1077. } while (best_site != -1);
  1078. }
  1079. // Note: If we enter the if below, then cost_list must be non-NULL.
  1080. if (s == 0) {
  1081. cost_list[0] = bestsad;
  1082. if (!do_init_search || s != best_init_s) {
  1083. if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
  1084. for (i = 0; i < num_candidates[s]; i++) {
  1085. const MV this_mv = { br + candidates[s][i].row,
  1086. bc + candidates[s][i].col };
  1087. cost_list[i + 1] = thissad =
  1088. vfp->sdf(what->buf, what->stride,
  1089. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1090. CHECK_BETTER
  1091. }
  1092. } else {
  1093. for (i = 0; i < num_candidates[s]; i++) {
  1094. const MV this_mv = { br + candidates[s][i].row,
  1095. bc + candidates[s][i].col };
  1096. if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
  1097. cost_list[i + 1] = thissad =
  1098. vfp->sdf(what->buf, what->stride,
  1099. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1100. CHECK_BETTER
  1101. }
  1102. }
  1103. if (best_site != -1) {
  1104. br += candidates[s][best_site].row;
  1105. bc += candidates[s][best_site].col;
  1106. k = best_site;
  1107. }
  1108. }
  1109. while (best_site != -1) {
  1110. int next_chkpts_indices[PATTERN_CANDIDATES_REF];
  1111. best_site = -1;
  1112. next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
  1113. next_chkpts_indices[1] = k;
  1114. next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
  1115. cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
  1116. cost_list[((k + 2) % 4) + 1] = cost_list[0];
  1117. cost_list[0] = bestsad;
  1118. if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
  1119. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  1120. const MV this_mv = {
  1121. br + candidates[s][next_chkpts_indices[i]].row,
  1122. bc + candidates[s][next_chkpts_indices[i]].col
  1123. };
  1124. cost_list[next_chkpts_indices[i] + 1] = thissad =
  1125. vfp->sdf(what->buf, what->stride,
  1126. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1127. CHECK_BETTER
  1128. }
  1129. } else {
  1130. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  1131. const MV this_mv = {
  1132. br + candidates[s][next_chkpts_indices[i]].row,
  1133. bc + candidates[s][next_chkpts_indices[i]].col
  1134. };
  1135. if (!is_mv_in(&x->mv_limits, &this_mv)) {
  1136. cost_list[next_chkpts_indices[i] + 1] = INT_MAX;
  1137. continue;
  1138. }
  1139. cost_list[next_chkpts_indices[i] + 1] = thissad =
  1140. vfp->sdf(what->buf, what->stride,
  1141. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1142. CHECK_BETTER
  1143. }
  1144. }
  1145. if (best_site != -1) {
  1146. k = next_chkpts_indices[best_site];
  1147. br += candidates[s][k].row;
  1148. bc += candidates[s][k].col;
  1149. }
  1150. }
  1151. }
  1152. }
  1153. // Returns the one-away integer pel sad values around the best as follows:
  1154. // cost_list[0]: sad at the best integer pel
  1155. // cost_list[1]: sad at delta {0, -1} (left) from the best integer pel
  1156. // cost_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel
  1157. // cost_list[3]: sad at delta { 0, 1} (right) from the best integer pel
  1158. // cost_list[4]: sad at delta {-1, 0} (top) from the best integer pel
  1159. if (cost_list) {
  1160. static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
  1161. if (cost_list[0] == INT_MAX) {
  1162. cost_list[0] = bestsad;
  1163. if (check_bounds(&x->mv_limits, br, bc, 1)) {
  1164. for (i = 0; i < 4; i++) {
  1165. const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
  1166. cost_list[i + 1] =
  1167. vfp->sdf(what->buf, what->stride,
  1168. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1169. }
  1170. } else {
  1171. for (i = 0; i < 4; i++) {
  1172. const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
  1173. if (!is_mv_in(&x->mv_limits, &this_mv))
  1174. cost_list[i + 1] = INT_MAX;
  1175. else
  1176. cost_list[i + 1] =
  1177. vfp->sdf(what->buf, what->stride,
  1178. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1179. }
  1180. }
  1181. } else {
  1182. if (use_mvcost) {
  1183. for (i = 0; i < 4; i++) {
  1184. const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
  1185. if (cost_list[i + 1] != INT_MAX) {
  1186. cost_list[i + 1] +=
  1187. mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
  1188. }
  1189. }
  1190. }
  1191. }
  1192. }
  1193. best_mv->row = br;
  1194. best_mv->col = bc;
  1195. return bestsad;
  1196. }
  1197. int vp9_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
  1198. const MV *center_mv, const vp9_variance_fn_ptr_t *vfp,
  1199. int use_mvcost) {
  1200. const MACROBLOCKD *const xd = &x->e_mbd;
  1201. const struct buf_2d *const what = &x->plane[0].src;
  1202. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1203. const MV mv = { best_mv->row * 8, best_mv->col * 8 };
  1204. uint32_t unused;
  1205. #if CONFIG_VP9_HIGHBITDEPTH
  1206. uint64_t err =
  1207. vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
  1208. in_what->stride, &unused);
  1209. err += (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
  1210. x->errorperbit)
  1211. : 0);
  1212. if (err >= INT_MAX) return INT_MAX;
  1213. return (int)err;
  1214. #else
  1215. return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
  1216. in_what->stride, &unused) +
  1217. (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
  1218. x->errorperbit)
  1219. : 0);
  1220. #endif
  1221. }
  1222. int vp9_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
  1223. const MV *center_mv, const uint8_t *second_pred,
  1224. const vp9_variance_fn_ptr_t *vfp, int use_mvcost) {
  1225. const MACROBLOCKD *const xd = &x->e_mbd;
  1226. const struct buf_2d *const what = &x->plane[0].src;
  1227. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1228. const MV mv = { best_mv->row * 8, best_mv->col * 8 };
  1229. unsigned int unused;
  1230. return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
  1231. what->buf, what->stride, &unused, second_pred) +
  1232. (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
  1233. x->errorperbit)
  1234. : 0);
  1235. }
  1236. static int hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
  1237. int sad_per_bit, int do_init_search, int *cost_list,
  1238. const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
  1239. const MV *center_mv, MV *best_mv) {
  1240. // First scale has 8-closest points, the rest have 6 points in hex shape
  1241. // at increasing scales
  1242. static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6,
  1243. 6, 6, 6, 6, 6 };
  1244. // Note that the largest candidate step at each scale is 2^scale
  1245. /* clang-format off */
  1246. static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
  1247. { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 },
  1248. { -1, 0 } },
  1249. { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } },
  1250. { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } },
  1251. { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } },
  1252. { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } },
  1253. { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 },
  1254. { -32, 0 } },
  1255. { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 },
  1256. { -64, 0 } },
  1257. { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 },
  1258. { -128, 0 } },
  1259. { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 },
  1260. { -256, 0 } },
  1261. { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 },
  1262. { -512, 0 } },
  1263. { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 },
  1264. { -512, 1024 }, { -1024, 0 } }
  1265. };
  1266. /* clang-format on */
  1267. return vp9_pattern_search(
  1268. x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp,
  1269. use_mvcost, center_mv, best_mv, hex_num_candidates, hex_candidates);
  1270. }
  1271. static int bigdia_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
  1272. int sad_per_bit, int do_init_search, int *cost_list,
  1273. const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
  1274. const MV *center_mv, MV *best_mv) {
  1275. // First scale has 4-closest points, the rest have 8 points in diamond
  1276. // shape at increasing scales
  1277. static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
  1278. 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  1279. };
  1280. // Note that the largest candidate step at each scale is 2^scale
  1281. /* clang-format off */
  1282. static const MV
  1283. bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
  1284. { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } },
  1285. { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 },
  1286. { -1, 1 }, { -2, 0 } },
  1287. { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 },
  1288. { -2, 2 }, { -4, 0 } },
  1289. { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 },
  1290. { -4, 4 }, { -8, 0 } },
  1291. { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 },
  1292. { -8, 8 }, { -16, 0 } },
  1293. { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 },
  1294. { 0, 32 }, { -16, 16 }, { -32, 0 } },
  1295. { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 },
  1296. { 0, 64 }, { -32, 32 }, { -64, 0 } },
  1297. { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 },
  1298. { 0, 128 }, { -64, 64 }, { -128, 0 } },
  1299. { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 },
  1300. { 0, 256 }, { -128, 128 }, { -256, 0 } },
  1301. { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 },
  1302. { 0, 512 }, { -256, 256 }, { -512, 0 } },
  1303. { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 },
  1304. { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } }
  1305. };
  1306. /* clang-format on */
  1307. return vp9_pattern_search_sad(
  1308. x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp,
  1309. use_mvcost, center_mv, best_mv, bigdia_num_candidates, bigdia_candidates);
  1310. }
  1311. static int square_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
  1312. int sad_per_bit, int do_init_search, int *cost_list,
  1313. const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
  1314. const MV *center_mv, MV *best_mv) {
  1315. // All scales have 8 closest points in square shape
  1316. static const int square_num_candidates[MAX_PATTERN_SCALES] = {
  1317. 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  1318. };
  1319. // Note that the largest candidate step at each scale is 2^scale
  1320. /* clang-format off */
  1321. static const MV
  1322. square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
  1323. { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 },
  1324. { -1, 1 }, { -1, 0 } },
  1325. { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 },
  1326. { -2, 2 }, { -2, 0 } },
  1327. { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 },
  1328. { -4, 4 }, { -4, 0 } },
  1329. { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 },
  1330. { -8, 8 }, { -8, 0 } },
  1331. { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 },
  1332. { 0, 16 }, { -16, 16 }, { -16, 0 } },
  1333. { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 },
  1334. { 0, 32 }, { -32, 32 }, { -32, 0 } },
  1335. { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 },
  1336. { 0, 64 }, { -64, 64 }, { -64, 0 } },
  1337. { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 },
  1338. { 0, 128 }, { -128, 128 }, { -128, 0 } },
  1339. { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 },
  1340. { 0, 256 }, { -256, 256 }, { -256, 0 } },
  1341. { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 },
  1342. { 0, 512 }, { -512, 512 }, { -512, 0 } },
  1343. { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 },
  1344. { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } }
  1345. };
  1346. /* clang-format on */
  1347. return vp9_pattern_search(
  1348. x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp,
  1349. use_mvcost, center_mv, best_mv, square_num_candidates, square_candidates);
  1350. }
  1351. static int fast_hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
  1352. int sad_per_bit,
  1353. int do_init_search, // must be zero for fast_hex
  1354. int *cost_list, const vp9_variance_fn_ptr_t *vfp,
  1355. int use_mvcost, const MV *center_mv, MV *best_mv) {
  1356. return hex_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param),
  1357. sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
  1358. center_mv, best_mv);
  1359. }
  1360. static int fast_dia_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
  1361. int sad_per_bit, int do_init_search, int *cost_list,
  1362. const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
  1363. const MV *center_mv, MV *best_mv) {
  1364. return bigdia_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param),
  1365. sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
  1366. center_mv, best_mv);
  1367. }
  1368. #undef CHECK_BETTER
  1369. // Exhuastive motion search around a given centre position with a given
  1370. // step size.
  1371. static int exhuastive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
  1372. int range, int step, int sad_per_bit,
  1373. const vp9_variance_fn_ptr_t *fn_ptr,
  1374. const MV *center_mv) {
  1375. const MACROBLOCKD *const xd = &x->e_mbd;
  1376. const struct buf_2d *const what = &x->plane[0].src;
  1377. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1378. MV fcenter_mv = { center_mv->row, center_mv->col };
  1379. unsigned int best_sad = INT_MAX;
  1380. int r, c, i;
  1381. int start_col, end_col, start_row, end_row;
  1382. int col_step = (step > 1) ? step : 4;
  1383. assert(step >= 1);
  1384. clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max,
  1385. x->mv_limits.row_min, x->mv_limits.row_max);
  1386. *best_mv = fcenter_mv;
  1387. best_sad =
  1388. fn_ptr->sdf(what->buf, what->stride,
  1389. get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
  1390. mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
  1391. start_row = VPXMAX(-range, x->mv_limits.row_min - fcenter_mv.row);
  1392. start_col = VPXMAX(-range, x->mv_limits.col_min - fcenter_mv.col);
  1393. end_row = VPXMIN(range, x->mv_limits.row_max - fcenter_mv.row);
  1394. end_col = VPXMIN(range, x->mv_limits.col_max - fcenter_mv.col);
  1395. for (r = start_row; r <= end_row; r += step) {
  1396. for (c = start_col; c <= end_col; c += col_step) {
  1397. // Step > 1 means we are not checking every location in this pass.
  1398. if (step > 1) {
  1399. const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
  1400. unsigned int sad =
  1401. fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
  1402. in_what->stride);
  1403. if (sad < best_sad) {
  1404. sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
  1405. if (sad < best_sad) {
  1406. best_sad = sad;
  1407. *best_mv = mv;
  1408. }
  1409. }
  1410. } else {
  1411. // 4 sads in a single call if we are checking every location
  1412. if (c + 3 <= end_col) {
  1413. unsigned int sads[4];
  1414. const uint8_t *addrs[4];
  1415. for (i = 0; i < 4; ++i) {
  1416. const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
  1417. addrs[i] = get_buf_from_mv(in_what, &mv);
  1418. }
  1419. fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
  1420. for (i = 0; i < 4; ++i) {
  1421. if (sads[i] < best_sad) {
  1422. const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
  1423. const unsigned int sad =
  1424. sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
  1425. if (sad < best_sad) {
  1426. best_sad = sad;
  1427. *best_mv = mv;
  1428. }
  1429. }
  1430. }
  1431. } else {
  1432. for (i = 0; i < end_col - c; ++i) {
  1433. const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
  1434. unsigned int sad =
  1435. fn_ptr->sdf(what->buf, what->stride,
  1436. get_buf_from_mv(in_what, &mv), in_what->stride);
  1437. if (sad < best_sad) {
  1438. sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
  1439. if (sad < best_sad) {
  1440. best_sad = sad;
  1441. *best_mv = mv;
  1442. }
  1443. }
  1444. }
  1445. }
  1446. }
  1447. }
  1448. }
  1449. return best_sad;
  1450. }
  1451. int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg,
  1452. MV *ref_mv, MV *best_mv, int search_param,
  1453. int sad_per_bit, int *num00,
  1454. const vp9_variance_fn_ptr_t *fn_ptr,
  1455. const MV *center_mv) {
  1456. int i, j, step;
  1457. const MACROBLOCKD *const xd = &x->e_mbd;
  1458. uint8_t *what = x->plane[0].src.buf;
  1459. const int what_stride = x->plane[0].src.stride;
  1460. const uint8_t *in_what;
  1461. const int in_what_stride = xd->plane[0].pre[0].stride;
  1462. const uint8_t *best_address;
  1463. unsigned int bestsad = INT_MAX;
  1464. int best_site = -1;
  1465. int last_site = -1;
  1466. int ref_row;
  1467. int ref_col;
  1468. // search_param determines the length of the initial step and hence the number
  1469. // of iterations.
  1470. // 0 = initial step (MAX_FIRST_STEP) pel
  1471. // 1 = (MAX_FIRST_STEP/2) pel,
  1472. // 2 = (MAX_FIRST_STEP/4) pel...
  1473. // const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
  1474. const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step];
  1475. const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step];
  1476. const int tot_steps = cfg->total_steps - search_param;
  1477. const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
  1478. clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
  1479. x->mv_limits.row_min, x->mv_limits.row_max);
  1480. ref_row = ref_mv->row;
  1481. ref_col = ref_mv->col;
  1482. *num00 = 0;
  1483. best_mv->row = ref_row;
  1484. best_mv->col = ref_col;
  1485. // Work out the start point for the search
  1486. in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
  1487. best_address = in_what;
  1488. // Check the starting position
  1489. bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
  1490. mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
  1491. i = 0;
  1492. for (step = 0; step < tot_steps; step++) {
  1493. int all_in = 1, t;
  1494. // All_in is true if every one of the points we are checking are within
  1495. // the bounds of the image.
  1496. all_in &= ((best_mv->row + ss_mv[i].row) > x->mv_limits.row_min);
  1497. all_in &= ((best_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max);
  1498. all_in &= ((best_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min);
  1499. all_in &= ((best_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max);
  1500. // If all the pixels are within the bounds we don't check whether the
  1501. // search point is valid in this loop, otherwise we check each point
  1502. // for validity..
  1503. if (all_in) {
  1504. unsigned int sad_array[4];
  1505. for (j = 0; j < cfg->searches_per_step; j += 4) {
  1506. unsigned char const *block_offset[4];
  1507. for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address;
  1508. fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
  1509. sad_array);
  1510. for (t = 0; t < 4; t++, i++) {
  1511. if (sad_array[t] < bestsad) {
  1512. const MV this_mv = { best_mv->row + ss_mv[i].row,
  1513. best_mv->col + ss_mv[i].col };
  1514. sad_array[t] +=
  1515. mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
  1516. if (sad_array[t] < bestsad) {
  1517. bestsad = sad_array[t];
  1518. best_site = i;
  1519. }
  1520. }
  1521. }
  1522. }
  1523. } else {
  1524. for (j = 0; j < cfg->searches_per_step; j++) {
  1525. // Trap illegal vectors
  1526. const MV this_mv = { best_mv->row + ss_mv[i].row,
  1527. best_mv->col + ss_mv[i].col };
  1528. if (is_mv_in(&x->mv_limits, &this_mv)) {
  1529. const uint8_t *const check_here = ss_os[i] + best_address;
  1530. unsigned int thissad =
  1531. fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
  1532. if (thissad < bestsad) {
  1533. thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
  1534. if (thissad < bestsad) {
  1535. bestsad = thissad;
  1536. best_site = i;
  1537. }
  1538. }
  1539. }
  1540. i++;
  1541. }
  1542. }
  1543. if (best_site != last_site) {
  1544. best_mv->row += ss_mv[best_site].row;
  1545. best_mv->col += ss_mv[best_site].col;
  1546. best_address += ss_os[best_site];
  1547. last_site = best_site;
  1548. #if defined(NEW_DIAMOND_SEARCH)
  1549. while (1) {
  1550. const MV this_mv = { best_mv->row + ss_mv[best_site].row,
  1551. best_mv->col + ss_mv[best_site].col };
  1552. if (is_mv_in(&x->mv_limits, &this_mv)) {
  1553. const uint8_t *const check_here = ss_os[best_site] + best_address;
  1554. unsigned int thissad =
  1555. fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
  1556. if (thissad < bestsad) {
  1557. thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
  1558. if (thissad < bestsad) {
  1559. bestsad = thissad;
  1560. best_mv->row += ss_mv[best_site].row;
  1561. best_mv->col += ss_mv[best_site].col;
  1562. best_address += ss_os[best_site];
  1563. continue;
  1564. }
  1565. }
  1566. }
  1567. break;
  1568. }
  1569. #endif
  1570. } else if (best_address == in_what) {
  1571. (*num00)++;
  1572. }
  1573. }
  1574. return bestsad;
  1575. }
  1576. static int vector_match(int16_t *ref, int16_t *src, int bwl) {
  1577. int best_sad = INT_MAX;
  1578. int this_sad;
  1579. int d;
  1580. int center, offset = 0;
  1581. int bw = 4 << bwl; // redundant variable, to be changed in the experiments.
  1582. for (d = 0; d <= bw; d += 16) {
  1583. this_sad = vpx_vector_var(&ref[d], src, bwl);
  1584. if (this_sad < best_sad) {
  1585. best_sad = this_sad;
  1586. offset = d;
  1587. }
  1588. }
  1589. center = offset;
  1590. for (d = -8; d <= 8; d += 16) {
  1591. int this_pos = offset + d;
  1592. // check limit
  1593. if (this_pos < 0 || this_pos > bw) continue;
  1594. this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
  1595. if (this_sad < best_sad) {
  1596. best_sad = this_sad;
  1597. center = this_pos;
  1598. }
  1599. }
  1600. offset = center;
  1601. for (d = -4; d <= 4; d += 8) {
  1602. int this_pos = offset + d;
  1603. // check limit
  1604. if (this_pos < 0 || this_pos > bw) continue;
  1605. this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
  1606. if (this_sad < best_sad) {
  1607. best_sad = this_sad;
  1608. center = this_pos;
  1609. }
  1610. }
  1611. offset = center;
  1612. for (d = -2; d <= 2; d += 4) {
  1613. int this_pos = offset + d;
  1614. // check limit
  1615. if (this_pos < 0 || this_pos > bw) continue;
  1616. this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
  1617. if (this_sad < best_sad) {
  1618. best_sad = this_sad;
  1619. center = this_pos;
  1620. }
  1621. }
  1622. offset = center;
  1623. for (d = -1; d <= 1; d += 2) {
  1624. int this_pos = offset + d;
  1625. // check limit
  1626. if (this_pos < 0 || this_pos > bw) continue;
  1627. this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
  1628. if (this_sad < best_sad) {
  1629. best_sad = this_sad;
  1630. center = this_pos;
  1631. }
  1632. }
  1633. return (center - (bw >> 1));
  1634. }
  1635. static const MV search_pos[4] = {
  1636. { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
  1637. };
  1638. unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
  1639. BLOCK_SIZE bsize, int mi_row,
  1640. int mi_col) {
  1641. MACROBLOCKD *xd = &x->e_mbd;
  1642. MODE_INFO *mi = xd->mi[0];
  1643. struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } };
  1644. DECLARE_ALIGNED(16, int16_t, hbuf[128]);
  1645. DECLARE_ALIGNED(16, int16_t, vbuf[128]);
  1646. DECLARE_ALIGNED(16, int16_t, src_hbuf[64]);
  1647. DECLARE_ALIGNED(16, int16_t, src_vbuf[64]);
  1648. int idx;
  1649. const int bw = 4 << b_width_log2_lookup[bsize];
  1650. const int bh = 4 << b_height_log2_lookup[bsize];
  1651. const int search_width = bw << 1;
  1652. const int search_height = bh << 1;
  1653. const int src_stride = x->plane[0].src.stride;
  1654. const int ref_stride = xd->plane[0].pre[0].stride;
  1655. uint8_t const *ref_buf, *src_buf;
  1656. MV *tmp_mv = &xd->mi[0]->mv[0].as_mv;
  1657. unsigned int best_sad, tmp_sad, this_sad[4];
  1658. MV this_mv;
  1659. const int norm_factor = 3 + (bw >> 5);
  1660. const YV12_BUFFER_CONFIG *scaled_ref_frame =
  1661. vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]);
  1662. if (scaled_ref_frame) {
  1663. int i;
  1664. // Swap out the reference frame for a version that's been scaled to
  1665. // match the resolution of the current frame, allowing the existing
  1666. // motion search code to be used without additional modifications.
  1667. for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
  1668. vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
  1669. }
  1670. #if CONFIG_VP9_HIGHBITDEPTH
  1671. // TODO(jingning): Implement integral projection functions for high bit-depth
  1672. // setting and remove this part of code.
  1673. if (xd->bd != 8) {
  1674. unsigned int this_sad;
  1675. tmp_mv->row = 0;
  1676. tmp_mv->col = 0;
  1677. this_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride,
  1678. xd->plane[0].pre[0].buf, ref_stride);
  1679. if (scaled_ref_frame) {
  1680. int i;
  1681. for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
  1682. }
  1683. return this_sad;
  1684. }
  1685. #endif
  1686. // Set up prediction 1-D reference set
  1687. ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
  1688. for (idx = 0; idx < search_width; idx += 16) {
  1689. vpx_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
  1690. ref_buf += 16;
  1691. }
  1692. ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
  1693. for (idx = 0; idx < search_height; ++idx) {
  1694. vbuf[idx] = vpx_int_pro_col(ref_buf, bw) >> norm_factor;
  1695. ref_buf += ref_stride;
  1696. }
  1697. // Set up src 1-D reference set
  1698. for (idx = 0; idx < bw; idx += 16) {
  1699. src_buf = x->plane[0].src.buf + idx;
  1700. vpx_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
  1701. }
  1702. src_buf = x->plane[0].src.buf;
  1703. for (idx = 0; idx < bh; ++idx) {
  1704. src_vbuf[idx] = vpx_int_pro_col(src_buf, bw) >> norm_factor;
  1705. src_buf += src_stride;
  1706. }
  1707. // Find the best match per 1-D search
  1708. tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]);
  1709. tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]);
  1710. this_mv = *tmp_mv;
  1711. src_buf = x->plane[0].src.buf;
  1712. ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
  1713. best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
  1714. {
  1715. const uint8_t *const pos[4] = {
  1716. ref_buf - ref_stride, ref_buf - 1, ref_buf + 1, ref_buf + ref_stride,
  1717. };
  1718. cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad);
  1719. }
  1720. for (idx = 0; idx < 4; ++idx) {
  1721. if (this_sad[idx] < best_sad) {
  1722. best_sad = this_sad[idx];
  1723. tmp_mv->row = search_pos[idx].row + this_mv.row;
  1724. tmp_mv->col = search_pos[idx].col + this_mv.col;
  1725. }
  1726. }
  1727. if (this_sad[0] < this_sad[3])
  1728. this_mv.row -= 1;
  1729. else
  1730. this_mv.row += 1;
  1731. if (this_sad[1] < this_sad[2])
  1732. this_mv.col -= 1;
  1733. else
  1734. this_mv.col += 1;
  1735. ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
  1736. tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
  1737. if (best_sad > tmp_sad) {
  1738. *tmp_mv = this_mv;
  1739. best_sad = tmp_sad;
  1740. }
  1741. tmp_mv->row *= 8;
  1742. tmp_mv->col *= 8;
  1743. if (scaled_ref_frame) {
  1744. int i;
  1745. for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
  1746. }
  1747. return best_sad;
  1748. }
  1749. // Runs sequence of diamond searches in smaller steps for RD.
  1750. /* do_refine: If last step (1-away) of n-step search doesn't pick the center
  1751. point as the best match, we will do a final 1-away diamond
  1752. refining search */
  1753. static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full,
  1754. int step_param, int sadpb, int further_steps,
  1755. int do_refine, int *cost_list,
  1756. const vp9_variance_fn_ptr_t *fn_ptr,
  1757. const MV *ref_mv, MV *dst_mv) {
  1758. MV temp_mv;
  1759. int thissme, n, num00 = 0;
  1760. int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
  1761. step_param, sadpb, &n, fn_ptr, ref_mv);
  1762. if (bestsme < INT_MAX)
  1763. bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
  1764. *dst_mv = temp_mv;
  1765. // If there won't be more n-step search, check to see if refining search is
  1766. // needed.
  1767. if (n > further_steps) do_refine = 0;
  1768. while (n < further_steps) {
  1769. ++n;
  1770. if (num00) {
  1771. num00--;
  1772. } else {
  1773. thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
  1774. step_param + n, sadpb, &num00, fn_ptr,
  1775. ref_mv);
  1776. if (thissme < INT_MAX)
  1777. thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
  1778. // check to see if refining search is needed.
  1779. if (num00 > further_steps - n) do_refine = 0;
  1780. if (thissme < bestsme) {
  1781. bestsme = thissme;
  1782. *dst_mv = temp_mv;
  1783. }
  1784. }
  1785. }
  1786. // final 1-away diamond refining search
  1787. if (do_refine) {
  1788. const int search_range = 8;
  1789. MV best_mv = *dst_mv;
  1790. thissme = vp9_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr,
  1791. ref_mv);
  1792. if (thissme < INT_MAX)
  1793. thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
  1794. if (thissme < bestsme) {
  1795. bestsme = thissme;
  1796. *dst_mv = best_mv;
  1797. }
  1798. }
  1799. // Return cost list.
  1800. if (cost_list) {
  1801. calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
  1802. }
  1803. return bestsme;
  1804. }
  1805. #define MIN_RANGE 7
  1806. #define MAX_RANGE 256
  1807. #define MIN_INTERVAL 1
  1808. // Runs an limited range exhaustive mesh search using a pattern set
  1809. // according to the encode speed profile.
  1810. static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x,
  1811. MV *centre_mv_full, int sadpb, int *cost_list,
  1812. const vp9_variance_fn_ptr_t *fn_ptr,
  1813. const MV *ref_mv, MV *dst_mv) {
  1814. const SPEED_FEATURES *const sf = &cpi->sf;
  1815. MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
  1816. MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
  1817. int bestsme;
  1818. int i;
  1819. int interval = sf->mesh_patterns[0].interval;
  1820. int range = sf->mesh_patterns[0].range;
  1821. int baseline_interval_divisor;
  1822. // Trap illegal values for interval and range for this function.
  1823. if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
  1824. (interval > range))
  1825. return INT_MAX;
  1826. baseline_interval_divisor = range / interval;
  1827. // Check size of proposed first range against magnitude of the centre
  1828. // value used as a starting point.
  1829. range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
  1830. range = VPXMIN(range, MAX_RANGE);
  1831. interval = VPXMAX(interval, range / baseline_interval_divisor);
  1832. // initial search
  1833. bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval,
  1834. sadpb, fn_ptr, &temp_mv);
  1835. if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
  1836. // Progressive searches with range and step size decreasing each time
  1837. // till we reach a step size of 1. Then break out.
  1838. for (i = 1; i < MAX_MESH_STEP; ++i) {
  1839. // First pass with coarser step and longer range
  1840. bestsme = exhuastive_mesh_search(
  1841. x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range,
  1842. sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv);
  1843. if (sf->mesh_patterns[i].interval == 1) break;
  1844. }
  1845. }
  1846. if (bestsme < INT_MAX)
  1847. bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
  1848. *dst_mv = temp_mv;
  1849. // Return cost list.
  1850. if (cost_list) {
  1851. calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
  1852. }
  1853. return bestsme;
  1854. }
  1855. int vp9_refining_search_sad(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
  1856. int search_range,
  1857. const vp9_variance_fn_ptr_t *fn_ptr,
  1858. const MV *center_mv) {
  1859. const MACROBLOCKD *const xd = &x->e_mbd;
  1860. const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
  1861. const struct buf_2d *const what = &x->plane[0].src;
  1862. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1863. const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
  1864. const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
  1865. unsigned int best_sad =
  1866. fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) +
  1867. mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
  1868. int i, j;
  1869. for (i = 0; i < search_range; i++) {
  1870. int best_site = -1;
  1871. const int all_in = ((ref_mv->row - 1) > x->mv_limits.row_min) &
  1872. ((ref_mv->row + 1) < x->mv_limits.row_max) &
  1873. ((ref_mv->col - 1) > x->mv_limits.col_min) &
  1874. ((ref_mv->col + 1) < x->mv_limits.col_max);
  1875. if (all_in) {
  1876. unsigned int sads[4];
  1877. const uint8_t *const positions[4] = { best_address - in_what->stride,
  1878. best_address - 1, best_address + 1,
  1879. best_address + in_what->stride };
  1880. fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
  1881. for (j = 0; j < 4; ++j) {
  1882. if (sads[j] < best_sad) {
  1883. const MV mv = { ref_mv->row + neighbors[j].row,
  1884. ref_mv->col + neighbors[j].col };
  1885. sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
  1886. if (sads[j] < best_sad) {
  1887. best_sad = sads[j];
  1888. best_site = j;
  1889. }
  1890. }
  1891. }
  1892. } else {
  1893. for (j = 0; j < 4; ++j) {
  1894. const MV mv = { ref_mv->row + neighbors[j].row,
  1895. ref_mv->col + neighbors[j].col };
  1896. if (is_mv_in(&x->mv_limits, &mv)) {
  1897. unsigned int sad =
  1898. fn_ptr->sdf(what->buf, what->stride,
  1899. get_buf_from_mv(in_what, &mv), in_what->stride);
  1900. if (sad < best_sad) {
  1901. sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
  1902. if (sad < best_sad) {
  1903. best_sad = sad;
  1904. best_site = j;
  1905. }
  1906. }
  1907. }
  1908. }
  1909. }
  1910. if (best_site == -1) {
  1911. break;
  1912. } else {
  1913. ref_mv->row += neighbors[best_site].row;
  1914. ref_mv->col += neighbors[best_site].col;
  1915. best_address = get_buf_from_mv(in_what, ref_mv);
  1916. }
  1917. }
  1918. return best_sad;
  1919. }
  1920. // This function is called when we do joint motion search in comp_inter_inter
  1921. // mode.
  1922. int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
  1923. int search_range,
  1924. const vp9_variance_fn_ptr_t *fn_ptr,
  1925. const MV *center_mv, const uint8_t *second_pred) {
  1926. const MV neighbors[8] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
  1927. { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } };
  1928. const MACROBLOCKD *const xd = &x->e_mbd;
  1929. const struct buf_2d *const what = &x->plane[0].src;
  1930. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1931. const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
  1932. unsigned int best_sad = INT_MAX;
  1933. int i, j;
  1934. clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
  1935. x->mv_limits.row_min, x->mv_limits.row_max);
  1936. best_sad =
  1937. fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
  1938. in_what->stride, second_pred) +
  1939. mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
  1940. for (i = 0; i < search_range; ++i) {
  1941. int best_site = -1;
  1942. for (j = 0; j < 8; ++j) {
  1943. const MV mv = { ref_mv->row + neighbors[j].row,
  1944. ref_mv->col + neighbors[j].col };
  1945. if (is_mv_in(&x->mv_limits, &mv)) {
  1946. unsigned int sad =
  1947. fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
  1948. in_what->stride, second_pred);
  1949. if (sad < best_sad) {
  1950. sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
  1951. if (sad < best_sad) {
  1952. best_sad = sad;
  1953. best_site = j;
  1954. }
  1955. }
  1956. }
  1957. }
  1958. if (best_site == -1) {
  1959. break;
  1960. } else {
  1961. ref_mv->row += neighbors[best_site].row;
  1962. ref_mv->col += neighbors[best_site].col;
  1963. }
  1964. }
  1965. return best_sad;
  1966. }
  1967. int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
  1968. MV *mvp_full, int step_param, int search_method,
  1969. int error_per_bit, int *cost_list, const MV *ref_mv,
  1970. MV *tmp_mv, int var_max, int rd) {
  1971. const SPEED_FEATURES *const sf = &cpi->sf;
  1972. const SEARCH_METHODS method = (SEARCH_METHODS)search_method;
  1973. vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
  1974. int var = 0;
  1975. if (cost_list) {
  1976. cost_list[0] = INT_MAX;
  1977. cost_list[1] = INT_MAX;
  1978. cost_list[2] = INT_MAX;
  1979. cost_list[3] = INT_MAX;
  1980. cost_list[4] = INT_MAX;
  1981. }
  1982. switch (method) {
  1983. case FAST_DIAMOND:
  1984. var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
  1985. cost_list, fn_ptr, 1, ref_mv, tmp_mv);
  1986. break;
  1987. case FAST_HEX:
  1988. var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
  1989. cost_list, fn_ptr, 1, ref_mv, tmp_mv);
  1990. break;
  1991. case HEX:
  1992. var = hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
  1993. fn_ptr, 1, ref_mv, tmp_mv);
  1994. break;
  1995. case SQUARE:
  1996. var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
  1997. fn_ptr, 1, ref_mv, tmp_mv);
  1998. break;
  1999. case BIGDIA:
  2000. var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
  2001. fn_ptr, 1, ref_mv, tmp_mv);
  2002. break;
  2003. case NSTEP:
  2004. var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
  2005. MAX_MVSEARCH_STEPS - 1 - step_param, 1,
  2006. cost_list, fn_ptr, ref_mv, tmp_mv);
  2007. // Should we allow a follow on exhaustive search?
  2008. if ((sf->exhaustive_searches_thresh < INT_MAX) &&
  2009. !cpi->rc.is_src_frame_alt_ref) {
  2010. int64_t exhuastive_thr = sf->exhaustive_searches_thresh;
  2011. exhuastive_thr >>=
  2012. 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
  2013. // Threshold variance for an exhaustive full search.
  2014. if (var > exhuastive_thr) {
  2015. int var_ex;
  2016. MV tmp_mv_ex;
  2017. var_ex = full_pixel_exhaustive(cpi, x, tmp_mv, error_per_bit,
  2018. cost_list, fn_ptr, ref_mv, &tmp_mv_ex);
  2019. if (var_ex < var) {
  2020. var = var_ex;
  2021. *tmp_mv = tmp_mv_ex;
  2022. }
  2023. }
  2024. }
  2025. break;
  2026. default: assert(0 && "Invalid search method.");
  2027. }
  2028. if (method != NSTEP && rd && var < var_max)
  2029. var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1);
  2030. return var;
  2031. }
  2032. // Note(yunqingwang): The following 2 functions are only used in the motion
  2033. // vector unit test, which return extreme motion vectors allowed by the MV
  2034. // limits.
  2035. #define COMMON_MV_TEST \
  2036. SETUP_SUBPEL_SEARCH; \
  2037. \
  2038. (void)error_per_bit; \
  2039. (void)vfp; \
  2040. (void)z; \
  2041. (void)src_stride; \
  2042. (void)y; \
  2043. (void)y_stride; \
  2044. (void)second_pred; \
  2045. (void)w; \
  2046. (void)h; \
  2047. (void)offset; \
  2048. (void)mvjcost; \
  2049. (void)mvcost; \
  2050. (void)sse1; \
  2051. (void)distortion; \
  2052. \
  2053. (void)halfiters; \
  2054. (void)quarteriters; \
  2055. (void)eighthiters; \
  2056. (void)whichdir; \
  2057. (void)allow_hp; \
  2058. (void)forced_stop; \
  2059. (void)hstep; \
  2060. (void)rr; \
  2061. (void)rc; \
  2062. \
  2063. (void)tr; \
  2064. (void)tc; \
  2065. (void)sse; \
  2066. (void)thismse; \
  2067. (void)cost_list;
  2068. // Return the maximum MV.
  2069. uint32_t vp9_return_max_sub_pixel_mv(
  2070. const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
  2071. int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
  2072. int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
  2073. uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
  2074. int h) {
  2075. COMMON_MV_TEST;
  2076. (void)minr;
  2077. (void)minc;
  2078. bestmv->row = maxr;
  2079. bestmv->col = maxc;
  2080. besterr = 0;
  2081. // In the sub-pel motion search, if hp is not used, then the last bit of mv
  2082. // has to be 0.
  2083. lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv));
  2084. return besterr;
  2085. }
  2086. // Return the minimum MV.
  2087. uint32_t vp9_return_min_sub_pixel_mv(
  2088. const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
  2089. int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
  2090. int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
  2091. uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
  2092. int h) {
  2093. COMMON_MV_TEST;
  2094. (void)maxr;
  2095. (void)maxc;
  2096. bestmv->row = minr;
  2097. bestmv->col = minc;
  2098. besterr = 0;
  2099. // In the sub-pel motion search, if hp is not used, then the last bit of mv
  2100. // has to be 0.
  2101. lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv));
  2102. return besterr;
  2103. }