vp9_pickmode.c 98 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578
  1. /*
  2. * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <assert.h>
  11. #include <limits.h>
  12. #include <math.h>
  13. #include <stdio.h>
  14. #include "./vp9_rtcd.h"
  15. #include "./vpx_dsp_rtcd.h"
  16. #include "vpx/vpx_codec.h"
  17. #include "vpx_dsp/vpx_dsp_common.h"
  18. #include "vpx_mem/vpx_mem.h"
  19. #include "vpx_ports/mem.h"
  20. #include "vp9/common/vp9_blockd.h"
  21. #include "vp9/common/vp9_common.h"
  22. #include "vp9/common/vp9_mvref_common.h"
  23. #include "vp9/common/vp9_pred_common.h"
  24. #include "vp9/common/vp9_reconinter.h"
  25. #include "vp9/common/vp9_reconintra.h"
  26. #include "vp9/common/vp9_scan.h"
  27. #include "vp9/encoder/vp9_cost.h"
  28. #include "vp9/encoder/vp9_encoder.h"
  29. #include "vp9/encoder/vp9_pickmode.h"
  30. #include "vp9/encoder/vp9_ratectrl.h"
  31. #include "vp9/encoder/vp9_rd.h"
  32. typedef struct {
  33. uint8_t *data;
  34. int stride;
  35. int in_use;
  36. } PRED_BUFFER;
  37. static const int pos_shift_16x16[4][4] = {
  38. { 9, 10, 13, 14 }, { 11, 12, 15, 16 }, { 17, 18, 21, 22 }, { 19, 20, 23, 24 }
  39. };
  40. static int mv_refs_rt(VP9_COMP *cpi, const VP9_COMMON *cm, const MACROBLOCK *x,
  41. const MACROBLOCKD *xd, const TileInfo *const tile,
  42. MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
  43. int_mv *mv_ref_list, int_mv *base_mv, int mi_row,
  44. int mi_col, int use_base_mv) {
  45. const int *ref_sign_bias = cm->ref_frame_sign_bias;
  46. int i, refmv_count = 0;
  47. const POSITION *const mv_ref_search = mv_ref_blocks[mi->sb_type];
  48. int different_ref_found = 0;
  49. int context_counter = 0;
  50. int const_motion = 0;
  51. // Blank the reference vector list
  52. memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
  53. // The nearest 2 blocks are treated differently
  54. // if the size < 8x8 we get the mv from the bmi substructure,
  55. // and we also need to keep a mode count.
  56. for (i = 0; i < 2; ++i) {
  57. const POSITION *const mv_ref = &mv_ref_search[i];
  58. if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
  59. const MODE_INFO *const candidate_mi =
  60. xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
  61. // Keep counts for entropy encoding.
  62. context_counter += mode_2_counter[candidate_mi->mode];
  63. different_ref_found = 1;
  64. if (candidate_mi->ref_frame[0] == ref_frame)
  65. ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, -1),
  66. refmv_count, mv_ref_list, Done);
  67. }
  68. }
  69. const_motion = 1;
  70. // Check the rest of the neighbors in much the same way
  71. // as before except we don't need to keep track of sub blocks or
  72. // mode counts.
  73. for (; i < MVREF_NEIGHBOURS && !refmv_count; ++i) {
  74. const POSITION *const mv_ref = &mv_ref_search[i];
  75. if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
  76. const MODE_INFO *const candidate_mi =
  77. xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
  78. different_ref_found = 1;
  79. if (candidate_mi->ref_frame[0] == ref_frame)
  80. ADD_MV_REF_LIST(candidate_mi->mv[0], refmv_count, mv_ref_list, Done);
  81. }
  82. }
  83. // Since we couldn't find 2 mvs from the same reference frame
  84. // go back through the neighbors and find motion vectors from
  85. // different reference frames.
  86. if (different_ref_found && !refmv_count) {
  87. for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
  88. const POSITION *mv_ref = &mv_ref_search[i];
  89. if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
  90. const MODE_INFO *const candidate_mi =
  91. xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
  92. // If the candidate is INTRA we don't want to consider its mv.
  93. IF_DIFF_REF_FRAME_ADD_MV(candidate_mi, ref_frame, ref_sign_bias,
  94. refmv_count, mv_ref_list, Done);
  95. }
  96. }
  97. }
  98. if (use_base_mv &&
  99. !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
  100. ref_frame == LAST_FRAME) {
  101. // Get base layer mv.
  102. MV_REF *candidate =
  103. &cm->prev_frame
  104. ->mvs[(mi_col >> 1) + (mi_row >> 1) * (cm->mi_cols >> 1)];
  105. if (candidate->mv[0].as_int != INVALID_MV) {
  106. base_mv->as_mv.row = (candidate->mv[0].as_mv.row * 2);
  107. base_mv->as_mv.col = (candidate->mv[0].as_mv.col * 2);
  108. clamp_mv_ref(&base_mv->as_mv, xd);
  109. } else {
  110. base_mv->as_int = INVALID_MV;
  111. }
  112. }
  113. Done:
  114. x->mbmi_ext->mode_context[ref_frame] = counter_to_context[context_counter];
  115. // Clamp vectors
  116. for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
  117. clamp_mv_ref(&mv_ref_list[i].as_mv, xd);
  118. return const_motion;
  119. }
  120. static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
  121. BLOCK_SIZE bsize, int mi_row, int mi_col,
  122. int_mv *tmp_mv, int *rate_mv,
  123. int64_t best_rd_sofar, int use_base_mv) {
  124. MACROBLOCKD *xd = &x->e_mbd;
  125. MODE_INFO *mi = xd->mi[0];
  126. struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } };
  127. const int step_param = cpi->sf.mv.fullpel_search_step_param;
  128. const int sadpb = x->sadperbit16;
  129. MV mvp_full;
  130. const int ref = mi->ref_frame[0];
  131. const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
  132. MV center_mv;
  133. uint32_t dis;
  134. int rate_mode;
  135. const MvLimits tmp_mv_limits = x->mv_limits;
  136. int rv = 0;
  137. int cost_list[5];
  138. int search_subpel = 1;
  139. const YV12_BUFFER_CONFIG *scaled_ref_frame =
  140. vp9_get_scaled_ref_frame(cpi, ref);
  141. if (scaled_ref_frame) {
  142. int i;
  143. // Swap out the reference frame for a version that's been scaled to
  144. // match the resolution of the current frame, allowing the existing
  145. // motion search code to be used without additional modifications.
  146. for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
  147. vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
  148. }
  149. vp9_set_mv_search_range(&x->mv_limits, &ref_mv);
  150. // Limit motion vector for large lightning change.
  151. if (cpi->oxcf.speed > 5 && x->lowvar_highsumdiff) {
  152. x->mv_limits.col_min = VPXMAX(x->mv_limits.col_min, -10);
  153. x->mv_limits.row_min = VPXMAX(x->mv_limits.row_min, -10);
  154. x->mv_limits.col_max = VPXMIN(x->mv_limits.col_max, 10);
  155. x->mv_limits.row_max = VPXMIN(x->mv_limits.row_max, 10);
  156. }
  157. assert(x->mv_best_ref_index[ref] <= 2);
  158. if (x->mv_best_ref_index[ref] < 2)
  159. mvp_full = x->mbmi_ext->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv;
  160. else
  161. mvp_full = x->pred_mv[ref];
  162. mvp_full.col >>= 3;
  163. mvp_full.row >>= 3;
  164. if (!use_base_mv)
  165. center_mv = ref_mv;
  166. else
  167. center_mv = tmp_mv->as_mv;
  168. if (x->sb_use_mv_part) {
  169. tmp_mv->as_mv.row = x->sb_mvrow_part >> 3;
  170. tmp_mv->as_mv.col = x->sb_mvcol_part >> 3;
  171. } else {
  172. vp9_full_pixel_search(
  173. cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, sadpb,
  174. cond_cost_list(cpi, cost_list), &center_mv, &tmp_mv->as_mv, INT_MAX, 0);
  175. }
  176. x->mv_limits = tmp_mv_limits;
  177. // calculate the bit cost on motion vector
  178. mvp_full.row = tmp_mv->as_mv.row * 8;
  179. mvp_full.col = tmp_mv->as_mv.col * 8;
  180. *rate_mv = vp9_mv_bit_cost(&mvp_full, &ref_mv, x->nmvjointcost, x->mvcost,
  181. MV_COST_WEIGHT);
  182. rate_mode =
  183. cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref]][INTER_OFFSET(NEWMV)];
  184. rv =
  185. !(RDCOST(x->rdmult, x->rddiv, (*rate_mv + rate_mode), 0) > best_rd_sofar);
  186. // For SVC on non-reference frame, avoid subpel for (0, 0) motion.
  187. if (cpi->use_svc && cpi->svc.non_reference_frame) {
  188. if (mvp_full.row == 0 && mvp_full.col == 0) search_subpel = 0;
  189. }
  190. if (rv && search_subpel) {
  191. int subpel_force_stop = cpi->sf.mv.subpel_force_stop;
  192. if (use_base_mv && cpi->sf.base_mv_aggressive) subpel_force_stop = 2;
  193. cpi->find_fractional_mv_step(
  194. x, &tmp_mv->as_mv, &ref_mv, cpi->common.allow_high_precision_mv,
  195. x->errorperbit, &cpi->fn_ptr[bsize], subpel_force_stop,
  196. cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
  197. x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0);
  198. *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost,
  199. x->mvcost, MV_COST_WEIGHT);
  200. }
  201. if (scaled_ref_frame) {
  202. int i;
  203. for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
  204. }
  205. return rv;
  206. }
  207. static void block_variance(const uint8_t *src, int src_stride,
  208. const uint8_t *ref, int ref_stride, int w, int h,
  209. unsigned int *sse, int *sum, int block_size,
  210. #if CONFIG_VP9_HIGHBITDEPTH
  211. int use_highbitdepth, vpx_bit_depth_t bd,
  212. #endif
  213. uint32_t *sse8x8, int *sum8x8, uint32_t *var8x8) {
  214. int i, j, k = 0;
  215. *sse = 0;
  216. *sum = 0;
  217. for (i = 0; i < h; i += block_size) {
  218. for (j = 0; j < w; j += block_size) {
  219. #if CONFIG_VP9_HIGHBITDEPTH
  220. if (use_highbitdepth) {
  221. switch (bd) {
  222. case VPX_BITS_8:
  223. vpx_highbd_8_get8x8var(src + src_stride * i + j, src_stride,
  224. ref + ref_stride * i + j, ref_stride,
  225. &sse8x8[k], &sum8x8[k]);
  226. break;
  227. case VPX_BITS_10:
  228. vpx_highbd_10_get8x8var(src + src_stride * i + j, src_stride,
  229. ref + ref_stride * i + j, ref_stride,
  230. &sse8x8[k], &sum8x8[k]);
  231. break;
  232. case VPX_BITS_12:
  233. vpx_highbd_12_get8x8var(src + src_stride * i + j, src_stride,
  234. ref + ref_stride * i + j, ref_stride,
  235. &sse8x8[k], &sum8x8[k]);
  236. break;
  237. }
  238. } else {
  239. vpx_get8x8var(src + src_stride * i + j, src_stride,
  240. ref + ref_stride * i + j, ref_stride, &sse8x8[k],
  241. &sum8x8[k]);
  242. }
  243. #else
  244. vpx_get8x8var(src + src_stride * i + j, src_stride,
  245. ref + ref_stride * i + j, ref_stride, &sse8x8[k],
  246. &sum8x8[k]);
  247. #endif
  248. *sse += sse8x8[k];
  249. *sum += sum8x8[k];
  250. var8x8[k] = sse8x8[k] - (uint32_t)(((int64_t)sum8x8[k] * sum8x8[k]) >> 6);
  251. k++;
  252. }
  253. }
  254. }
  255. static void calculate_variance(int bw, int bh, TX_SIZE tx_size,
  256. unsigned int *sse_i, int *sum_i,
  257. unsigned int *var_o, unsigned int *sse_o,
  258. int *sum_o) {
  259. const BLOCK_SIZE unit_size = txsize_to_bsize[tx_size];
  260. const int nw = 1 << (bw - b_width_log2_lookup[unit_size]);
  261. const int nh = 1 << (bh - b_height_log2_lookup[unit_size]);
  262. int i, j, k = 0;
  263. for (i = 0; i < nh; i += 2) {
  264. for (j = 0; j < nw; j += 2) {
  265. sse_o[k] = sse_i[i * nw + j] + sse_i[i * nw + j + 1] +
  266. sse_i[(i + 1) * nw + j] + sse_i[(i + 1) * nw + j + 1];
  267. sum_o[k] = sum_i[i * nw + j] + sum_i[i * nw + j + 1] +
  268. sum_i[(i + 1) * nw + j] + sum_i[(i + 1) * nw + j + 1];
  269. var_o[k] = sse_o[k] - (uint32_t)(((int64_t)sum_o[k] * sum_o[k]) >>
  270. (b_width_log2_lookup[unit_size] +
  271. b_height_log2_lookup[unit_size] + 6));
  272. k++;
  273. }
  274. }
  275. }
  276. // Adjust the ac_thr according to speed, width, height and normalized sum
  277. static int ac_thr_factor(const int speed, const int width, const int height,
  278. const int norm_sum) {
  279. if (speed >= 8 && norm_sum < 5) {
  280. if (width <= 640 && height <= 480)
  281. return 4;
  282. else
  283. return 2;
  284. }
  285. return 1;
  286. }
  287. static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
  288. MACROBLOCK *x, MACROBLOCKD *xd,
  289. int *out_rate_sum, int64_t *out_dist_sum,
  290. unsigned int *var_y, unsigned int *sse_y,
  291. int mi_row, int mi_col, int *early_term) {
  292. // Note our transform coeffs are 8 times an orthogonal transform.
  293. // Hence quantizer step is also 8 times. To get effective quantizer
  294. // we need to divide by 8 before sending to modeling function.
  295. unsigned int sse;
  296. int rate;
  297. int64_t dist;
  298. struct macroblock_plane *const p = &x->plane[0];
  299. struct macroblockd_plane *const pd = &xd->plane[0];
  300. const uint32_t dc_quant = pd->dequant[0];
  301. const uint32_t ac_quant = pd->dequant[1];
  302. const int64_t dc_thr = dc_quant * dc_quant >> 6;
  303. int64_t ac_thr = ac_quant * ac_quant >> 6;
  304. unsigned int var;
  305. int sum;
  306. int skip_dc = 0;
  307. const int bw = b_width_log2_lookup[bsize];
  308. const int bh = b_height_log2_lookup[bsize];
  309. const int num8x8 = 1 << (bw + bh - 2);
  310. unsigned int sse8x8[64] = { 0 };
  311. int sum8x8[64] = { 0 };
  312. unsigned int var8x8[64] = { 0 };
  313. TX_SIZE tx_size;
  314. int i, k;
  315. #if CONFIG_VP9_HIGHBITDEPTH
  316. const vpx_bit_depth_t bd = cpi->common.bit_depth;
  317. #endif
  318. // Calculate variance for whole partition, and also save 8x8 blocks' variance
  319. // to be used in following transform skipping test.
  320. block_variance(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
  321. 4 << bw, 4 << bh, &sse, &sum, 8,
  322. #if CONFIG_VP9_HIGHBITDEPTH
  323. cpi->common.use_highbitdepth, bd,
  324. #endif
  325. sse8x8, sum8x8, var8x8);
  326. var = sse - (unsigned int)(((int64_t)sum * sum) >> (bw + bh + 4));
  327. *var_y = var;
  328. *sse_y = sse;
  329. #if CONFIG_VP9_TEMPORAL_DENOISING
  330. if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
  331. cpi->oxcf.speed > 5)
  332. ac_thr = vp9_scale_acskip_thresh(ac_thr, cpi->denoiser.denoising_level,
  333. (abs(sum) >> (bw + bh)),
  334. cpi->svc.temporal_layer_id);
  335. else
  336. ac_thr *= ac_thr_factor(cpi->oxcf.speed, cpi->common.width,
  337. cpi->common.height, abs(sum) >> (bw + bh));
  338. #else
  339. ac_thr *= ac_thr_factor(cpi->oxcf.speed, cpi->common.width,
  340. cpi->common.height, abs(sum) >> (bw + bh));
  341. #endif
  342. if (cpi->common.tx_mode == TX_MODE_SELECT) {
  343. if (sse > (var << 2))
  344. tx_size = VPXMIN(max_txsize_lookup[bsize],
  345. tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
  346. else
  347. tx_size = TX_8X8;
  348. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
  349. cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id))
  350. tx_size = TX_8X8;
  351. else if (tx_size > TX_16X16)
  352. tx_size = TX_16X16;
  353. } else {
  354. tx_size = VPXMIN(max_txsize_lookup[bsize],
  355. tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
  356. }
  357. assert(tx_size >= TX_8X8);
  358. xd->mi[0]->tx_size = tx_size;
  359. // Evaluate if the partition block is a skippable block in Y plane.
  360. {
  361. unsigned int sse16x16[16] = { 0 };
  362. int sum16x16[16] = { 0 };
  363. unsigned int var16x16[16] = { 0 };
  364. const int num16x16 = num8x8 >> 2;
  365. unsigned int sse32x32[4] = { 0 };
  366. int sum32x32[4] = { 0 };
  367. unsigned int var32x32[4] = { 0 };
  368. const int num32x32 = num8x8 >> 4;
  369. int ac_test = 1;
  370. int dc_test = 1;
  371. const int num = (tx_size == TX_8X8)
  372. ? num8x8
  373. : ((tx_size == TX_16X16) ? num16x16 : num32x32);
  374. const unsigned int *sse_tx =
  375. (tx_size == TX_8X8) ? sse8x8
  376. : ((tx_size == TX_16X16) ? sse16x16 : sse32x32);
  377. const unsigned int *var_tx =
  378. (tx_size == TX_8X8) ? var8x8
  379. : ((tx_size == TX_16X16) ? var16x16 : var32x32);
  380. // Calculate variance if tx_size > TX_8X8
  381. if (tx_size >= TX_16X16)
  382. calculate_variance(bw, bh, TX_8X8, sse8x8, sum8x8, var16x16, sse16x16,
  383. sum16x16);
  384. if (tx_size == TX_32X32)
  385. calculate_variance(bw, bh, TX_16X16, sse16x16, sum16x16, var32x32,
  386. sse32x32, sum32x32);
  387. // Skipping test
  388. x->skip_txfm[0] = SKIP_TXFM_NONE;
  389. for (k = 0; k < num; k++)
  390. // Check if all ac coefficients can be quantized to zero.
  391. if (!(var_tx[k] < ac_thr || var == 0)) {
  392. ac_test = 0;
  393. break;
  394. }
  395. for (k = 0; k < num; k++)
  396. // Check if dc coefficient can be quantized to zero.
  397. if (!(sse_tx[k] - var_tx[k] < dc_thr || sse == var)) {
  398. dc_test = 0;
  399. break;
  400. }
  401. if (ac_test) {
  402. x->skip_txfm[0] = SKIP_TXFM_AC_ONLY;
  403. if (dc_test) x->skip_txfm[0] = SKIP_TXFM_AC_DC;
  404. } else if (dc_test) {
  405. skip_dc = 1;
  406. }
  407. }
  408. if (x->skip_txfm[0] == SKIP_TXFM_AC_DC) {
  409. int skip_uv[2] = { 0 };
  410. unsigned int var_uv[2];
  411. unsigned int sse_uv[2];
  412. *out_rate_sum = 0;
  413. *out_dist_sum = sse << 4;
  414. // Transform skipping test in UV planes.
  415. for (i = 1; i <= 2; i++) {
  416. if (cpi->oxcf.speed < 8 || x->color_sensitivity[i - 1]) {
  417. struct macroblock_plane *const p = &x->plane[i];
  418. struct macroblockd_plane *const pd = &xd->plane[i];
  419. const TX_SIZE uv_tx_size = get_uv_tx_size(xd->mi[0], pd);
  420. const BLOCK_SIZE unit_size = txsize_to_bsize[uv_tx_size];
  421. const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, pd);
  422. const int uv_bw = b_width_log2_lookup[uv_bsize];
  423. const int uv_bh = b_height_log2_lookup[uv_bsize];
  424. const int sf = (uv_bw - b_width_log2_lookup[unit_size]) +
  425. (uv_bh - b_height_log2_lookup[unit_size]);
  426. const uint32_t uv_dc_thr = pd->dequant[0] * pd->dequant[0] >> (6 - sf);
  427. const uint32_t uv_ac_thr = pd->dequant[1] * pd->dequant[1] >> (6 - sf);
  428. int j = i - 1;
  429. vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, i);
  430. var_uv[j] = cpi->fn_ptr[uv_bsize].vf(
  431. p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse_uv[j]);
  432. if ((var_uv[j] < uv_ac_thr || var_uv[j] == 0) &&
  433. (sse_uv[j] - var_uv[j] < uv_dc_thr || sse_uv[j] == var_uv[j]))
  434. skip_uv[j] = 1;
  435. else
  436. break;
  437. } else {
  438. skip_uv[i - 1] = 1;
  439. }
  440. }
  441. // If the transform in YUV planes are skippable, the mode search checks
  442. // fewer inter modes and doesn't check intra modes.
  443. if (skip_uv[0] & skip_uv[1]) {
  444. *early_term = 1;
  445. }
  446. return;
  447. }
  448. if (!skip_dc) {
  449. #if CONFIG_VP9_HIGHBITDEPTH
  450. vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
  451. dc_quant >> (xd->bd - 5), &rate, &dist);
  452. #else
  453. vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
  454. dc_quant >> 3, &rate, &dist);
  455. #endif // CONFIG_VP9_HIGHBITDEPTH
  456. }
  457. if (!skip_dc) {
  458. *out_rate_sum = rate >> 1;
  459. *out_dist_sum = dist << 3;
  460. } else {
  461. *out_rate_sum = 0;
  462. *out_dist_sum = (sse - var) << 4;
  463. }
  464. #if CONFIG_VP9_HIGHBITDEPTH
  465. vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
  466. ac_quant >> (xd->bd - 5), &rate, &dist);
  467. #else
  468. vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize], ac_quant >> 3,
  469. &rate, &dist);
  470. #endif // CONFIG_VP9_HIGHBITDEPTH
  471. *out_rate_sum += rate;
  472. *out_dist_sum += dist << 4;
  473. }
  474. static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
  475. MACROBLOCKD *xd, int *out_rate_sum,
  476. int64_t *out_dist_sum, unsigned int *var_y,
  477. unsigned int *sse_y) {
  478. // Note our transform coeffs are 8 times an orthogonal transform.
  479. // Hence quantizer step is also 8 times. To get effective quantizer
  480. // we need to divide by 8 before sending to modeling function.
  481. unsigned int sse;
  482. int rate;
  483. int64_t dist;
  484. struct macroblock_plane *const p = &x->plane[0];
  485. struct macroblockd_plane *const pd = &xd->plane[0];
  486. const int64_t dc_thr = p->quant_thred[0] >> 6;
  487. const int64_t ac_thr = p->quant_thred[1] >> 6;
  488. const uint32_t dc_quant = pd->dequant[0];
  489. const uint32_t ac_quant = pd->dequant[1];
  490. unsigned int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride,
  491. pd->dst.buf, pd->dst.stride, &sse);
  492. int skip_dc = 0;
  493. *var_y = var;
  494. *sse_y = sse;
  495. if (cpi->common.tx_mode == TX_MODE_SELECT) {
  496. if (sse > (var << 2))
  497. xd->mi[0]->tx_size =
  498. VPXMIN(max_txsize_lookup[bsize],
  499. tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
  500. else
  501. xd->mi[0]->tx_size = TX_8X8;
  502. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
  503. cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id))
  504. xd->mi[0]->tx_size = TX_8X8;
  505. else if (xd->mi[0]->tx_size > TX_16X16)
  506. xd->mi[0]->tx_size = TX_16X16;
  507. } else {
  508. xd->mi[0]->tx_size =
  509. VPXMIN(max_txsize_lookup[bsize],
  510. tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
  511. }
  512. // Evaluate if the partition block is a skippable block in Y plane.
  513. {
  514. const BLOCK_SIZE unit_size = txsize_to_bsize[xd->mi[0]->tx_size];
  515. const unsigned int num_blk_log2 =
  516. (b_width_log2_lookup[bsize] - b_width_log2_lookup[unit_size]) +
  517. (b_height_log2_lookup[bsize] - b_height_log2_lookup[unit_size]);
  518. const unsigned int sse_tx = sse >> num_blk_log2;
  519. const unsigned int var_tx = var >> num_blk_log2;
  520. x->skip_txfm[0] = SKIP_TXFM_NONE;
  521. // Check if all ac coefficients can be quantized to zero.
  522. if (var_tx < ac_thr || var == 0) {
  523. x->skip_txfm[0] = SKIP_TXFM_AC_ONLY;
  524. // Check if dc coefficient can be quantized to zero.
  525. if (sse_tx - var_tx < dc_thr || sse == var)
  526. x->skip_txfm[0] = SKIP_TXFM_AC_DC;
  527. } else {
  528. if (sse_tx - var_tx < dc_thr || sse == var) skip_dc = 1;
  529. }
  530. }
  531. if (x->skip_txfm[0] == SKIP_TXFM_AC_DC) {
  532. *out_rate_sum = 0;
  533. *out_dist_sum = sse << 4;
  534. return;
  535. }
  536. if (!skip_dc) {
  537. #if CONFIG_VP9_HIGHBITDEPTH
  538. vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
  539. dc_quant >> (xd->bd - 5), &rate, &dist);
  540. #else
  541. vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
  542. dc_quant >> 3, &rate, &dist);
  543. #endif // CONFIG_VP9_HIGHBITDEPTH
  544. }
  545. if (!skip_dc) {
  546. *out_rate_sum = rate >> 1;
  547. *out_dist_sum = dist << 3;
  548. } else {
  549. *out_rate_sum = 0;
  550. *out_dist_sum = (sse - var) << 4;
  551. }
  552. #if CONFIG_VP9_HIGHBITDEPTH
  553. vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
  554. ac_quant >> (xd->bd - 5), &rate, &dist);
  555. #else
  556. vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize], ac_quant >> 3,
  557. &rate, &dist);
  558. #endif // CONFIG_VP9_HIGHBITDEPTH
  559. *out_rate_sum += rate;
  560. *out_dist_sum += dist << 4;
  561. }
  562. static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,
  563. int *skippable, int64_t *sse, BLOCK_SIZE bsize,
  564. TX_SIZE tx_size, int rd_computed) {
  565. MACROBLOCKD *xd = &x->e_mbd;
  566. const struct macroblockd_plane *pd = &xd->plane[0];
  567. struct macroblock_plane *const p = &x->plane[0];
  568. const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
  569. const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
  570. const int step = 1 << (tx_size << 1);
  571. const int block_step = (1 << tx_size);
  572. int block = 0, r, c;
  573. const int max_blocks_wide =
  574. num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : xd->mb_to_right_edge >> 5);
  575. const int max_blocks_high =
  576. num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : xd->mb_to_bottom_edge >> 5);
  577. int eob_cost = 0;
  578. const int bw = 4 * num_4x4_w;
  579. const int bh = 4 * num_4x4_h;
  580. #if CONFIG_VP9_HIGHBITDEPTH
  581. // TODO(jingning): Implement the high bit-depth Hadamard transforms and
  582. // remove this check condition.
  583. // TODO(marpan): Use this path (model_rd) for 8bit under certain conditions
  584. // for now, as the vp9_quantize_fp below for highbitdepth build is slow.
  585. if (xd->bd != 8 ||
  586. (cpi->oxcf.speed > 5 && cpi->common.frame_type != KEY_FRAME &&
  587. bsize < BLOCK_32X32)) {
  588. unsigned int var_y, sse_y;
  589. (void)tx_size;
  590. if (!rd_computed)
  591. model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist,
  592. &var_y, &sse_y);
  593. *sse = INT_MAX;
  594. *skippable = 0;
  595. return;
  596. }
  597. #endif
  598. if (cpi->sf.use_simple_block_yrd && cpi->common.frame_type != KEY_FRAME &&
  599. (bsize < BLOCK_32X32 ||
  600. (cpi->use_svc &&
  601. (bsize < BLOCK_32X32 || cpi->svc.temporal_layer_id > 0)))) {
  602. unsigned int var_y, sse_y;
  603. (void)tx_size;
  604. if (!rd_computed)
  605. model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist,
  606. &var_y, &sse_y);
  607. *sse = INT_MAX;
  608. *skippable = 0;
  609. return;
  610. }
  611. (void)cpi;
  612. // The max tx_size passed in is TX_16X16.
  613. assert(tx_size != TX_32X32);
  614. vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
  615. pd->dst.buf, pd->dst.stride);
  616. *skippable = 1;
  617. // Keep track of the row and column of the blocks we use so that we know
  618. // if we are in the unrestricted motion border.
  619. for (r = 0; r < max_blocks_high; r += block_step) {
  620. for (c = 0; c < num_4x4_w; c += block_step) {
  621. if (c < max_blocks_wide) {
  622. const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
  623. tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  624. tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  625. tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  626. uint16_t *const eob = &p->eobs[block];
  627. const int diff_stride = bw;
  628. const int16_t *src_diff;
  629. src_diff = &p->src_diff[(r * diff_stride + c) << 2];
  630. switch (tx_size) {
  631. case TX_16X16:
  632. vpx_hadamard_16x16(src_diff, diff_stride, coeff);
  633. vp9_quantize_fp(coeff, 256, x->skip_block, p->round_fp, p->quant_fp,
  634. qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
  635. scan_order->iscan);
  636. break;
  637. case TX_8X8:
  638. vpx_hadamard_8x8(src_diff, diff_stride, coeff);
  639. vp9_quantize_fp(coeff, 64, x->skip_block, p->round_fp, p->quant_fp,
  640. qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
  641. scan_order->iscan);
  642. break;
  643. case TX_4X4:
  644. x->fwd_txfm4x4(src_diff, coeff, diff_stride);
  645. vp9_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp,
  646. qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
  647. scan_order->iscan);
  648. break;
  649. default: assert(0); break;
  650. }
  651. *skippable &= (*eob == 0);
  652. eob_cost += 1;
  653. }
  654. block += step;
  655. }
  656. }
  657. this_rdc->rate = 0;
  658. if (*sse < INT64_MAX) {
  659. *sse = (*sse << 6) >> 2;
  660. if (*skippable) {
  661. this_rdc->dist = *sse;
  662. return;
  663. }
  664. }
  665. block = 0;
  666. this_rdc->dist = 0;
  667. for (r = 0; r < max_blocks_high; r += block_step) {
  668. for (c = 0; c < num_4x4_w; c += block_step) {
  669. if (c < max_blocks_wide) {
  670. tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  671. tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  672. tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  673. uint16_t *const eob = &p->eobs[block];
  674. if (*eob == 1)
  675. this_rdc->rate += (int)abs(qcoeff[0]);
  676. else if (*eob > 1)
  677. this_rdc->rate += vpx_satd(qcoeff, step << 4);
  678. this_rdc->dist += vp9_block_error_fp(coeff, dqcoeff, step << 4) >> 2;
  679. }
  680. block += step;
  681. }
  682. }
  683. // If skippable is set, rate gets clobbered later.
  684. this_rdc->rate <<= (2 + VP9_PROB_COST_SHIFT);
  685. this_rdc->rate += (eob_cost << VP9_PROB_COST_SHIFT);
  686. }
  687. static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE plane_bsize,
  688. MACROBLOCK *x, MACROBLOCKD *xd,
  689. RD_COST *this_rdc, unsigned int *var_y,
  690. unsigned int *sse_y, int start_plane,
  691. int stop_plane) {
  692. // Note our transform coeffs are 8 times an orthogonal transform.
  693. // Hence quantizer step is also 8 times. To get effective quantizer
  694. // we need to divide by 8 before sending to modeling function.
  695. unsigned int sse;
  696. int rate;
  697. int64_t dist;
  698. int i;
  699. #if CONFIG_VP9_HIGHBITDEPTH
  700. uint64_t tot_var = *var_y;
  701. uint64_t tot_sse = *sse_y;
  702. #else
  703. uint32_t tot_var = *var_y;
  704. uint32_t tot_sse = *sse_y;
  705. #endif
  706. this_rdc->rate = 0;
  707. this_rdc->dist = 0;
  708. for (i = start_plane; i <= stop_plane; ++i) {
  709. struct macroblock_plane *const p = &x->plane[i];
  710. struct macroblockd_plane *const pd = &xd->plane[i];
  711. const uint32_t dc_quant = pd->dequant[0];
  712. const uint32_t ac_quant = pd->dequant[1];
  713. const BLOCK_SIZE bs = plane_bsize;
  714. unsigned int var;
  715. if (!x->color_sensitivity[i - 1]) continue;
  716. var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
  717. pd->dst.stride, &sse);
  718. assert(sse >= var);
  719. tot_var += var;
  720. tot_sse += sse;
  721. #if CONFIG_VP9_HIGHBITDEPTH
  722. vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
  723. dc_quant >> (xd->bd - 5), &rate, &dist);
  724. #else
  725. vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
  726. dc_quant >> 3, &rate, &dist);
  727. #endif // CONFIG_VP9_HIGHBITDEPTH
  728. this_rdc->rate += rate >> 1;
  729. this_rdc->dist += dist << 3;
  730. #if CONFIG_VP9_HIGHBITDEPTH
  731. vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs],
  732. ac_quant >> (xd->bd - 5), &rate, &dist);
  733. #else
  734. vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs], ac_quant >> 3,
  735. &rate, &dist);
  736. #endif // CONFIG_VP9_HIGHBITDEPTH
  737. this_rdc->rate += rate;
  738. this_rdc->dist += dist << 4;
  739. }
  740. #if CONFIG_VP9_HIGHBITDEPTH
  741. *var_y = tot_var > UINT32_MAX ? UINT32_MAX : (uint32_t)tot_var;
  742. *sse_y = tot_sse > UINT32_MAX ? UINT32_MAX : (uint32_t)tot_sse;
  743. #else
  744. *var_y = tot_var;
  745. *sse_y = tot_sse;
  746. #endif
  747. }
  748. static int get_pred_buffer(PRED_BUFFER *p, int len) {
  749. int i;
  750. for (i = 0; i < len; i++) {
  751. if (!p[i].in_use) {
  752. p[i].in_use = 1;
  753. return i;
  754. }
  755. }
  756. return -1;
  757. }
  758. static void free_pred_buffer(PRED_BUFFER *p) {
  759. if (p != NULL) p->in_use = 0;
  760. }
  761. static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
  762. int mi_row, int mi_col,
  763. MV_REFERENCE_FRAME ref_frame,
  764. PREDICTION_MODE this_mode, unsigned int var_y,
  765. unsigned int sse_y,
  766. struct buf_2d yv12_mb[][MAX_MB_PLANE],
  767. int *rate, int64_t *dist) {
  768. MACROBLOCKD *xd = &x->e_mbd;
  769. MODE_INFO *const mi = xd->mi[0];
  770. const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
  771. unsigned int var = var_y, sse = sse_y;
  772. // Skipping threshold for ac.
  773. unsigned int thresh_ac;
  774. // Skipping threshold for dc.
  775. unsigned int thresh_dc;
  776. int motion_low = 1;
  777. if (mi->mv[0].as_mv.row > 64 || mi->mv[0].as_mv.row < -64 ||
  778. mi->mv[0].as_mv.col > 64 || mi->mv[0].as_mv.col < -64)
  779. motion_low = 0;
  780. if (x->encode_breakout > 0 && motion_low == 1) {
  781. // Set a maximum for threshold to avoid big PSNR loss in low bit rate
  782. // case. Use extreme low threshold for static frames to limit
  783. // skipping.
  784. const unsigned int max_thresh = 36000;
  785. // The encode_breakout input
  786. const unsigned int min_thresh =
  787. VPXMIN(((unsigned int)x->encode_breakout << 4), max_thresh);
  788. #if CONFIG_VP9_HIGHBITDEPTH
  789. const int shift = (xd->bd << 1) - 16;
  790. #endif
  791. // Calculate threshold according to dequant value.
  792. thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) >> 3;
  793. #if CONFIG_VP9_HIGHBITDEPTH
  794. if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && shift > 0) {
  795. thresh_ac = ROUND_POWER_OF_TWO(thresh_ac, shift);
  796. }
  797. #endif // CONFIG_VP9_HIGHBITDEPTH
  798. thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
  799. // Adjust ac threshold according to partition size.
  800. thresh_ac >>=
  801. 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
  802. thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
  803. #if CONFIG_VP9_HIGHBITDEPTH
  804. if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && shift > 0) {
  805. thresh_dc = ROUND_POWER_OF_TWO(thresh_dc, shift);
  806. }
  807. #endif // CONFIG_VP9_HIGHBITDEPTH
  808. } else {
  809. thresh_ac = 0;
  810. thresh_dc = 0;
  811. }
  812. // Y skipping condition checking for ac and dc.
  813. if (var <= thresh_ac && (sse - var) <= thresh_dc) {
  814. unsigned int sse_u, sse_v;
  815. unsigned int var_u, var_v;
  816. unsigned int thresh_ac_uv = thresh_ac;
  817. unsigned int thresh_dc_uv = thresh_dc;
  818. if (x->sb_is_skin) {
  819. thresh_ac_uv = 0;
  820. thresh_dc_uv = 0;
  821. }
  822. // Skip UV prediction unless breakout is zero (lossless) to save
  823. // computation with low impact on the result
  824. if (x->encode_breakout == 0) {
  825. xd->plane[1].pre[0] = yv12_mb[ref_frame][1];
  826. xd->plane[2].pre[0] = yv12_mb[ref_frame][2];
  827. vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, bsize);
  828. }
  829. var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf, x->plane[1].src.stride,
  830. xd->plane[1].dst.buf,
  831. xd->plane[1].dst.stride, &sse_u);
  832. // U skipping condition checking
  833. if (((var_u << 2) <= thresh_ac_uv) && (sse_u - var_u <= thresh_dc_uv)) {
  834. var_v = cpi->fn_ptr[uv_size].vf(
  835. x->plane[2].src.buf, x->plane[2].src.stride, xd->plane[2].dst.buf,
  836. xd->plane[2].dst.stride, &sse_v);
  837. // V skipping condition checking
  838. if (((var_v << 2) <= thresh_ac_uv) && (sse_v - var_v <= thresh_dc_uv)) {
  839. x->skip = 1;
  840. // The cost of skip bit needs to be added.
  841. *rate = cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref_frame]]
  842. [INTER_OFFSET(this_mode)];
  843. // More on this part of rate
  844. // rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
  845. // Scaling factor for SSE from spatial domain to frequency
  846. // domain is 16. Adjust distortion accordingly.
  847. // TODO(yunqingwang): In this function, only y-plane dist is
  848. // calculated.
  849. *dist = (sse << 4); // + ((sse_u + sse_v) << 4);
  850. // *disable_skip = 1;
  851. }
  852. }
  853. }
  854. }
  855. struct estimate_block_intra_args {
  856. VP9_COMP *cpi;
  857. MACROBLOCK *x;
  858. PREDICTION_MODE mode;
  859. int skippable;
  860. RD_COST *rdc;
  861. };
  862. static void estimate_block_intra(int plane, int block, int row, int col,
  863. BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
  864. void *arg) {
  865. struct estimate_block_intra_args *const args = arg;
  866. VP9_COMP *const cpi = args->cpi;
  867. MACROBLOCK *const x = args->x;
  868. MACROBLOCKD *const xd = &x->e_mbd;
  869. struct macroblock_plane *const p = &x->plane[0];
  870. struct macroblockd_plane *const pd = &xd->plane[0];
  871. const BLOCK_SIZE bsize_tx = txsize_to_bsize[tx_size];
  872. uint8_t *const src_buf_base = p->src.buf;
  873. uint8_t *const dst_buf_base = pd->dst.buf;
  874. const int src_stride = p->src.stride;
  875. const int dst_stride = pd->dst.stride;
  876. RD_COST this_rdc;
  877. (void)block;
  878. p->src.buf = &src_buf_base[4 * (row * src_stride + col)];
  879. pd->dst.buf = &dst_buf_base[4 * (row * dst_stride + col)];
  880. // Use source buffer as an approximation for the fully reconstructed buffer.
  881. vp9_predict_intra_block(xd, b_width_log2_lookup[plane_bsize], tx_size,
  882. args->mode, x->skip_encode ? p->src.buf : pd->dst.buf,
  883. x->skip_encode ? src_stride : dst_stride, pd->dst.buf,
  884. dst_stride, col, row, plane);
  885. if (plane == 0) {
  886. int64_t this_sse = INT64_MAX;
  887. // TODO(jingning): This needs further refactoring.
  888. block_yrd(cpi, x, &this_rdc, &args->skippable, &this_sse, bsize_tx,
  889. VPXMIN(tx_size, TX_16X16), 0);
  890. } else {
  891. unsigned int var = 0;
  892. unsigned int sse = 0;
  893. model_rd_for_sb_uv(cpi, plane_bsize, x, xd, &this_rdc, &var, &sse, plane,
  894. plane);
  895. }
  896. p->src.buf = src_buf_base;
  897. pd->dst.buf = dst_buf_base;
  898. args->rdc->rate += this_rdc.rate;
  899. args->rdc->dist += this_rdc.dist;
  900. }
  901. static const THR_MODES mode_idx[MAX_REF_FRAMES][4] = {
  902. { THR_DC, THR_V_PRED, THR_H_PRED, THR_TM },
  903. { THR_NEARESTMV, THR_NEARMV, THR_ZEROMV, THR_NEWMV },
  904. { THR_NEARESTG, THR_NEARG, THR_ZEROG, THR_NEWG },
  905. { THR_NEARESTA, THR_NEARA, THR_ZEROA, THR_NEWA },
  906. };
  907. static const PREDICTION_MODE intra_mode_list[] = { DC_PRED, V_PRED, H_PRED,
  908. TM_PRED };
  909. static int mode_offset(const PREDICTION_MODE mode) {
  910. if (mode >= NEARESTMV) {
  911. return INTER_OFFSET(mode);
  912. } else {
  913. switch (mode) {
  914. case DC_PRED: return 0;
  915. case V_PRED: return 1;
  916. case H_PRED: return 2;
  917. case TM_PRED: return 3;
  918. default: return -1;
  919. }
  920. }
  921. }
  922. static INLINE int rd_less_than_thresh_row_mt(int64_t best_rd, int thresh,
  923. const int *const thresh_fact) {
  924. int is_rd_less_than_thresh;
  925. is_rd_less_than_thresh =
  926. best_rd < ((int64_t)thresh * (*thresh_fact) >> 5) || thresh == INT_MAX;
  927. return is_rd_less_than_thresh;
  928. }
  929. static INLINE void update_thresh_freq_fact_row_mt(
  930. VP9_COMP *cpi, TileDataEnc *tile_data, int source_variance,
  931. int thresh_freq_fact_idx, MV_REFERENCE_FRAME ref_frame,
  932. THR_MODES best_mode_idx, PREDICTION_MODE mode) {
  933. THR_MODES thr_mode_idx = mode_idx[ref_frame][mode_offset(mode)];
  934. int freq_fact_idx = thresh_freq_fact_idx + thr_mode_idx;
  935. int *freq_fact = &tile_data->row_base_thresh_freq_fact[freq_fact_idx];
  936. if (thr_mode_idx == best_mode_idx)
  937. *freq_fact -= (*freq_fact >> 4);
  938. else if (cpi->sf.limit_newmv_early_exit && mode == NEWMV &&
  939. ref_frame == LAST_FRAME && source_variance < 5) {
  940. *freq_fact = VPXMIN(*freq_fact + RD_THRESH_INC, 32);
  941. } else {
  942. *freq_fact = VPXMIN(*freq_fact + RD_THRESH_INC,
  943. cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
  944. }
  945. }
  946. static INLINE void update_thresh_freq_fact(
  947. VP9_COMP *cpi, TileDataEnc *tile_data, int source_variance,
  948. BLOCK_SIZE bsize, MV_REFERENCE_FRAME ref_frame, THR_MODES best_mode_idx,
  949. PREDICTION_MODE mode) {
  950. THR_MODES thr_mode_idx = mode_idx[ref_frame][mode_offset(mode)];
  951. int *freq_fact = &tile_data->thresh_freq_fact[bsize][thr_mode_idx];
  952. if (thr_mode_idx == best_mode_idx)
  953. *freq_fact -= (*freq_fact >> 4);
  954. else if (cpi->sf.limit_newmv_early_exit && mode == NEWMV &&
  955. ref_frame == LAST_FRAME && source_variance < 5) {
  956. *freq_fact = VPXMIN(*freq_fact + RD_THRESH_INC, 32);
  957. } else {
  958. *freq_fact = VPXMIN(*freq_fact + RD_THRESH_INC,
  959. cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
  960. }
  961. }
  962. void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
  963. BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
  964. MACROBLOCKD *const xd = &x->e_mbd;
  965. MODE_INFO *const mi = xd->mi[0];
  966. RD_COST this_rdc, best_rdc;
  967. PREDICTION_MODE this_mode;
  968. struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 };
  969. const TX_SIZE intra_tx_size =
  970. VPXMIN(max_txsize_lookup[bsize],
  971. tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
  972. MODE_INFO *const mic = xd->mi[0];
  973. int *bmode_costs;
  974. const MODE_INFO *above_mi = xd->above_mi;
  975. const MODE_INFO *left_mi = xd->left_mi;
  976. const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
  977. const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
  978. bmode_costs = cpi->y_mode_costs[A][L];
  979. (void)ctx;
  980. vp9_rd_cost_reset(&best_rdc);
  981. vp9_rd_cost_reset(&this_rdc);
  982. mi->ref_frame[0] = INTRA_FRAME;
  983. // Initialize interp_filter here so we do not have to check for inter block
  984. // modes in get_pred_context_switchable_interp()
  985. mi->interp_filter = SWITCHABLE_FILTERS;
  986. mi->mv[0].as_int = INVALID_MV;
  987. mi->uv_mode = DC_PRED;
  988. memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
  989. // Change the limit of this loop to add other intra prediction
  990. // mode tests.
  991. for (this_mode = DC_PRED; this_mode <= H_PRED; ++this_mode) {
  992. this_rdc.dist = this_rdc.rate = 0;
  993. args.mode = this_mode;
  994. args.skippable = 1;
  995. args.rdc = &this_rdc;
  996. mi->tx_size = intra_tx_size;
  997. vp9_foreach_transformed_block_in_plane(xd, bsize, 0, estimate_block_intra,
  998. &args);
  999. if (args.skippable) {
  1000. x->skip_txfm[0] = SKIP_TXFM_AC_DC;
  1001. this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 1);
  1002. } else {
  1003. x->skip_txfm[0] = SKIP_TXFM_NONE;
  1004. this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 0);
  1005. }
  1006. this_rdc.rate += bmode_costs[this_mode];
  1007. this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
  1008. if (this_rdc.rdcost < best_rdc.rdcost) {
  1009. best_rdc = this_rdc;
  1010. mi->mode = this_mode;
  1011. }
  1012. }
  1013. *rd_cost = best_rdc;
  1014. }
  1015. static void init_ref_frame_cost(VP9_COMMON *const cm, MACROBLOCKD *const xd,
  1016. int ref_frame_cost[MAX_REF_FRAMES]) {
  1017. vpx_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
  1018. vpx_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
  1019. vpx_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
  1020. ref_frame_cost[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
  1021. ref_frame_cost[LAST_FRAME] = ref_frame_cost[GOLDEN_FRAME] =
  1022. ref_frame_cost[ALTREF_FRAME] = vp9_cost_bit(intra_inter_p, 1);
  1023. ref_frame_cost[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
  1024. ref_frame_cost[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
  1025. ref_frame_cost[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
  1026. ref_frame_cost[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
  1027. ref_frame_cost[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
  1028. }
  1029. typedef struct {
  1030. MV_REFERENCE_FRAME ref_frame;
  1031. PREDICTION_MODE pred_mode;
  1032. } REF_MODE;
  1033. #define RT_INTER_MODES 12
  1034. static const REF_MODE ref_mode_set[RT_INTER_MODES] = {
  1035. { LAST_FRAME, ZEROMV }, { LAST_FRAME, NEARESTMV },
  1036. { GOLDEN_FRAME, ZEROMV }, { LAST_FRAME, NEARMV },
  1037. { LAST_FRAME, NEWMV }, { GOLDEN_FRAME, NEARESTMV },
  1038. { GOLDEN_FRAME, NEARMV }, { GOLDEN_FRAME, NEWMV },
  1039. { ALTREF_FRAME, ZEROMV }, { ALTREF_FRAME, NEARESTMV },
  1040. { ALTREF_FRAME, NEARMV }, { ALTREF_FRAME, NEWMV }
  1041. };
  1042. static const REF_MODE ref_mode_set_svc[RT_INTER_MODES] = {
  1043. { LAST_FRAME, ZEROMV }, { LAST_FRAME, NEARESTMV },
  1044. { LAST_FRAME, NEARMV }, { GOLDEN_FRAME, ZEROMV },
  1045. { GOLDEN_FRAME, NEARESTMV }, { GOLDEN_FRAME, NEARMV },
  1046. { LAST_FRAME, NEWMV }, { GOLDEN_FRAME, NEWMV }
  1047. };
  1048. static INLINE void find_predictors(
  1049. VP9_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
  1050. int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
  1051. int const_motion[MAX_REF_FRAMES], int *ref_frame_skip_mask,
  1052. const int flag_list[4], TileDataEnc *tile_data, int mi_row, int mi_col,
  1053. struct buf_2d yv12_mb[4][MAX_MB_PLANE], BLOCK_SIZE bsize,
  1054. int force_skip_low_temp_var) {
  1055. VP9_COMMON *const cm = &cpi->common;
  1056. MACROBLOCKD *const xd = &x->e_mbd;
  1057. const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
  1058. TileInfo *const tile_info = &tile_data->tile_info;
  1059. // TODO(jingning) placeholder for inter-frame non-RD mode decision.
  1060. x->pred_mv_sad[ref_frame] = INT_MAX;
  1061. frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
  1062. frame_mv[ZEROMV][ref_frame].as_int = 0;
  1063. // this needs various further optimizations. to be continued..
  1064. if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) {
  1065. int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
  1066. const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
  1067. vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
  1068. if (cm->use_prev_frame_mvs) {
  1069. vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame, candidates, mi_row, mi_col,
  1070. x->mbmi_ext->mode_context);
  1071. } else {
  1072. const_motion[ref_frame] =
  1073. mv_refs_rt(cpi, cm, x, xd, tile_info, xd->mi[0], ref_frame,
  1074. candidates, &frame_mv[NEWMV][ref_frame], mi_row, mi_col,
  1075. (int)(cpi->svc.use_base_mv && cpi->svc.spatial_layer_id));
  1076. }
  1077. vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
  1078. &frame_mv[NEARESTMV][ref_frame],
  1079. &frame_mv[NEARMV][ref_frame]);
  1080. // Early exit for golden frame if force_skip_low_temp_var is set.
  1081. if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8 &&
  1082. !(force_skip_low_temp_var && ref_frame == GOLDEN_FRAME)) {
  1083. vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
  1084. bsize);
  1085. }
  1086. } else {
  1087. *ref_frame_skip_mask |= (1 << ref_frame);
  1088. }
  1089. }
  1090. static void vp9_NEWMV_diff_bias(const NOISE_ESTIMATE *ne, MACROBLOCKD *xd,
  1091. PREDICTION_MODE this_mode, RD_COST *this_rdc,
  1092. BLOCK_SIZE bsize, int mv_row, int mv_col,
  1093. int is_last_frame, int lowvar_highsumdiff,
  1094. int is_skin) {
  1095. // Bias against MVs associated with NEWMV mode that are very different from
  1096. // top/left neighbors.
  1097. if (this_mode == NEWMV) {
  1098. int al_mv_average_row;
  1099. int al_mv_average_col;
  1100. int left_row, left_col;
  1101. int row_diff, col_diff;
  1102. int above_mv_valid = 0;
  1103. int left_mv_valid = 0;
  1104. int above_row = 0;
  1105. int above_col = 0;
  1106. if (xd->above_mi) {
  1107. above_mv_valid = xd->above_mi->mv[0].as_int != INVALID_MV;
  1108. above_row = xd->above_mi->mv[0].as_mv.row;
  1109. above_col = xd->above_mi->mv[0].as_mv.col;
  1110. }
  1111. if (xd->left_mi) {
  1112. left_mv_valid = xd->left_mi->mv[0].as_int != INVALID_MV;
  1113. left_row = xd->left_mi->mv[0].as_mv.row;
  1114. left_col = xd->left_mi->mv[0].as_mv.col;
  1115. }
  1116. if (above_mv_valid && left_mv_valid) {
  1117. al_mv_average_row = (above_row + left_row + 1) >> 1;
  1118. al_mv_average_col = (above_col + left_col + 1) >> 1;
  1119. } else if (above_mv_valid) {
  1120. al_mv_average_row = above_row;
  1121. al_mv_average_col = above_col;
  1122. } else if (left_mv_valid) {
  1123. al_mv_average_row = left_row;
  1124. al_mv_average_col = left_col;
  1125. } else {
  1126. al_mv_average_row = al_mv_average_col = 0;
  1127. }
  1128. row_diff = (al_mv_average_row - mv_row);
  1129. col_diff = (al_mv_average_col - mv_col);
  1130. if (row_diff > 48 || row_diff < -48 || col_diff > 48 || col_diff < -48) {
  1131. if (bsize > BLOCK_32X32)
  1132. this_rdc->rdcost = this_rdc->rdcost << 1;
  1133. else
  1134. this_rdc->rdcost = 3 * this_rdc->rdcost >> 1;
  1135. }
  1136. }
  1137. // If noise estimation is enabled, and estimated level is above threshold,
  1138. // add a bias to LAST reference with small motion, for large blocks.
  1139. if (ne->enabled && ne->level >= kMedium && bsize >= BLOCK_32X32 &&
  1140. is_last_frame && mv_row < 8 && mv_row > -8 && mv_col < 8 && mv_col > -8)
  1141. this_rdc->rdcost = 7 * (this_rdc->rdcost >> 3);
  1142. else if (lowvar_highsumdiff && !is_skin && bsize >= BLOCK_16X16 &&
  1143. is_last_frame && mv_row < 16 && mv_row > -16 && mv_col < 16 &&
  1144. mv_col > -16)
  1145. this_rdc->rdcost = 7 * (this_rdc->rdcost >> 3);
  1146. }
  1147. #if CONFIG_VP9_TEMPORAL_DENOISING
  1148. static void vp9_pickmode_ctx_den_update(
  1149. VP9_PICKMODE_CTX_DEN *ctx_den, int64_t zero_last_cost_orig,
  1150. int ref_frame_cost[MAX_REF_FRAMES],
  1151. int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int reuse_inter_pred,
  1152. TX_SIZE best_tx_size, PREDICTION_MODE best_mode,
  1153. MV_REFERENCE_FRAME best_ref_frame, INTERP_FILTER best_pred_filter,
  1154. uint8_t best_mode_skip_txfm) {
  1155. ctx_den->zero_last_cost_orig = zero_last_cost_orig;
  1156. ctx_den->ref_frame_cost = ref_frame_cost;
  1157. ctx_den->frame_mv = frame_mv;
  1158. ctx_den->reuse_inter_pred = reuse_inter_pred;
  1159. ctx_den->best_tx_size = best_tx_size;
  1160. ctx_den->best_mode = best_mode;
  1161. ctx_den->best_ref_frame = best_ref_frame;
  1162. ctx_den->best_pred_filter = best_pred_filter;
  1163. ctx_den->best_mode_skip_txfm = best_mode_skip_txfm;
  1164. }
  1165. static void recheck_zeromv_after_denoising(
  1166. VP9_COMP *cpi, MODE_INFO *const mi, MACROBLOCK *x, MACROBLOCKD *const xd,
  1167. VP9_DENOISER_DECISION decision, VP9_PICKMODE_CTX_DEN *ctx_den,
  1168. struct buf_2d yv12_mb[4][MAX_MB_PLANE], RD_COST *best_rdc, BLOCK_SIZE bsize,
  1169. int mi_row, int mi_col) {
  1170. // If INTRA or GOLDEN reference was selected, re-evaluate ZEROMV on
  1171. // denoised result. Only do this under noise conditions, and if rdcost of
  1172. // ZEROMV onoriginal source is not significantly higher than rdcost of best
  1173. // mode.
  1174. if (cpi->noise_estimate.enabled && cpi->noise_estimate.level > kLow &&
  1175. ctx_den->zero_last_cost_orig < (best_rdc->rdcost << 3) &&
  1176. ((ctx_den->best_ref_frame == INTRA_FRAME && decision >= FILTER_BLOCK) ||
  1177. (ctx_den->best_ref_frame == GOLDEN_FRAME &&
  1178. cpi->svc.number_spatial_layers == 1 &&
  1179. decision == FILTER_ZEROMV_BLOCK))) {
  1180. // Check if we should pick ZEROMV on denoised signal.
  1181. int rate = 0;
  1182. int64_t dist = 0;
  1183. uint32_t var_y = UINT_MAX;
  1184. uint32_t sse_y = UINT_MAX;
  1185. RD_COST this_rdc;
  1186. mi->mode = ZEROMV;
  1187. mi->ref_frame[0] = LAST_FRAME;
  1188. mi->ref_frame[1] = NONE;
  1189. mi->mv[0].as_int = 0;
  1190. mi->interp_filter = EIGHTTAP;
  1191. xd->plane[0].pre[0] = yv12_mb[LAST_FRAME][0];
  1192. vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
  1193. model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y);
  1194. this_rdc.rate = rate + ctx_den->ref_frame_cost[LAST_FRAME] +
  1195. cpi->inter_mode_cost[x->mbmi_ext->mode_context[LAST_FRAME]]
  1196. [INTER_OFFSET(ZEROMV)];
  1197. this_rdc.dist = dist;
  1198. this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, rate, dist);
  1199. // Don't switch to ZEROMV if the rdcost for ZEROMV on denoised source
  1200. // is higher than best_ref mode (on original source).
  1201. if (this_rdc.rdcost > best_rdc->rdcost) {
  1202. this_rdc = *best_rdc;
  1203. mi->mode = ctx_den->best_mode;
  1204. mi->ref_frame[0] = ctx_den->best_ref_frame;
  1205. mi->interp_filter = ctx_den->best_pred_filter;
  1206. if (ctx_den->best_ref_frame == INTRA_FRAME) {
  1207. mi->mv[0].as_int = INVALID_MV;
  1208. mi->interp_filter = SWITCHABLE_FILTERS;
  1209. } else if (ctx_den->best_ref_frame == GOLDEN_FRAME) {
  1210. mi->mv[0].as_int =
  1211. ctx_den->frame_mv[ctx_den->best_mode][ctx_den->best_ref_frame]
  1212. .as_int;
  1213. if (ctx_den->reuse_inter_pred) {
  1214. xd->plane[0].pre[0] = yv12_mb[GOLDEN_FRAME][0];
  1215. vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
  1216. }
  1217. }
  1218. mi->tx_size = ctx_den->best_tx_size;
  1219. x->skip_txfm[0] = ctx_den->best_mode_skip_txfm;
  1220. } else {
  1221. ctx_den->best_ref_frame = LAST_FRAME;
  1222. *best_rdc = this_rdc;
  1223. }
  1224. }
  1225. }
  1226. #endif // CONFIG_VP9_TEMPORAL_DENOISING
  1227. static INLINE int get_force_skip_low_temp_var(uint8_t *variance_low, int mi_row,
  1228. int mi_col, BLOCK_SIZE bsize) {
  1229. const int i = (mi_row & 0x7) >> 1;
  1230. const int j = (mi_col & 0x7) >> 1;
  1231. int force_skip_low_temp_var = 0;
  1232. // Set force_skip_low_temp_var based on the block size and block offset.
  1233. if (bsize == BLOCK_64X64) {
  1234. force_skip_low_temp_var = variance_low[0];
  1235. } else if (bsize == BLOCK_64X32) {
  1236. if (!(mi_col & 0x7) && !(mi_row & 0x7)) {
  1237. force_skip_low_temp_var = variance_low[1];
  1238. } else if (!(mi_col & 0x7) && (mi_row & 0x7)) {
  1239. force_skip_low_temp_var = variance_low[2];
  1240. }
  1241. } else if (bsize == BLOCK_32X64) {
  1242. if (!(mi_col & 0x7) && !(mi_row & 0x7)) {
  1243. force_skip_low_temp_var = variance_low[3];
  1244. } else if ((mi_col & 0x7) && !(mi_row & 0x7)) {
  1245. force_skip_low_temp_var = variance_low[4];
  1246. }
  1247. } else if (bsize == BLOCK_32X32) {
  1248. if (!(mi_col & 0x7) && !(mi_row & 0x7)) {
  1249. force_skip_low_temp_var = variance_low[5];
  1250. } else if ((mi_col & 0x7) && !(mi_row & 0x7)) {
  1251. force_skip_low_temp_var = variance_low[6];
  1252. } else if (!(mi_col & 0x7) && (mi_row & 0x7)) {
  1253. force_skip_low_temp_var = variance_low[7];
  1254. } else if ((mi_col & 0x7) && (mi_row & 0x7)) {
  1255. force_skip_low_temp_var = variance_low[8];
  1256. }
  1257. } else if (bsize == BLOCK_16X16) {
  1258. force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]];
  1259. } else if (bsize == BLOCK_32X16) {
  1260. // The col shift index for the second 16x16 block.
  1261. const int j2 = ((mi_col + 2) & 0x7) >> 1;
  1262. // Only if each 16x16 block inside has low temporal variance.
  1263. force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] &&
  1264. variance_low[pos_shift_16x16[i][j2]];
  1265. } else if (bsize == BLOCK_16X32) {
  1266. // The row shift index for the second 16x16 block.
  1267. const int i2 = ((mi_row + 2) & 0x7) >> 1;
  1268. force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] &&
  1269. variance_low[pos_shift_16x16[i2][j]];
  1270. }
  1271. return force_skip_low_temp_var;
  1272. }
  1273. void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
  1274. int mi_row, int mi_col, RD_COST *rd_cost,
  1275. BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
  1276. VP9_COMMON *const cm = &cpi->common;
  1277. SPEED_FEATURES *const sf = &cpi->sf;
  1278. const SVC *const svc = &cpi->svc;
  1279. MACROBLOCKD *const xd = &x->e_mbd;
  1280. MODE_INFO *const mi = xd->mi[0];
  1281. struct macroblockd_plane *const pd = &xd->plane[0];
  1282. PREDICTION_MODE best_mode = ZEROMV;
  1283. MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME;
  1284. MV_REFERENCE_FRAME usable_ref_frame;
  1285. TX_SIZE best_tx_size = TX_SIZES;
  1286. INTERP_FILTER best_pred_filter = EIGHTTAP;
  1287. int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
  1288. uint8_t mode_checked[MB_MODE_COUNT][MAX_REF_FRAMES];
  1289. struct buf_2d yv12_mb[4][MAX_MB_PLANE];
  1290. static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
  1291. VP9_ALT_FLAG };
  1292. RD_COST this_rdc, best_rdc;
  1293. uint8_t skip_txfm = SKIP_TXFM_NONE, best_mode_skip_txfm = SKIP_TXFM_NONE;
  1294. // var_y and sse_y are saved to be used in skipping checking
  1295. unsigned int var_y = UINT_MAX;
  1296. unsigned int sse_y = UINT_MAX;
  1297. const int intra_cost_penalty =
  1298. vp9_get_intra_cost_penalty(cpi, bsize, cm->base_qindex, cm->y_dc_delta_q);
  1299. int64_t inter_mode_thresh =
  1300. RDCOST(x->rdmult, x->rddiv, intra_cost_penalty, 0);
  1301. const int *const rd_threshes = cpi->rd.threshes[mi->segment_id][bsize];
  1302. const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2;
  1303. int thresh_freq_fact_idx = (sb_row * BLOCK_SIZES + bsize) * MAX_MODES;
  1304. const int *const rd_thresh_freq_fact =
  1305. (cpi->sf.adaptive_rd_thresh_row_mt)
  1306. ? &(tile_data->row_base_thresh_freq_fact[thresh_freq_fact_idx])
  1307. : tile_data->thresh_freq_fact[bsize];
  1308. INTERP_FILTER filter_ref;
  1309. const int bsl = mi_width_log2_lookup[bsize];
  1310. const int pred_filter_search =
  1311. cm->interp_filter == SWITCHABLE
  1312. ? (((mi_row + mi_col) >> bsl) +
  1313. get_chessboard_index(cm->current_video_frame)) &
  1314. 0x1
  1315. : 0;
  1316. int const_motion[MAX_REF_FRAMES] = { 0 };
  1317. const int bh = num_4x4_blocks_high_lookup[bsize] << 2;
  1318. const int bw = num_4x4_blocks_wide_lookup[bsize] << 2;
  1319. // For speed 6, the result of interp filter is reused later in actual encoding
  1320. // process.
  1321. // tmp[3] points to dst buffer, and the other 3 point to allocated buffers.
  1322. PRED_BUFFER tmp[4];
  1323. DECLARE_ALIGNED(16, uint8_t, pred_buf[3 * 64 * 64]);
  1324. #if CONFIG_VP9_HIGHBITDEPTH
  1325. DECLARE_ALIGNED(16, uint16_t, pred_buf_16[3 * 64 * 64]);
  1326. #endif
  1327. struct buf_2d orig_dst = pd->dst;
  1328. PRED_BUFFER *best_pred = NULL;
  1329. PRED_BUFFER *this_mode_pred = NULL;
  1330. const int pixels_in_block = bh * bw;
  1331. int reuse_inter_pred = cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready;
  1332. int ref_frame_skip_mask = 0;
  1333. int idx;
  1334. int best_pred_sad = INT_MAX;
  1335. int best_early_term = 0;
  1336. int ref_frame_cost[MAX_REF_FRAMES];
  1337. int svc_force_zero_mode[3] = { 0 };
  1338. int perform_intra_pred = 1;
  1339. int use_golden_nonzeromv = 1;
  1340. int force_skip_low_temp_var = 0;
  1341. int skip_ref_find_pred[4] = { 0 };
  1342. unsigned int sse_zeromv_normalized = UINT_MAX;
  1343. unsigned int best_sse_sofar = UINT_MAX;
  1344. unsigned int thresh_svc_skip_golden = 500;
  1345. #if CONFIG_VP9_TEMPORAL_DENOISING
  1346. VP9_PICKMODE_CTX_DEN ctx_den;
  1347. int64_t zero_last_cost_orig = INT64_MAX;
  1348. int denoise_svc_pickmode = 1;
  1349. #endif
  1350. INTERP_FILTER filter_gf_svc = EIGHTTAP;
  1351. init_ref_frame_cost(cm, xd, ref_frame_cost);
  1352. memset(&mode_checked[0][0], 0, MB_MODE_COUNT * MAX_REF_FRAMES);
  1353. if (reuse_inter_pred) {
  1354. int i;
  1355. for (i = 0; i < 3; i++) {
  1356. #if CONFIG_VP9_HIGHBITDEPTH
  1357. if (cm->use_highbitdepth)
  1358. tmp[i].data = CONVERT_TO_BYTEPTR(&pred_buf_16[pixels_in_block * i]);
  1359. else
  1360. tmp[i].data = &pred_buf[pixels_in_block * i];
  1361. #else
  1362. tmp[i].data = &pred_buf[pixels_in_block * i];
  1363. #endif // CONFIG_VP9_HIGHBITDEPTH
  1364. tmp[i].stride = bw;
  1365. tmp[i].in_use = 0;
  1366. }
  1367. tmp[3].data = pd->dst.buf;
  1368. tmp[3].stride = pd->dst.stride;
  1369. tmp[3].in_use = 0;
  1370. }
  1371. x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
  1372. x->skip = 0;
  1373. // Instead of using vp9_get_pred_context_switchable_interp(xd) to assign
  1374. // filter_ref, we use a less strict condition on assigning filter_ref.
  1375. // This is to reduce the probabily of entering the flow of not assigning
  1376. // filter_ref and then skip filter search.
  1377. if (xd->above_mi && is_inter_block(xd->above_mi))
  1378. filter_ref = xd->above_mi->interp_filter;
  1379. else if (xd->left_mi && is_inter_block(xd->left_mi))
  1380. filter_ref = xd->left_mi->interp_filter;
  1381. else
  1382. filter_ref = cm->interp_filter;
  1383. // initialize mode decisions
  1384. vp9_rd_cost_reset(&best_rdc);
  1385. vp9_rd_cost_reset(rd_cost);
  1386. mi->sb_type = bsize;
  1387. mi->ref_frame[0] = NONE;
  1388. mi->ref_frame[1] = NONE;
  1389. mi->tx_size =
  1390. VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cm->tx_mode]);
  1391. if (sf->short_circuit_flat_blocks || sf->limit_newmv_early_exit) {
  1392. #if CONFIG_VP9_HIGHBITDEPTH
  1393. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
  1394. x->source_variance = vp9_high_get_sby_perpixel_variance(
  1395. cpi, &x->plane[0].src, bsize, xd->bd);
  1396. else
  1397. #endif // CONFIG_VP9_HIGHBITDEPTH
  1398. x->source_variance =
  1399. vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
  1400. }
  1401. #if CONFIG_VP9_TEMPORAL_DENOISING
  1402. if (cpi->oxcf.noise_sensitivity > 0) {
  1403. if (cpi->use_svc) {
  1404. int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
  1405. cpi->svc.temporal_layer_id,
  1406. cpi->svc.number_temporal_layers);
  1407. LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
  1408. denoise_svc_pickmode = denoise_svc(cpi) && !lc->is_key_frame;
  1409. }
  1410. if (cpi->denoiser.denoising_level > kDenLowLow && denoise_svc_pickmode)
  1411. vp9_denoiser_reset_frame_stats(ctx);
  1412. }
  1413. #endif
  1414. if (cpi->rc.frames_since_golden == 0 && !cpi->use_svc) {
  1415. usable_ref_frame = LAST_FRAME;
  1416. } else {
  1417. usable_ref_frame = GOLDEN_FRAME;
  1418. }
  1419. if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR) {
  1420. if (cpi->rc.alt_ref_gf_group || cpi->rc.is_src_frame_alt_ref)
  1421. usable_ref_frame = ALTREF_FRAME;
  1422. if (cpi->rc.is_src_frame_alt_ref) {
  1423. skip_ref_find_pred[LAST_FRAME] = 1;
  1424. skip_ref_find_pred[GOLDEN_FRAME] = 1;
  1425. }
  1426. }
  1427. // For svc mode, on spatial_layer_id > 0: if the reference has different scale
  1428. // constrain the inter mode to only test zero motion.
  1429. if (cpi->use_svc && svc->force_zero_mode_spatial_ref &&
  1430. cpi->svc.spatial_layer_id > 0) {
  1431. if (cpi->ref_frame_flags & flag_list[LAST_FRAME]) {
  1432. struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf;
  1433. if (vp9_is_scaled(sf)) svc_force_zero_mode[LAST_FRAME - 1] = 1;
  1434. }
  1435. if (cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) {
  1436. struct scale_factors *const sf = &cm->frame_refs[GOLDEN_FRAME - 1].sf;
  1437. if (vp9_is_scaled(sf)) svc_force_zero_mode[GOLDEN_FRAME - 1] = 1;
  1438. }
  1439. }
  1440. if (cpi->sf.short_circuit_low_temp_var) {
  1441. force_skip_low_temp_var =
  1442. get_force_skip_low_temp_var(&x->variance_low[0], mi_row, mi_col, bsize);
  1443. // If force_skip_low_temp_var is set, and for short circuit mode = 1 and 3,
  1444. // skip golden reference.
  1445. if ((cpi->sf.short_circuit_low_temp_var == 1 ||
  1446. cpi->sf.short_circuit_low_temp_var == 3) &&
  1447. force_skip_low_temp_var) {
  1448. usable_ref_frame = LAST_FRAME;
  1449. }
  1450. }
  1451. if (!((cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) &&
  1452. !svc_force_zero_mode[GOLDEN_FRAME - 1] && !force_skip_low_temp_var))
  1453. use_golden_nonzeromv = 0;
  1454. if (cpi->oxcf.speed >= 8 && !cpi->use_svc &&
  1455. ((cpi->rc.frames_since_golden + 1) < x->last_sb_high_content ||
  1456. x->last_sb_high_content > 40 || cpi->rc.frames_since_golden > 120))
  1457. usable_ref_frame = LAST_FRAME;
  1458. for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) {
  1459. if (!skip_ref_find_pred[ref_frame]) {
  1460. find_predictors(cpi, x, ref_frame, frame_mv, const_motion,
  1461. &ref_frame_skip_mask, flag_list, tile_data, mi_row,
  1462. mi_col, yv12_mb, bsize, force_skip_low_temp_var);
  1463. }
  1464. }
  1465. if (cpi->use_svc || cpi->oxcf.speed <= 7 || bsize < BLOCK_32X32)
  1466. x->sb_use_mv_part = 0;
  1467. for (idx = 0; idx < RT_INTER_MODES; ++idx) {
  1468. int rate_mv = 0;
  1469. int mode_rd_thresh;
  1470. int mode_index;
  1471. int i;
  1472. int64_t this_sse;
  1473. int is_skippable;
  1474. int this_early_term = 0;
  1475. int rd_computed = 0;
  1476. int inter_mv_mode = 0;
  1477. int skip_this_mv = 0;
  1478. PREDICTION_MODE this_mode = ref_mode_set[idx].pred_mode;
  1479. ref_frame = ref_mode_set[idx].ref_frame;
  1480. if (cpi->use_svc) {
  1481. this_mode = ref_mode_set_svc[idx].pred_mode;
  1482. ref_frame = ref_mode_set_svc[idx].ref_frame;
  1483. }
  1484. if (ref_frame > usable_ref_frame) continue;
  1485. if (skip_ref_find_pred[ref_frame]) continue;
  1486. // For SVC, skip the golden (spatial) reference search if sse of zeromv_last
  1487. // is below threshold.
  1488. if (cpi->use_svc && ref_frame == GOLDEN_FRAME &&
  1489. sse_zeromv_normalized < thresh_svc_skip_golden)
  1490. continue;
  1491. if (sf->short_circuit_flat_blocks && x->source_variance == 0 &&
  1492. this_mode != NEARESTMV) {
  1493. continue;
  1494. }
  1495. if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode))) continue;
  1496. if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR) {
  1497. if (cpi->rc.is_src_frame_alt_ref &&
  1498. (ref_frame != ALTREF_FRAME ||
  1499. frame_mv[this_mode][ref_frame].as_int != 0))
  1500. continue;
  1501. if (cpi->rc.alt_ref_gf_group && cm->show_frame &&
  1502. cpi->rc.frames_since_golden > (cpi->rc.baseline_gf_interval >> 1) &&
  1503. ref_frame == GOLDEN_FRAME &&
  1504. frame_mv[this_mode][ref_frame].as_int != 0)
  1505. continue;
  1506. if (cpi->rc.alt_ref_gf_group && cm->show_frame &&
  1507. cpi->rc.frames_since_golden > 0 &&
  1508. cpi->rc.frames_since_golden < (cpi->rc.baseline_gf_interval >> 1) &&
  1509. ref_frame == ALTREF_FRAME &&
  1510. frame_mv[this_mode][ref_frame].as_int != 0)
  1511. continue;
  1512. }
  1513. if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue;
  1514. if (const_motion[ref_frame] && this_mode == NEARMV) continue;
  1515. // Skip non-zeromv mode search for golden frame if force_skip_low_temp_var
  1516. // is set. If nearestmv for golden frame is 0, zeromv mode will be skipped
  1517. // later.
  1518. if (force_skip_low_temp_var && ref_frame == GOLDEN_FRAME &&
  1519. frame_mv[this_mode][ref_frame].as_int != 0) {
  1520. continue;
  1521. }
  1522. if (x->content_state_sb != kVeryHighSad &&
  1523. (cpi->sf.short_circuit_low_temp_var >= 2 ||
  1524. (cpi->sf.short_circuit_low_temp_var == 1 && bsize == BLOCK_64X64)) &&
  1525. force_skip_low_temp_var && ref_frame == LAST_FRAME &&
  1526. this_mode == NEWMV) {
  1527. continue;
  1528. }
  1529. if (cpi->use_svc) {
  1530. if (svc_force_zero_mode[ref_frame - 1] &&
  1531. frame_mv[this_mode][ref_frame].as_int != 0)
  1532. continue;
  1533. }
  1534. if (sf->reference_masking &&
  1535. !(frame_mv[this_mode][ref_frame].as_int == 0 &&
  1536. ref_frame == LAST_FRAME)) {
  1537. if (usable_ref_frame < ALTREF_FRAME) {
  1538. if (!force_skip_low_temp_var && usable_ref_frame > LAST_FRAME) {
  1539. i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
  1540. if ((cpi->ref_frame_flags & flag_list[i]))
  1541. if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
  1542. ref_frame_skip_mask |= (1 << ref_frame);
  1543. }
  1544. } else if (!cpi->rc.is_src_frame_alt_ref &&
  1545. !(frame_mv[this_mode][ref_frame].as_int == 0 &&
  1546. ref_frame == ALTREF_FRAME)) {
  1547. int ref1 = (ref_frame == GOLDEN_FRAME) ? LAST_FRAME : GOLDEN_FRAME;
  1548. int ref2 = (ref_frame == ALTREF_FRAME) ? LAST_FRAME : ALTREF_FRAME;
  1549. if (((cpi->ref_frame_flags & flag_list[ref1]) &&
  1550. (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref1] << 1))) ||
  1551. ((cpi->ref_frame_flags & flag_list[ref2]) &&
  1552. (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref2] << 1))))
  1553. ref_frame_skip_mask |= (1 << ref_frame);
  1554. }
  1555. }
  1556. if (ref_frame_skip_mask & (1 << ref_frame)) continue;
  1557. // Select prediction reference frames.
  1558. for (i = 0; i < MAX_MB_PLANE; i++)
  1559. xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
  1560. mi->ref_frame[0] = ref_frame;
  1561. set_ref_ptrs(cm, xd, ref_frame, NONE);
  1562. mode_index = mode_idx[ref_frame][INTER_OFFSET(this_mode)];
  1563. mode_rd_thresh = best_mode_skip_txfm ? rd_threshes[mode_index] << 1
  1564. : rd_threshes[mode_index];
  1565. // Increase mode_rd_thresh value for GOLDEN_FRAME for improved encoding
  1566. // speed with little/no subjective quality loss.
  1567. if (cpi->sf.bias_golden && ref_frame == GOLDEN_FRAME &&
  1568. cpi->rc.frames_since_golden > 4)
  1569. mode_rd_thresh = mode_rd_thresh << 3;
  1570. if ((cpi->sf.adaptive_rd_thresh_row_mt &&
  1571. rd_less_than_thresh_row_mt(best_rdc.rdcost, mode_rd_thresh,
  1572. &rd_thresh_freq_fact[mode_index])) ||
  1573. (!cpi->sf.adaptive_rd_thresh_row_mt &&
  1574. rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
  1575. &rd_thresh_freq_fact[mode_index])))
  1576. continue;
  1577. if (this_mode == NEWMV) {
  1578. if (ref_frame > LAST_FRAME && !cpi->use_svc &&
  1579. cpi->oxcf.rc_mode == VPX_CBR) {
  1580. int tmp_sad;
  1581. uint32_t dis;
  1582. int cost_list[5] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX };
  1583. if (bsize < BLOCK_16X16) continue;
  1584. tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col);
  1585. if (tmp_sad > x->pred_mv_sad[LAST_FRAME]) continue;
  1586. if (tmp_sad + (num_pels_log2_lookup[bsize] << 4) > best_pred_sad)
  1587. continue;
  1588. frame_mv[NEWMV][ref_frame].as_int = mi->mv[0].as_int;
  1589. rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv,
  1590. &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv,
  1591. x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
  1592. frame_mv[NEWMV][ref_frame].as_mv.row >>= 3;
  1593. frame_mv[NEWMV][ref_frame].as_mv.col >>= 3;
  1594. cpi->find_fractional_mv_step(
  1595. x, &frame_mv[NEWMV][ref_frame].as_mv,
  1596. &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv,
  1597. cpi->common.allow_high_precision_mv, x->errorperbit,
  1598. &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
  1599. cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
  1600. x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref_frame], NULL, 0,
  1601. 0);
  1602. } else if (svc->use_base_mv && svc->spatial_layer_id) {
  1603. if (frame_mv[NEWMV][ref_frame].as_int != INVALID_MV) {
  1604. const int pre_stride = xd->plane[0].pre[0].stride;
  1605. unsigned int base_mv_sse = UINT_MAX;
  1606. int scale = (cpi->rc.avg_frame_low_motion > 60) ? 2 : 4;
  1607. const uint8_t *const pre_buf =
  1608. xd->plane[0].pre[0].buf +
  1609. (frame_mv[NEWMV][ref_frame].as_mv.row >> 3) * pre_stride +
  1610. (frame_mv[NEWMV][ref_frame].as_mv.col >> 3);
  1611. cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride,
  1612. pre_buf, pre_stride, &base_mv_sse);
  1613. // Exit NEWMV search if base_mv is (0,0) && bsize < BLOCK_16x16,
  1614. // for SVC encoding.
  1615. if (cpi->use_svc && cpi->svc.use_base_mv && bsize < BLOCK_16X16 &&
  1616. frame_mv[NEWMV][ref_frame].as_mv.row == 0 &&
  1617. frame_mv[NEWMV][ref_frame].as_mv.col == 0)
  1618. continue;
  1619. // Exit NEWMV search if base_mv_sse is large.
  1620. if (sf->base_mv_aggressive && base_mv_sse > (best_sse_sofar << scale))
  1621. continue;
  1622. if (base_mv_sse < (best_sse_sofar << 1)) {
  1623. // Base layer mv is good.
  1624. // Exit NEWMV search if the base_mv is (0, 0) and sse is low, since
  1625. // (0, 0) mode is already tested.
  1626. unsigned int base_mv_sse_normalized =
  1627. base_mv_sse >>
  1628. (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
  1629. if (sf->base_mv_aggressive && base_mv_sse <= best_sse_sofar &&
  1630. base_mv_sse_normalized < 400 &&
  1631. frame_mv[NEWMV][ref_frame].as_mv.row == 0 &&
  1632. frame_mv[NEWMV][ref_frame].as_mv.col == 0)
  1633. continue;
  1634. if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
  1635. &frame_mv[NEWMV][ref_frame], &rate_mv,
  1636. best_rdc.rdcost, 1)) {
  1637. continue;
  1638. }
  1639. } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
  1640. &frame_mv[NEWMV][ref_frame],
  1641. &rate_mv, best_rdc.rdcost, 0)) {
  1642. continue;
  1643. }
  1644. } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
  1645. &frame_mv[NEWMV][ref_frame],
  1646. &rate_mv, best_rdc.rdcost, 0)) {
  1647. continue;
  1648. }
  1649. } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
  1650. &frame_mv[NEWMV][ref_frame], &rate_mv,
  1651. best_rdc.rdcost, 0)) {
  1652. continue;
  1653. }
  1654. }
  1655. // TODO(jianj): Skipping the testing of (duplicate) non-zero motion vector
  1656. // causes some regression, leave it for duplicate zero-mv for now, until
  1657. // regression issue is resolved.
  1658. for (inter_mv_mode = NEARESTMV; inter_mv_mode <= NEWMV; inter_mv_mode++) {
  1659. if (inter_mv_mode == this_mode) continue;
  1660. if (mode_checked[inter_mv_mode][ref_frame] &&
  1661. frame_mv[this_mode][ref_frame].as_int ==
  1662. frame_mv[inter_mv_mode][ref_frame].as_int &&
  1663. frame_mv[inter_mv_mode][ref_frame].as_int == 0) {
  1664. skip_this_mv = 1;
  1665. break;
  1666. }
  1667. }
  1668. if (skip_this_mv) continue;
  1669. // If use_golden_nonzeromv is false, NEWMV mode is skipped for golden, no
  1670. // need to compute best_pred_sad which is only used to skip golden NEWMV.
  1671. if (use_golden_nonzeromv && this_mode == NEWMV && ref_frame == LAST_FRAME &&
  1672. frame_mv[NEWMV][LAST_FRAME].as_int != INVALID_MV) {
  1673. const int pre_stride = xd->plane[0].pre[0].stride;
  1674. const uint8_t *const pre_buf =
  1675. xd->plane[0].pre[0].buf +
  1676. (frame_mv[NEWMV][LAST_FRAME].as_mv.row >> 3) * pre_stride +
  1677. (frame_mv[NEWMV][LAST_FRAME].as_mv.col >> 3);
  1678. best_pred_sad = cpi->fn_ptr[bsize].sdf(
  1679. x->plane[0].src.buf, x->plane[0].src.stride, pre_buf, pre_stride);
  1680. x->pred_mv_sad[LAST_FRAME] = best_pred_sad;
  1681. }
  1682. if (this_mode != NEARESTMV &&
  1683. frame_mv[this_mode][ref_frame].as_int ==
  1684. frame_mv[NEARESTMV][ref_frame].as_int)
  1685. continue;
  1686. mi->mode = this_mode;
  1687. mi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
  1688. // Search for the best prediction filter type, when the resulting
  1689. // motion vector is at sub-pixel accuracy level for luma component, i.e.,
  1690. // the last three bits are all zeros.
  1691. if (reuse_inter_pred) {
  1692. if (!this_mode_pred) {
  1693. this_mode_pred = &tmp[3];
  1694. } else {
  1695. this_mode_pred = &tmp[get_pred_buffer(tmp, 3)];
  1696. pd->dst.buf = this_mode_pred->data;
  1697. pd->dst.stride = bw;
  1698. }
  1699. }
  1700. if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
  1701. pred_filter_search &&
  1702. (ref_frame == LAST_FRAME ||
  1703. (ref_frame == GOLDEN_FRAME &&
  1704. (cpi->use_svc || cpi->oxcf.rc_mode == VPX_VBR))) &&
  1705. (((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) != 0)) {
  1706. int pf_rate[3];
  1707. int64_t pf_dist[3];
  1708. int curr_rate[3];
  1709. unsigned int pf_var[3];
  1710. unsigned int pf_sse[3];
  1711. TX_SIZE pf_tx_size[3];
  1712. int64_t best_cost = INT64_MAX;
  1713. INTERP_FILTER best_filter = SWITCHABLE, filter;
  1714. PRED_BUFFER *current_pred = this_mode_pred;
  1715. rd_computed = 1;
  1716. for (filter = EIGHTTAP; filter <= EIGHTTAP_SMOOTH; ++filter) {
  1717. int64_t cost;
  1718. mi->interp_filter = filter;
  1719. vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
  1720. model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter], &pf_dist[filter],
  1721. &pf_var[filter], &pf_sse[filter]);
  1722. curr_rate[filter] = pf_rate[filter];
  1723. pf_rate[filter] += vp9_get_switchable_rate(cpi, xd);
  1724. cost = RDCOST(x->rdmult, x->rddiv, pf_rate[filter], pf_dist[filter]);
  1725. pf_tx_size[filter] = mi->tx_size;
  1726. if (cost < best_cost) {
  1727. best_filter = filter;
  1728. best_cost = cost;
  1729. skip_txfm = x->skip_txfm[0];
  1730. if (reuse_inter_pred) {
  1731. if (this_mode_pred != current_pred) {
  1732. free_pred_buffer(this_mode_pred);
  1733. this_mode_pred = current_pred;
  1734. }
  1735. current_pred = &tmp[get_pred_buffer(tmp, 3)];
  1736. pd->dst.buf = current_pred->data;
  1737. pd->dst.stride = bw;
  1738. }
  1739. }
  1740. }
  1741. if (reuse_inter_pred && this_mode_pred != current_pred)
  1742. free_pred_buffer(current_pred);
  1743. mi->interp_filter = best_filter;
  1744. mi->tx_size = pf_tx_size[best_filter];
  1745. this_rdc.rate = curr_rate[best_filter];
  1746. this_rdc.dist = pf_dist[best_filter];
  1747. var_y = pf_var[best_filter];
  1748. sse_y = pf_sse[best_filter];
  1749. x->skip_txfm[0] = skip_txfm;
  1750. if (reuse_inter_pred) {
  1751. pd->dst.buf = this_mode_pred->data;
  1752. pd->dst.stride = this_mode_pred->stride;
  1753. }
  1754. } else {
  1755. // For low motion content use x->sb_is_skin in addition to VeryHighSad
  1756. // for setting large_block.
  1757. const int large_block =
  1758. (x->content_state_sb == kVeryHighSad ||
  1759. (x->sb_is_skin && cpi->rc.avg_frame_low_motion > 70) ||
  1760. cpi->oxcf.speed < 7)
  1761. ? bsize > BLOCK_32X32
  1762. : bsize >= BLOCK_32X32;
  1763. mi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP : filter_ref;
  1764. if (cpi->use_svc && ref_frame == GOLDEN_FRAME &&
  1765. svc_force_zero_mode[ref_frame - 1])
  1766. mi->interp_filter = filter_gf_svc;
  1767. vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
  1768. // For large partition blocks, extra testing is done.
  1769. if (cpi->oxcf.rc_mode == VPX_CBR && large_block &&
  1770. !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) &&
  1771. cm->base_qindex) {
  1772. model_rd_for_sb_y_large(cpi, bsize, x, xd, &this_rdc.rate,
  1773. &this_rdc.dist, &var_y, &sse_y, mi_row, mi_col,
  1774. &this_early_term);
  1775. } else {
  1776. rd_computed = 1;
  1777. model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist,
  1778. &var_y, &sse_y);
  1779. }
  1780. // Save normalized sse (between current and last frame) for (0, 0) motion.
  1781. if (cpi->use_svc && ref_frame == LAST_FRAME &&
  1782. frame_mv[this_mode][ref_frame].as_int == 0) {
  1783. sse_zeromv_normalized =
  1784. sse_y >> (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
  1785. }
  1786. if (sse_y < best_sse_sofar) best_sse_sofar = sse_y;
  1787. }
  1788. if (!this_early_term) {
  1789. this_sse = (int64_t)sse_y;
  1790. block_yrd(cpi, x, &this_rdc, &is_skippable, &this_sse, bsize,
  1791. VPXMIN(mi->tx_size, TX_16X16), rd_computed);
  1792. x->skip_txfm[0] = is_skippable;
  1793. if (is_skippable) {
  1794. this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
  1795. } else {
  1796. if (RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist) <
  1797. RDCOST(x->rdmult, x->rddiv, 0, this_sse)) {
  1798. this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
  1799. } else {
  1800. this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
  1801. this_rdc.dist = this_sse;
  1802. x->skip_txfm[0] = SKIP_TXFM_AC_DC;
  1803. }
  1804. }
  1805. if (cm->interp_filter == SWITCHABLE) {
  1806. if ((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07)
  1807. this_rdc.rate += vp9_get_switchable_rate(cpi, xd);
  1808. }
  1809. } else {
  1810. this_rdc.rate += cm->interp_filter == SWITCHABLE
  1811. ? vp9_get_switchable_rate(cpi, xd)
  1812. : 0;
  1813. this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
  1814. }
  1815. if (x->color_sensitivity[0] || x->color_sensitivity[1]) {
  1816. RD_COST rdc_uv;
  1817. const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, &xd->plane[1]);
  1818. if (x->color_sensitivity[0])
  1819. vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 1);
  1820. if (x->color_sensitivity[1])
  1821. vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 2);
  1822. model_rd_for_sb_uv(cpi, uv_bsize, x, xd, &rdc_uv, &var_y, &sse_y, 1, 2);
  1823. this_rdc.rate += rdc_uv.rate;
  1824. this_rdc.dist += rdc_uv.dist;
  1825. }
  1826. this_rdc.rate += rate_mv;
  1827. this_rdc.rate += cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref_frame]]
  1828. [INTER_OFFSET(this_mode)];
  1829. this_rdc.rate += ref_frame_cost[ref_frame];
  1830. this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
  1831. // Bias against NEWMV that is very different from its neighbors, and bias
  1832. // to small motion-lastref for noisy input.
  1833. if (cpi->oxcf.rc_mode == VPX_CBR && cpi->oxcf.speed >= 5 &&
  1834. cpi->oxcf.content != VP9E_CONTENT_SCREEN) {
  1835. vp9_NEWMV_diff_bias(&cpi->noise_estimate, xd, this_mode, &this_rdc, bsize,
  1836. frame_mv[this_mode][ref_frame].as_mv.row,
  1837. frame_mv[this_mode][ref_frame].as_mv.col,
  1838. ref_frame == LAST_FRAME, x->lowvar_highsumdiff,
  1839. x->sb_is_skin);
  1840. }
  1841. // Skipping checking: test to see if this block can be reconstructed by
  1842. // prediction only.
  1843. if (cpi->allow_encode_breakout) {
  1844. encode_breakout_test(cpi, x, bsize, mi_row, mi_col, ref_frame, this_mode,
  1845. var_y, sse_y, yv12_mb, &this_rdc.rate,
  1846. &this_rdc.dist);
  1847. if (x->skip) {
  1848. this_rdc.rate += rate_mv;
  1849. this_rdc.rdcost =
  1850. RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
  1851. }
  1852. }
  1853. #if CONFIG_VP9_TEMPORAL_DENOISING
  1854. if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc_pickmode &&
  1855. cpi->denoiser.denoising_level > kDenLowLow) {
  1856. vp9_denoiser_update_frame_stats(mi, sse_y, this_mode, ctx);
  1857. // Keep track of zero_last cost.
  1858. if (ref_frame == LAST_FRAME && frame_mv[this_mode][ref_frame].as_int == 0)
  1859. zero_last_cost_orig = this_rdc.rdcost;
  1860. }
  1861. #else
  1862. (void)ctx;
  1863. #endif
  1864. mode_checked[this_mode][ref_frame] = 1;
  1865. if (this_rdc.rdcost < best_rdc.rdcost || x->skip) {
  1866. best_rdc = this_rdc;
  1867. best_mode = this_mode;
  1868. best_pred_filter = mi->interp_filter;
  1869. best_tx_size = mi->tx_size;
  1870. best_ref_frame = ref_frame;
  1871. best_mode_skip_txfm = x->skip_txfm[0];
  1872. best_early_term = this_early_term;
  1873. if (reuse_inter_pred) {
  1874. free_pred_buffer(best_pred);
  1875. best_pred = this_mode_pred;
  1876. }
  1877. } else {
  1878. if (reuse_inter_pred) free_pred_buffer(this_mode_pred);
  1879. }
  1880. if (x->skip) break;
  1881. // If early termination flag is 1 and at least 2 modes are checked,
  1882. // the mode search is terminated.
  1883. if (best_early_term && idx > 0) {
  1884. x->skip = 1;
  1885. break;
  1886. }
  1887. }
  1888. mi->mode = best_mode;
  1889. mi->interp_filter = best_pred_filter;
  1890. mi->tx_size = best_tx_size;
  1891. mi->ref_frame[0] = best_ref_frame;
  1892. mi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int;
  1893. xd->mi[0]->bmi[0].as_mv[0].as_int = mi->mv[0].as_int;
  1894. x->skip_txfm[0] = best_mode_skip_txfm;
  1895. // For spatial enhancemanent layer: perform intra prediction only if base
  1896. // layer is chosen as the reference. Always perform intra prediction if
  1897. // LAST is the only reference or is_key_frame is set.
  1898. if (cpi->svc.spatial_layer_id) {
  1899. perform_intra_pred =
  1900. cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame ||
  1901. !(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) ||
  1902. (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
  1903. svc_force_zero_mode[best_ref_frame - 1]);
  1904. inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh;
  1905. }
  1906. if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
  1907. cpi->rc.is_src_frame_alt_ref)
  1908. perform_intra_pred = 0;
  1909. // Perform intra prediction search, if the best SAD is above a certain
  1910. // threshold.
  1911. if (best_rdc.rdcost == INT64_MAX ||
  1912. ((!force_skip_low_temp_var || bsize < BLOCK_32X32 ||
  1913. x->content_state_sb == kVeryHighSad) &&
  1914. perform_intra_pred && !x->skip && best_rdc.rdcost > inter_mode_thresh &&
  1915. bsize <= cpi->sf.max_intra_bsize && !x->skip_low_source_sad &&
  1916. !x->lowvar_highsumdiff)) {
  1917. struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 };
  1918. int i;
  1919. TX_SIZE best_intra_tx_size = TX_SIZES;
  1920. TX_SIZE intra_tx_size =
  1921. VPXMIN(max_txsize_lookup[bsize],
  1922. tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
  1923. if (cpi->oxcf.content != VP9E_CONTENT_SCREEN && intra_tx_size > TX_16X16)
  1924. intra_tx_size = TX_16X16;
  1925. if (reuse_inter_pred && best_pred != NULL) {
  1926. if (best_pred->data == orig_dst.buf) {
  1927. this_mode_pred = &tmp[get_pred_buffer(tmp, 3)];
  1928. #if CONFIG_VP9_HIGHBITDEPTH
  1929. if (cm->use_highbitdepth)
  1930. vpx_highbd_convolve_copy(
  1931. CONVERT_TO_SHORTPTR(best_pred->data), best_pred->stride,
  1932. CONVERT_TO_SHORTPTR(this_mode_pred->data), this_mode_pred->stride,
  1933. NULL, 0, 0, 0, 0, bw, bh, xd->bd);
  1934. else
  1935. vpx_convolve_copy(best_pred->data, best_pred->stride,
  1936. this_mode_pred->data, this_mode_pred->stride, NULL,
  1937. 0, 0, 0, 0, bw, bh);
  1938. #else
  1939. vpx_convolve_copy(best_pred->data, best_pred->stride,
  1940. this_mode_pred->data, this_mode_pred->stride, NULL, 0,
  1941. 0, 0, 0, bw, bh);
  1942. #endif // CONFIG_VP9_HIGHBITDEPTH
  1943. best_pred = this_mode_pred;
  1944. }
  1945. }
  1946. pd->dst = orig_dst;
  1947. for (i = 0; i < 4; ++i) {
  1948. const PREDICTION_MODE this_mode = intra_mode_list[i];
  1949. THR_MODES mode_index = mode_idx[INTRA_FRAME][mode_offset(this_mode)];
  1950. int mode_rd_thresh = rd_threshes[mode_index];
  1951. if (sf->short_circuit_flat_blocks && x->source_variance == 0 &&
  1952. this_mode != DC_PRED) {
  1953. continue;
  1954. }
  1955. if (!((1 << this_mode) & cpi->sf.intra_y_mode_bsize_mask[bsize]))
  1956. continue;
  1957. if ((cpi->sf.adaptive_rd_thresh_row_mt &&
  1958. rd_less_than_thresh_row_mt(best_rdc.rdcost, mode_rd_thresh,
  1959. &rd_thresh_freq_fact[mode_index])) ||
  1960. (!cpi->sf.adaptive_rd_thresh_row_mt &&
  1961. rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
  1962. &rd_thresh_freq_fact[mode_index])))
  1963. continue;
  1964. mi->mode = this_mode;
  1965. mi->ref_frame[0] = INTRA_FRAME;
  1966. this_rdc.dist = this_rdc.rate = 0;
  1967. args.mode = this_mode;
  1968. args.skippable = 1;
  1969. args.rdc = &this_rdc;
  1970. mi->tx_size = intra_tx_size;
  1971. vp9_foreach_transformed_block_in_plane(xd, bsize, 0, estimate_block_intra,
  1972. &args);
  1973. // Check skip cost here since skippable is not set for for uv, this
  1974. // mirrors the behavior used by inter
  1975. if (args.skippable) {
  1976. x->skip_txfm[0] = SKIP_TXFM_AC_DC;
  1977. this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 1);
  1978. } else {
  1979. x->skip_txfm[0] = SKIP_TXFM_NONE;
  1980. this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 0);
  1981. }
  1982. // Inter and intra RD will mismatch in scale for non-screen content.
  1983. if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) {
  1984. if (x->color_sensitivity[0])
  1985. vp9_foreach_transformed_block_in_plane(xd, bsize, 1,
  1986. estimate_block_intra, &args);
  1987. if (x->color_sensitivity[1])
  1988. vp9_foreach_transformed_block_in_plane(xd, bsize, 2,
  1989. estimate_block_intra, &args);
  1990. }
  1991. this_rdc.rate += cpi->mbmode_cost[this_mode];
  1992. this_rdc.rate += ref_frame_cost[INTRA_FRAME];
  1993. this_rdc.rate += intra_cost_penalty;
  1994. this_rdc.rdcost =
  1995. RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
  1996. if (this_rdc.rdcost < best_rdc.rdcost) {
  1997. best_rdc = this_rdc;
  1998. best_mode = this_mode;
  1999. best_intra_tx_size = mi->tx_size;
  2000. best_ref_frame = INTRA_FRAME;
  2001. mi->uv_mode = this_mode;
  2002. mi->mv[0].as_int = INVALID_MV;
  2003. best_mode_skip_txfm = x->skip_txfm[0];
  2004. }
  2005. }
  2006. // Reset mb_mode_info to the best inter mode.
  2007. if (best_ref_frame != INTRA_FRAME) {
  2008. mi->tx_size = best_tx_size;
  2009. } else {
  2010. mi->tx_size = best_intra_tx_size;
  2011. }
  2012. }
  2013. pd->dst = orig_dst;
  2014. mi->mode = best_mode;
  2015. mi->ref_frame[0] = best_ref_frame;
  2016. x->skip_txfm[0] = best_mode_skip_txfm;
  2017. if (!is_inter_block(mi)) {
  2018. mi->interp_filter = SWITCHABLE_FILTERS;
  2019. }
  2020. if (reuse_inter_pred && best_pred != NULL) {
  2021. if (best_pred->data != orig_dst.buf && is_inter_mode(mi->mode)) {
  2022. #if CONFIG_VP9_HIGHBITDEPTH
  2023. if (cm->use_highbitdepth)
  2024. vpx_highbd_convolve_copy(
  2025. CONVERT_TO_SHORTPTR(best_pred->data), best_pred->stride,
  2026. CONVERT_TO_SHORTPTR(pd->dst.buf), pd->dst.stride, NULL, 0, 0, 0, 0,
  2027. bw, bh, xd->bd);
  2028. else
  2029. vpx_convolve_copy(best_pred->data, best_pred->stride, pd->dst.buf,
  2030. pd->dst.stride, NULL, 0, 0, 0, 0, bw, bh);
  2031. #else
  2032. vpx_convolve_copy(best_pred->data, best_pred->stride, pd->dst.buf,
  2033. pd->dst.stride, NULL, 0, 0, 0, 0, bw, bh);
  2034. #endif // CONFIG_VP9_HIGHBITDEPTH
  2035. }
  2036. }
  2037. #if CONFIG_VP9_TEMPORAL_DENOISING
  2038. if (cpi->oxcf.noise_sensitivity > 0 && cpi->resize_pending == 0 &&
  2039. denoise_svc_pickmode && cpi->denoiser.denoising_level > kDenLowLow &&
  2040. cpi->denoiser.reset == 0) {
  2041. VP9_DENOISER_DECISION decision = COPY_BLOCK;
  2042. ctx->sb_skip_denoising = 0;
  2043. // TODO(marpan): There is an issue with denoising when the
  2044. // superblock partitioning scheme is based on the pickmode.
  2045. // Remove this condition when the issue is resolved.
  2046. if (x->sb_pickmode_part) ctx->sb_skip_denoising = 1;
  2047. vp9_pickmode_ctx_den_update(&ctx_den, zero_last_cost_orig, ref_frame_cost,
  2048. frame_mv, reuse_inter_pred, best_tx_size,
  2049. best_mode, best_ref_frame, best_pred_filter,
  2050. best_mode_skip_txfm);
  2051. vp9_denoiser_denoise(cpi, x, mi_row, mi_col, bsize, ctx, &decision);
  2052. recheck_zeromv_after_denoising(cpi, mi, x, xd, decision, &ctx_den, yv12_mb,
  2053. &best_rdc, bsize, mi_row, mi_col);
  2054. best_ref_frame = ctx_den.best_ref_frame;
  2055. }
  2056. #endif
  2057. if (cpi->sf.adaptive_rd_thresh) {
  2058. THR_MODES best_mode_idx = mode_idx[best_ref_frame][mode_offset(mi->mode)];
  2059. if (best_ref_frame == INTRA_FRAME) {
  2060. // Only consider the modes that are included in the intra_mode_list.
  2061. int intra_modes = sizeof(intra_mode_list) / sizeof(PREDICTION_MODE);
  2062. int i;
  2063. // TODO(yunqingwang): Check intra mode mask and only update freq_fact
  2064. // for those valid modes.
  2065. for (i = 0; i < intra_modes; i++) {
  2066. if (cpi->sf.adaptive_rd_thresh_row_mt)
  2067. update_thresh_freq_fact_row_mt(cpi, tile_data, x->source_variance,
  2068. thresh_freq_fact_idx, INTRA_FRAME,
  2069. best_mode_idx, intra_mode_list[i]);
  2070. else
  2071. update_thresh_freq_fact(cpi, tile_data, x->source_variance, bsize,
  2072. INTRA_FRAME, best_mode_idx,
  2073. intra_mode_list[i]);
  2074. }
  2075. } else {
  2076. for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) {
  2077. PREDICTION_MODE this_mode;
  2078. if (best_ref_frame != ref_frame) continue;
  2079. for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
  2080. if (cpi->sf.adaptive_rd_thresh_row_mt)
  2081. update_thresh_freq_fact_row_mt(cpi, tile_data, x->source_variance,
  2082. thresh_freq_fact_idx, ref_frame,
  2083. best_mode_idx, this_mode);
  2084. else
  2085. update_thresh_freq_fact(cpi, tile_data, x->source_variance, bsize,
  2086. ref_frame, best_mode_idx, this_mode);
  2087. }
  2088. }
  2089. }
  2090. }
  2091. *rd_cost = best_rdc;
  2092. }
  2093. void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int mi_row,
  2094. int mi_col, RD_COST *rd_cost, BLOCK_SIZE bsize,
  2095. PICK_MODE_CONTEXT *ctx) {
  2096. VP9_COMMON *const cm = &cpi->common;
  2097. SPEED_FEATURES *const sf = &cpi->sf;
  2098. MACROBLOCKD *const xd = &x->e_mbd;
  2099. MODE_INFO *const mi = xd->mi[0];
  2100. MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
  2101. const struct segmentation *const seg = &cm->seg;
  2102. MV_REFERENCE_FRAME ref_frame, second_ref_frame = NONE;
  2103. MV_REFERENCE_FRAME best_ref_frame = NONE;
  2104. unsigned char segment_id = mi->segment_id;
  2105. struct buf_2d yv12_mb[4][MAX_MB_PLANE];
  2106. static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
  2107. VP9_ALT_FLAG };
  2108. int64_t best_rd = INT64_MAX;
  2109. b_mode_info bsi[MAX_REF_FRAMES][4];
  2110. int ref_frame_skip_mask = 0;
  2111. const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  2112. const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
  2113. int idx, idy;
  2114. x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
  2115. ctx->pred_pixel_ready = 0;
  2116. for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) {
  2117. const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
  2118. int_mv dummy_mv[2];
  2119. x->pred_mv_sad[ref_frame] = INT_MAX;
  2120. if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) {
  2121. int_mv *const candidates = mbmi_ext->ref_mvs[ref_frame];
  2122. const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
  2123. vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf,
  2124. sf);
  2125. vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame, candidates, mi_row, mi_col,
  2126. mbmi_ext->mode_context);
  2127. vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
  2128. &dummy_mv[0], &dummy_mv[1]);
  2129. } else {
  2130. ref_frame_skip_mask |= (1 << ref_frame);
  2131. }
  2132. }
  2133. mi->sb_type = bsize;
  2134. mi->tx_size = TX_4X4;
  2135. mi->uv_mode = DC_PRED;
  2136. mi->ref_frame[0] = LAST_FRAME;
  2137. mi->ref_frame[1] = NONE;
  2138. mi->interp_filter =
  2139. cm->interp_filter == SWITCHABLE ? EIGHTTAP : cm->interp_filter;
  2140. for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) {
  2141. int64_t this_rd = 0;
  2142. int plane;
  2143. if (ref_frame_skip_mask & (1 << ref_frame)) continue;
  2144. #if CONFIG_BETTER_HW_COMPATIBILITY
  2145. if ((bsize == BLOCK_8X4 || bsize == BLOCK_4X8) && ref_frame > INTRA_FRAME &&
  2146. vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
  2147. continue;
  2148. #endif
  2149. // TODO(jingning, agrange): Scaling reference frame not supported for
  2150. // sub8x8 blocks. Is this supported now?
  2151. if (ref_frame > INTRA_FRAME &&
  2152. vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
  2153. continue;
  2154. // If the segment reference frame feature is enabled....
  2155. // then do nothing if the current ref frame is not allowed..
  2156. if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
  2157. get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
  2158. continue;
  2159. mi->ref_frame[0] = ref_frame;
  2160. x->skip = 0;
  2161. set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
  2162. // Select prediction reference frames.
  2163. for (plane = 0; plane < MAX_MB_PLANE; plane++)
  2164. xd->plane[plane].pre[0] = yv12_mb[ref_frame][plane];
  2165. for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
  2166. for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
  2167. int_mv b_mv[MB_MODE_COUNT];
  2168. int64_t b_best_rd = INT64_MAX;
  2169. const int i = idy * 2 + idx;
  2170. PREDICTION_MODE this_mode;
  2171. RD_COST this_rdc;
  2172. unsigned int var_y, sse_y;
  2173. struct macroblock_plane *p = &x->plane[0];
  2174. struct macroblockd_plane *pd = &xd->plane[0];
  2175. const struct buf_2d orig_src = p->src;
  2176. const struct buf_2d orig_dst = pd->dst;
  2177. struct buf_2d orig_pre[2];
  2178. memcpy(orig_pre, xd->plane[0].pre, sizeof(orig_pre));
  2179. // set buffer pointers for sub8x8 motion search.
  2180. p->src.buf =
  2181. &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
  2182. pd->dst.buf =
  2183. &pd->dst.buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->dst.stride)];
  2184. pd->pre[0].buf =
  2185. &pd->pre[0]
  2186. .buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->pre[0].stride)];
  2187. b_mv[ZEROMV].as_int = 0;
  2188. b_mv[NEWMV].as_int = INVALID_MV;
  2189. vp9_append_sub8x8_mvs_for_idx(cm, xd, i, 0, mi_row, mi_col,
  2190. &b_mv[NEARESTMV], &b_mv[NEARMV],
  2191. mbmi_ext->mode_context);
  2192. for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
  2193. int b_rate = 0;
  2194. xd->mi[0]->bmi[i].as_mv[0].as_int = b_mv[this_mode].as_int;
  2195. if (this_mode == NEWMV) {
  2196. const int step_param = cpi->sf.mv.fullpel_search_step_param;
  2197. MV mvp_full;
  2198. MV tmp_mv;
  2199. int cost_list[5];
  2200. const MvLimits tmp_mv_limits = x->mv_limits;
  2201. uint32_t dummy_dist;
  2202. if (i == 0) {
  2203. mvp_full.row = b_mv[NEARESTMV].as_mv.row >> 3;
  2204. mvp_full.col = b_mv[NEARESTMV].as_mv.col >> 3;
  2205. } else {
  2206. mvp_full.row = xd->mi[0]->bmi[0].as_mv[0].as_mv.row >> 3;
  2207. mvp_full.col = xd->mi[0]->bmi[0].as_mv[0].as_mv.col >> 3;
  2208. }
  2209. vp9_set_mv_search_range(&x->mv_limits,
  2210. &mbmi_ext->ref_mvs[ref_frame][0].as_mv);
  2211. vp9_full_pixel_search(
  2212. cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method,
  2213. x->sadperbit4, cond_cost_list(cpi, cost_list),
  2214. &mbmi_ext->ref_mvs[ref_frame][0].as_mv, &tmp_mv, INT_MAX, 0);
  2215. x->mv_limits = tmp_mv_limits;
  2216. // calculate the bit cost on motion vector
  2217. mvp_full.row = tmp_mv.row * 8;
  2218. mvp_full.col = tmp_mv.col * 8;
  2219. b_rate += vp9_mv_bit_cost(
  2220. &mvp_full, &mbmi_ext->ref_mvs[ref_frame][0].as_mv,
  2221. x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
  2222. b_rate += cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref_frame]]
  2223. [INTER_OFFSET(NEWMV)];
  2224. if (RDCOST(x->rdmult, x->rddiv, b_rate, 0) > b_best_rd) continue;
  2225. cpi->find_fractional_mv_step(
  2226. x, &tmp_mv, &mbmi_ext->ref_mvs[ref_frame][0].as_mv,
  2227. cpi->common.allow_high_precision_mv, x->errorperbit,
  2228. &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
  2229. cpi->sf.mv.subpel_iters_per_step,
  2230. cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost,
  2231. &dummy_dist, &x->pred_sse[ref_frame], NULL, 0, 0);
  2232. xd->mi[0]->bmi[i].as_mv[0].as_mv = tmp_mv;
  2233. } else {
  2234. b_rate += cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref_frame]]
  2235. [INTER_OFFSET(this_mode)];
  2236. }
  2237. #if CONFIG_VP9_HIGHBITDEPTH
  2238. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  2239. vp9_highbd_build_inter_predictor(
  2240. CONVERT_TO_SHORTPTR(pd->pre[0].buf), pd->pre[0].stride,
  2241. CONVERT_TO_SHORTPTR(pd->dst.buf), pd->dst.stride,
  2242. &xd->mi[0]->bmi[i].as_mv[0].as_mv, &xd->block_refs[0]->sf,
  2243. 4 * num_4x4_blocks_wide, 4 * num_4x4_blocks_high, 0,
  2244. vp9_filter_kernels[mi->interp_filter], MV_PRECISION_Q3,
  2245. mi_col * MI_SIZE + 4 * (i & 0x01),
  2246. mi_row * MI_SIZE + 4 * (i >> 1), xd->bd);
  2247. } else {
  2248. #endif
  2249. vp9_build_inter_predictor(
  2250. pd->pre[0].buf, pd->pre[0].stride, pd->dst.buf, pd->dst.stride,
  2251. &xd->mi[0]->bmi[i].as_mv[0].as_mv, &xd->block_refs[0]->sf,
  2252. 4 * num_4x4_blocks_wide, 4 * num_4x4_blocks_high, 0,
  2253. vp9_filter_kernels[mi->interp_filter], MV_PRECISION_Q3,
  2254. mi_col * MI_SIZE + 4 * (i & 0x01),
  2255. mi_row * MI_SIZE + 4 * (i >> 1));
  2256. #if CONFIG_VP9_HIGHBITDEPTH
  2257. }
  2258. #endif
  2259. model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist,
  2260. &var_y, &sse_y);
  2261. this_rdc.rate += b_rate;
  2262. this_rdc.rdcost =
  2263. RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
  2264. if (this_rdc.rdcost < b_best_rd) {
  2265. b_best_rd = this_rdc.rdcost;
  2266. bsi[ref_frame][i].as_mode = this_mode;
  2267. bsi[ref_frame][i].as_mv[0].as_mv = xd->mi[0]->bmi[i].as_mv[0].as_mv;
  2268. }
  2269. } // mode search
  2270. // restore source and prediction buffer pointers.
  2271. p->src = orig_src;
  2272. pd->pre[0] = orig_pre[0];
  2273. pd->dst = orig_dst;
  2274. this_rd += b_best_rd;
  2275. xd->mi[0]->bmi[i] = bsi[ref_frame][i];
  2276. if (num_4x4_blocks_wide > 1) xd->mi[0]->bmi[i + 1] = xd->mi[0]->bmi[i];
  2277. if (num_4x4_blocks_high > 1) xd->mi[0]->bmi[i + 2] = xd->mi[0]->bmi[i];
  2278. }
  2279. } // loop through sub8x8 blocks
  2280. if (this_rd < best_rd) {
  2281. best_rd = this_rd;
  2282. best_ref_frame = ref_frame;
  2283. }
  2284. } // reference frames
  2285. mi->tx_size = TX_4X4;
  2286. mi->ref_frame[0] = best_ref_frame;
  2287. for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
  2288. for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
  2289. const int block = idy * 2 + idx;
  2290. xd->mi[0]->bmi[block] = bsi[best_ref_frame][block];
  2291. if (num_4x4_blocks_wide > 1)
  2292. xd->mi[0]->bmi[block + 1] = bsi[best_ref_frame][block];
  2293. if (num_4x4_blocks_high > 1)
  2294. xd->mi[0]->bmi[block + 2] = bsi[best_ref_frame][block];
  2295. }
  2296. }
  2297. mi->mode = xd->mi[0]->bmi[3].as_mode;
  2298. ctx->mic = *(xd->mi[0]);
  2299. ctx->mbmi_ext = *x->mbmi_ext;
  2300. ctx->skip_txfm[0] = SKIP_TXFM_NONE;
  2301. ctx->skip = 0;
  2302. // Dummy assignment for speed -5. No effect in speed -6.
  2303. rd_cost->rdcost = best_rd;
  2304. }