vp9_rdopt.c 164 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <assert.h>
  11. #include <math.h>
  12. #include "./vp9_rtcd.h"
  13. #include "./vpx_dsp_rtcd.h"
  14. #include "vpx_dsp/vpx_dsp_common.h"
  15. #include "vpx_mem/vpx_mem.h"
  16. #include "vpx_ports/mem.h"
  17. #include "vpx_ports/system_state.h"
  18. #include "vp9/common/vp9_common.h"
  19. #include "vp9/common/vp9_entropy.h"
  20. #include "vp9/common/vp9_entropymode.h"
  21. #include "vp9/common/vp9_idct.h"
  22. #include "vp9/common/vp9_mvref_common.h"
  23. #include "vp9/common/vp9_pred_common.h"
  24. #include "vp9/common/vp9_quant_common.h"
  25. #include "vp9/common/vp9_reconinter.h"
  26. #include "vp9/common/vp9_reconintra.h"
  27. #include "vp9/common/vp9_scan.h"
  28. #include "vp9/common/vp9_seg_common.h"
  29. #include "vp9/encoder/vp9_cost.h"
  30. #include "vp9/encoder/vp9_encodemb.h"
  31. #include "vp9/encoder/vp9_encodemv.h"
  32. #include "vp9/encoder/vp9_encoder.h"
  33. #include "vp9/encoder/vp9_mcomp.h"
  34. #include "vp9/encoder/vp9_quantize.h"
  35. #include "vp9/encoder/vp9_ratectrl.h"
  36. #include "vp9/encoder/vp9_rd.h"
  37. #include "vp9/encoder/vp9_rdopt.h"
  38. #include "vp9/encoder/vp9_aq_variance.h"
  39. #define LAST_FRAME_MODE_MASK \
  40. ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
  41. #define GOLDEN_FRAME_MODE_MASK \
  42. ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
  43. #define ALT_REF_MODE_MASK \
  44. ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
  45. #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
  46. #define MIN_EARLY_TERM_INDEX 3
  47. #define NEW_MV_DISCOUNT_FACTOR 8
  48. typedef struct {
  49. PREDICTION_MODE mode;
  50. MV_REFERENCE_FRAME ref_frame[2];
  51. } MODE_DEFINITION;
  52. typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
  53. struct rdcost_block_args {
  54. const VP9_COMP *cpi;
  55. MACROBLOCK *x;
  56. ENTROPY_CONTEXT t_above[16];
  57. ENTROPY_CONTEXT t_left[16];
  58. int this_rate;
  59. int64_t this_dist;
  60. int64_t this_sse;
  61. int64_t this_rd;
  62. int64_t best_rd;
  63. int exit_early;
  64. int use_fast_coef_costing;
  65. const scan_order *so;
  66. uint8_t skippable;
  67. };
  68. #define LAST_NEW_MV_INDEX 6
  69. static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
  70. { NEARESTMV, { LAST_FRAME, NONE } },
  71. { NEARESTMV, { ALTREF_FRAME, NONE } },
  72. { NEARESTMV, { GOLDEN_FRAME, NONE } },
  73. { DC_PRED, { INTRA_FRAME, NONE } },
  74. { NEWMV, { LAST_FRAME, NONE } },
  75. { NEWMV, { ALTREF_FRAME, NONE } },
  76. { NEWMV, { GOLDEN_FRAME, NONE } },
  77. { NEARMV, { LAST_FRAME, NONE } },
  78. { NEARMV, { ALTREF_FRAME, NONE } },
  79. { NEARMV, { GOLDEN_FRAME, NONE } },
  80. { ZEROMV, { LAST_FRAME, NONE } },
  81. { ZEROMV, { GOLDEN_FRAME, NONE } },
  82. { ZEROMV, { ALTREF_FRAME, NONE } },
  83. { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
  84. { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  85. { TM_PRED, { INTRA_FRAME, NONE } },
  86. { NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  87. { NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  88. { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  89. { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  90. { ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
  91. { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  92. { H_PRED, { INTRA_FRAME, NONE } },
  93. { V_PRED, { INTRA_FRAME, NONE } },
  94. { D135_PRED, { INTRA_FRAME, NONE } },
  95. { D207_PRED, { INTRA_FRAME, NONE } },
  96. { D153_PRED, { INTRA_FRAME, NONE } },
  97. { D63_PRED, { INTRA_FRAME, NONE } },
  98. { D117_PRED, { INTRA_FRAME, NONE } },
  99. { D45_PRED, { INTRA_FRAME, NONE } },
  100. };
  101. static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
  102. { { LAST_FRAME, NONE } }, { { GOLDEN_FRAME, NONE } },
  103. { { ALTREF_FRAME, NONE } }, { { LAST_FRAME, ALTREF_FRAME } },
  104. { { GOLDEN_FRAME, ALTREF_FRAME } }, { { INTRA_FRAME, NONE } },
  105. };
  106. static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int m, int n,
  107. int min_plane, int max_plane) {
  108. int i;
  109. for (i = min_plane; i < max_plane; ++i) {
  110. struct macroblock_plane *const p = &x->plane[i];
  111. struct macroblockd_plane *const pd = &x->e_mbd.plane[i];
  112. p->coeff = ctx->coeff_pbuf[i][m];
  113. p->qcoeff = ctx->qcoeff_pbuf[i][m];
  114. pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
  115. p->eobs = ctx->eobs_pbuf[i][m];
  116. ctx->coeff_pbuf[i][m] = ctx->coeff_pbuf[i][n];
  117. ctx->qcoeff_pbuf[i][m] = ctx->qcoeff_pbuf[i][n];
  118. ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
  119. ctx->eobs_pbuf[i][m] = ctx->eobs_pbuf[i][n];
  120. ctx->coeff_pbuf[i][n] = p->coeff;
  121. ctx->qcoeff_pbuf[i][n] = p->qcoeff;
  122. ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
  123. ctx->eobs_pbuf[i][n] = p->eobs;
  124. }
  125. }
  126. static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
  127. MACROBLOCKD *xd, int *out_rate_sum,
  128. int64_t *out_dist_sum, int *skip_txfm_sb,
  129. int64_t *skip_sse_sb) {
  130. // Note our transform coeffs are 8 times an orthogonal transform.
  131. // Hence quantizer step is also 8 times. To get effective quantizer
  132. // we need to divide by 8 before sending to modeling function.
  133. int i;
  134. int64_t rate_sum = 0;
  135. int64_t dist_sum = 0;
  136. const int ref = xd->mi[0]->ref_frame[0];
  137. unsigned int sse;
  138. unsigned int var = 0;
  139. int64_t total_sse = 0;
  140. int skip_flag = 1;
  141. const int shift = 6;
  142. int64_t dist;
  143. const int dequant_shift =
  144. #if CONFIG_VP9_HIGHBITDEPTH
  145. (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
  146. #endif // CONFIG_VP9_HIGHBITDEPTH
  147. 3;
  148. unsigned int qstep_vec[MAX_MB_PLANE];
  149. unsigned int nlog2_vec[MAX_MB_PLANE];
  150. unsigned int sum_sse_vec[MAX_MB_PLANE];
  151. int any_zero_sum_sse = 0;
  152. x->pred_sse[ref] = 0;
  153. for (i = 0; i < MAX_MB_PLANE; ++i) {
  154. struct macroblock_plane *const p = &x->plane[i];
  155. struct macroblockd_plane *const pd = &xd->plane[i];
  156. const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
  157. const TX_SIZE max_tx_size = max_txsize_lookup[bs];
  158. const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
  159. const int64_t dc_thr = p->quant_thred[0] >> shift;
  160. const int64_t ac_thr = p->quant_thred[1] >> shift;
  161. unsigned int sum_sse = 0;
  162. // The low thresholds are used to measure if the prediction errors are
  163. // low enough so that we can skip the mode search.
  164. const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2);
  165. const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2);
  166. int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
  167. int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
  168. int idx, idy;
  169. int lw = b_width_log2_lookup[unit_size] + 2;
  170. int lh = b_height_log2_lookup[unit_size] + 2;
  171. for (idy = 0; idy < bh; ++idy) {
  172. for (idx = 0; idx < bw; ++idx) {
  173. uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
  174. uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
  175. int block_idx = (idy << 1) + idx;
  176. int low_err_skip = 0;
  177. var = cpi->fn_ptr[unit_size].vf(src, p->src.stride, dst, pd->dst.stride,
  178. &sse);
  179. x->bsse[(i << 2) + block_idx] = sse;
  180. sum_sse += sse;
  181. x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_NONE;
  182. if (!x->select_tx_size) {
  183. // Check if all ac coefficients can be quantized to zero.
  184. if (var < ac_thr || var == 0) {
  185. x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_ONLY;
  186. // Check if dc coefficient can be quantized to zero.
  187. if (sse - var < dc_thr || sse == var) {
  188. x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_DC;
  189. if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
  190. low_err_skip = 1;
  191. }
  192. }
  193. }
  194. if (skip_flag && !low_err_skip) skip_flag = 0;
  195. if (i == 0) x->pred_sse[ref] += sse;
  196. }
  197. }
  198. total_sse += sum_sse;
  199. sum_sse_vec[i] = sum_sse;
  200. any_zero_sum_sse = any_zero_sum_sse || (sum_sse == 0);
  201. qstep_vec[i] = pd->dequant[1] >> dequant_shift;
  202. nlog2_vec[i] = num_pels_log2_lookup[bs];
  203. }
  204. // Fast approximate the modelling function.
  205. if (cpi->sf.simple_model_rd_from_var) {
  206. for (i = 0; i < MAX_MB_PLANE; ++i) {
  207. int64_t rate;
  208. const int64_t square_error = sum_sse_vec[i];
  209. int quantizer = qstep_vec[i];
  210. if (quantizer < 120)
  211. rate = (square_error * (280 - quantizer)) >> (16 - VP9_PROB_COST_SHIFT);
  212. else
  213. rate = 0;
  214. dist = (square_error * quantizer) >> 8;
  215. rate_sum += rate;
  216. dist_sum += dist;
  217. }
  218. } else {
  219. if (any_zero_sum_sse) {
  220. for (i = 0; i < MAX_MB_PLANE; ++i) {
  221. int rate;
  222. vp9_model_rd_from_var_lapndz(sum_sse_vec[i], nlog2_vec[i], qstep_vec[i],
  223. &rate, &dist);
  224. rate_sum += rate;
  225. dist_sum += dist;
  226. }
  227. } else {
  228. vp9_model_rd_from_var_lapndz_vec(sum_sse_vec, nlog2_vec, qstep_vec,
  229. &rate_sum, &dist_sum);
  230. }
  231. }
  232. *skip_txfm_sb = skip_flag;
  233. *skip_sse_sb = total_sse << 4;
  234. *out_rate_sum = (int)rate_sum;
  235. *out_dist_sum = dist_sum << 4;
  236. }
  237. #if CONFIG_VP9_HIGHBITDEPTH
  238. int64_t vp9_highbd_block_error_c(const tran_low_t *coeff,
  239. const tran_low_t *dqcoeff, intptr_t block_size,
  240. int64_t *ssz, int bd) {
  241. int i;
  242. int64_t error = 0, sqcoeff = 0;
  243. int shift = 2 * (bd - 8);
  244. int rounding = shift > 0 ? 1 << (shift - 1) : 0;
  245. for (i = 0; i < block_size; i++) {
  246. const int64_t diff = coeff[i] - dqcoeff[i];
  247. error += diff * diff;
  248. sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  249. }
  250. assert(error >= 0 && sqcoeff >= 0);
  251. error = (error + rounding) >> shift;
  252. sqcoeff = (sqcoeff + rounding) >> shift;
  253. *ssz = sqcoeff;
  254. return error;
  255. }
  256. static int64_t vp9_highbd_block_error_dispatch(const tran_low_t *coeff,
  257. const tran_low_t *dqcoeff,
  258. intptr_t block_size,
  259. int64_t *ssz, int bd) {
  260. if (bd == 8) {
  261. return vp9_block_error(coeff, dqcoeff, block_size, ssz);
  262. } else {
  263. return vp9_highbd_block_error(coeff, dqcoeff, block_size, ssz, bd);
  264. }
  265. }
  266. #endif // CONFIG_VP9_HIGHBITDEPTH
  267. int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
  268. intptr_t block_size, int64_t *ssz) {
  269. int i;
  270. int64_t error = 0, sqcoeff = 0;
  271. for (i = 0; i < block_size; i++) {
  272. const int diff = coeff[i] - dqcoeff[i];
  273. error += diff * diff;
  274. sqcoeff += coeff[i] * coeff[i];
  275. }
  276. *ssz = sqcoeff;
  277. return error;
  278. }
  279. int64_t vp9_block_error_fp_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
  280. int block_size) {
  281. int i;
  282. int64_t error = 0;
  283. for (i = 0; i < block_size; i++) {
  284. const int diff = coeff[i] - dqcoeff[i];
  285. error += diff * diff;
  286. }
  287. return error;
  288. }
  289. /* The trailing '0' is a terminator which is used inside cost_coeffs() to
  290. * decide whether to include cost of a trailing EOB node or not (i.e. we
  291. * can skip this if the last coefficient in this transform block, e.g. the
  292. * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
  293. * were non-zero). */
  294. static const int16_t band_counts[TX_SIZES][8] = {
  295. { 1, 2, 3, 4, 3, 16 - 13, 0 },
  296. { 1, 2, 3, 4, 11, 64 - 21, 0 },
  297. { 1, 2, 3, 4, 11, 256 - 21, 0 },
  298. { 1, 2, 3, 4, 11, 1024 - 21, 0 },
  299. };
  300. static int cost_coeffs(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
  301. int pt, const int16_t *scan, const int16_t *nb,
  302. int use_fast_coef_costing) {
  303. MACROBLOCKD *const xd = &x->e_mbd;
  304. MODE_INFO *mi = xd->mi[0];
  305. const struct macroblock_plane *p = &x->plane[plane];
  306. const PLANE_TYPE type = get_plane_type(plane);
  307. const int16_t *band_count = &band_counts[tx_size][1];
  308. const int eob = p->eobs[block];
  309. const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  310. unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
  311. x->token_costs[tx_size][type][is_inter_block(mi)];
  312. uint8_t token_cache[32 * 32];
  313. int cost;
  314. #if CONFIG_VP9_HIGHBITDEPTH
  315. const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
  316. #else
  317. const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8);
  318. #endif
  319. // Check for consistency of tx_size with mode info
  320. assert(type == PLANE_TYPE_Y
  321. ? mi->tx_size == tx_size
  322. : get_uv_tx_size(mi, &xd->plane[plane]) == tx_size);
  323. if (eob == 0) {
  324. // single eob token
  325. cost = token_costs[0][0][pt][EOB_TOKEN];
  326. } else {
  327. if (use_fast_coef_costing) {
  328. int band_left = *band_count++;
  329. int c;
  330. // dc token
  331. int v = qcoeff[0];
  332. int16_t prev_t;
  333. cost = vp9_get_token_cost(v, &prev_t, cat6_high_cost);
  334. cost += (*token_costs)[0][pt][prev_t];
  335. token_cache[0] = vp9_pt_energy_class[prev_t];
  336. ++token_costs;
  337. // ac tokens
  338. for (c = 1; c < eob; c++) {
  339. const int rc = scan[c];
  340. int16_t t;
  341. v = qcoeff[rc];
  342. cost += vp9_get_token_cost(v, &t, cat6_high_cost);
  343. cost += (*token_costs)[!prev_t][!prev_t][t];
  344. prev_t = t;
  345. if (!--band_left) {
  346. band_left = *band_count++;
  347. ++token_costs;
  348. }
  349. }
  350. // eob token
  351. if (band_left) cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
  352. } else { // !use_fast_coef_costing
  353. int band_left = *band_count++;
  354. int c;
  355. // dc token
  356. int v = qcoeff[0];
  357. int16_t tok;
  358. unsigned int(*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS];
  359. cost = vp9_get_token_cost(v, &tok, cat6_high_cost);
  360. cost += (*token_costs)[0][pt][tok];
  361. token_cache[0] = vp9_pt_energy_class[tok];
  362. ++token_costs;
  363. tok_cost_ptr = &((*token_costs)[!tok]);
  364. // ac tokens
  365. for (c = 1; c < eob; c++) {
  366. const int rc = scan[c];
  367. v = qcoeff[rc];
  368. cost += vp9_get_token_cost(v, &tok, cat6_high_cost);
  369. pt = get_coef_context(nb, token_cache, c);
  370. cost += (*tok_cost_ptr)[pt][tok];
  371. token_cache[rc] = vp9_pt_energy_class[tok];
  372. if (!--band_left) {
  373. band_left = *band_count++;
  374. ++token_costs;
  375. }
  376. tok_cost_ptr = &((*token_costs)[!tok]);
  377. }
  378. // eob token
  379. if (band_left) {
  380. pt = get_coef_context(nb, token_cache, c);
  381. cost += (*token_costs)[0][pt][EOB_TOKEN];
  382. }
  383. }
  384. }
  385. return cost;
  386. }
  387. static INLINE int num_4x4_to_edge(int plane_4x4_dim, int mb_to_edge_dim,
  388. int subsampling_dim, int blk_dim) {
  389. return plane_4x4_dim + (mb_to_edge_dim >> (5 + subsampling_dim)) - blk_dim;
  390. }
  391. // Compute the pixel domain sum square error on all visible 4x4s in the
  392. // transform block.
  393. static unsigned pixel_sse(const VP9_COMP *const cpi, const MACROBLOCKD *xd,
  394. const struct macroblockd_plane *const pd,
  395. const uint8_t *src, const int src_stride,
  396. const uint8_t *dst, const int dst_stride, int blk_row,
  397. int blk_col, const BLOCK_SIZE plane_bsize,
  398. const BLOCK_SIZE tx_bsize) {
  399. unsigned int sse = 0;
  400. const int plane_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
  401. const int plane_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
  402. const int tx_4x4_w = num_4x4_blocks_wide_lookup[tx_bsize];
  403. const int tx_4x4_h = num_4x4_blocks_high_lookup[tx_bsize];
  404. int b4x4s_to_right_edge = num_4x4_to_edge(plane_4x4_w, xd->mb_to_right_edge,
  405. pd->subsampling_x, blk_col);
  406. int b4x4s_to_bottom_edge = num_4x4_to_edge(plane_4x4_h, xd->mb_to_bottom_edge,
  407. pd->subsampling_y, blk_row);
  408. if (tx_bsize == BLOCK_4X4 ||
  409. (b4x4s_to_right_edge >= tx_4x4_w && b4x4s_to_bottom_edge >= tx_4x4_h)) {
  410. cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
  411. } else {
  412. const vpx_variance_fn_t vf_4x4 = cpi->fn_ptr[BLOCK_4X4].vf;
  413. int r, c;
  414. unsigned this_sse = 0;
  415. int max_r = VPXMIN(b4x4s_to_bottom_edge, tx_4x4_h);
  416. int max_c = VPXMIN(b4x4s_to_right_edge, tx_4x4_w);
  417. sse = 0;
  418. // if we are in the unrestricted motion border.
  419. for (r = 0; r < max_r; ++r) {
  420. // Skip visiting the sub blocks that are wholly within the UMV.
  421. for (c = 0; c < max_c; ++c) {
  422. vf_4x4(src + r * src_stride * 4 + c * 4, src_stride,
  423. dst + r * dst_stride * 4 + c * 4, dst_stride, &this_sse);
  424. sse += this_sse;
  425. }
  426. }
  427. }
  428. return sse;
  429. }
  430. // Compute the squares sum squares on all visible 4x4s in the transform block.
  431. static int64_t sum_squares_visible(const MACROBLOCKD *xd,
  432. const struct macroblockd_plane *const pd,
  433. const int16_t *diff, const int diff_stride,
  434. int blk_row, int blk_col,
  435. const BLOCK_SIZE plane_bsize,
  436. const BLOCK_SIZE tx_bsize) {
  437. int64_t sse;
  438. const int plane_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
  439. const int plane_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
  440. const int tx_4x4_w = num_4x4_blocks_wide_lookup[tx_bsize];
  441. const int tx_4x4_h = num_4x4_blocks_high_lookup[tx_bsize];
  442. int b4x4s_to_right_edge = num_4x4_to_edge(plane_4x4_w, xd->mb_to_right_edge,
  443. pd->subsampling_x, blk_col);
  444. int b4x4s_to_bottom_edge = num_4x4_to_edge(plane_4x4_h, xd->mb_to_bottom_edge,
  445. pd->subsampling_y, blk_row);
  446. if (tx_bsize == BLOCK_4X4 ||
  447. (b4x4s_to_right_edge >= tx_4x4_w && b4x4s_to_bottom_edge >= tx_4x4_h)) {
  448. assert(tx_4x4_w == tx_4x4_h);
  449. sse = (int64_t)vpx_sum_squares_2d_i16(diff, diff_stride, tx_4x4_w << 2);
  450. } else {
  451. int r, c;
  452. int max_r = VPXMIN(b4x4s_to_bottom_edge, tx_4x4_h);
  453. int max_c = VPXMIN(b4x4s_to_right_edge, tx_4x4_w);
  454. sse = 0;
  455. // if we are in the unrestricted motion border.
  456. for (r = 0; r < max_r; ++r) {
  457. // Skip visiting the sub blocks that are wholly within the UMV.
  458. for (c = 0; c < max_c; ++c) {
  459. sse += (int64_t)vpx_sum_squares_2d_i16(
  460. diff + r * diff_stride * 4 + c * 4, diff_stride, 4);
  461. }
  462. }
  463. }
  464. return sse;
  465. }
  466. static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
  467. BLOCK_SIZE plane_bsize, int block, int blk_row,
  468. int blk_col, TX_SIZE tx_size, int64_t *out_dist,
  469. int64_t *out_sse) {
  470. MACROBLOCKD *const xd = &x->e_mbd;
  471. const struct macroblock_plane *const p = &x->plane[plane];
  472. const struct macroblockd_plane *const pd = &xd->plane[plane];
  473. if (x->block_tx_domain) {
  474. const int ss_txfrm_size = tx_size << 1;
  475. int64_t this_sse;
  476. const int shift = tx_size == TX_32X32 ? 0 : 2;
  477. const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  478. const tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  479. #if CONFIG_VP9_HIGHBITDEPTH
  480. const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
  481. *out_dist = vp9_highbd_block_error_dispatch(
  482. coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse, bd) >>
  483. shift;
  484. #else
  485. *out_dist =
  486. vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse) >>
  487. shift;
  488. #endif // CONFIG_VP9_HIGHBITDEPTH
  489. *out_sse = this_sse >> shift;
  490. if (x->skip_encode && !is_inter_block(xd->mi[0])) {
  491. // TODO(jingning): tune the model to better capture the distortion.
  492. const int64_t p =
  493. (pd->dequant[1] * pd->dequant[1] * (1 << ss_txfrm_size)) >>
  494. #if CONFIG_VP9_HIGHBITDEPTH
  495. (shift + 2 + (bd - 8) * 2);
  496. #else
  497. (shift + 2);
  498. #endif // CONFIG_VP9_HIGHBITDEPTH
  499. *out_dist += (p >> 4);
  500. *out_sse += p;
  501. }
  502. } else {
  503. const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
  504. const int bs = 4 * num_4x4_blocks_wide_lookup[tx_bsize];
  505. const int src_stride = p->src.stride;
  506. const int dst_stride = pd->dst.stride;
  507. const int src_idx = 4 * (blk_row * src_stride + blk_col);
  508. const int dst_idx = 4 * (blk_row * dst_stride + blk_col);
  509. const uint8_t *src = &p->src.buf[src_idx];
  510. const uint8_t *dst = &pd->dst.buf[dst_idx];
  511. const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  512. const uint16_t *eob = &p->eobs[block];
  513. unsigned int tmp;
  514. tmp = pixel_sse(cpi, xd, pd, src, src_stride, dst, dst_stride, blk_row,
  515. blk_col, plane_bsize, tx_bsize);
  516. *out_sse = (int64_t)tmp * 16;
  517. if (*eob) {
  518. #if CONFIG_VP9_HIGHBITDEPTH
  519. DECLARE_ALIGNED(16, uint16_t, recon16[1024]);
  520. uint8_t *recon = (uint8_t *)recon16;
  521. #else
  522. DECLARE_ALIGNED(16, uint8_t, recon[1024]);
  523. #endif // CONFIG_VP9_HIGHBITDEPTH
  524. #if CONFIG_VP9_HIGHBITDEPTH
  525. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  526. vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(dst), dst_stride, recon16,
  527. 32, NULL, 0, 0, 0, 0, bs, bs, xd->bd);
  528. if (xd->lossless) {
  529. vp9_highbd_iwht4x4_add(dqcoeff, recon16, 32, *eob, xd->bd);
  530. } else {
  531. switch (tx_size) {
  532. case TX_4X4:
  533. vp9_highbd_idct4x4_add(dqcoeff, recon16, 32, *eob, xd->bd);
  534. break;
  535. case TX_8X8:
  536. vp9_highbd_idct8x8_add(dqcoeff, recon16, 32, *eob, xd->bd);
  537. break;
  538. case TX_16X16:
  539. vp9_highbd_idct16x16_add(dqcoeff, recon16, 32, *eob, xd->bd);
  540. break;
  541. case TX_32X32:
  542. vp9_highbd_idct32x32_add(dqcoeff, recon16, 32, *eob, xd->bd);
  543. break;
  544. default: assert(0 && "Invalid transform size");
  545. }
  546. }
  547. recon = CONVERT_TO_BYTEPTR(recon16);
  548. } else {
  549. #endif // CONFIG_VP9_HIGHBITDEPTH
  550. vpx_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, 0, 0, 0, bs, bs);
  551. switch (tx_size) {
  552. case TX_32X32: vp9_idct32x32_add(dqcoeff, recon, 32, *eob); break;
  553. case TX_16X16: vp9_idct16x16_add(dqcoeff, recon, 32, *eob); break;
  554. case TX_8X8: vp9_idct8x8_add(dqcoeff, recon, 32, *eob); break;
  555. case TX_4X4:
  556. // this is like vp9_short_idct4x4 but has a special case around
  557. // eob<=1, which is significant (not just an optimization) for
  558. // the lossless case.
  559. x->inv_txfm_add(dqcoeff, recon, 32, *eob);
  560. break;
  561. default: assert(0 && "Invalid transform size"); break;
  562. }
  563. #if CONFIG_VP9_HIGHBITDEPTH
  564. }
  565. #endif // CONFIG_VP9_HIGHBITDEPTH
  566. tmp = pixel_sse(cpi, xd, pd, src, src_stride, recon, 32, blk_row, blk_col,
  567. plane_bsize, tx_bsize);
  568. }
  569. *out_dist = (int64_t)tmp * 16;
  570. }
  571. }
  572. static int rate_block(int plane, int block, TX_SIZE tx_size, int coeff_ctx,
  573. struct rdcost_block_args *args) {
  574. return cost_coeffs(args->x, plane, block, tx_size, coeff_ctx, args->so->scan,
  575. args->so->neighbors, args->use_fast_coef_costing);
  576. }
  577. static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
  578. BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
  579. struct rdcost_block_args *args = arg;
  580. MACROBLOCK *const x = args->x;
  581. MACROBLOCKD *const xd = &x->e_mbd;
  582. MODE_INFO *const mi = xd->mi[0];
  583. int64_t rd1, rd2, rd;
  584. int rate;
  585. int64_t dist;
  586. int64_t sse;
  587. const int coeff_ctx =
  588. combine_entropy_contexts(args->t_left[blk_row], args->t_above[blk_col]);
  589. if (args->exit_early) return;
  590. if (!is_inter_block(mi)) {
  591. struct encode_b_args intra_arg = { x, x->block_qcoeff_opt, args->t_above,
  592. args->t_left, &mi->skip };
  593. vp9_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size,
  594. &intra_arg);
  595. if (x->block_tx_domain) {
  596. dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
  597. tx_size, &dist, &sse);
  598. } else {
  599. const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
  600. const struct macroblock_plane *const p = &x->plane[plane];
  601. const struct macroblockd_plane *const pd = &xd->plane[plane];
  602. const int src_stride = p->src.stride;
  603. const int dst_stride = pd->dst.stride;
  604. const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
  605. const uint8_t *src = &p->src.buf[4 * (blk_row * src_stride + blk_col)];
  606. const uint8_t *dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)];
  607. const int16_t *diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
  608. unsigned int tmp;
  609. sse = sum_squares_visible(xd, pd, diff, diff_stride, blk_row, blk_col,
  610. plane_bsize, tx_bsize);
  611. #if CONFIG_VP9_HIGHBITDEPTH
  612. if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && (xd->bd > 8))
  613. sse = ROUND64_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
  614. #endif // CONFIG_VP9_HIGHBITDEPTH
  615. sse = sse * 16;
  616. tmp = pixel_sse(args->cpi, xd, pd, src, src_stride, dst, dst_stride,
  617. blk_row, blk_col, plane_bsize, tx_bsize);
  618. dist = (int64_t)tmp * 16;
  619. }
  620. } else if (max_txsize_lookup[plane_bsize] == tx_size) {
  621. if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
  622. SKIP_TXFM_NONE) {
  623. // full forward transform and quantization
  624. vp9_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
  625. if (x->block_qcoeff_opt)
  626. vp9_optimize_b(x, plane, block, tx_size, coeff_ctx);
  627. dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
  628. tx_size, &dist, &sse);
  629. } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
  630. SKIP_TXFM_AC_ONLY) {
  631. // compute DC coefficient
  632. tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
  633. tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
  634. vp9_xform_quant_dc(x, plane, block, blk_row, blk_col, plane_bsize,
  635. tx_size);
  636. sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
  637. dist = sse;
  638. if (x->plane[plane].eobs[block]) {
  639. const int64_t orig_sse = (int64_t)coeff[0] * coeff[0];
  640. const int64_t resd_sse = coeff[0] - dqcoeff[0];
  641. int64_t dc_correct = orig_sse - resd_sse * resd_sse;
  642. #if CONFIG_VP9_HIGHBITDEPTH
  643. dc_correct >>= ((xd->bd - 8) * 2);
  644. #endif
  645. if (tx_size != TX_32X32) dc_correct >>= 2;
  646. dist = VPXMAX(0, sse - dc_correct);
  647. }
  648. } else {
  649. // SKIP_TXFM_AC_DC
  650. // skip forward transform. Because this is handled here, the quantization
  651. // does not need to do it.
  652. x->plane[plane].eobs[block] = 0;
  653. sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
  654. dist = sse;
  655. }
  656. } else {
  657. // full forward transform and quantization
  658. vp9_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
  659. if (x->block_qcoeff_opt)
  660. vp9_optimize_b(x, plane, block, tx_size, coeff_ctx);
  661. dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
  662. tx_size, &dist, &sse);
  663. }
  664. rd = RDCOST(x->rdmult, x->rddiv, 0, dist);
  665. if (args->this_rd + rd > args->best_rd) {
  666. args->exit_early = 1;
  667. return;
  668. }
  669. rate = rate_block(plane, block, tx_size, coeff_ctx, args);
  670. args->t_above[blk_col] = (x->plane[plane].eobs[block] > 0) ? 1 : 0;
  671. args->t_left[blk_row] = (x->plane[plane].eobs[block] > 0) ? 1 : 0;
  672. rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist);
  673. rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);
  674. // TODO(jingning): temporarily enabled only for luma component
  675. rd = VPXMIN(rd1, rd2);
  676. if (plane == 0) {
  677. x->zcoeff_blk[tx_size][block] =
  678. !x->plane[plane].eobs[block] || (rd1 > rd2 && !xd->lossless);
  679. x->sum_y_eobs[tx_size] += x->plane[plane].eobs[block];
  680. }
  681. args->this_rate += rate;
  682. args->this_dist += dist;
  683. args->this_sse += sse;
  684. args->this_rd += rd;
  685. if (args->this_rd > args->best_rd) {
  686. args->exit_early = 1;
  687. return;
  688. }
  689. args->skippable &= !x->plane[plane].eobs[block];
  690. }
  691. static void txfm_rd_in_plane(const VP9_COMP *cpi, MACROBLOCK *x, int *rate,
  692. int64_t *distortion, int *skippable, int64_t *sse,
  693. int64_t ref_best_rd, int plane, BLOCK_SIZE bsize,
  694. TX_SIZE tx_size, int use_fast_coef_casting) {
  695. MACROBLOCKD *const xd = &x->e_mbd;
  696. const struct macroblockd_plane *const pd = &xd->plane[plane];
  697. struct rdcost_block_args args;
  698. vp9_zero(args);
  699. args.cpi = cpi;
  700. args.x = x;
  701. args.best_rd = ref_best_rd;
  702. args.use_fast_coef_costing = use_fast_coef_casting;
  703. args.skippable = 1;
  704. if (plane == 0) xd->mi[0]->tx_size = tx_size;
  705. vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
  706. args.so = get_scan(xd, tx_size, get_plane_type(plane), 0);
  707. vp9_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
  708. &args);
  709. if (args.exit_early) {
  710. *rate = INT_MAX;
  711. *distortion = INT64_MAX;
  712. *sse = INT64_MAX;
  713. *skippable = 0;
  714. } else {
  715. *distortion = args.this_dist;
  716. *rate = args.this_rate;
  717. *sse = args.this_sse;
  718. *skippable = args.skippable;
  719. }
  720. }
  721. static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
  722. int64_t *distortion, int *skip, int64_t *sse,
  723. int64_t ref_best_rd, BLOCK_SIZE bs) {
  724. const TX_SIZE max_tx_size = max_txsize_lookup[bs];
  725. VP9_COMMON *const cm = &cpi->common;
  726. const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  727. MACROBLOCKD *const xd = &x->e_mbd;
  728. MODE_INFO *const mi = xd->mi[0];
  729. mi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
  730. txfm_rd_in_plane(cpi, x, rate, distortion, skip, sse, ref_best_rd, 0, bs,
  731. mi->tx_size, cpi->sf.use_fast_coef_costing);
  732. }
  733. static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
  734. int64_t *distortion, int *skip,
  735. int64_t *psse, int64_t ref_best_rd,
  736. BLOCK_SIZE bs) {
  737. const TX_SIZE max_tx_size = max_txsize_lookup[bs];
  738. VP9_COMMON *const cm = &cpi->common;
  739. MACROBLOCKD *const xd = &x->e_mbd;
  740. MODE_INFO *const mi = xd->mi[0];
  741. vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
  742. int r[TX_SIZES][2], s[TX_SIZES];
  743. int64_t d[TX_SIZES], sse[TX_SIZES];
  744. int64_t rd[TX_SIZES][2] = { { INT64_MAX, INT64_MAX },
  745. { INT64_MAX, INT64_MAX },
  746. { INT64_MAX, INT64_MAX },
  747. { INT64_MAX, INT64_MAX } };
  748. int n, m;
  749. int s0, s1;
  750. int64_t best_rd = INT64_MAX;
  751. TX_SIZE best_tx = max_tx_size;
  752. int start_tx, end_tx;
  753. const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
  754. assert(skip_prob > 0);
  755. s0 = vp9_cost_bit(skip_prob, 0);
  756. s1 = vp9_cost_bit(skip_prob, 1);
  757. if (cm->tx_mode == TX_MODE_SELECT) {
  758. start_tx = max_tx_size;
  759. end_tx = 0;
  760. } else {
  761. TX_SIZE chosen_tx_size =
  762. VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]);
  763. start_tx = chosen_tx_size;
  764. end_tx = chosen_tx_size;
  765. }
  766. for (n = start_tx; n >= end_tx; n--) {
  767. int r_tx_size = 0;
  768. for (m = 0; m <= n - (n == (int)max_tx_size); m++) {
  769. if (m == n)
  770. r_tx_size += vp9_cost_zero(tx_probs[m]);
  771. else
  772. r_tx_size += vp9_cost_one(tx_probs[m]);
  773. }
  774. txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], ref_best_rd, 0,
  775. bs, n, cpi->sf.use_fast_coef_costing);
  776. r[n][1] = r[n][0];
  777. if (r[n][0] < INT_MAX) {
  778. r[n][1] += r_tx_size;
  779. }
  780. if (d[n] == INT64_MAX || r[n][0] == INT_MAX) {
  781. rd[n][0] = rd[n][1] = INT64_MAX;
  782. } else if (s[n]) {
  783. if (is_inter_block(mi)) {
  784. rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
  785. r[n][1] -= r_tx_size;
  786. } else {
  787. rd[n][0] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
  788. rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size, sse[n]);
  789. }
  790. } else {
  791. rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
  792. rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
  793. }
  794. if (is_inter_block(mi) && !xd->lossless && !s[n] && sse[n] != INT64_MAX) {
  795. rd[n][0] = VPXMIN(rd[n][0], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
  796. rd[n][1] = VPXMIN(rd[n][1], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
  797. }
  798. // Early termination in transform size search.
  799. if (cpi->sf.tx_size_search_breakout &&
  800. (rd[n][1] == INT64_MAX ||
  801. (n < (int)max_tx_size && rd[n][1] > rd[n + 1][1]) || s[n] == 1))
  802. break;
  803. if (rd[n][1] < best_rd) {
  804. best_tx = n;
  805. best_rd = rd[n][1];
  806. }
  807. }
  808. mi->tx_size = best_tx;
  809. *distortion = d[mi->tx_size];
  810. *rate = r[mi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  811. *skip = s[mi->tx_size];
  812. *psse = sse[mi->tx_size];
  813. }
  814. static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
  815. int64_t *distortion, int *skip, int64_t *psse,
  816. BLOCK_SIZE bs, int64_t ref_best_rd) {
  817. MACROBLOCKD *xd = &x->e_mbd;
  818. int64_t sse;
  819. int64_t *ret_sse = psse ? psse : &sse;
  820. assert(bs == xd->mi[0]->sb_type);
  821. if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
  822. choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
  823. bs);
  824. } else {
  825. choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
  826. bs);
  827. }
  828. }
  829. static int conditional_skipintra(PREDICTION_MODE mode,
  830. PREDICTION_MODE best_intra_mode) {
  831. if (mode == D117_PRED && best_intra_mode != V_PRED &&
  832. best_intra_mode != D135_PRED)
  833. return 1;
  834. if (mode == D63_PRED && best_intra_mode != V_PRED &&
  835. best_intra_mode != D45_PRED)
  836. return 1;
  837. if (mode == D207_PRED && best_intra_mode != H_PRED &&
  838. best_intra_mode != D45_PRED)
  839. return 1;
  840. if (mode == D153_PRED && best_intra_mode != H_PRED &&
  841. best_intra_mode != D135_PRED)
  842. return 1;
  843. return 0;
  844. }
  845. static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int row,
  846. int col, PREDICTION_MODE *best_mode,
  847. const int *bmode_costs, ENTROPY_CONTEXT *a,
  848. ENTROPY_CONTEXT *l, int *bestrate,
  849. int *bestratey, int64_t *bestdistortion,
  850. BLOCK_SIZE bsize, int64_t rd_thresh) {
  851. PREDICTION_MODE mode;
  852. MACROBLOCKD *const xd = &x->e_mbd;
  853. int64_t best_rd = rd_thresh;
  854. struct macroblock_plane *p = &x->plane[0];
  855. struct macroblockd_plane *pd = &xd->plane[0];
  856. const int src_stride = p->src.stride;
  857. const int dst_stride = pd->dst.stride;
  858. const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4];
  859. uint8_t *dst_init = &pd->dst.buf[row * 4 * src_stride + col * 4];
  860. ENTROPY_CONTEXT ta[2], tempa[2];
  861. ENTROPY_CONTEXT tl[2], templ[2];
  862. const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  863. const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
  864. int idx, idy;
  865. uint8_t best_dst[8 * 8];
  866. #if CONFIG_VP9_HIGHBITDEPTH
  867. uint16_t best_dst16[8 * 8];
  868. #endif
  869. memcpy(ta, a, num_4x4_blocks_wide * sizeof(a[0]));
  870. memcpy(tl, l, num_4x4_blocks_high * sizeof(l[0]));
  871. xd->mi[0]->tx_size = TX_4X4;
  872. #if CONFIG_VP9_HIGHBITDEPTH
  873. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  874. for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
  875. int64_t this_rd;
  876. int ratey = 0;
  877. int64_t distortion = 0;
  878. int rate = bmode_costs[mode];
  879. if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue;
  880. // Only do the oblique modes if the best so far is
  881. // one of the neighboring directional modes
  882. if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
  883. if (conditional_skipintra(mode, *best_mode)) continue;
  884. }
  885. memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
  886. memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
  887. for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
  888. for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
  889. const int block = (row + idy) * 2 + (col + idx);
  890. const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
  891. uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
  892. uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
  893. int16_t *const src_diff =
  894. vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
  895. tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
  896. xd->mi[0]->bmi[block].as_mode = mode;
  897. vp9_predict_intra_block(xd, 1, TX_4X4, mode,
  898. x->skip_encode ? src : dst,
  899. x->skip_encode ? src_stride : dst_stride, dst,
  900. dst_stride, col + idx, row + idy, 0);
  901. vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride, dst,
  902. dst_stride, xd->bd);
  903. if (xd->lossless) {
  904. const scan_order *so = &vp9_default_scan_orders[TX_4X4];
  905. const int coeff_ctx =
  906. combine_entropy_contexts(tempa[idx], templ[idy]);
  907. vp9_highbd_fwht4x4(src_diff, coeff, 8);
  908. vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
  909. ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan,
  910. so->neighbors, cpi->sf.use_fast_coef_costing);
  911. tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0 ? 1 : 0);
  912. if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
  913. goto next_highbd;
  914. vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst16,
  915. dst_stride, p->eobs[block], xd->bd);
  916. } else {
  917. int64_t unused;
  918. const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
  919. const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
  920. const int coeff_ctx =
  921. combine_entropy_contexts(tempa[idx], templ[idy]);
  922. if (tx_type == DCT_DCT)
  923. vpx_highbd_fdct4x4(src_diff, coeff, 8);
  924. else
  925. vp9_highbd_fht4x4(src_diff, coeff, 8, tx_type);
  926. vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
  927. ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan,
  928. so->neighbors, cpi->sf.use_fast_coef_costing);
  929. distortion += vp9_highbd_block_error_dispatch(
  930. coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16,
  931. &unused, xd->bd) >>
  932. 2;
  933. tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0 ? 1 : 0);
  934. if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
  935. goto next_highbd;
  936. vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
  937. dst16, dst_stride, p->eobs[block], xd->bd);
  938. }
  939. }
  940. }
  941. rate += ratey;
  942. this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
  943. if (this_rd < best_rd) {
  944. *bestrate = rate;
  945. *bestratey = ratey;
  946. *bestdistortion = distortion;
  947. best_rd = this_rd;
  948. *best_mode = mode;
  949. memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
  950. memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
  951. for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
  952. memcpy(best_dst16 + idy * 8,
  953. CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
  954. num_4x4_blocks_wide * 4 * sizeof(uint16_t));
  955. }
  956. }
  957. next_highbd : {}
  958. }
  959. if (best_rd >= rd_thresh || x->skip_encode) return best_rd;
  960. for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
  961. memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
  962. best_dst16 + idy * 8, num_4x4_blocks_wide * 4 * sizeof(uint16_t));
  963. }
  964. return best_rd;
  965. }
  966. #endif // CONFIG_VP9_HIGHBITDEPTH
  967. for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
  968. int64_t this_rd;
  969. int ratey = 0;
  970. int64_t distortion = 0;
  971. int rate = bmode_costs[mode];
  972. if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue;
  973. // Only do the oblique modes if the best so far is
  974. // one of the neighboring directional modes
  975. if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
  976. if (conditional_skipintra(mode, *best_mode)) continue;
  977. }
  978. memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
  979. memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
  980. for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
  981. for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
  982. const int block = (row + idy) * 2 + (col + idx);
  983. const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
  984. uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
  985. int16_t *const src_diff =
  986. vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
  987. tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
  988. xd->mi[0]->bmi[block].as_mode = mode;
  989. vp9_predict_intra_block(xd, 1, TX_4X4, mode, x->skip_encode ? src : dst,
  990. x->skip_encode ? src_stride : dst_stride, dst,
  991. dst_stride, col + idx, row + idy, 0);
  992. vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
  993. if (xd->lossless) {
  994. const scan_order *so = &vp9_default_scan_orders[TX_4X4];
  995. const int coeff_ctx =
  996. combine_entropy_contexts(tempa[idx], templ[idy]);
  997. vp9_fwht4x4(src_diff, coeff, 8);
  998. vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
  999. ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan,
  1000. so->neighbors, cpi->sf.use_fast_coef_costing);
  1001. tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0) ? 1 : 0;
  1002. if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
  1003. goto next;
  1004. vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
  1005. p->eobs[block]);
  1006. } else {
  1007. int64_t unused;
  1008. const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
  1009. const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
  1010. const int coeff_ctx =
  1011. combine_entropy_contexts(tempa[idx], templ[idy]);
  1012. vp9_fht4x4(src_diff, coeff, 8, tx_type);
  1013. vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
  1014. ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan,
  1015. so->neighbors, cpi->sf.use_fast_coef_costing);
  1016. tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0) ? 1 : 0;
  1017. distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
  1018. 16, &unused) >>
  1019. 2;
  1020. if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
  1021. goto next;
  1022. vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block), dst,
  1023. dst_stride, p->eobs[block]);
  1024. }
  1025. }
  1026. }
  1027. rate += ratey;
  1028. this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
  1029. if (this_rd < best_rd) {
  1030. *bestrate = rate;
  1031. *bestratey = ratey;
  1032. *bestdistortion = distortion;
  1033. best_rd = this_rd;
  1034. *best_mode = mode;
  1035. memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
  1036. memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
  1037. for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
  1038. memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
  1039. num_4x4_blocks_wide * 4);
  1040. }
  1041. next : {}
  1042. }
  1043. if (best_rd >= rd_thresh || x->skip_encode) return best_rd;
  1044. for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
  1045. memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
  1046. num_4x4_blocks_wide * 4);
  1047. return best_rd;
  1048. }
  1049. static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
  1050. int *rate, int *rate_y,
  1051. int64_t *distortion,
  1052. int64_t best_rd) {
  1053. int i, j;
  1054. const MACROBLOCKD *const xd = &mb->e_mbd;
  1055. MODE_INFO *const mic = xd->mi[0];
  1056. const MODE_INFO *above_mi = xd->above_mi;
  1057. const MODE_INFO *left_mi = xd->left_mi;
  1058. const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
  1059. const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  1060. const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
  1061. int idx, idy;
  1062. int cost = 0;
  1063. int64_t total_distortion = 0;
  1064. int tot_rate_y = 0;
  1065. int64_t total_rd = 0;
  1066. const int *bmode_costs = cpi->mbmode_cost;
  1067. // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
  1068. for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
  1069. for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
  1070. PREDICTION_MODE best_mode = DC_PRED;
  1071. int r = INT_MAX, ry = INT_MAX;
  1072. int64_t d = INT64_MAX, this_rd = INT64_MAX;
  1073. i = idy * 2 + idx;
  1074. if (cpi->common.frame_type == KEY_FRAME) {
  1075. const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i);
  1076. const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i);
  1077. bmode_costs = cpi->y_mode_costs[A][L];
  1078. }
  1079. this_rd = rd_pick_intra4x4block(
  1080. cpi, mb, idy, idx, &best_mode, bmode_costs,
  1081. xd->plane[0].above_context + idx, xd->plane[0].left_context + idy, &r,
  1082. &ry, &d, bsize, best_rd - total_rd);
  1083. if (this_rd >= best_rd - total_rd) return INT64_MAX;
  1084. total_rd += this_rd;
  1085. cost += r;
  1086. total_distortion += d;
  1087. tot_rate_y += ry;
  1088. mic->bmi[i].as_mode = best_mode;
  1089. for (j = 1; j < num_4x4_blocks_high; ++j)
  1090. mic->bmi[i + j * 2].as_mode = best_mode;
  1091. for (j = 1; j < num_4x4_blocks_wide; ++j)
  1092. mic->bmi[i + j].as_mode = best_mode;
  1093. if (total_rd >= best_rd) return INT64_MAX;
  1094. }
  1095. }
  1096. *rate = cost;
  1097. *rate_y = tot_rate_y;
  1098. *distortion = total_distortion;
  1099. mic->mode = mic->bmi[3].as_mode;
  1100. return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
  1101. }
  1102. // This function is used only for intra_only frames
  1103. static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
  1104. int *rate_tokenonly, int64_t *distortion,
  1105. int *skippable, BLOCK_SIZE bsize,
  1106. int64_t best_rd) {
  1107. PREDICTION_MODE mode;
  1108. PREDICTION_MODE mode_selected = DC_PRED;
  1109. MACROBLOCKD *const xd = &x->e_mbd;
  1110. MODE_INFO *const mic = xd->mi[0];
  1111. int this_rate, this_rate_tokenonly, s;
  1112. int64_t this_distortion, this_rd;
  1113. TX_SIZE best_tx = TX_4X4;
  1114. int *bmode_costs;
  1115. const MODE_INFO *above_mi = xd->above_mi;
  1116. const MODE_INFO *left_mi = xd->left_mi;
  1117. const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
  1118. const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
  1119. bmode_costs = cpi->y_mode_costs[A][L];
  1120. memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
  1121. /* Y Search for intra prediction mode */
  1122. for (mode = DC_PRED; mode <= TM_PRED; mode++) {
  1123. if (cpi->sf.use_nonrd_pick_mode) {
  1124. // These speed features are turned on in hybrid non-RD and RD mode
  1125. // for key frame coding in the context of real-time setting.
  1126. if (conditional_skipintra(mode, mode_selected)) continue;
  1127. if (*skippable) break;
  1128. }
  1129. mic->mode = mode;
  1130. super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
  1131. bsize, best_rd);
  1132. if (this_rate_tokenonly == INT_MAX) continue;
  1133. this_rate = this_rate_tokenonly + bmode_costs[mode];
  1134. this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
  1135. if (this_rd < best_rd) {
  1136. mode_selected = mode;
  1137. best_rd = this_rd;
  1138. best_tx = mic->tx_size;
  1139. *rate = this_rate;
  1140. *rate_tokenonly = this_rate_tokenonly;
  1141. *distortion = this_distortion;
  1142. *skippable = s;
  1143. }
  1144. }
  1145. mic->mode = mode_selected;
  1146. mic->tx_size = best_tx;
  1147. return best_rd;
  1148. }
  1149. // Return value 0: early termination triggered, no valid rd cost available;
  1150. // 1: rd cost values are valid.
  1151. static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, int *rate,
  1152. int64_t *distortion, int *skippable, int64_t *sse,
  1153. BLOCK_SIZE bsize, int64_t ref_best_rd) {
  1154. MACROBLOCKD *const xd = &x->e_mbd;
  1155. MODE_INFO *const mi = xd->mi[0];
  1156. const TX_SIZE uv_tx_size = get_uv_tx_size(mi, &xd->plane[1]);
  1157. int plane;
  1158. int pnrate = 0, pnskip = 1;
  1159. int64_t pndist = 0, pnsse = 0;
  1160. int is_cost_valid = 1;
  1161. if (ref_best_rd < 0) is_cost_valid = 0;
  1162. if (is_inter_block(mi) && is_cost_valid) {
  1163. int plane;
  1164. for (plane = 1; plane < MAX_MB_PLANE; ++plane)
  1165. vp9_subtract_plane(x, bsize, plane);
  1166. }
  1167. *rate = 0;
  1168. *distortion = 0;
  1169. *sse = 0;
  1170. *skippable = 1;
  1171. for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
  1172. txfm_rd_in_plane(cpi, x, &pnrate, &pndist, &pnskip, &pnsse, ref_best_rd,
  1173. plane, bsize, uv_tx_size, cpi->sf.use_fast_coef_costing);
  1174. if (pnrate == INT_MAX) {
  1175. is_cost_valid = 0;
  1176. break;
  1177. }
  1178. *rate += pnrate;
  1179. *distortion += pndist;
  1180. *sse += pnsse;
  1181. *skippable &= pnskip;
  1182. }
  1183. if (!is_cost_valid) {
  1184. // reset cost value
  1185. *rate = INT_MAX;
  1186. *distortion = INT64_MAX;
  1187. *sse = INT64_MAX;
  1188. *skippable = 0;
  1189. }
  1190. return is_cost_valid;
  1191. }
  1192. static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
  1193. PICK_MODE_CONTEXT *ctx, int *rate,
  1194. int *rate_tokenonly, int64_t *distortion,
  1195. int *skippable, BLOCK_SIZE bsize,
  1196. TX_SIZE max_tx_size) {
  1197. MACROBLOCKD *xd = &x->e_mbd;
  1198. PREDICTION_MODE mode;
  1199. PREDICTION_MODE mode_selected = DC_PRED;
  1200. int64_t best_rd = INT64_MAX, this_rd;
  1201. int this_rate_tokenonly, this_rate, s;
  1202. int64_t this_distortion, this_sse;
  1203. memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
  1204. for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
  1205. if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode))) continue;
  1206. #if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
  1207. if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) &&
  1208. (xd->above_mi == NULL || xd->left_mi == NULL) && need_top_left[mode])
  1209. continue;
  1210. #endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
  1211. xd->mi[0]->uv_mode = mode;
  1212. if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
  1213. &this_sse, bsize, best_rd))
  1214. continue;
  1215. this_rate =
  1216. this_rate_tokenonly +
  1217. cpi->intra_uv_mode_cost[cpi->common.frame_type][xd->mi[0]->mode][mode];
  1218. this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
  1219. if (this_rd < best_rd) {
  1220. mode_selected = mode;
  1221. best_rd = this_rd;
  1222. *rate = this_rate;
  1223. *rate_tokenonly = this_rate_tokenonly;
  1224. *distortion = this_distortion;
  1225. *skippable = s;
  1226. if (!x->select_tx_size) swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
  1227. }
  1228. }
  1229. xd->mi[0]->uv_mode = mode_selected;
  1230. return best_rd;
  1231. }
  1232. static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x, int *rate,
  1233. int *rate_tokenonly, int64_t *distortion,
  1234. int *skippable, BLOCK_SIZE bsize) {
  1235. const VP9_COMMON *cm = &cpi->common;
  1236. int64_t unused;
  1237. x->e_mbd.mi[0]->uv_mode = DC_PRED;
  1238. memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
  1239. super_block_uvrd(cpi, x, rate_tokenonly, distortion, skippable, &unused,
  1240. bsize, INT64_MAX);
  1241. *rate =
  1242. *rate_tokenonly +
  1243. cpi->intra_uv_mode_cost[cm->frame_type][x->e_mbd.mi[0]->mode][DC_PRED];
  1244. return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
  1245. }
  1246. static void choose_intra_uv_mode(VP9_COMP *cpi, MACROBLOCK *const x,
  1247. PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize,
  1248. TX_SIZE max_tx_size, int *rate_uv,
  1249. int *rate_uv_tokenonly, int64_t *dist_uv,
  1250. int *skip_uv, PREDICTION_MODE *mode_uv) {
  1251. // Use an estimated rd for uv_intra based on DC_PRED if the
  1252. // appropriate speed flag is set.
  1253. if (cpi->sf.use_uv_intra_rd_estimate) {
  1254. rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
  1255. bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
  1256. // Else do a proper rd search for each possible transform size that may
  1257. // be considered in the main rd loop.
  1258. } else {
  1259. rd_pick_intra_sbuv_mode(cpi, x, ctx, rate_uv, rate_uv_tokenonly, dist_uv,
  1260. skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
  1261. max_tx_size);
  1262. }
  1263. *mode_uv = x->e_mbd.mi[0]->uv_mode;
  1264. }
  1265. static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode,
  1266. int mode_context) {
  1267. assert(is_inter_mode(mode));
  1268. return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
  1269. }
  1270. static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
  1271. int i, PREDICTION_MODE mode, int_mv this_mv[2],
  1272. int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
  1273. int_mv seg_mvs[MAX_REF_FRAMES],
  1274. int_mv *best_ref_mv[2], const int *mvjcost,
  1275. int *mvcost[2]) {
  1276. MODE_INFO *const mi = xd->mi[0];
  1277. const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
  1278. int thismvcost = 0;
  1279. int idx, idy;
  1280. const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mi->sb_type];
  1281. const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mi->sb_type];
  1282. const int is_compound = has_second_ref(mi);
  1283. switch (mode) {
  1284. case NEWMV:
  1285. this_mv[0].as_int = seg_mvs[mi->ref_frame[0]].as_int;
  1286. thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
  1287. mvjcost, mvcost, MV_COST_WEIGHT_SUB);
  1288. if (is_compound) {
  1289. this_mv[1].as_int = seg_mvs[mi->ref_frame[1]].as_int;
  1290. thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
  1291. mvjcost, mvcost, MV_COST_WEIGHT_SUB);
  1292. }
  1293. break;
  1294. case NEARMV:
  1295. case NEARESTMV:
  1296. this_mv[0].as_int = frame_mv[mode][mi->ref_frame[0]].as_int;
  1297. if (is_compound)
  1298. this_mv[1].as_int = frame_mv[mode][mi->ref_frame[1]].as_int;
  1299. break;
  1300. case ZEROMV:
  1301. this_mv[0].as_int = 0;
  1302. if (is_compound) this_mv[1].as_int = 0;
  1303. break;
  1304. default: break;
  1305. }
  1306. mi->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
  1307. if (is_compound) mi->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
  1308. mi->bmi[i].as_mode = mode;
  1309. for (idy = 0; idy < num_4x4_blocks_high; ++idy)
  1310. for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
  1311. memmove(&mi->bmi[i + idy * 2 + idx], &mi->bmi[i], sizeof(mi->bmi[i]));
  1312. return cost_mv_ref(cpi, mode, mbmi_ext->mode_context[mi->ref_frame[0]]) +
  1313. thismvcost;
  1314. }
  1315. static int64_t encode_inter_mb_segment(VP9_COMP *cpi, MACROBLOCK *x,
  1316. int64_t best_yrd, int i, int *labelyrate,
  1317. int64_t *distortion, int64_t *sse,
  1318. ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
  1319. int mi_row, int mi_col) {
  1320. int k;
  1321. MACROBLOCKD *xd = &x->e_mbd;
  1322. struct macroblockd_plane *const pd = &xd->plane[0];
  1323. struct macroblock_plane *const p = &x->plane[0];
  1324. MODE_INFO *const mi = xd->mi[0];
  1325. const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->sb_type, pd);
  1326. const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
  1327. const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
  1328. int idx, idy;
  1329. const uint8_t *const src =
  1330. &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
  1331. uint8_t *const dst =
  1332. &pd->dst.buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->dst.stride)];
  1333. int64_t thisdistortion = 0, thissse = 0;
  1334. int thisrate = 0, ref;
  1335. const scan_order *so = &vp9_default_scan_orders[TX_4X4];
  1336. const int is_compound = has_second_ref(mi);
  1337. const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter];
  1338. for (ref = 0; ref < 1 + is_compound; ++ref) {
  1339. const int bw = b_width_log2_lookup[BLOCK_8X8];
  1340. const int h = 4 * (i >> bw);
  1341. const int w = 4 * (i & ((1 << bw) - 1));
  1342. const struct scale_factors *sf = &xd->block_refs[ref]->sf;
  1343. int y_stride = pd->pre[ref].stride;
  1344. uint8_t *pre = pd->pre[ref].buf + (h * pd->pre[ref].stride + w);
  1345. if (vp9_is_scaled(sf)) {
  1346. const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
  1347. const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
  1348. y_stride = xd->block_refs[ref]->buf->y_stride;
  1349. pre = xd->block_refs[ref]->buf->y_buffer;
  1350. pre += scaled_buffer_offset(x_start + w, y_start + h, y_stride, sf);
  1351. }
  1352. #if CONFIG_VP9_HIGHBITDEPTH
  1353. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  1354. vp9_highbd_build_inter_predictor(
  1355. CONVERT_TO_SHORTPTR(pre), y_stride, CONVERT_TO_SHORTPTR(dst),
  1356. pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv,
  1357. &xd->block_refs[ref]->sf, width, height, ref, kernel, MV_PRECISION_Q3,
  1358. mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2),
  1359. xd->bd);
  1360. } else {
  1361. vp9_build_inter_predictor(
  1362. pre, y_stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv,
  1363. &xd->block_refs[ref]->sf, width, height, ref, kernel, MV_PRECISION_Q3,
  1364. mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2));
  1365. }
  1366. #else
  1367. vp9_build_inter_predictor(
  1368. pre, y_stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv,
  1369. &xd->block_refs[ref]->sf, width, height, ref, kernel, MV_PRECISION_Q3,
  1370. mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2));
  1371. #endif // CONFIG_VP9_HIGHBITDEPTH
  1372. }
  1373. #if CONFIG_VP9_HIGHBITDEPTH
  1374. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  1375. vpx_highbd_subtract_block(
  1376. height, width, vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
  1377. 8, src, p->src.stride, dst, pd->dst.stride, xd->bd);
  1378. } else {
  1379. vpx_subtract_block(height, width,
  1380. vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
  1381. 8, src, p->src.stride, dst, pd->dst.stride);
  1382. }
  1383. #else
  1384. vpx_subtract_block(height, width,
  1385. vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
  1386. 8, src, p->src.stride, dst, pd->dst.stride);
  1387. #endif // CONFIG_VP9_HIGHBITDEPTH
  1388. k = i;
  1389. for (idy = 0; idy < height / 4; ++idy) {
  1390. for (idx = 0; idx < width / 4; ++idx) {
  1391. #if CONFIG_VP9_HIGHBITDEPTH
  1392. const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
  1393. #endif
  1394. int64_t ssz, rd, rd1, rd2;
  1395. tran_low_t *coeff;
  1396. int coeff_ctx;
  1397. k += (idy * 2 + idx);
  1398. coeff_ctx = combine_entropy_contexts(ta[k & 1], tl[k >> 1]);
  1399. coeff = BLOCK_OFFSET(p->coeff, k);
  1400. x->fwd_txfm4x4(vp9_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
  1401. coeff, 8);
  1402. vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
  1403. #if CONFIG_VP9_HIGHBITDEPTH
  1404. thisdistortion += vp9_highbd_block_error_dispatch(
  1405. coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz, bd);
  1406. #else
  1407. thisdistortion +=
  1408. vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz);
  1409. #endif // CONFIG_VP9_HIGHBITDEPTH
  1410. thissse += ssz;
  1411. thisrate += cost_coeffs(x, 0, k, TX_4X4, coeff_ctx, so->scan,
  1412. so->neighbors, cpi->sf.use_fast_coef_costing);
  1413. ta[k & 1] = tl[k >> 1] = (x->plane[0].eobs[k] > 0) ? 1 : 0;
  1414. rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
  1415. rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
  1416. rd = VPXMIN(rd1, rd2);
  1417. if (rd >= best_yrd) return INT64_MAX;
  1418. }
  1419. }
  1420. *distortion = thisdistortion >> 2;
  1421. *labelyrate = thisrate;
  1422. *sse = thissse >> 2;
  1423. return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
  1424. }
  1425. typedef struct {
  1426. int eobs;
  1427. int brate;
  1428. int byrate;
  1429. int64_t bdist;
  1430. int64_t bsse;
  1431. int64_t brdcost;
  1432. int_mv mvs[2];
  1433. ENTROPY_CONTEXT ta[2];
  1434. ENTROPY_CONTEXT tl[2];
  1435. } SEG_RDSTAT;
  1436. typedef struct {
  1437. int_mv *ref_mv[2];
  1438. int_mv mvp;
  1439. int64_t segment_rd;
  1440. int r;
  1441. int64_t d;
  1442. int64_t sse;
  1443. int segment_yrate;
  1444. PREDICTION_MODE modes[4];
  1445. SEG_RDSTAT rdstat[4][INTER_MODES];
  1446. int mvthresh;
  1447. } BEST_SEG_INFO;
  1448. static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) {
  1449. return (mv->row >> 3) < mv_limits->row_min ||
  1450. (mv->row >> 3) > mv_limits->row_max ||
  1451. (mv->col >> 3) < mv_limits->col_min ||
  1452. (mv->col >> 3) > mv_limits->col_max;
  1453. }
  1454. static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
  1455. MODE_INFO *const mi = x->e_mbd.mi[0];
  1456. struct macroblock_plane *const p = &x->plane[0];
  1457. struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
  1458. p->src.buf =
  1459. &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
  1460. assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
  1461. pd->pre[0].buf =
  1462. &pd->pre[0].buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->pre[0].stride)];
  1463. if (has_second_ref(mi))
  1464. pd->pre[1].buf =
  1465. &pd->pre[1]
  1466. .buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->pre[1].stride)];
  1467. }
  1468. static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
  1469. struct buf_2d orig_pre[2]) {
  1470. MODE_INFO *mi = x->e_mbd.mi[0];
  1471. x->plane[0].src = orig_src;
  1472. x->e_mbd.plane[0].pre[0] = orig_pre[0];
  1473. if (has_second_ref(mi)) x->e_mbd.plane[0].pre[1] = orig_pre[1];
  1474. }
  1475. static INLINE int mv_has_subpel(const MV *mv) {
  1476. return (mv->row & 0x0F) || (mv->col & 0x0F);
  1477. }
  1478. // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
  1479. // TODO(aconverse): Find out if this is still productive then clean up or remove
  1480. static int check_best_zero_mv(const VP9_COMP *cpi,
  1481. const uint8_t mode_context[MAX_REF_FRAMES],
  1482. int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
  1483. int this_mode,
  1484. const MV_REFERENCE_FRAME ref_frames[2]) {
  1485. if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
  1486. frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
  1487. (ref_frames[1] == NONE ||
  1488. frame_mv[this_mode][ref_frames[1]].as_int == 0)) {
  1489. int rfc = mode_context[ref_frames[0]];
  1490. int c1 = cost_mv_ref(cpi, NEARMV, rfc);
  1491. int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
  1492. int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
  1493. if (this_mode == NEARMV) {
  1494. if (c1 > c3) return 0;
  1495. } else if (this_mode == NEARESTMV) {
  1496. if (c2 > c3) return 0;
  1497. } else {
  1498. assert(this_mode == ZEROMV);
  1499. if (ref_frames[1] == NONE) {
  1500. if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
  1501. (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
  1502. return 0;
  1503. } else {
  1504. if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
  1505. frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
  1506. (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
  1507. frame_mv[NEARMV][ref_frames[1]].as_int == 0))
  1508. return 0;
  1509. }
  1510. }
  1511. }
  1512. return 1;
  1513. }
  1514. static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
  1515. int_mv *frame_mv, int mi_row, int mi_col,
  1516. int_mv single_newmv[MAX_REF_FRAMES],
  1517. int *rate_mv) {
  1518. const VP9_COMMON *const cm = &cpi->common;
  1519. const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
  1520. const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
  1521. MACROBLOCKD *xd = &x->e_mbd;
  1522. MODE_INFO *mi = xd->mi[0];
  1523. const int refs[2] = { mi->ref_frame[0],
  1524. mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1] };
  1525. int_mv ref_mv[2];
  1526. int ite, ref;
  1527. const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter];
  1528. struct scale_factors sf;
  1529. // Do joint motion search in compound mode to get more accurate mv.
  1530. struct buf_2d backup_yv12[2][MAX_MB_PLANE];
  1531. uint32_t last_besterr[2] = { UINT_MAX, UINT_MAX };
  1532. const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
  1533. vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]),
  1534. vp9_get_scaled_ref_frame(cpi, mi->ref_frame[1])
  1535. };
  1536. // Prediction buffer from second frame.
  1537. #if CONFIG_VP9_HIGHBITDEPTH
  1538. DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[64 * 64]);
  1539. uint8_t *second_pred;
  1540. #else
  1541. DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]);
  1542. #endif // CONFIG_VP9_HIGHBITDEPTH
  1543. for (ref = 0; ref < 2; ++ref) {
  1544. ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
  1545. if (scaled_ref_frame[ref]) {
  1546. int i;
  1547. // Swap out the reference frame for a version that's been scaled to
  1548. // match the resolution of the current frame, allowing the existing
  1549. // motion search code to be used without additional modifications.
  1550. for (i = 0; i < MAX_MB_PLANE; i++)
  1551. backup_yv12[ref][i] = xd->plane[i].pre[ref];
  1552. vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
  1553. NULL);
  1554. }
  1555. frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
  1556. }
  1557. // Since we have scaled the reference frames to match the size of the current
  1558. // frame we must use a unit scaling factor during mode selection.
  1559. #if CONFIG_VP9_HIGHBITDEPTH
  1560. vp9_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
  1561. cm->height, cm->use_highbitdepth);
  1562. #else
  1563. vp9_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
  1564. cm->height);
  1565. #endif // CONFIG_VP9_HIGHBITDEPTH
  1566. // Allow joint search multiple times iteratively for each reference frame
  1567. // and break out of the search loop if it couldn't find a better mv.
  1568. for (ite = 0; ite < 4; ite++) {
  1569. struct buf_2d ref_yv12[2];
  1570. uint32_t bestsme = UINT_MAX;
  1571. int sadpb = x->sadperbit16;
  1572. MV tmp_mv;
  1573. int search_range = 3;
  1574. const MvLimits tmp_mv_limits = x->mv_limits;
  1575. int id = ite % 2; // Even iterations search in the first reference frame,
  1576. // odd iterations search in the second. The predictor
  1577. // found for the 'other' reference frame is factored in.
  1578. // Initialized here because of compiler problem in Visual Studio.
  1579. ref_yv12[0] = xd->plane[0].pre[0];
  1580. ref_yv12[1] = xd->plane[0].pre[1];
  1581. // Get the prediction block from the 'other' reference frame.
  1582. #if CONFIG_VP9_HIGHBITDEPTH
  1583. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  1584. second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
  1585. vp9_highbd_build_inter_predictor(
  1586. CONVERT_TO_SHORTPTR(ref_yv12[!id].buf), ref_yv12[!id].stride,
  1587. second_pred_alloc_16, pw, &frame_mv[refs[!id]].as_mv, &sf, pw, ph, 0,
  1588. kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd->bd);
  1589. } else {
  1590. second_pred = (uint8_t *)second_pred_alloc_16;
  1591. vp9_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride,
  1592. second_pred, pw, &frame_mv[refs[!id]].as_mv,
  1593. &sf, pw, ph, 0, kernel, MV_PRECISION_Q3,
  1594. mi_col * MI_SIZE, mi_row * MI_SIZE);
  1595. }
  1596. #else
  1597. vp9_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride,
  1598. second_pred, pw, &frame_mv[refs[!id]].as_mv, &sf,
  1599. pw, ph, 0, kernel, MV_PRECISION_Q3,
  1600. mi_col * MI_SIZE, mi_row * MI_SIZE);
  1601. #endif // CONFIG_VP9_HIGHBITDEPTH
  1602. // Do compound motion search on the current reference frame.
  1603. if (id) xd->plane[0].pre[0] = ref_yv12[id];
  1604. vp9_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv);
  1605. // Use the mv result from the single mode as mv predictor.
  1606. tmp_mv = frame_mv[refs[id]].as_mv;
  1607. tmp_mv.col >>= 3;
  1608. tmp_mv.row >>= 3;
  1609. // Small-range full-pixel motion search.
  1610. bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb, search_range,
  1611. &cpi->fn_ptr[bsize], &ref_mv[id].as_mv,
  1612. second_pred);
  1613. if (bestsme < UINT_MAX)
  1614. bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv,
  1615. second_pred, &cpi->fn_ptr[bsize], 1);
  1616. x->mv_limits = tmp_mv_limits;
  1617. if (bestsme < UINT_MAX) {
  1618. uint32_t dis; /* TODO: use dis in distortion calculation later. */
  1619. uint32_t sse;
  1620. bestsme = cpi->find_fractional_mv_step(
  1621. x, &tmp_mv, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
  1622. x->errorperbit, &cpi->fn_ptr[bsize], 0,
  1623. cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
  1624. &dis, &sse, second_pred, pw, ph);
  1625. }
  1626. // Restore the pointer to the first (possibly scaled) prediction buffer.
  1627. if (id) xd->plane[0].pre[0] = ref_yv12[0];
  1628. if (bestsme < last_besterr[id]) {
  1629. frame_mv[refs[id]].as_mv = tmp_mv;
  1630. last_besterr[id] = bestsme;
  1631. } else {
  1632. break;
  1633. }
  1634. }
  1635. *rate_mv = 0;
  1636. for (ref = 0; ref < 2; ++ref) {
  1637. if (scaled_ref_frame[ref]) {
  1638. // Restore the prediction frame pointers to their unscaled versions.
  1639. int i;
  1640. for (i = 0; i < MAX_MB_PLANE; i++)
  1641. xd->plane[i].pre[ref] = backup_yv12[ref][i];
  1642. }
  1643. *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
  1644. &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
  1645. x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
  1646. }
  1647. }
  1648. static int64_t rd_pick_best_sub8x8_mode(
  1649. VP9_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv,
  1650. int_mv *second_best_ref_mv, int64_t best_rd, int *returntotrate,
  1651. int *returnyrate, int64_t *returndistortion, int *skippable, int64_t *psse,
  1652. int mvthresh, int_mv seg_mvs[4][MAX_REF_FRAMES], BEST_SEG_INFO *bsi_buf,
  1653. int filter_idx, int mi_row, int mi_col) {
  1654. int i;
  1655. BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
  1656. MACROBLOCKD *xd = &x->e_mbd;
  1657. MODE_INFO *mi = xd->mi[0];
  1658. int mode_idx;
  1659. int k, br = 0, idx, idy;
  1660. int64_t bd = 0, block_sse = 0;
  1661. PREDICTION_MODE this_mode;
  1662. VP9_COMMON *cm = &cpi->common;
  1663. struct macroblock_plane *const p = &x->plane[0];
  1664. struct macroblockd_plane *const pd = &xd->plane[0];
  1665. const int label_count = 4;
  1666. int64_t this_segment_rd = 0;
  1667. int label_mv_thresh;
  1668. int segmentyrate = 0;
  1669. const BLOCK_SIZE bsize = mi->sb_type;
  1670. const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  1671. const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
  1672. ENTROPY_CONTEXT t_above[2], t_left[2];
  1673. int subpelmv = 1, have_ref = 0;
  1674. SPEED_FEATURES *const sf = &cpi->sf;
  1675. const int has_second_rf = has_second_ref(mi);
  1676. const int inter_mode_mask = sf->inter_mode_mask[bsize];
  1677. MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
  1678. vp9_zero(*bsi);
  1679. bsi->segment_rd = best_rd;
  1680. bsi->ref_mv[0] = best_ref_mv;
  1681. bsi->ref_mv[1] = second_best_ref_mv;
  1682. bsi->mvp.as_int = best_ref_mv->as_int;
  1683. bsi->mvthresh = mvthresh;
  1684. for (i = 0; i < 4; i++) bsi->modes[i] = ZEROMV;
  1685. memcpy(t_above, pd->above_context, sizeof(t_above));
  1686. memcpy(t_left, pd->left_context, sizeof(t_left));
  1687. // 64 makes this threshold really big effectively
  1688. // making it so that we very rarely check mvs on
  1689. // segments. setting this to 1 would make mv thresh
  1690. // roughly equal to what it is for macroblocks
  1691. label_mv_thresh = 1 * bsi->mvthresh / label_count;
  1692. // Segmentation method overheads
  1693. for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
  1694. for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
  1695. // TODO(jingning,rbultje): rewrite the rate-distortion optimization
  1696. // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
  1697. int_mv mode_mv[MB_MODE_COUNT][2];
  1698. int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
  1699. PREDICTION_MODE mode_selected = ZEROMV;
  1700. int64_t best_rd = INT64_MAX;
  1701. const int i = idy * 2 + idx;
  1702. int ref;
  1703. for (ref = 0; ref < 1 + has_second_rf; ++ref) {
  1704. const MV_REFERENCE_FRAME frame = mi->ref_frame[ref];
  1705. frame_mv[ZEROMV][frame].as_int = 0;
  1706. vp9_append_sub8x8_mvs_for_idx(
  1707. cm, xd, i, ref, mi_row, mi_col, &frame_mv[NEARESTMV][frame],
  1708. &frame_mv[NEARMV][frame], mbmi_ext->mode_context);
  1709. }
  1710. // search for the best motion vector on this segment
  1711. for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
  1712. const struct buf_2d orig_src = x->plane[0].src;
  1713. struct buf_2d orig_pre[2];
  1714. mode_idx = INTER_OFFSET(this_mode);
  1715. bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
  1716. if (!(inter_mode_mask & (1 << this_mode))) continue;
  1717. if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv,
  1718. this_mode, mi->ref_frame))
  1719. continue;
  1720. memcpy(orig_pre, pd->pre, sizeof(orig_pre));
  1721. memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
  1722. sizeof(bsi->rdstat[i][mode_idx].ta));
  1723. memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
  1724. sizeof(bsi->rdstat[i][mode_idx].tl));
  1725. // motion search for newmv (single predictor case only)
  1726. if (!has_second_rf && this_mode == NEWMV &&
  1727. seg_mvs[i][mi->ref_frame[0]].as_int == INVALID_MV) {
  1728. MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
  1729. int step_param = 0;
  1730. uint32_t bestsme = UINT_MAX;
  1731. int sadpb = x->sadperbit4;
  1732. MV mvp_full;
  1733. int max_mv;
  1734. int cost_list[5];
  1735. const MvLimits tmp_mv_limits = x->mv_limits;
  1736. /* Is the best so far sufficiently good that we cant justify doing
  1737. * and new motion search. */
  1738. if (best_rd < label_mv_thresh) break;
  1739. if (cpi->oxcf.mode != BEST) {
  1740. // use previous block's result as next block's MV predictor.
  1741. if (i > 0) {
  1742. bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
  1743. if (i == 2) bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
  1744. }
  1745. }
  1746. if (i == 0)
  1747. max_mv = x->max_mv_context[mi->ref_frame[0]];
  1748. else
  1749. max_mv =
  1750. VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
  1751. if (sf->mv.auto_mv_step_size && cm->show_frame) {
  1752. // Take wtd average of the step_params based on the last frame's
  1753. // max mv magnitude and the best ref mvs of the current block for
  1754. // the given reference.
  1755. step_param =
  1756. (vp9_init_search_range(max_mv) + cpi->mv_step_param) / 2;
  1757. } else {
  1758. step_param = cpi->mv_step_param;
  1759. }
  1760. mvp_full.row = bsi->mvp.as_mv.row >> 3;
  1761. mvp_full.col = bsi->mvp.as_mv.col >> 3;
  1762. if (sf->adaptive_motion_search) {
  1763. mvp_full.row = x->pred_mv[mi->ref_frame[0]].row >> 3;
  1764. mvp_full.col = x->pred_mv[mi->ref_frame[0]].col >> 3;
  1765. step_param = VPXMAX(step_param, 8);
  1766. }
  1767. // adjust src pointer for this block
  1768. mi_buf_shift(x, i);
  1769. vp9_set_mv_search_range(&x->mv_limits, &bsi->ref_mv[0]->as_mv);
  1770. bestsme = vp9_full_pixel_search(
  1771. cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method,
  1772. sadpb,
  1773. sf->mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
  1774. &bsi->ref_mv[0]->as_mv, new_mv, INT_MAX, 1);
  1775. x->mv_limits = tmp_mv_limits;
  1776. if (bestsme < UINT_MAX) {
  1777. uint32_t distortion;
  1778. cpi->find_fractional_mv_step(
  1779. x, new_mv, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv,
  1780. x->errorperbit, &cpi->fn_ptr[bsize], sf->mv.subpel_force_stop,
  1781. sf->mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
  1782. x->nmvjointcost, x->mvcost, &distortion,
  1783. &x->pred_sse[mi->ref_frame[0]], NULL, 0, 0);
  1784. // save motion search result for use in compound prediction
  1785. seg_mvs[i][mi->ref_frame[0]].as_mv = *new_mv;
  1786. }
  1787. if (sf->adaptive_motion_search)
  1788. x->pred_mv[mi->ref_frame[0]] = *new_mv;
  1789. // restore src pointers
  1790. mi_buf_restore(x, orig_src, orig_pre);
  1791. }
  1792. if (has_second_rf) {
  1793. if (seg_mvs[i][mi->ref_frame[1]].as_int == INVALID_MV ||
  1794. seg_mvs[i][mi->ref_frame[0]].as_int == INVALID_MV)
  1795. continue;
  1796. }
  1797. if (has_second_rf && this_mode == NEWMV &&
  1798. mi->interp_filter == EIGHTTAP) {
  1799. // adjust src pointers
  1800. mi_buf_shift(x, i);
  1801. if (sf->comp_inter_joint_search_thresh <= bsize) {
  1802. int rate_mv;
  1803. joint_motion_search(cpi, x, bsize, frame_mv[this_mode], mi_row,
  1804. mi_col, seg_mvs[i], &rate_mv);
  1805. seg_mvs[i][mi->ref_frame[0]].as_int =
  1806. frame_mv[this_mode][mi->ref_frame[0]].as_int;
  1807. seg_mvs[i][mi->ref_frame[1]].as_int =
  1808. frame_mv[this_mode][mi->ref_frame[1]].as_int;
  1809. }
  1810. // restore src pointers
  1811. mi_buf_restore(x, orig_src, orig_pre);
  1812. }
  1813. bsi->rdstat[i][mode_idx].brate = set_and_cost_bmi_mvs(
  1814. cpi, x, xd, i, this_mode, mode_mv[this_mode], frame_mv, seg_mvs[i],
  1815. bsi->ref_mv, x->nmvjointcost, x->mvcost);
  1816. for (ref = 0; ref < 1 + has_second_rf; ++ref) {
  1817. bsi->rdstat[i][mode_idx].mvs[ref].as_int =
  1818. mode_mv[this_mode][ref].as_int;
  1819. if (num_4x4_blocks_wide > 1)
  1820. bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
  1821. mode_mv[this_mode][ref].as_int;
  1822. if (num_4x4_blocks_high > 1)
  1823. bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
  1824. mode_mv[this_mode][ref].as_int;
  1825. }
  1826. // Trap vectors that reach beyond the UMV borders
  1827. if (mv_check_bounds(&x->mv_limits, &mode_mv[this_mode][0].as_mv) ||
  1828. (has_second_rf &&
  1829. mv_check_bounds(&x->mv_limits, &mode_mv[this_mode][1].as_mv)))
  1830. continue;
  1831. if (filter_idx > 0) {
  1832. BEST_SEG_INFO *ref_bsi = bsi_buf;
  1833. subpelmv = 0;
  1834. have_ref = 1;
  1835. for (ref = 0; ref < 1 + has_second_rf; ++ref) {
  1836. subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
  1837. have_ref &= mode_mv[this_mode][ref].as_int ==
  1838. ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
  1839. }
  1840. if (filter_idx > 1 && !subpelmv && !have_ref) {
  1841. ref_bsi = bsi_buf + 1;
  1842. have_ref = 1;
  1843. for (ref = 0; ref < 1 + has_second_rf; ++ref)
  1844. have_ref &= mode_mv[this_mode][ref].as_int ==
  1845. ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
  1846. }
  1847. if (!subpelmv && have_ref &&
  1848. ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
  1849. memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
  1850. sizeof(SEG_RDSTAT));
  1851. if (num_4x4_blocks_wide > 1)
  1852. bsi->rdstat[i + 1][mode_idx].eobs =
  1853. ref_bsi->rdstat[i + 1][mode_idx].eobs;
  1854. if (num_4x4_blocks_high > 1)
  1855. bsi->rdstat[i + 2][mode_idx].eobs =
  1856. ref_bsi->rdstat[i + 2][mode_idx].eobs;
  1857. if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
  1858. mode_selected = this_mode;
  1859. best_rd = bsi->rdstat[i][mode_idx].brdcost;
  1860. }
  1861. continue;
  1862. }
  1863. }
  1864. bsi->rdstat[i][mode_idx].brdcost = encode_inter_mb_segment(
  1865. cpi, x, bsi->segment_rd - this_segment_rd, i,
  1866. &bsi->rdstat[i][mode_idx].byrate, &bsi->rdstat[i][mode_idx].bdist,
  1867. &bsi->rdstat[i][mode_idx].bsse, bsi->rdstat[i][mode_idx].ta,
  1868. bsi->rdstat[i][mode_idx].tl, mi_row, mi_col);
  1869. if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
  1870. bsi->rdstat[i][mode_idx].brdcost +=
  1871. RDCOST(x->rdmult, x->rddiv, bsi->rdstat[i][mode_idx].brate, 0);
  1872. bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
  1873. bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
  1874. if (num_4x4_blocks_wide > 1)
  1875. bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
  1876. if (num_4x4_blocks_high > 1)
  1877. bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
  1878. }
  1879. if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
  1880. mode_selected = this_mode;
  1881. best_rd = bsi->rdstat[i][mode_idx].brdcost;
  1882. }
  1883. } /*for each 4x4 mode*/
  1884. if (best_rd == INT64_MAX) {
  1885. int iy, midx;
  1886. for (iy = i + 1; iy < 4; ++iy)
  1887. for (midx = 0; midx < INTER_MODES; ++midx)
  1888. bsi->rdstat[iy][midx].brdcost = INT64_MAX;
  1889. bsi->segment_rd = INT64_MAX;
  1890. return INT64_MAX;
  1891. }
  1892. mode_idx = INTER_OFFSET(mode_selected);
  1893. memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
  1894. memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
  1895. set_and_cost_bmi_mvs(cpi, x, xd, i, mode_selected, mode_mv[mode_selected],
  1896. frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
  1897. x->mvcost);
  1898. br += bsi->rdstat[i][mode_idx].brate;
  1899. bd += bsi->rdstat[i][mode_idx].bdist;
  1900. block_sse += bsi->rdstat[i][mode_idx].bsse;
  1901. segmentyrate += bsi->rdstat[i][mode_idx].byrate;
  1902. this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
  1903. if (this_segment_rd > bsi->segment_rd) {
  1904. int iy, midx;
  1905. for (iy = i + 1; iy < 4; ++iy)
  1906. for (midx = 0; midx < INTER_MODES; ++midx)
  1907. bsi->rdstat[iy][midx].brdcost = INT64_MAX;
  1908. bsi->segment_rd = INT64_MAX;
  1909. return INT64_MAX;
  1910. }
  1911. }
  1912. } /* for each label */
  1913. bsi->r = br;
  1914. bsi->d = bd;
  1915. bsi->segment_yrate = segmentyrate;
  1916. bsi->segment_rd = this_segment_rd;
  1917. bsi->sse = block_sse;
  1918. // update the coding decisions
  1919. for (k = 0; k < 4; ++k) bsi->modes[k] = mi->bmi[k].as_mode;
  1920. if (bsi->segment_rd > best_rd) return INT64_MAX;
  1921. /* set it to the best */
  1922. for (i = 0; i < 4; i++) {
  1923. mode_idx = INTER_OFFSET(bsi->modes[i]);
  1924. mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
  1925. if (has_second_ref(mi))
  1926. mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
  1927. x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
  1928. mi->bmi[i].as_mode = bsi->modes[i];
  1929. }
  1930. /*
  1931. * used to set mbmi->mv.as_int
  1932. */
  1933. *returntotrate = bsi->r;
  1934. *returndistortion = bsi->d;
  1935. *returnyrate = bsi->segment_yrate;
  1936. *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0);
  1937. *psse = bsi->sse;
  1938. mi->mode = bsi->modes[3];
  1939. return bsi->segment_rd;
  1940. }
  1941. static void estimate_ref_frame_costs(const VP9_COMMON *cm,
  1942. const MACROBLOCKD *xd, int segment_id,
  1943. unsigned int *ref_costs_single,
  1944. unsigned int *ref_costs_comp,
  1945. vpx_prob *comp_mode_p) {
  1946. int seg_ref_active =
  1947. segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
  1948. if (seg_ref_active) {
  1949. memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
  1950. memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
  1951. *comp_mode_p = 128;
  1952. } else {
  1953. vpx_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
  1954. vpx_prob comp_inter_p = 128;
  1955. if (cm->reference_mode == REFERENCE_MODE_SELECT) {
  1956. comp_inter_p = vp9_get_reference_mode_prob(cm, xd);
  1957. *comp_mode_p = comp_inter_p;
  1958. } else {
  1959. *comp_mode_p = 128;
  1960. }
  1961. ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
  1962. if (cm->reference_mode != COMPOUND_REFERENCE) {
  1963. vpx_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
  1964. vpx_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
  1965. unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
  1966. if (cm->reference_mode == REFERENCE_MODE_SELECT)
  1967. base_cost += vp9_cost_bit(comp_inter_p, 0);
  1968. ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
  1969. ref_costs_single[ALTREF_FRAME] = base_cost;
  1970. ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
  1971. ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
  1972. ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
  1973. ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
  1974. ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
  1975. } else {
  1976. ref_costs_single[LAST_FRAME] = 512;
  1977. ref_costs_single[GOLDEN_FRAME] = 512;
  1978. ref_costs_single[ALTREF_FRAME] = 512;
  1979. }
  1980. if (cm->reference_mode != SINGLE_REFERENCE) {
  1981. vpx_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
  1982. unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
  1983. if (cm->reference_mode == REFERENCE_MODE_SELECT)
  1984. base_cost += vp9_cost_bit(comp_inter_p, 1);
  1985. ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0);
  1986. ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
  1987. } else {
  1988. ref_costs_comp[LAST_FRAME] = 512;
  1989. ref_costs_comp[GOLDEN_FRAME] = 512;
  1990. }
  1991. }
  1992. }
  1993. static void store_coding_context(
  1994. MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
  1995. int64_t comp_pred_diff[REFERENCE_MODES],
  1996. int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS], int skippable) {
  1997. MACROBLOCKD *const xd = &x->e_mbd;
  1998. // Take a snapshot of the coding context so it can be
  1999. // restored if we decide to encode this way
  2000. ctx->skip = x->skip;
  2001. ctx->skippable = skippable;
  2002. ctx->best_mode_index = mode_index;
  2003. ctx->mic = *xd->mi[0];
  2004. ctx->mbmi_ext = *x->mbmi_ext;
  2005. ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
  2006. ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
  2007. ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
  2008. memcpy(ctx->best_filter_diff, best_filter_diff,
  2009. sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
  2010. }
  2011. static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
  2012. MV_REFERENCE_FRAME ref_frame,
  2013. BLOCK_SIZE block_size, int mi_row, int mi_col,
  2014. int_mv frame_nearest_mv[MAX_REF_FRAMES],
  2015. int_mv frame_near_mv[MAX_REF_FRAMES],
  2016. struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
  2017. const VP9_COMMON *cm = &cpi->common;
  2018. const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
  2019. MACROBLOCKD *const xd = &x->e_mbd;
  2020. MODE_INFO *const mi = xd->mi[0];
  2021. int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
  2022. const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
  2023. MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
  2024. assert(yv12 != NULL);
  2025. // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
  2026. // use the UV scaling factors.
  2027. vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
  2028. // Gets an initial list of candidate vectors from neighbours and orders them
  2029. vp9_find_mv_refs(cm, xd, mi, ref_frame, candidates, mi_row, mi_col,
  2030. mbmi_ext->mode_context);
  2031. // Candidate refinement carried out at encoder and decoder
  2032. vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
  2033. &frame_nearest_mv[ref_frame],
  2034. &frame_near_mv[ref_frame]);
  2035. // Further refinement that is encode side only to test the top few candidates
  2036. // in full and choose the best as the centre point for subsequent searches.
  2037. // The current implementation doesn't support scaling.
  2038. if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8)
  2039. vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
  2040. block_size);
  2041. }
  2042. static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
  2043. int mi_row, int mi_col, int_mv *tmp_mv,
  2044. int *rate_mv) {
  2045. MACROBLOCKD *xd = &x->e_mbd;
  2046. const VP9_COMMON *cm = &cpi->common;
  2047. MODE_INFO *mi = xd->mi[0];
  2048. struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } };
  2049. int bestsme = INT_MAX;
  2050. int step_param;
  2051. int sadpb = x->sadperbit16;
  2052. MV mvp_full;
  2053. int ref = mi->ref_frame[0];
  2054. MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
  2055. const MvLimits tmp_mv_limits = x->mv_limits;
  2056. int cost_list[5];
  2057. const YV12_BUFFER_CONFIG *scaled_ref_frame =
  2058. vp9_get_scaled_ref_frame(cpi, ref);
  2059. MV pred_mv[3];
  2060. pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
  2061. pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
  2062. pred_mv[2] = x->pred_mv[ref];
  2063. if (scaled_ref_frame) {
  2064. int i;
  2065. // Swap out the reference frame for a version that's been scaled to
  2066. // match the resolution of the current frame, allowing the existing
  2067. // motion search code to be used without additional modifications.
  2068. for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
  2069. vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
  2070. }
  2071. // Work out the size of the first step in the mv step search.
  2072. // 0 here is maximum length first step. 1 is VPXMAX >> 1 etc.
  2073. if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
  2074. // Take wtd average of the step_params based on the last frame's
  2075. // max mv magnitude and that based on the best ref mvs of the current
  2076. // block for the given reference.
  2077. step_param =
  2078. (vp9_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) /
  2079. 2;
  2080. } else {
  2081. step_param = cpi->mv_step_param;
  2082. }
  2083. if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
  2084. int boffset =
  2085. 2 * (b_width_log2_lookup[BLOCK_64X64] -
  2086. VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
  2087. step_param = VPXMAX(step_param, boffset);
  2088. }
  2089. if (cpi->sf.adaptive_motion_search) {
  2090. int bwl = b_width_log2_lookup[bsize];
  2091. int bhl = b_height_log2_lookup[bsize];
  2092. int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
  2093. if (tlevel < 5) step_param += 2;
  2094. // prev_mv_sad is not setup for dynamically scaled frames.
  2095. if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
  2096. int i;
  2097. for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
  2098. if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
  2099. x->pred_mv[ref].row = 0;
  2100. x->pred_mv[ref].col = 0;
  2101. tmp_mv->as_int = INVALID_MV;
  2102. if (scaled_ref_frame) {
  2103. int i;
  2104. for (i = 0; i < MAX_MB_PLANE; ++i)
  2105. xd->plane[i].pre[0] = backup_yv12[i];
  2106. }
  2107. return;
  2108. }
  2109. }
  2110. }
  2111. }
  2112. // Note: MV limits are modified here. Always restore the original values
  2113. // after full-pixel motion search.
  2114. vp9_set_mv_search_range(&x->mv_limits, &ref_mv);
  2115. mvp_full = pred_mv[x->mv_best_ref_index[ref]];
  2116. mvp_full.col >>= 3;
  2117. mvp_full.row >>= 3;
  2118. bestsme = vp9_full_pixel_search(
  2119. cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, sadpb,
  2120. cond_cost_list(cpi, cost_list), &ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
  2121. x->mv_limits = tmp_mv_limits;
  2122. if (bestsme < INT_MAX) {
  2123. uint32_t dis; /* TODO: use dis in distortion calculation later. */
  2124. cpi->find_fractional_mv_step(
  2125. x, &tmp_mv->as_mv, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
  2126. &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
  2127. cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
  2128. x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0);
  2129. }
  2130. *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost,
  2131. x->mvcost, MV_COST_WEIGHT);
  2132. if (cpi->sf.adaptive_motion_search) x->pred_mv[ref] = tmp_mv->as_mv;
  2133. if (scaled_ref_frame) {
  2134. int i;
  2135. for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
  2136. }
  2137. }
  2138. static INLINE void restore_dst_buf(MACROBLOCKD *xd,
  2139. uint8_t *orig_dst[MAX_MB_PLANE],
  2140. int orig_dst_stride[MAX_MB_PLANE]) {
  2141. int i;
  2142. for (i = 0; i < MAX_MB_PLANE; i++) {
  2143. xd->plane[i].dst.buf = orig_dst[i];
  2144. xd->plane[i].dst.stride = orig_dst_stride[i];
  2145. }
  2146. }
  2147. // In some situations we want to discount tha pparent cost of a new motion
  2148. // vector. Where there is a subtle motion field and especially where there is
  2149. // low spatial complexity then it can be hard to cover the cost of a new motion
  2150. // vector in a single block, even if that motion vector reduces distortion.
  2151. // However, once established that vector may be usable through the nearest and
  2152. // near mv modes to reduce distortion in subsequent blocks and also improve
  2153. // visual quality.
  2154. static int discount_newmv_test(const VP9_COMP *cpi, int this_mode,
  2155. int_mv this_mv,
  2156. int_mv (*mode_mv)[MAX_REF_FRAMES],
  2157. int ref_frame) {
  2158. return (!cpi->rc.is_src_frame_alt_ref && (this_mode == NEWMV) &&
  2159. (this_mv.as_int != 0) &&
  2160. ((mode_mv[NEARESTMV][ref_frame].as_int == 0) ||
  2161. (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) &&
  2162. ((mode_mv[NEARMV][ref_frame].as_int == 0) ||
  2163. (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV)));
  2164. }
  2165. static int64_t handle_inter_mode(
  2166. VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int *rate2,
  2167. int64_t *distortion, int *skippable, int *rate_y, int *rate_uv,
  2168. int *disable_skip, int_mv (*mode_mv)[MAX_REF_FRAMES], int mi_row,
  2169. int mi_col, int_mv single_newmv[MAX_REF_FRAMES],
  2170. INTERP_FILTER (*single_filter)[MAX_REF_FRAMES],
  2171. int (*single_skippable)[MAX_REF_FRAMES], int64_t *psse,
  2172. const int64_t ref_best_rd, int64_t *mask_filter, int64_t filter_cache[]) {
  2173. VP9_COMMON *cm = &cpi->common;
  2174. MACROBLOCKD *xd = &x->e_mbd;
  2175. MODE_INFO *mi = xd->mi[0];
  2176. MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
  2177. const int is_comp_pred = has_second_ref(mi);
  2178. const int this_mode = mi->mode;
  2179. int_mv *frame_mv = mode_mv[this_mode];
  2180. int i;
  2181. int refs[2] = { mi->ref_frame[0],
  2182. (mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1]) };
  2183. int_mv cur_mv[2];
  2184. #if CONFIG_VP9_HIGHBITDEPTH
  2185. DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]);
  2186. uint8_t *tmp_buf;
  2187. #else
  2188. DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * 64 * 64]);
  2189. #endif // CONFIG_VP9_HIGHBITDEPTH
  2190. int pred_exists = 0;
  2191. int intpel_mv;
  2192. int64_t rd, tmp_rd, best_rd = INT64_MAX;
  2193. int best_needs_copy = 0;
  2194. uint8_t *orig_dst[MAX_MB_PLANE];
  2195. int orig_dst_stride[MAX_MB_PLANE];
  2196. int rs = 0;
  2197. INTERP_FILTER best_filter = SWITCHABLE;
  2198. uint8_t skip_txfm[MAX_MB_PLANE << 2] = { 0 };
  2199. int64_t bsse[MAX_MB_PLANE << 2] = { 0 };
  2200. int bsl = mi_width_log2_lookup[bsize];
  2201. int pred_filter_search =
  2202. cpi->sf.cb_pred_filter_search
  2203. ? (((mi_row + mi_col) >> bsl) +
  2204. get_chessboard_index(cm->current_video_frame)) &
  2205. 0x1
  2206. : 0;
  2207. int skip_txfm_sb = 0;
  2208. int64_t skip_sse_sb = INT64_MAX;
  2209. int64_t distortion_y = 0, distortion_uv = 0;
  2210. #if CONFIG_VP9_HIGHBITDEPTH
  2211. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  2212. tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16);
  2213. } else {
  2214. tmp_buf = (uint8_t *)tmp_buf16;
  2215. }
  2216. #endif // CONFIG_VP9_HIGHBITDEPTH
  2217. if (pred_filter_search) {
  2218. INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
  2219. if (xd->above_mi && is_inter_block(xd->above_mi))
  2220. af = xd->above_mi->interp_filter;
  2221. if (xd->left_mi && is_inter_block(xd->left_mi))
  2222. lf = xd->left_mi->interp_filter;
  2223. if ((this_mode != NEWMV) || (af == lf)) best_filter = af;
  2224. }
  2225. if (is_comp_pred) {
  2226. if (frame_mv[refs[0]].as_int == INVALID_MV ||
  2227. frame_mv[refs[1]].as_int == INVALID_MV)
  2228. return INT64_MAX;
  2229. if (cpi->sf.adaptive_mode_search) {
  2230. if (single_filter[this_mode][refs[0]] ==
  2231. single_filter[this_mode][refs[1]])
  2232. best_filter = single_filter[this_mode][refs[0]];
  2233. }
  2234. }
  2235. if (this_mode == NEWMV) {
  2236. int rate_mv;
  2237. if (is_comp_pred) {
  2238. // Initialize mv using single prediction mode result.
  2239. frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
  2240. frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
  2241. if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
  2242. joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col,
  2243. single_newmv, &rate_mv);
  2244. } else {
  2245. rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
  2246. &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
  2247. x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
  2248. rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
  2249. &x->mbmi_ext->ref_mvs[refs[1]][0].as_mv,
  2250. x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
  2251. }
  2252. *rate2 += rate_mv;
  2253. } else {
  2254. int_mv tmp_mv;
  2255. single_motion_search(cpi, x, bsize, mi_row, mi_col, &tmp_mv, &rate_mv);
  2256. if (tmp_mv.as_int == INVALID_MV) return INT64_MAX;
  2257. frame_mv[refs[0]].as_int = xd->mi[0]->bmi[0].as_mv[0].as_int =
  2258. tmp_mv.as_int;
  2259. single_newmv[refs[0]].as_int = tmp_mv.as_int;
  2260. // Estimate the rate implications of a new mv but discount this
  2261. // under certain circumstances where we want to help initiate a weak
  2262. // motion field, where the distortion gain for a single block may not
  2263. // be enough to overcome the cost of a new mv.
  2264. if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0])) {
  2265. *rate2 += VPXMAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
  2266. } else {
  2267. *rate2 += rate_mv;
  2268. }
  2269. }
  2270. }
  2271. for (i = 0; i < is_comp_pred + 1; ++i) {
  2272. cur_mv[i] = frame_mv[refs[i]];
  2273. // Clip "next_nearest" so that it does not extend to far out of image
  2274. if (this_mode != NEWMV) clamp_mv2(&cur_mv[i].as_mv, xd);
  2275. if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
  2276. mi->mv[i].as_int = cur_mv[i].as_int;
  2277. }
  2278. // do first prediction into the destination buffer. Do the next
  2279. // prediction into a temporary buffer. Then keep track of which one
  2280. // of these currently holds the best predictor, and use the other
  2281. // one for future predictions. In the end, copy from tmp_buf to
  2282. // dst if necessary.
  2283. for (i = 0; i < MAX_MB_PLANE; i++) {
  2284. orig_dst[i] = xd->plane[i].dst.buf;
  2285. orig_dst_stride[i] = xd->plane[i].dst.stride;
  2286. }
  2287. // We don't include the cost of the second reference here, because there
  2288. // are only two options: Last/ARF or Golden/ARF; The second one is always
  2289. // known, which is ARF.
  2290. //
  2291. // Under some circumstances we discount the cost of new mv mode to encourage
  2292. // initiation of a motion field.
  2293. if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv,
  2294. refs[0])) {
  2295. *rate2 +=
  2296. VPXMIN(cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]),
  2297. cost_mv_ref(cpi, NEARESTMV, mbmi_ext->mode_context[refs[0]]));
  2298. } else {
  2299. *rate2 += cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]);
  2300. }
  2301. if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd &&
  2302. mi->mode != NEARESTMV)
  2303. return INT64_MAX;
  2304. pred_exists = 0;
  2305. // Are all MVs integer pel for Y and UV
  2306. intpel_mv = !mv_has_subpel(&mi->mv[0].as_mv);
  2307. if (is_comp_pred) intpel_mv &= !mv_has_subpel(&mi->mv[1].as_mv);
  2308. // Search for best switchable filter by checking the variance of
  2309. // pred error irrespective of whether the filter will be used
  2310. for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX;
  2311. if (cm->interp_filter != BILINEAR) {
  2312. if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
  2313. best_filter = EIGHTTAP;
  2314. } else if (best_filter == SWITCHABLE) {
  2315. int newbest;
  2316. int tmp_rate_sum = 0;
  2317. int64_t tmp_dist_sum = 0;
  2318. for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
  2319. int j;
  2320. int64_t rs_rd;
  2321. int tmp_skip_sb = 0;
  2322. int64_t tmp_skip_sse = INT64_MAX;
  2323. mi->interp_filter = i;
  2324. rs = vp9_get_switchable_rate(cpi, xd);
  2325. rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
  2326. if (i > 0 && intpel_mv) {
  2327. rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
  2328. filter_cache[i] = rd;
  2329. filter_cache[SWITCHABLE_FILTERS] =
  2330. VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
  2331. if (cm->interp_filter == SWITCHABLE) rd += rs_rd;
  2332. *mask_filter = VPXMAX(*mask_filter, rd);
  2333. } else {
  2334. int rate_sum = 0;
  2335. int64_t dist_sum = 0;
  2336. if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
  2337. (cpi->sf.interp_filter_search_mask & (1 << i))) {
  2338. rate_sum = INT_MAX;
  2339. dist_sum = INT64_MAX;
  2340. continue;
  2341. }
  2342. if ((cm->interp_filter == SWITCHABLE && (!i || best_needs_copy)) ||
  2343. (cm->interp_filter != SWITCHABLE &&
  2344. (cm->interp_filter == mi->interp_filter ||
  2345. (i == 0 && intpel_mv)))) {
  2346. restore_dst_buf(xd, orig_dst, orig_dst_stride);
  2347. } else {
  2348. for (j = 0; j < MAX_MB_PLANE; j++) {
  2349. xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
  2350. xd->plane[j].dst.stride = 64;
  2351. }
  2352. }
  2353. vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
  2354. model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, &tmp_skip_sb,
  2355. &tmp_skip_sse);
  2356. rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
  2357. filter_cache[i] = rd;
  2358. filter_cache[SWITCHABLE_FILTERS] =
  2359. VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
  2360. if (cm->interp_filter == SWITCHABLE) rd += rs_rd;
  2361. *mask_filter = VPXMAX(*mask_filter, rd);
  2362. if (i == 0 && intpel_mv) {
  2363. tmp_rate_sum = rate_sum;
  2364. tmp_dist_sum = dist_sum;
  2365. }
  2366. }
  2367. if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
  2368. if (rd / 2 > ref_best_rd) {
  2369. restore_dst_buf(xd, orig_dst, orig_dst_stride);
  2370. return INT64_MAX;
  2371. }
  2372. }
  2373. newbest = i == 0 || rd < best_rd;
  2374. if (newbest) {
  2375. best_rd = rd;
  2376. best_filter = mi->interp_filter;
  2377. if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
  2378. best_needs_copy = !best_needs_copy;
  2379. }
  2380. if ((cm->interp_filter == SWITCHABLE && newbest) ||
  2381. (cm->interp_filter != SWITCHABLE &&
  2382. cm->interp_filter == mi->interp_filter)) {
  2383. pred_exists = 1;
  2384. tmp_rd = best_rd;
  2385. skip_txfm_sb = tmp_skip_sb;
  2386. skip_sse_sb = tmp_skip_sse;
  2387. memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
  2388. memcpy(bsse, x->bsse, sizeof(bsse));
  2389. }
  2390. }
  2391. restore_dst_buf(xd, orig_dst, orig_dst_stride);
  2392. }
  2393. }
  2394. // Set the appropriate filter
  2395. mi->interp_filter =
  2396. cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter;
  2397. rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi, xd) : 0;
  2398. if (pred_exists) {
  2399. if (best_needs_copy) {
  2400. // again temporarily set the buffers to local memory to prevent a memcpy
  2401. for (i = 0; i < MAX_MB_PLANE; i++) {
  2402. xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
  2403. xd->plane[i].dst.stride = 64;
  2404. }
  2405. }
  2406. rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0);
  2407. } else {
  2408. int tmp_rate;
  2409. int64_t tmp_dist;
  2410. // Handles the special case when a filter that is not in the
  2411. // switchable list (ex. bilinear) is indicated at the frame level, or
  2412. // skip condition holds.
  2413. vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
  2414. model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist, &skip_txfm_sb,
  2415. &skip_sse_sb);
  2416. rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
  2417. memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
  2418. memcpy(bsse, x->bsse, sizeof(bsse));
  2419. }
  2420. if (!is_comp_pred) single_filter[this_mode][refs[0]] = mi->interp_filter;
  2421. if (cpi->sf.adaptive_mode_search)
  2422. if (is_comp_pred)
  2423. if (single_skippable[this_mode][refs[0]] &&
  2424. single_skippable[this_mode][refs[1]])
  2425. memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm));
  2426. if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
  2427. // if current pred_error modeled rd is substantially more than the best
  2428. // so far, do not bother doing full rd
  2429. if (rd / 2 > ref_best_rd) {
  2430. restore_dst_buf(xd, orig_dst, orig_dst_stride);
  2431. return INT64_MAX;
  2432. }
  2433. }
  2434. if (cm->interp_filter == SWITCHABLE) *rate2 += rs;
  2435. memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
  2436. memcpy(x->bsse, bsse, sizeof(bsse));
  2437. if (!skip_txfm_sb) {
  2438. int skippable_y, skippable_uv;
  2439. int64_t sseuv = INT64_MAX;
  2440. int64_t rdcosty = INT64_MAX;
  2441. // Y cost and distortion
  2442. vp9_subtract_plane(x, bsize, 0);
  2443. super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse, bsize,
  2444. ref_best_rd);
  2445. if (*rate_y == INT_MAX) {
  2446. *rate2 = INT_MAX;
  2447. *distortion = INT64_MAX;
  2448. restore_dst_buf(xd, orig_dst, orig_dst_stride);
  2449. return INT64_MAX;
  2450. }
  2451. *rate2 += *rate_y;
  2452. *distortion += distortion_y;
  2453. rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
  2454. rdcosty = VPXMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
  2455. if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
  2456. &sseuv, bsize, ref_best_rd - rdcosty)) {
  2457. *rate2 = INT_MAX;
  2458. *distortion = INT64_MAX;
  2459. restore_dst_buf(xd, orig_dst, orig_dst_stride);
  2460. return INT64_MAX;
  2461. }
  2462. *psse += sseuv;
  2463. *rate2 += *rate_uv;
  2464. *distortion += distortion_uv;
  2465. *skippable = skippable_y && skippable_uv;
  2466. } else {
  2467. x->skip = 1;
  2468. *disable_skip = 1;
  2469. // The cost of skip bit needs to be added.
  2470. *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
  2471. *distortion = skip_sse_sb;
  2472. }
  2473. if (!is_comp_pred) single_skippable[this_mode][refs[0]] = *skippable;
  2474. restore_dst_buf(xd, orig_dst, orig_dst_stride);
  2475. return 0; // The rate-distortion cost will be re-calculated by caller.
  2476. }
  2477. void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
  2478. BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
  2479. int64_t best_rd) {
  2480. VP9_COMMON *const cm = &cpi->common;
  2481. MACROBLOCKD *const xd = &x->e_mbd;
  2482. struct macroblockd_plane *const pd = xd->plane;
  2483. int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
  2484. int y_skip = 0, uv_skip = 0;
  2485. int64_t dist_y = 0, dist_uv = 0;
  2486. TX_SIZE max_uv_tx_size;
  2487. x->skip_encode = 0;
  2488. ctx->skip = 0;
  2489. xd->mi[0]->ref_frame[0] = INTRA_FRAME;
  2490. xd->mi[0]->ref_frame[1] = NONE;
  2491. // Initialize interp_filter here so we do not have to check for inter block
  2492. // modes in get_pred_context_switchable_interp()
  2493. xd->mi[0]->interp_filter = SWITCHABLE_FILTERS;
  2494. if (bsize >= BLOCK_8X8) {
  2495. if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
  2496. &y_skip, bsize, best_rd) >= best_rd) {
  2497. rd_cost->rate = INT_MAX;
  2498. return;
  2499. }
  2500. } else {
  2501. y_skip = 0;
  2502. if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
  2503. &dist_y, best_rd) >= best_rd) {
  2504. rd_cost->rate = INT_MAX;
  2505. return;
  2506. }
  2507. }
  2508. max_uv_tx_size = uv_txsize_lookup[bsize][xd->mi[0]->tx_size]
  2509. [pd[1].subsampling_x][pd[1].subsampling_y];
  2510. rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, &dist_uv,
  2511. &uv_skip, VPXMAX(BLOCK_8X8, bsize), max_uv_tx_size);
  2512. if (y_skip && uv_skip) {
  2513. rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
  2514. vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
  2515. rd_cost->dist = dist_y + dist_uv;
  2516. } else {
  2517. rd_cost->rate =
  2518. rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
  2519. rd_cost->dist = dist_y + dist_uv;
  2520. }
  2521. ctx->mic = *xd->mi[0];
  2522. ctx->mbmi_ext = *x->mbmi_ext;
  2523. rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
  2524. }
  2525. // This function is designed to apply a bias or adjustment to an rd value based
  2526. // on the relative variance of the source and reconstruction.
  2527. #define LOW_VAR_THRESH 16
  2528. #define VLOW_ADJ_MAX 25
  2529. #define VHIGH_ADJ_MAX 8
  2530. static void rd_variance_adjustment(VP9_COMP *cpi, MACROBLOCK *x,
  2531. BLOCK_SIZE bsize, int64_t *this_rd,
  2532. MV_REFERENCE_FRAME ref_frame,
  2533. unsigned int source_variance) {
  2534. MACROBLOCKD *const xd = &x->e_mbd;
  2535. unsigned int recon_variance;
  2536. unsigned int absvar_diff = 0;
  2537. int64_t var_error = 0;
  2538. int64_t var_factor = 0;
  2539. if (*this_rd == INT64_MAX) return;
  2540. #if CONFIG_VP9_HIGHBITDEPTH
  2541. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  2542. recon_variance = vp9_high_get_sby_perpixel_variance(cpi, &xd->plane[0].dst,
  2543. bsize, xd->bd);
  2544. } else {
  2545. recon_variance =
  2546. vp9_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
  2547. }
  2548. #else
  2549. recon_variance = vp9_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
  2550. #endif // CONFIG_VP9_HIGHBITDEPTH
  2551. if ((source_variance + recon_variance) > LOW_VAR_THRESH) {
  2552. absvar_diff = (source_variance > recon_variance)
  2553. ? (source_variance - recon_variance)
  2554. : (recon_variance - source_variance);
  2555. var_error = ((int64_t)200 * source_variance * recon_variance) /
  2556. (((int64_t)source_variance * source_variance) +
  2557. ((int64_t)recon_variance * recon_variance));
  2558. var_error = 100 - var_error;
  2559. }
  2560. // Source variance above a threshold and ref frame is intra.
  2561. // This case is targeted mainly at discouraging intra modes that give rise
  2562. // to a predictor with a low spatial complexity compared to the source.
  2563. if ((source_variance > LOW_VAR_THRESH) && (ref_frame == INTRA_FRAME) &&
  2564. (source_variance > recon_variance)) {
  2565. var_factor = VPXMIN(absvar_diff, VPXMIN(VLOW_ADJ_MAX, var_error));
  2566. // A second possible case of interest is where the source variance
  2567. // is very low and we wish to discourage false texture or motion trails.
  2568. } else if ((source_variance < (LOW_VAR_THRESH >> 1)) &&
  2569. (recon_variance > source_variance)) {
  2570. var_factor = VPXMIN(absvar_diff, VPXMIN(VHIGH_ADJ_MAX, var_error));
  2571. }
  2572. *this_rd += (*this_rd * var_factor) / 100;
  2573. }
  2574. // Do we have an internal image edge (e.g. formatting bars).
  2575. int vp9_internal_image_edge(VP9_COMP *cpi) {
  2576. return (cpi->oxcf.pass == 2) &&
  2577. ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
  2578. (cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
  2579. }
  2580. // Checks to see if a super block is on a horizontal image edge.
  2581. // In most cases this is the "real" edge unless there are formatting
  2582. // bars embedded in the stream.
  2583. int vp9_active_h_edge(VP9_COMP *cpi, int mi_row, int mi_step) {
  2584. int top_edge = 0;
  2585. int bottom_edge = cpi->common.mi_rows;
  2586. int is_active_h_edge = 0;
  2587. // For two pass account for any formatting bars detected.
  2588. if (cpi->oxcf.pass == 2) {
  2589. TWO_PASS *twopass = &cpi->twopass;
  2590. // The inactive region is specified in MBs not mi units.
  2591. // The image edge is in the following MB row.
  2592. top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
  2593. bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
  2594. bottom_edge = VPXMAX(top_edge, bottom_edge);
  2595. }
  2596. if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
  2597. ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
  2598. is_active_h_edge = 1;
  2599. }
  2600. return is_active_h_edge;
  2601. }
  2602. // Checks to see if a super block is on a vertical image edge.
  2603. // In most cases this is the "real" edge unless there are formatting
  2604. // bars embedded in the stream.
  2605. int vp9_active_v_edge(VP9_COMP *cpi, int mi_col, int mi_step) {
  2606. int left_edge = 0;
  2607. int right_edge = cpi->common.mi_cols;
  2608. int is_active_v_edge = 0;
  2609. // For two pass account for any formatting bars detected.
  2610. if (cpi->oxcf.pass == 2) {
  2611. TWO_PASS *twopass = &cpi->twopass;
  2612. // The inactive region is specified in MBs not mi units.
  2613. // The image edge is in the following MB row.
  2614. left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
  2615. right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
  2616. right_edge = VPXMAX(left_edge, right_edge);
  2617. }
  2618. if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
  2619. ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
  2620. is_active_v_edge = 1;
  2621. }
  2622. return is_active_v_edge;
  2623. }
  2624. // Checks to see if a super block is at the edge of the active image.
  2625. // In most cases this is the "real" edge unless there are formatting
  2626. // bars embedded in the stream.
  2627. int vp9_active_edge_sb(VP9_COMP *cpi, int mi_row, int mi_col) {
  2628. return vp9_active_h_edge(cpi, mi_row, MI_BLOCK_SIZE) ||
  2629. vp9_active_v_edge(cpi, mi_col, MI_BLOCK_SIZE);
  2630. }
  2631. void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
  2632. MACROBLOCK *x, int mi_row, int mi_col,
  2633. RD_COST *rd_cost, BLOCK_SIZE bsize,
  2634. PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) {
  2635. VP9_COMMON *const cm = &cpi->common;
  2636. TileInfo *const tile_info = &tile_data->tile_info;
  2637. RD_OPT *const rd_opt = &cpi->rd;
  2638. SPEED_FEATURES *const sf = &cpi->sf;
  2639. MACROBLOCKD *const xd = &x->e_mbd;
  2640. MODE_INFO *const mi = xd->mi[0];
  2641. MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
  2642. const struct segmentation *const seg = &cm->seg;
  2643. PREDICTION_MODE this_mode;
  2644. MV_REFERENCE_FRAME ref_frame, second_ref_frame;
  2645. unsigned char segment_id = mi->segment_id;
  2646. int comp_pred, i, k;
  2647. int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
  2648. struct buf_2d yv12_mb[4][MAX_MB_PLANE];
  2649. int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
  2650. INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES];
  2651. int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES];
  2652. static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
  2653. VP9_ALT_FLAG };
  2654. int64_t best_rd = best_rd_so_far;
  2655. int64_t best_pred_diff[REFERENCE_MODES];
  2656. int64_t best_pred_rd[REFERENCE_MODES];
  2657. int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
  2658. int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
  2659. MODE_INFO best_mbmode;
  2660. int best_mode_skippable = 0;
  2661. int midx, best_mode_index = -1;
  2662. unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
  2663. vpx_prob comp_mode_p;
  2664. int64_t best_intra_rd = INT64_MAX;
  2665. unsigned int best_pred_sse = UINT_MAX;
  2666. PREDICTION_MODE best_intra_mode = DC_PRED;
  2667. int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
  2668. int64_t dist_uv[TX_SIZES];
  2669. int skip_uv[TX_SIZES];
  2670. PREDICTION_MODE mode_uv[TX_SIZES];
  2671. const int intra_cost_penalty =
  2672. vp9_get_intra_cost_penalty(cpi, bsize, cm->base_qindex, cm->y_dc_delta_q);
  2673. int best_skip2 = 0;
  2674. uint8_t ref_frame_skip_mask[2] = { 0 };
  2675. uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
  2676. int mode_skip_start = sf->mode_skip_start + 1;
  2677. const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
  2678. const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
  2679. int64_t mode_threshold[MAX_MODES];
  2680. int *tile_mode_map = tile_data->mode_map[bsize];
  2681. int mode_map[MAX_MODES]; // Maintain mode_map information locally to avoid
  2682. // lock mechanism involved with reads from
  2683. // tile_mode_map
  2684. const int mode_search_skip_flags = sf->mode_search_skip_flags;
  2685. int64_t mask_filter = 0;
  2686. int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
  2687. vp9_zero(best_mbmode);
  2688. x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
  2689. for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX;
  2690. estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
  2691. &comp_mode_p);
  2692. for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX;
  2693. for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
  2694. best_filter_rd[i] = INT64_MAX;
  2695. for (i = 0; i < TX_SIZES; i++) rate_uv_intra[i] = INT_MAX;
  2696. for (i = 0; i < MAX_REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
  2697. for (i = 0; i < MB_MODE_COUNT; ++i) {
  2698. for (k = 0; k < MAX_REF_FRAMES; ++k) {
  2699. single_inter_filter[i][k] = SWITCHABLE;
  2700. single_skippable[i][k] = 0;
  2701. }
  2702. }
  2703. rd_cost->rate = INT_MAX;
  2704. for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
  2705. x->pred_mv_sad[ref_frame] = INT_MAX;
  2706. if (cpi->ref_frame_flags & flag_list[ref_frame]) {
  2707. assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
  2708. setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
  2709. frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
  2710. }
  2711. frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
  2712. frame_mv[ZEROMV][ref_frame].as_int = 0;
  2713. }
  2714. for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
  2715. if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
  2716. // Skip checking missing references in both single and compound reference
  2717. // modes. Note that a mode will be skipped if both reference frames
  2718. // are masked out.
  2719. ref_frame_skip_mask[0] |= (1 << ref_frame);
  2720. ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
  2721. } else if (sf->reference_masking) {
  2722. for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
  2723. // Skip fixed mv modes for poor references
  2724. if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
  2725. mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
  2726. break;
  2727. }
  2728. }
  2729. }
  2730. // If the segment reference frame feature is enabled....
  2731. // then do nothing if the current ref frame is not allowed..
  2732. if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
  2733. get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
  2734. ref_frame_skip_mask[0] |= (1 << ref_frame);
  2735. ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
  2736. }
  2737. }
  2738. // Disable this drop out case if the ref frame
  2739. // segment level feature is enabled for this segment. This is to
  2740. // prevent the possibility that we end up unable to pick any mode.
  2741. if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
  2742. // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
  2743. // unless ARNR filtering is enabled in which case we want
  2744. // an unfiltered alternative. We allow near/nearest as well
  2745. // because they may result in zero-zero MVs but be cheaper.
  2746. if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
  2747. ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << GOLDEN_FRAME);
  2748. ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
  2749. mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
  2750. if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
  2751. mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
  2752. if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
  2753. mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
  2754. }
  2755. }
  2756. if (cpi->rc.is_src_frame_alt_ref) {
  2757. if (sf->alt_ref_search_fp) {
  2758. mode_skip_mask[ALTREF_FRAME] = 0;
  2759. ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
  2760. ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
  2761. }
  2762. }
  2763. if (sf->alt_ref_search_fp)
  2764. if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
  2765. if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
  2766. mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
  2767. if (sf->adaptive_mode_search) {
  2768. if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
  2769. cpi->rc.frames_since_golden >= 3)
  2770. if (x->pred_mv_sad[GOLDEN_FRAME] > (x->pred_mv_sad[LAST_FRAME] << 1))
  2771. mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
  2772. }
  2773. if (bsize > sf->max_intra_bsize) {
  2774. ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
  2775. ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
  2776. }
  2777. mode_skip_mask[INTRA_FRAME] |=
  2778. ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
  2779. for (i = 0; i <= LAST_NEW_MV_INDEX; ++i) mode_threshold[i] = 0;
  2780. for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
  2781. mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
  2782. midx = sf->schedule_mode_search ? mode_skip_start : 0;
  2783. while (midx > 4) {
  2784. uint8_t end_pos = 0;
  2785. for (i = 5; i < midx; ++i) {
  2786. if (mode_threshold[tile_mode_map[i - 1]] >
  2787. mode_threshold[tile_mode_map[i]]) {
  2788. uint8_t tmp = tile_mode_map[i];
  2789. tile_mode_map[i] = tile_mode_map[i - 1];
  2790. tile_mode_map[i - 1] = tmp;
  2791. end_pos = i;
  2792. }
  2793. }
  2794. midx = end_pos;
  2795. }
  2796. memcpy(mode_map, tile_mode_map, sizeof(mode_map));
  2797. for (midx = 0; midx < MAX_MODES; ++midx) {
  2798. int mode_index = mode_map[midx];
  2799. int mode_excluded = 0;
  2800. int64_t this_rd = INT64_MAX;
  2801. int disable_skip = 0;
  2802. int compmode_cost = 0;
  2803. int rate2 = 0, rate_y = 0, rate_uv = 0;
  2804. int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
  2805. int skippable = 0;
  2806. int this_skip2 = 0;
  2807. int64_t total_sse = INT64_MAX;
  2808. int early_term = 0;
  2809. this_mode = vp9_mode_order[mode_index].mode;
  2810. ref_frame = vp9_mode_order[mode_index].ref_frame[0];
  2811. second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
  2812. vp9_zero(x->sum_y_eobs);
  2813. // Look at the reference frame of the best mode so far and set the
  2814. // skip mask to look at a subset of the remaining modes.
  2815. if (midx == mode_skip_start && best_mode_index >= 0) {
  2816. switch (best_mbmode.ref_frame[0]) {
  2817. case INTRA_FRAME: break;
  2818. case LAST_FRAME:
  2819. ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK;
  2820. ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
  2821. break;
  2822. case GOLDEN_FRAME:
  2823. ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
  2824. ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
  2825. break;
  2826. case ALTREF_FRAME: ref_frame_skip_mask[0] |= ALT_REF_MODE_MASK; break;
  2827. case NONE:
  2828. case MAX_REF_FRAMES: assert(0 && "Invalid Reference frame"); break;
  2829. }
  2830. }
  2831. if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
  2832. (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
  2833. continue;
  2834. if (mode_skip_mask[ref_frame] & (1 << this_mode)) continue;
  2835. // Test best rd so far against threshold for trying this mode.
  2836. if (best_mode_skippable && sf->schedule_mode_search)
  2837. mode_threshold[mode_index] <<= 1;
  2838. if (best_rd < mode_threshold[mode_index]) continue;
  2839. // This is only used in motion vector unit test.
  2840. if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue;
  2841. if (sf->motion_field_mode_search) {
  2842. const int mi_width = VPXMIN(num_8x8_blocks_wide_lookup[bsize],
  2843. tile_info->mi_col_end - mi_col);
  2844. const int mi_height = VPXMIN(num_8x8_blocks_high_lookup[bsize],
  2845. tile_info->mi_row_end - mi_row);
  2846. const int bsl = mi_width_log2_lookup[bsize];
  2847. int cb_partition_search_ctrl =
  2848. (((mi_row + mi_col) >> bsl) +
  2849. get_chessboard_index(cm->current_video_frame)) &
  2850. 0x1;
  2851. MODE_INFO *ref_mi;
  2852. int const_motion = 1;
  2853. int skip_ref_frame = !cb_partition_search_ctrl;
  2854. MV_REFERENCE_FRAME rf = NONE;
  2855. int_mv ref_mv;
  2856. ref_mv.as_int = INVALID_MV;
  2857. if ((mi_row - 1) >= tile_info->mi_row_start) {
  2858. ref_mv = xd->mi[-xd->mi_stride]->mv[0];
  2859. rf = xd->mi[-xd->mi_stride]->ref_frame[0];
  2860. for (i = 0; i < mi_width; ++i) {
  2861. ref_mi = xd->mi[-xd->mi_stride + i];
  2862. const_motion &= (ref_mv.as_int == ref_mi->mv[0].as_int) &&
  2863. (ref_frame == ref_mi->ref_frame[0]);
  2864. skip_ref_frame &= (rf == ref_mi->ref_frame[0]);
  2865. }
  2866. }
  2867. if ((mi_col - 1) >= tile_info->mi_col_start) {
  2868. if (ref_mv.as_int == INVALID_MV) ref_mv = xd->mi[-1]->mv[0];
  2869. if (rf == NONE) rf = xd->mi[-1]->ref_frame[0];
  2870. for (i = 0; i < mi_height; ++i) {
  2871. ref_mi = xd->mi[i * xd->mi_stride - 1];
  2872. const_motion &= (ref_mv.as_int == ref_mi->mv[0].as_int) &&
  2873. (ref_frame == ref_mi->ref_frame[0]);
  2874. skip_ref_frame &= (rf == ref_mi->ref_frame[0]);
  2875. }
  2876. }
  2877. if (skip_ref_frame && this_mode != NEARESTMV && this_mode != NEWMV)
  2878. if (rf > INTRA_FRAME)
  2879. if (ref_frame != rf) continue;
  2880. if (const_motion)
  2881. if (this_mode == NEARMV || this_mode == ZEROMV) continue;
  2882. }
  2883. comp_pred = second_ref_frame > INTRA_FRAME;
  2884. if (comp_pred) {
  2885. if (!cpi->allow_comp_inter_inter) continue;
  2886. // Skip compound inter modes if ARF is not available.
  2887. if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
  2888. // Do not allow compound prediction if the segment level reference frame
  2889. // feature is in use as in this case there can only be one reference.
  2890. if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue;
  2891. if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
  2892. best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME)
  2893. continue;
  2894. mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
  2895. } else {
  2896. if (ref_frame != INTRA_FRAME)
  2897. mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
  2898. }
  2899. if (ref_frame == INTRA_FRAME) {
  2900. if (sf->adaptive_mode_search)
  2901. if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
  2902. continue;
  2903. if (this_mode != DC_PRED) {
  2904. // Disable intra modes other than DC_PRED for blocks with low variance
  2905. // Threshold for intra skipping based on source variance
  2906. // TODO(debargha): Specialize the threshold for super block sizes
  2907. const unsigned int skip_intra_var_thresh = 64;
  2908. if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
  2909. x->source_variance < skip_intra_var_thresh)
  2910. continue;
  2911. // Only search the oblique modes if the best so far is
  2912. // one of the neighboring directional modes
  2913. if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
  2914. (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
  2915. if (best_mode_index >= 0 && best_mbmode.ref_frame[0] > INTRA_FRAME)
  2916. continue;
  2917. }
  2918. if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
  2919. if (conditional_skipintra(this_mode, best_intra_mode)) continue;
  2920. }
  2921. }
  2922. } else {
  2923. const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, second_ref_frame };
  2924. if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv, this_mode,
  2925. ref_frames))
  2926. continue;
  2927. }
  2928. mi->mode = this_mode;
  2929. mi->uv_mode = DC_PRED;
  2930. mi->ref_frame[0] = ref_frame;
  2931. mi->ref_frame[1] = second_ref_frame;
  2932. // Evaluate all sub-pel filters irrespective of whether we can use
  2933. // them for this frame.
  2934. mi->interp_filter =
  2935. cm->interp_filter == SWITCHABLE ? EIGHTTAP : cm->interp_filter;
  2936. mi->mv[0].as_int = mi->mv[1].as_int = 0;
  2937. x->skip = 0;
  2938. set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
  2939. // Select prediction reference frames.
  2940. for (i = 0; i < MAX_MB_PLANE; i++) {
  2941. xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
  2942. if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
  2943. }
  2944. if (ref_frame == INTRA_FRAME) {
  2945. TX_SIZE uv_tx;
  2946. struct macroblockd_plane *const pd = &xd->plane[1];
  2947. memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
  2948. super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, bsize,
  2949. best_rd);
  2950. if (rate_y == INT_MAX) continue;
  2951. uv_tx = uv_txsize_lookup[bsize][mi->tx_size][pd->subsampling_x]
  2952. [pd->subsampling_y];
  2953. if (rate_uv_intra[uv_tx] == INT_MAX) {
  2954. choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
  2955. &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx],
  2956. &skip_uv[uv_tx], &mode_uv[uv_tx]);
  2957. }
  2958. rate_uv = rate_uv_tokenonly[uv_tx];
  2959. distortion_uv = dist_uv[uv_tx];
  2960. skippable = skippable && skip_uv[uv_tx];
  2961. mi->uv_mode = mode_uv[uv_tx];
  2962. rate2 = rate_y + cpi->mbmode_cost[mi->mode] + rate_uv_intra[uv_tx];
  2963. if (this_mode != DC_PRED && this_mode != TM_PRED)
  2964. rate2 += intra_cost_penalty;
  2965. distortion2 = distortion_y + distortion_uv;
  2966. } else {
  2967. this_rd = handle_inter_mode(
  2968. cpi, x, bsize, &rate2, &distortion2, &skippable, &rate_y, &rate_uv,
  2969. &disable_skip, frame_mv, mi_row, mi_col, single_newmv,
  2970. single_inter_filter, single_skippable, &total_sse, best_rd,
  2971. &mask_filter, filter_cache);
  2972. if (this_rd == INT64_MAX) continue;
  2973. compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
  2974. if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost;
  2975. }
  2976. // Estimate the reference frame signaling cost and add it
  2977. // to the rolling cost variable.
  2978. if (comp_pred) {
  2979. rate2 += ref_costs_comp[ref_frame];
  2980. } else {
  2981. rate2 += ref_costs_single[ref_frame];
  2982. }
  2983. if (!disable_skip) {
  2984. const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
  2985. const int skip_cost0 = vp9_cost_bit(skip_prob, 0);
  2986. const int skip_cost1 = vp9_cost_bit(skip_prob, 1);
  2987. if (skippable) {
  2988. // Back out the coefficient coding costs
  2989. rate2 -= (rate_y + rate_uv);
  2990. // Cost the skip mb case
  2991. rate2 += skip_cost1;
  2992. } else if (ref_frame != INTRA_FRAME && !xd->lossless) {
  2993. if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + skip_cost0,
  2994. distortion2) <
  2995. RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) {
  2996. // Add in the cost of the no skip flag.
  2997. rate2 += skip_cost0;
  2998. } else {
  2999. // FIXME(rbultje) make this work for splitmv also
  3000. assert(total_sse >= 0);
  3001. rate2 += skip_cost1;
  3002. distortion2 = total_sse;
  3003. rate2 -= (rate_y + rate_uv);
  3004. this_skip2 = 1;
  3005. }
  3006. } else {
  3007. // Add in the cost of the no skip flag.
  3008. rate2 += skip_cost0;
  3009. }
  3010. // Calculate the final RD estimate for this mode.
  3011. this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
  3012. }
  3013. // Apply an adjustment to the rd value based on the similarity of the
  3014. // source variance and reconstructed variance.
  3015. rd_variance_adjustment(cpi, x, bsize, &this_rd, ref_frame,
  3016. x->source_variance);
  3017. if (ref_frame == INTRA_FRAME) {
  3018. // Keep record of best intra rd
  3019. if (this_rd < best_intra_rd) {
  3020. best_intra_rd = this_rd;
  3021. best_intra_mode = mi->mode;
  3022. }
  3023. }
  3024. if (!disable_skip && ref_frame == INTRA_FRAME) {
  3025. for (i = 0; i < REFERENCE_MODES; ++i)
  3026. best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
  3027. for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
  3028. best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
  3029. }
  3030. // Did this mode help.. i.e. is it the new best mode
  3031. if (this_rd < best_rd || x->skip) {
  3032. int max_plane = MAX_MB_PLANE;
  3033. if (!mode_excluded) {
  3034. // Note index of best mode so far
  3035. best_mode_index = mode_index;
  3036. if (ref_frame == INTRA_FRAME) {
  3037. /* required for left and above block mv */
  3038. mi->mv[0].as_int = 0;
  3039. max_plane = 1;
  3040. // Initialize interp_filter here so we do not have to check for
  3041. // inter block modes in get_pred_context_switchable_interp()
  3042. mi->interp_filter = SWITCHABLE_FILTERS;
  3043. } else {
  3044. best_pred_sse = x->pred_sse[ref_frame];
  3045. }
  3046. rd_cost->rate = rate2;
  3047. rd_cost->dist = distortion2;
  3048. rd_cost->rdcost = this_rd;
  3049. best_rd = this_rd;
  3050. best_mbmode = *mi;
  3051. best_skip2 = this_skip2;
  3052. best_mode_skippable = skippable;
  3053. if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
  3054. memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mi->tx_size],
  3055. sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
  3056. ctx->sum_y_eobs = x->sum_y_eobs[mi->tx_size];
  3057. // TODO(debargha): enhance this test with a better distortion prediction
  3058. // based on qp, activity mask and history
  3059. if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
  3060. (mode_index > MIN_EARLY_TERM_INDEX)) {
  3061. int qstep = xd->plane[0].dequant[1];
  3062. // TODO(debargha): Enhance this by specializing for each mode_index
  3063. int scale = 4;
  3064. #if CONFIG_VP9_HIGHBITDEPTH
  3065. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  3066. qstep >>= (xd->bd - 8);
  3067. }
  3068. #endif // CONFIG_VP9_HIGHBITDEPTH
  3069. if (x->source_variance < UINT_MAX) {
  3070. const int var_adjust = (x->source_variance < 16);
  3071. scale -= var_adjust;
  3072. }
  3073. if (ref_frame > INTRA_FRAME && distortion2 * scale < qstep * qstep) {
  3074. early_term = 1;
  3075. }
  3076. }
  3077. }
  3078. }
  3079. /* keep record of best compound/single-only prediction */
  3080. if (!disable_skip && ref_frame != INTRA_FRAME) {
  3081. int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
  3082. if (cm->reference_mode == REFERENCE_MODE_SELECT) {
  3083. single_rate = rate2 - compmode_cost;
  3084. hybrid_rate = rate2;
  3085. } else {
  3086. single_rate = rate2;
  3087. hybrid_rate = rate2 + compmode_cost;
  3088. }
  3089. single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
  3090. hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
  3091. if (!comp_pred) {
  3092. if (single_rd < best_pred_rd[SINGLE_REFERENCE])
  3093. best_pred_rd[SINGLE_REFERENCE] = single_rd;
  3094. } else {
  3095. if (single_rd < best_pred_rd[COMPOUND_REFERENCE])
  3096. best_pred_rd[COMPOUND_REFERENCE] = single_rd;
  3097. }
  3098. if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
  3099. best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
  3100. /* keep record of best filter type */
  3101. if (!mode_excluded && cm->interp_filter != BILINEAR) {
  3102. int64_t ref =
  3103. filter_cache[cm->interp_filter == SWITCHABLE ? SWITCHABLE_FILTERS
  3104. : cm->interp_filter];
  3105. for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
  3106. int64_t adj_rd;
  3107. if (ref == INT64_MAX)
  3108. adj_rd = 0;
  3109. else if (filter_cache[i] == INT64_MAX)
  3110. // when early termination is triggered, the encoder does not have
  3111. // access to the rate-distortion cost. it only knows that the cost
  3112. // should be above the maximum valid value. hence it takes the known
  3113. // maximum plus an arbitrary constant as the rate-distortion cost.
  3114. adj_rd = mask_filter - ref + 10;
  3115. else
  3116. adj_rd = filter_cache[i] - ref;
  3117. adj_rd += this_rd;
  3118. best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
  3119. }
  3120. }
  3121. }
  3122. if (early_term) break;
  3123. if (x->skip && !comp_pred) break;
  3124. }
  3125. // The inter modes' rate costs are not calculated precisely in some cases.
  3126. // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
  3127. // ZEROMV. Here, checks are added for those cases, and the mode decisions
  3128. // are corrected.
  3129. if (best_mbmode.mode == NEWMV) {
  3130. const MV_REFERENCE_FRAME refs[2] = { best_mbmode.ref_frame[0],
  3131. best_mbmode.ref_frame[1] };
  3132. int comp_pred_mode = refs[1] > INTRA_FRAME;
  3133. if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
  3134. ((comp_pred_mode &&
  3135. frame_mv[NEARESTMV][refs[1]].as_int == best_mbmode.mv[1].as_int) ||
  3136. !comp_pred_mode))
  3137. best_mbmode.mode = NEARESTMV;
  3138. else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
  3139. ((comp_pred_mode &&
  3140. frame_mv[NEARMV][refs[1]].as_int == best_mbmode.mv[1].as_int) ||
  3141. !comp_pred_mode))
  3142. best_mbmode.mode = NEARMV;
  3143. else if (best_mbmode.mv[0].as_int == 0 &&
  3144. ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) ||
  3145. !comp_pred_mode))
  3146. best_mbmode.mode = ZEROMV;
  3147. }
  3148. if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
  3149. // If adaptive interp filter is enabled, then the current leaf node of 8x8
  3150. // data is needed for sub8x8. Hence preserve the context.
  3151. if (cpi->row_mt && bsize == BLOCK_8X8) ctx->mic = *xd->mi[0];
  3152. rd_cost->rate = INT_MAX;
  3153. rd_cost->rdcost = INT64_MAX;
  3154. return;
  3155. }
  3156. // If we used an estimate for the uv intra rd in the loop above...
  3157. if (sf->use_uv_intra_rd_estimate) {
  3158. // Do Intra UV best rd mode selection if best mode choice above was intra.
  3159. if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
  3160. TX_SIZE uv_tx_size;
  3161. *mi = best_mbmode;
  3162. uv_tx_size = get_uv_tx_size(mi, &xd->plane[1]);
  3163. rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
  3164. &rate_uv_tokenonly[uv_tx_size],
  3165. &dist_uv[uv_tx_size], &skip_uv[uv_tx_size],
  3166. bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
  3167. uv_tx_size);
  3168. }
  3169. }
  3170. assert((cm->interp_filter == SWITCHABLE) ||
  3171. (cm->interp_filter == best_mbmode.interp_filter) ||
  3172. !is_inter_block(&best_mbmode));
  3173. if (!cpi->rc.is_src_frame_alt_ref)
  3174. vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact,
  3175. sf->adaptive_rd_thresh, bsize, best_mode_index);
  3176. // macroblock modes
  3177. *mi = best_mbmode;
  3178. x->skip |= best_skip2;
  3179. for (i = 0; i < REFERENCE_MODES; ++i) {
  3180. if (best_pred_rd[i] == INT64_MAX)
  3181. best_pred_diff[i] = INT_MIN;
  3182. else
  3183. best_pred_diff[i] = best_rd - best_pred_rd[i];
  3184. }
  3185. if (!x->skip) {
  3186. for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
  3187. if (best_filter_rd[i] == INT64_MAX)
  3188. best_filter_diff[i] = 0;
  3189. else
  3190. best_filter_diff[i] = best_rd - best_filter_rd[i];
  3191. }
  3192. if (cm->interp_filter == SWITCHABLE)
  3193. assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
  3194. } else {
  3195. vp9_zero(best_filter_diff);
  3196. }
  3197. // TODO(yunqingwang): Moving this line in front of the above best_filter_diff
  3198. // updating code causes PSNR loss. Need to figure out the confliction.
  3199. x->skip |= best_mode_skippable;
  3200. if (!x->skip && !x->select_tx_size) {
  3201. int has_high_freq_coeff = 0;
  3202. int plane;
  3203. int max_plane = is_inter_block(xd->mi[0]) ? MAX_MB_PLANE : 1;
  3204. for (plane = 0; plane < max_plane; ++plane) {
  3205. x->plane[plane].eobs = ctx->eobs_pbuf[plane][1];
  3206. has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane);
  3207. }
  3208. for (plane = max_plane; plane < MAX_MB_PLANE; ++plane) {
  3209. x->plane[plane].eobs = ctx->eobs_pbuf[plane][2];
  3210. has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane);
  3211. }
  3212. best_mode_skippable |= !has_high_freq_coeff;
  3213. }
  3214. assert(best_mode_index >= 0);
  3215. store_coding_context(x, ctx, best_mode_index, best_pred_diff,
  3216. best_filter_diff, best_mode_skippable);
  3217. }
  3218. void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, TileDataEnc *tile_data,
  3219. MACROBLOCK *x, RD_COST *rd_cost,
  3220. BLOCK_SIZE bsize,
  3221. PICK_MODE_CONTEXT *ctx,
  3222. int64_t best_rd_so_far) {
  3223. VP9_COMMON *const cm = &cpi->common;
  3224. MACROBLOCKD *const xd = &x->e_mbd;
  3225. MODE_INFO *const mi = xd->mi[0];
  3226. unsigned char segment_id = mi->segment_id;
  3227. const int comp_pred = 0;
  3228. int i;
  3229. int64_t best_pred_diff[REFERENCE_MODES];
  3230. int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
  3231. unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
  3232. vpx_prob comp_mode_p;
  3233. INTERP_FILTER best_filter = SWITCHABLE;
  3234. int64_t this_rd = INT64_MAX;
  3235. int rate2 = 0;
  3236. const int64_t distortion2 = 0;
  3237. x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
  3238. estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
  3239. &comp_mode_p);
  3240. for (i = 0; i < MAX_REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
  3241. for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
  3242. rd_cost->rate = INT_MAX;
  3243. assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
  3244. mi->mode = ZEROMV;
  3245. mi->uv_mode = DC_PRED;
  3246. mi->ref_frame[0] = LAST_FRAME;
  3247. mi->ref_frame[1] = NONE;
  3248. mi->mv[0].as_int = 0;
  3249. x->skip = 1;
  3250. ctx->sum_y_eobs = 0;
  3251. if (cm->interp_filter != BILINEAR) {
  3252. best_filter = EIGHTTAP;
  3253. if (cm->interp_filter == SWITCHABLE &&
  3254. x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
  3255. int rs;
  3256. int best_rs = INT_MAX;
  3257. for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
  3258. mi->interp_filter = i;
  3259. rs = vp9_get_switchable_rate(cpi, xd);
  3260. if (rs < best_rs) {
  3261. best_rs = rs;
  3262. best_filter = mi->interp_filter;
  3263. }
  3264. }
  3265. }
  3266. }
  3267. // Set the appropriate filter
  3268. if (cm->interp_filter == SWITCHABLE) {
  3269. mi->interp_filter = best_filter;
  3270. rate2 += vp9_get_switchable_rate(cpi, xd);
  3271. } else {
  3272. mi->interp_filter = cm->interp_filter;
  3273. }
  3274. if (cm->reference_mode == REFERENCE_MODE_SELECT)
  3275. rate2 += vp9_cost_bit(comp_mode_p, comp_pred);
  3276. // Estimate the reference frame signaling cost and add it
  3277. // to the rolling cost variable.
  3278. rate2 += ref_costs_single[LAST_FRAME];
  3279. this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
  3280. rd_cost->rate = rate2;
  3281. rd_cost->dist = distortion2;
  3282. rd_cost->rdcost = this_rd;
  3283. if (this_rd >= best_rd_so_far) {
  3284. rd_cost->rate = INT_MAX;
  3285. rd_cost->rdcost = INT64_MAX;
  3286. return;
  3287. }
  3288. assert((cm->interp_filter == SWITCHABLE) ||
  3289. (cm->interp_filter == mi->interp_filter));
  3290. vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact,
  3291. cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
  3292. vp9_zero(best_pred_diff);
  3293. vp9_zero(best_filter_diff);
  3294. if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);
  3295. store_coding_context(x, ctx, THR_ZEROMV, best_pred_diff, best_filter_diff, 0);
  3296. }
  3297. void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
  3298. MACROBLOCK *x, int mi_row, int mi_col,
  3299. RD_COST *rd_cost, BLOCK_SIZE bsize,
  3300. PICK_MODE_CONTEXT *ctx,
  3301. int64_t best_rd_so_far) {
  3302. VP9_COMMON *const cm = &cpi->common;
  3303. RD_OPT *const rd_opt = &cpi->rd;
  3304. SPEED_FEATURES *const sf = &cpi->sf;
  3305. MACROBLOCKD *const xd = &x->e_mbd;
  3306. MODE_INFO *const mi = xd->mi[0];
  3307. const struct segmentation *const seg = &cm->seg;
  3308. MV_REFERENCE_FRAME ref_frame, second_ref_frame;
  3309. unsigned char segment_id = mi->segment_id;
  3310. int comp_pred, i;
  3311. int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
  3312. struct buf_2d yv12_mb[4][MAX_MB_PLANE];
  3313. static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
  3314. VP9_ALT_FLAG };
  3315. int64_t best_rd = best_rd_so_far;
  3316. int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
  3317. int64_t best_pred_diff[REFERENCE_MODES];
  3318. int64_t best_pred_rd[REFERENCE_MODES];
  3319. int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
  3320. int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
  3321. MODE_INFO best_mbmode;
  3322. int ref_index, best_ref_index = 0;
  3323. unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
  3324. vpx_prob comp_mode_p;
  3325. INTERP_FILTER tmp_best_filter = SWITCHABLE;
  3326. int rate_uv_intra, rate_uv_tokenonly;
  3327. int64_t dist_uv;
  3328. int skip_uv;
  3329. PREDICTION_MODE mode_uv = DC_PRED;
  3330. const int intra_cost_penalty =
  3331. vp9_get_intra_cost_penalty(cpi, bsize, cm->base_qindex, cm->y_dc_delta_q);
  3332. int_mv seg_mvs[4][MAX_REF_FRAMES];
  3333. b_mode_info best_bmodes[4];
  3334. int best_skip2 = 0;
  3335. int ref_frame_skip_mask[2] = { 0 };
  3336. int64_t mask_filter = 0;
  3337. int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
  3338. int internal_active_edge =
  3339. vp9_active_edge_sb(cpi, mi_row, mi_col) && vp9_internal_image_edge(cpi);
  3340. const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
  3341. x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
  3342. memset(x->zcoeff_blk[TX_4X4], 0, 4);
  3343. vp9_zero(best_mbmode);
  3344. for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX;
  3345. for (i = 0; i < 4; i++) {
  3346. int j;
  3347. for (j = 0; j < MAX_REF_FRAMES; j++) seg_mvs[i][j].as_int = INVALID_MV;
  3348. }
  3349. estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
  3350. &comp_mode_p);
  3351. for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX;
  3352. for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
  3353. best_filter_rd[i] = INT64_MAX;
  3354. rate_uv_intra = INT_MAX;
  3355. rd_cost->rate = INT_MAX;
  3356. for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
  3357. if (cpi->ref_frame_flags & flag_list[ref_frame]) {
  3358. setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
  3359. frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
  3360. } else {
  3361. ref_frame_skip_mask[0] |= (1 << ref_frame);
  3362. ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
  3363. }
  3364. frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
  3365. frame_mv[ZEROMV][ref_frame].as_int = 0;
  3366. }
  3367. for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) {
  3368. int mode_excluded = 0;
  3369. int64_t this_rd = INT64_MAX;
  3370. int disable_skip = 0;
  3371. int compmode_cost = 0;
  3372. int rate2 = 0, rate_y = 0, rate_uv = 0;
  3373. int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
  3374. int skippable = 0;
  3375. int i;
  3376. int this_skip2 = 0;
  3377. int64_t total_sse = INT_MAX;
  3378. int early_term = 0;
  3379. struct buf_2d backup_yv12[2][MAX_MB_PLANE];
  3380. ref_frame = vp9_ref_order[ref_index].ref_frame[0];
  3381. second_ref_frame = vp9_ref_order[ref_index].ref_frame[1];
  3382. vp9_zero(x->sum_y_eobs);
  3383. #if CONFIG_BETTER_HW_COMPATIBILITY
  3384. // forbid 8X4 and 4X8 partitions if any reference frame is scaled.
  3385. if (bsize == BLOCK_8X4 || bsize == BLOCK_4X8) {
  3386. int ref_scaled = vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf);
  3387. if (second_ref_frame > INTRA_FRAME)
  3388. ref_scaled += vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf);
  3389. if (ref_scaled) continue;
  3390. }
  3391. #endif
  3392. // Look at the reference frame of the best mode so far and set the
  3393. // skip mask to look at a subset of the remaining modes.
  3394. if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) {
  3395. if (ref_index == 3) {
  3396. switch (best_mbmode.ref_frame[0]) {
  3397. case INTRA_FRAME: break;
  3398. case LAST_FRAME:
  3399. ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME);
  3400. ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
  3401. break;
  3402. case GOLDEN_FRAME:
  3403. ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << ALTREF_FRAME);
  3404. ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
  3405. break;
  3406. case ALTREF_FRAME:
  3407. ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << LAST_FRAME);
  3408. break;
  3409. case NONE:
  3410. case MAX_REF_FRAMES: assert(0 && "Invalid Reference frame"); break;
  3411. }
  3412. }
  3413. }
  3414. if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
  3415. (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
  3416. continue;
  3417. // Test best rd so far against threshold for trying this mode.
  3418. if (!internal_active_edge &&
  3419. rd_less_than_thresh(best_rd,
  3420. rd_opt->threshes[segment_id][bsize][ref_index],
  3421. &rd_thresh_freq_fact[ref_index]))
  3422. continue;
  3423. // This is only used in motion vector unit test.
  3424. if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue;
  3425. comp_pred = second_ref_frame > INTRA_FRAME;
  3426. if (comp_pred) {
  3427. if (!cpi->allow_comp_inter_inter) continue;
  3428. if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
  3429. // Do not allow compound prediction if the segment level reference frame
  3430. // feature is in use as in this case there can only be one reference.
  3431. if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue;
  3432. if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
  3433. best_mbmode.ref_frame[0] == INTRA_FRAME)
  3434. continue;
  3435. }
  3436. if (comp_pred)
  3437. mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
  3438. else if (ref_frame != INTRA_FRAME)
  3439. mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
  3440. // If the segment reference frame feature is enabled....
  3441. // then do nothing if the current ref frame is not allowed..
  3442. if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
  3443. get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
  3444. continue;
  3445. // Disable this drop out case if the ref frame
  3446. // segment level feature is enabled for this segment. This is to
  3447. // prevent the possibility that we end up unable to pick any mode.
  3448. } else if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
  3449. // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
  3450. // unless ARNR filtering is enabled in which case we want
  3451. // an unfiltered alternative. We allow near/nearest as well
  3452. // because they may result in zero-zero MVs but be cheaper.
  3453. if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
  3454. continue;
  3455. }
  3456. mi->tx_size = TX_4X4;
  3457. mi->uv_mode = DC_PRED;
  3458. mi->ref_frame[0] = ref_frame;
  3459. mi->ref_frame[1] = second_ref_frame;
  3460. // Evaluate all sub-pel filters irrespective of whether we can use
  3461. // them for this frame.
  3462. mi->interp_filter =
  3463. cm->interp_filter == SWITCHABLE ? EIGHTTAP : cm->interp_filter;
  3464. x->skip = 0;
  3465. set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
  3466. // Select prediction reference frames.
  3467. for (i = 0; i < MAX_MB_PLANE; i++) {
  3468. xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
  3469. if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
  3470. }
  3471. if (ref_frame == INTRA_FRAME) {
  3472. int rate;
  3473. if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, &distortion_y,
  3474. best_rd) >= best_rd)
  3475. continue;
  3476. rate2 += rate;
  3477. rate2 += intra_cost_penalty;
  3478. distortion2 += distortion_y;
  3479. if (rate_uv_intra == INT_MAX) {
  3480. choose_intra_uv_mode(cpi, x, ctx, bsize, TX_4X4, &rate_uv_intra,
  3481. &rate_uv_tokenonly, &dist_uv, &skip_uv, &mode_uv);
  3482. }
  3483. rate2 += rate_uv_intra;
  3484. rate_uv = rate_uv_tokenonly;
  3485. distortion2 += dist_uv;
  3486. distortion_uv = dist_uv;
  3487. mi->uv_mode = mode_uv;
  3488. } else {
  3489. int rate;
  3490. int64_t distortion;
  3491. int64_t this_rd_thresh;
  3492. int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
  3493. int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
  3494. int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
  3495. int tmp_best_skippable = 0;
  3496. int switchable_filter_index;
  3497. int_mv *second_ref =
  3498. comp_pred ? &x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL;
  3499. b_mode_info tmp_best_bmodes[16];
  3500. MODE_INFO tmp_best_mbmode;
  3501. BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
  3502. int pred_exists = 0;
  3503. int uv_skippable;
  3504. YV12_BUFFER_CONFIG *scaled_ref_frame[2] = { NULL, NULL };
  3505. int ref;
  3506. for (ref = 0; ref < 2; ++ref) {
  3507. scaled_ref_frame[ref] =
  3508. mi->ref_frame[ref] > INTRA_FRAME
  3509. ? vp9_get_scaled_ref_frame(cpi, mi->ref_frame[ref])
  3510. : NULL;
  3511. if (scaled_ref_frame[ref]) {
  3512. int i;
  3513. // Swap out the reference frame for a version that's been scaled to
  3514. // match the resolution of the current frame, allowing the existing
  3515. // motion search code to be used without additional modifications.
  3516. for (i = 0; i < MAX_MB_PLANE; i++)
  3517. backup_yv12[ref][i] = xd->plane[i].pre[ref];
  3518. vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
  3519. NULL);
  3520. }
  3521. }
  3522. this_rd_thresh = (ref_frame == LAST_FRAME)
  3523. ? rd_opt->threshes[segment_id][bsize][THR_LAST]
  3524. : rd_opt->threshes[segment_id][bsize][THR_ALTR];
  3525. this_rd_thresh = (ref_frame == GOLDEN_FRAME)
  3526. ? rd_opt->threshes[segment_id][bsize][THR_GOLD]
  3527. : this_rd_thresh;
  3528. for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
  3529. filter_cache[i] = INT64_MAX;
  3530. if (cm->interp_filter != BILINEAR) {
  3531. tmp_best_filter = EIGHTTAP;
  3532. if (x->source_variance < sf->disable_filter_search_var_thresh) {
  3533. tmp_best_filter = EIGHTTAP;
  3534. } else if (sf->adaptive_pred_interp_filter == 1 &&
  3535. ctx->pred_interp_filter < SWITCHABLE) {
  3536. tmp_best_filter = ctx->pred_interp_filter;
  3537. } else if (sf->adaptive_pred_interp_filter == 2) {
  3538. tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE
  3539. ? ctx->pred_interp_filter
  3540. : 0;
  3541. } else {
  3542. for (switchable_filter_index = 0;
  3543. switchable_filter_index < SWITCHABLE_FILTERS;
  3544. ++switchable_filter_index) {
  3545. int newbest, rs;
  3546. int64_t rs_rd;
  3547. MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
  3548. mi->interp_filter = switchable_filter_index;
  3549. tmp_rd = rd_pick_best_sub8x8_mode(
  3550. cpi, x, &mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd,
  3551. &rate, &rate_y, &distortion, &skippable, &total_sse,
  3552. (int)this_rd_thresh, seg_mvs, bsi, switchable_filter_index,
  3553. mi_row, mi_col);
  3554. if (tmp_rd == INT64_MAX) continue;
  3555. rs = vp9_get_switchable_rate(cpi, xd);
  3556. rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
  3557. filter_cache[switchable_filter_index] = tmp_rd;
  3558. filter_cache[SWITCHABLE_FILTERS] =
  3559. VPXMIN(filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd);
  3560. if (cm->interp_filter == SWITCHABLE) tmp_rd += rs_rd;
  3561. mask_filter = VPXMAX(mask_filter, tmp_rd);
  3562. newbest = (tmp_rd < tmp_best_rd);
  3563. if (newbest) {
  3564. tmp_best_filter = mi->interp_filter;
  3565. tmp_best_rd = tmp_rd;
  3566. }
  3567. if ((newbest && cm->interp_filter == SWITCHABLE) ||
  3568. (mi->interp_filter == cm->interp_filter &&
  3569. cm->interp_filter != SWITCHABLE)) {
  3570. tmp_best_rdu = tmp_rd;
  3571. tmp_best_rate = rate;
  3572. tmp_best_ratey = rate_y;
  3573. tmp_best_distortion = distortion;
  3574. tmp_best_sse = total_sse;
  3575. tmp_best_skippable = skippable;
  3576. tmp_best_mbmode = *mi;
  3577. for (i = 0; i < 4; i++) {
  3578. tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
  3579. x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
  3580. x->sum_y_eobs[TX_4X4] += x->plane[0].eobs[i];
  3581. }
  3582. pred_exists = 1;
  3583. if (switchable_filter_index == 0 && sf->use_rd_breakout &&
  3584. best_rd < INT64_MAX) {
  3585. if (tmp_best_rdu / 2 > best_rd) {
  3586. // skip searching the other filters if the first is
  3587. // already substantially larger than the best so far
  3588. tmp_best_filter = mi->interp_filter;
  3589. tmp_best_rdu = INT64_MAX;
  3590. break;
  3591. }
  3592. }
  3593. }
  3594. } // switchable_filter_index loop
  3595. }
  3596. }
  3597. if (tmp_best_rdu == INT64_MAX && pred_exists) continue;
  3598. mi->interp_filter = (cm->interp_filter == SWITCHABLE ? tmp_best_filter
  3599. : cm->interp_filter);
  3600. if (!pred_exists) {
  3601. // Handles the special case when a filter that is not in the
  3602. // switchable list (bilinear, 6-tap) is indicated at the frame level
  3603. tmp_rd = rd_pick_best_sub8x8_mode(
  3604. cpi, x, &x->mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd,
  3605. &rate, &rate_y, &distortion, &skippable, &total_sse,
  3606. (int)this_rd_thresh, seg_mvs, bsi, 0, mi_row, mi_col);
  3607. if (tmp_rd == INT64_MAX) continue;
  3608. } else {
  3609. total_sse = tmp_best_sse;
  3610. rate = tmp_best_rate;
  3611. rate_y = tmp_best_ratey;
  3612. distortion = tmp_best_distortion;
  3613. skippable = tmp_best_skippable;
  3614. *mi = tmp_best_mbmode;
  3615. for (i = 0; i < 4; i++) xd->mi[0]->bmi[i] = tmp_best_bmodes[i];
  3616. }
  3617. rate2 += rate;
  3618. distortion2 += distortion;
  3619. if (cm->interp_filter == SWITCHABLE)
  3620. rate2 += vp9_get_switchable_rate(cpi, xd);
  3621. if (!mode_excluded)
  3622. mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
  3623. : cm->reference_mode == COMPOUND_REFERENCE;
  3624. compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
  3625. tmp_best_rdu =
  3626. best_rd - VPXMIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
  3627. RDCOST(x->rdmult, x->rddiv, 0, total_sse));
  3628. if (tmp_best_rdu > 0) {
  3629. // If even the 'Y' rd value of split is higher than best so far
  3630. // then dont bother looking at UV
  3631. vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, BLOCK_8X8);
  3632. memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
  3633. if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
  3634. &uv_sse, BLOCK_8X8, tmp_best_rdu)) {
  3635. for (ref = 0; ref < 2; ++ref) {
  3636. if (scaled_ref_frame[ref]) {
  3637. int i;
  3638. for (i = 0; i < MAX_MB_PLANE; ++i)
  3639. xd->plane[i].pre[ref] = backup_yv12[ref][i];
  3640. }
  3641. }
  3642. continue;
  3643. }
  3644. rate2 += rate_uv;
  3645. distortion2 += distortion_uv;
  3646. skippable = skippable && uv_skippable;
  3647. total_sse += uv_sse;
  3648. }
  3649. for (ref = 0; ref < 2; ++ref) {
  3650. if (scaled_ref_frame[ref]) {
  3651. // Restore the prediction frame pointers to their unscaled versions.
  3652. int i;
  3653. for (i = 0; i < MAX_MB_PLANE; ++i)
  3654. xd->plane[i].pre[ref] = backup_yv12[ref][i];
  3655. }
  3656. }
  3657. }
  3658. if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost;
  3659. // Estimate the reference frame signaling cost and add it
  3660. // to the rolling cost variable.
  3661. if (second_ref_frame > INTRA_FRAME) {
  3662. rate2 += ref_costs_comp[ref_frame];
  3663. } else {
  3664. rate2 += ref_costs_single[ref_frame];
  3665. }
  3666. if (!disable_skip) {
  3667. const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
  3668. const int skip_cost0 = vp9_cost_bit(skip_prob, 0);
  3669. const int skip_cost1 = vp9_cost_bit(skip_prob, 1);
  3670. // Skip is never coded at the segment level for sub8x8 blocks and instead
  3671. // always coded in the bitstream at the mode info level.
  3672. if (ref_frame != INTRA_FRAME && !xd->lossless) {
  3673. if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + skip_cost0,
  3674. distortion2) <
  3675. RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) {
  3676. // Add in the cost of the no skip flag.
  3677. rate2 += skip_cost0;
  3678. } else {
  3679. // FIXME(rbultje) make this work for splitmv also
  3680. rate2 += skip_cost1;
  3681. distortion2 = total_sse;
  3682. assert(total_sse >= 0);
  3683. rate2 -= (rate_y + rate_uv);
  3684. rate_y = 0;
  3685. rate_uv = 0;
  3686. this_skip2 = 1;
  3687. }
  3688. } else {
  3689. // Add in the cost of the no skip flag.
  3690. rate2 += skip_cost0;
  3691. }
  3692. // Calculate the final RD estimate for this mode.
  3693. this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
  3694. }
  3695. if (!disable_skip && ref_frame == INTRA_FRAME) {
  3696. for (i = 0; i < REFERENCE_MODES; ++i)
  3697. best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
  3698. for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
  3699. best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
  3700. }
  3701. // Did this mode help.. i.e. is it the new best mode
  3702. if (this_rd < best_rd || x->skip) {
  3703. if (!mode_excluded) {
  3704. int max_plane = MAX_MB_PLANE;
  3705. // Note index of best mode so far
  3706. best_ref_index = ref_index;
  3707. if (ref_frame == INTRA_FRAME) {
  3708. /* required for left and above block mv */
  3709. mi->mv[0].as_int = 0;
  3710. max_plane = 1;
  3711. // Initialize interp_filter here so we do not have to check for
  3712. // inter block modes in get_pred_context_switchable_interp()
  3713. mi->interp_filter = SWITCHABLE_FILTERS;
  3714. }
  3715. rd_cost->rate = rate2;
  3716. rd_cost->dist = distortion2;
  3717. rd_cost->rdcost = this_rd;
  3718. best_rd = this_rd;
  3719. best_yrd =
  3720. best_rd - RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
  3721. best_mbmode = *mi;
  3722. best_skip2 = this_skip2;
  3723. if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
  3724. memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
  3725. sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
  3726. ctx->sum_y_eobs = x->sum_y_eobs[TX_4X4];
  3727. for (i = 0; i < 4; i++) best_bmodes[i] = xd->mi[0]->bmi[i];
  3728. // TODO(debargha): enhance this test with a better distortion prediction
  3729. // based on qp, activity mask and history
  3730. if ((sf->mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
  3731. (ref_index > MIN_EARLY_TERM_INDEX)) {
  3732. int qstep = xd->plane[0].dequant[1];
  3733. // TODO(debargha): Enhance this by specializing for each mode_index
  3734. int scale = 4;
  3735. #if CONFIG_VP9_HIGHBITDEPTH
  3736. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  3737. qstep >>= (xd->bd - 8);
  3738. }
  3739. #endif // CONFIG_VP9_HIGHBITDEPTH
  3740. if (x->source_variance < UINT_MAX) {
  3741. const int var_adjust = (x->source_variance < 16);
  3742. scale -= var_adjust;
  3743. }
  3744. if (ref_frame > INTRA_FRAME && distortion2 * scale < qstep * qstep) {
  3745. early_term = 1;
  3746. }
  3747. }
  3748. }
  3749. }
  3750. /* keep record of best compound/single-only prediction */
  3751. if (!disable_skip && ref_frame != INTRA_FRAME) {
  3752. int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
  3753. if (cm->reference_mode == REFERENCE_MODE_SELECT) {
  3754. single_rate = rate2 - compmode_cost;
  3755. hybrid_rate = rate2;
  3756. } else {
  3757. single_rate = rate2;
  3758. hybrid_rate = rate2 + compmode_cost;
  3759. }
  3760. single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
  3761. hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
  3762. if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE])
  3763. best_pred_rd[SINGLE_REFERENCE] = single_rd;
  3764. else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE])
  3765. best_pred_rd[COMPOUND_REFERENCE] = single_rd;
  3766. if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
  3767. best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
  3768. }
  3769. /* keep record of best filter type */
  3770. if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
  3771. cm->interp_filter != BILINEAR) {
  3772. int64_t ref =
  3773. filter_cache[cm->interp_filter == SWITCHABLE ? SWITCHABLE_FILTERS
  3774. : cm->interp_filter];
  3775. int64_t adj_rd;
  3776. for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
  3777. if (ref == INT64_MAX)
  3778. adj_rd = 0;
  3779. else if (filter_cache[i] == INT64_MAX)
  3780. // when early termination is triggered, the encoder does not have
  3781. // access to the rate-distortion cost. it only knows that the cost
  3782. // should be above the maximum valid value. hence it takes the known
  3783. // maximum plus an arbitrary constant as the rate-distortion cost.
  3784. adj_rd = mask_filter - ref + 10;
  3785. else
  3786. adj_rd = filter_cache[i] - ref;
  3787. adj_rd += this_rd;
  3788. best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
  3789. }
  3790. }
  3791. if (early_term) break;
  3792. if (x->skip && !comp_pred) break;
  3793. }
  3794. if (best_rd >= best_rd_so_far) {
  3795. rd_cost->rate = INT_MAX;
  3796. rd_cost->rdcost = INT64_MAX;
  3797. return;
  3798. }
  3799. // If we used an estimate for the uv intra rd in the loop above...
  3800. if (sf->use_uv_intra_rd_estimate) {
  3801. // Do Intra UV best rd mode selection if best mode choice above was intra.
  3802. if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
  3803. *mi = best_mbmode;
  3804. rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra, &rate_uv_tokenonly,
  3805. &dist_uv, &skip_uv, BLOCK_8X8, TX_4X4);
  3806. }
  3807. }
  3808. if (best_rd == INT64_MAX) {
  3809. rd_cost->rate = INT_MAX;
  3810. rd_cost->dist = INT64_MAX;
  3811. rd_cost->rdcost = INT64_MAX;
  3812. return;
  3813. }
  3814. assert((cm->interp_filter == SWITCHABLE) ||
  3815. (cm->interp_filter == best_mbmode.interp_filter) ||
  3816. !is_inter_block(&best_mbmode));
  3817. vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, sf->adaptive_rd_thresh,
  3818. bsize, best_ref_index);
  3819. // macroblock modes
  3820. *mi = best_mbmode;
  3821. x->skip |= best_skip2;
  3822. if (!is_inter_block(&best_mbmode)) {
  3823. for (i = 0; i < 4; i++) xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
  3824. } else {
  3825. for (i = 0; i < 4; ++i)
  3826. memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
  3827. mi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
  3828. mi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
  3829. }
  3830. for (i = 0; i < REFERENCE_MODES; ++i) {
  3831. if (best_pred_rd[i] == INT64_MAX)
  3832. best_pred_diff[i] = INT_MIN;
  3833. else
  3834. best_pred_diff[i] = best_rd - best_pred_rd[i];
  3835. }
  3836. if (!x->skip) {
  3837. for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
  3838. if (best_filter_rd[i] == INT64_MAX)
  3839. best_filter_diff[i] = 0;
  3840. else
  3841. best_filter_diff[i] = best_rd - best_filter_rd[i];
  3842. }
  3843. if (cm->interp_filter == SWITCHABLE)
  3844. assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
  3845. } else {
  3846. vp9_zero(best_filter_diff);
  3847. }
  3848. store_coding_context(x, ctx, best_ref_index, best_pred_diff, best_filter_diff,
  3849. 0);
  3850. }