vp9_encodeframe.c 186 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <limits.h>
  11. #include <math.h>
  12. #include <stdio.h>
  13. #include "./vp9_rtcd.h"
  14. #include "./vpx_dsp_rtcd.h"
  15. #include "./vpx_config.h"
  16. #include "vpx_dsp/vpx_dsp_common.h"
  17. #include "vpx_ports/mem.h"
  18. #include "vpx_ports/vpx_timer.h"
  19. #include "vpx_ports/system_state.h"
  20. #include "vp9/common/vp9_common.h"
  21. #include "vp9/common/vp9_entropy.h"
  22. #include "vp9/common/vp9_entropymode.h"
  23. #include "vp9/common/vp9_idct.h"
  24. #include "vp9/common/vp9_mvref_common.h"
  25. #include "vp9/common/vp9_pred_common.h"
  26. #include "vp9/common/vp9_quant_common.h"
  27. #include "vp9/common/vp9_reconintra.h"
  28. #include "vp9/common/vp9_reconinter.h"
  29. #include "vp9/common/vp9_seg_common.h"
  30. #include "vp9/common/vp9_tile_common.h"
  31. #include "vp9/encoder/vp9_aq_360.h"
  32. #include "vp9/encoder/vp9_aq_complexity.h"
  33. #include "vp9/encoder/vp9_aq_cyclicrefresh.h"
  34. #include "vp9/encoder/vp9_aq_variance.h"
  35. #include "vp9/encoder/vp9_encodeframe.h"
  36. #include "vp9/encoder/vp9_encodemb.h"
  37. #include "vp9/encoder/vp9_encodemv.h"
  38. #include "vp9/encoder/vp9_ethread.h"
  39. #include "vp9/encoder/vp9_extend.h"
  40. #include "vp9/encoder/vp9_pickmode.h"
  41. #include "vp9/encoder/vp9_rd.h"
  42. #include "vp9/encoder/vp9_rdopt.h"
  43. #include "vp9/encoder/vp9_segmentation.h"
  44. #include "vp9/encoder/vp9_tokenize.h"
  45. static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
  46. int output_enabled, int mi_row, int mi_col,
  47. BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx);
  48. // Machine learning-based early termination parameters.
  49. static const double train_mean[24] = {
  50. 303501.697372, 3042630.372158, 24.694696, 1.392182,
  51. 689.413511, 162.027012, 1.478213, 0.0,
  52. 135382.260230, 912738.513263, 28.845217, 1.515230,
  53. 544.158492, 131.807995, 1.436863, 0.0,
  54. 43682.377587, 208131.711766, 28.084737, 1.356677,
  55. 138.254122, 119.522553, 1.252322, 0.0
  56. };
  57. static const double train_stdm[24] = {
  58. 673689.212982, 5996652.516628, 0.024449, 1.989792,
  59. 985.880847, 0.014638, 2.001898, 0.0,
  60. 208798.775332, 1812548.443284, 0.018693, 1.838009,
  61. 396.986910, 0.015657, 1.332541, 0.0,
  62. 55888.847031, 448587.962714, 0.017900, 1.904776,
  63. 98.652832, 0.016598, 1.320992, 0.0
  64. };
  65. // Error tolerance: 0.01%-0.0.05%-0.1%
  66. static const double classifiers[24] = {
  67. 0.111736, 0.289977, 0.042219, 0.204765, 0.120410, -0.143863,
  68. 0.282376, 0.847811, 0.637161, 0.131570, 0.018636, 0.202134,
  69. 0.112797, 0.028162, 0.182450, 1.124367, 0.386133, 0.083700,
  70. 0.050028, 0.150873, 0.061119, 0.109318, 0.127255, 0.625211
  71. };
  72. // This is used as a reference when computing the source variance for the
  73. // purpose of activity masking.
  74. // Eventually this should be replaced by custom no-reference routines,
  75. // which will be faster.
  76. static const uint8_t VP9_VAR_OFFS[64] = {
  77. 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  78. 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  79. 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  80. 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  81. 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
  82. };
  83. #if CONFIG_VP9_HIGHBITDEPTH
  84. static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = {
  85. 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  86. 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  87. 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  88. 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
  89. 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
  90. };
  91. static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = {
  92. 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  93. 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  94. 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  95. 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  96. 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  97. 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  98. 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
  99. 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
  100. };
  101. static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = {
  102. 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  103. 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  104. 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  105. 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  106. 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  107. 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  108. 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  109. 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  110. 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
  111. 128 * 16
  112. };
  113. #endif // CONFIG_VP9_HIGHBITDEPTH
  114. unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi,
  115. const struct buf_2d *ref,
  116. BLOCK_SIZE bs) {
  117. unsigned int sse;
  118. const unsigned int var =
  119. cpi->fn_ptr[bs].vf(ref->buf, ref->stride, VP9_VAR_OFFS, 0, &sse);
  120. return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
  121. }
  122. #if CONFIG_VP9_HIGHBITDEPTH
  123. unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi,
  124. const struct buf_2d *ref,
  125. BLOCK_SIZE bs, int bd) {
  126. unsigned int var, sse;
  127. switch (bd) {
  128. case 10:
  129. var =
  130. cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
  131. CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10), 0, &sse);
  132. break;
  133. case 12:
  134. var =
  135. cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
  136. CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12), 0, &sse);
  137. break;
  138. case 8:
  139. default:
  140. var =
  141. cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
  142. CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8), 0, &sse);
  143. break;
  144. }
  145. return (unsigned int)ROUND64_POWER_OF_TWO((int64_t)var,
  146. num_pels_log2_lookup[bs]);
  147. }
  148. #endif // CONFIG_VP9_HIGHBITDEPTH
  149. static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi,
  150. const struct buf_2d *ref,
  151. int mi_row, int mi_col,
  152. BLOCK_SIZE bs) {
  153. unsigned int sse, var;
  154. uint8_t *last_y;
  155. const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME);
  156. assert(last != NULL);
  157. last_y =
  158. &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE];
  159. var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse);
  160. return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
  161. }
  162. static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x,
  163. int mi_row, int mi_col) {
  164. unsigned int var = get_sby_perpixel_diff_variance(
  165. cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64);
  166. if (var < 8)
  167. return BLOCK_64X64;
  168. else if (var < 128)
  169. return BLOCK_32X32;
  170. else if (var < 2048)
  171. return BLOCK_16X16;
  172. else
  173. return BLOCK_8X8;
  174. }
  175. // Lighter version of set_offsets that only sets the mode info
  176. // pointers.
  177. static INLINE void set_mode_info_offsets(VP9_COMMON *const cm,
  178. MACROBLOCK *const x,
  179. MACROBLOCKD *const xd, int mi_row,
  180. int mi_col) {
  181. const int idx_str = xd->mi_stride * mi_row + mi_col;
  182. xd->mi = cm->mi_grid_visible + idx_str;
  183. xd->mi[0] = cm->mi + idx_str;
  184. x->mbmi_ext = x->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
  185. }
  186. static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
  187. MACROBLOCK *const x, int mi_row, int mi_col,
  188. BLOCK_SIZE bsize) {
  189. VP9_COMMON *const cm = &cpi->common;
  190. MACROBLOCKD *const xd = &x->e_mbd;
  191. MODE_INFO *mi;
  192. const int mi_width = num_8x8_blocks_wide_lookup[bsize];
  193. const int mi_height = num_8x8_blocks_high_lookup[bsize];
  194. const struct segmentation *const seg = &cm->seg;
  195. MvLimits *const mv_limits = &x->mv_limits;
  196. set_skip_context(xd, mi_row, mi_col);
  197. set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
  198. mi = xd->mi[0];
  199. // Set up destination pointers.
  200. vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
  201. // Set up limit values for MV components.
  202. // Mv beyond the range do not produce new/different prediction block.
  203. mv_limits->row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND);
  204. mv_limits->col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND);
  205. mv_limits->row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND;
  206. mv_limits->col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND;
  207. // Set up distance of MB to edge of frame in 1/8th pel units.
  208. assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
  209. set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows,
  210. cm->mi_cols);
  211. // Set up source buffers.
  212. vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
  213. // R/D setup.
  214. x->rddiv = cpi->rd.RDDIV;
  215. x->rdmult = cpi->rd.RDMULT;
  216. // Setup segment ID.
  217. if (seg->enabled) {
  218. if (cpi->oxcf.aq_mode != VARIANCE_AQ && cpi->oxcf.aq_mode != LOOKAHEAD_AQ &&
  219. cpi->oxcf.aq_mode != EQUATOR360_AQ) {
  220. const uint8_t *const map =
  221. seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
  222. mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
  223. }
  224. vp9_init_plane_quantizers(cpi, x);
  225. x->encode_breakout = cpi->segment_encode_breakout[mi->segment_id];
  226. } else {
  227. mi->segment_id = 0;
  228. x->encode_breakout = cpi->encode_breakout;
  229. }
  230. // required by vp9_append_sub8x8_mvs_for_idx() and vp9_find_best_ref_mvs()
  231. xd->tile = *tile;
  232. }
  233. static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd,
  234. int mi_row, int mi_col,
  235. BLOCK_SIZE bsize) {
  236. const int block_width =
  237. VPXMIN(num_8x8_blocks_wide_lookup[bsize], cm->mi_cols - mi_col);
  238. const int block_height =
  239. VPXMIN(num_8x8_blocks_high_lookup[bsize], cm->mi_rows - mi_row);
  240. const int mi_stride = xd->mi_stride;
  241. MODE_INFO *const src_mi = xd->mi[0];
  242. int i, j;
  243. for (j = 0; j < block_height; ++j)
  244. for (i = 0; i < block_width; ++i) xd->mi[j * mi_stride + i] = src_mi;
  245. }
  246. static void set_block_size(VP9_COMP *const cpi, MACROBLOCK *const x,
  247. MACROBLOCKD *const xd, int mi_row, int mi_col,
  248. BLOCK_SIZE bsize) {
  249. if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
  250. set_mode_info_offsets(&cpi->common, x, xd, mi_row, mi_col);
  251. xd->mi[0]->sb_type = bsize;
  252. }
  253. }
  254. typedef struct {
  255. int64_t sum_square_error;
  256. int64_t sum_error;
  257. int log2_count;
  258. int variance;
  259. } var;
  260. typedef struct {
  261. var none;
  262. var horz[2];
  263. var vert[2];
  264. } partition_variance;
  265. typedef struct {
  266. partition_variance part_variances;
  267. var split[4];
  268. } v4x4;
  269. typedef struct {
  270. partition_variance part_variances;
  271. v4x4 split[4];
  272. } v8x8;
  273. typedef struct {
  274. partition_variance part_variances;
  275. v8x8 split[4];
  276. } v16x16;
  277. typedef struct {
  278. partition_variance part_variances;
  279. v16x16 split[4];
  280. } v32x32;
  281. typedef struct {
  282. partition_variance part_variances;
  283. v32x32 split[4];
  284. } v64x64;
  285. typedef struct {
  286. partition_variance *part_variances;
  287. var *split[4];
  288. } variance_node;
  289. typedef enum {
  290. V16X16,
  291. V32X32,
  292. V64X64,
  293. } TREE_LEVEL;
  294. static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) {
  295. int i;
  296. node->part_variances = NULL;
  297. switch (bsize) {
  298. case BLOCK_64X64: {
  299. v64x64 *vt = (v64x64 *)data;
  300. node->part_variances = &vt->part_variances;
  301. for (i = 0; i < 4; i++)
  302. node->split[i] = &vt->split[i].part_variances.none;
  303. break;
  304. }
  305. case BLOCK_32X32: {
  306. v32x32 *vt = (v32x32 *)data;
  307. node->part_variances = &vt->part_variances;
  308. for (i = 0; i < 4; i++)
  309. node->split[i] = &vt->split[i].part_variances.none;
  310. break;
  311. }
  312. case BLOCK_16X16: {
  313. v16x16 *vt = (v16x16 *)data;
  314. node->part_variances = &vt->part_variances;
  315. for (i = 0; i < 4; i++)
  316. node->split[i] = &vt->split[i].part_variances.none;
  317. break;
  318. }
  319. case BLOCK_8X8: {
  320. v8x8 *vt = (v8x8 *)data;
  321. node->part_variances = &vt->part_variances;
  322. for (i = 0; i < 4; i++)
  323. node->split[i] = &vt->split[i].part_variances.none;
  324. break;
  325. }
  326. case BLOCK_4X4: {
  327. v4x4 *vt = (v4x4 *)data;
  328. node->part_variances = &vt->part_variances;
  329. for (i = 0; i < 4; i++) node->split[i] = &vt->split[i];
  330. break;
  331. }
  332. default: {
  333. assert(0);
  334. break;
  335. }
  336. }
  337. }
  338. // Set variance values given sum square error, sum error, count.
  339. static void fill_variance(int64_t s2, int64_t s, int c, var *v) {
  340. v->sum_square_error = s2;
  341. v->sum_error = s;
  342. v->log2_count = c;
  343. }
  344. static void get_variance(var *v) {
  345. v->variance =
  346. (int)(256 * (v->sum_square_error -
  347. ((v->sum_error * v->sum_error) >> v->log2_count)) >>
  348. v->log2_count);
  349. }
  350. static void sum_2_variances(const var *a, const var *b, var *r) {
  351. assert(a->log2_count == b->log2_count);
  352. fill_variance(a->sum_square_error + b->sum_square_error,
  353. a->sum_error + b->sum_error, a->log2_count + 1, r);
  354. }
  355. static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
  356. variance_node node;
  357. memset(&node, 0, sizeof(node));
  358. tree_to_node(data, bsize, &node);
  359. sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
  360. sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
  361. sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
  362. sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
  363. sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
  364. &node.part_variances->none);
  365. }
  366. static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x,
  367. MACROBLOCKD *const xd, void *data,
  368. BLOCK_SIZE bsize, int mi_row, int mi_col,
  369. int64_t threshold, BLOCK_SIZE bsize_min,
  370. int force_split) {
  371. VP9_COMMON *const cm = &cpi->common;
  372. variance_node vt;
  373. const int block_width = num_8x8_blocks_wide_lookup[bsize];
  374. const int block_height = num_8x8_blocks_high_lookup[bsize];
  375. assert(block_height == block_width);
  376. tree_to_node(data, bsize, &vt);
  377. if (force_split == 1) return 0;
  378. // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
  379. // variance is below threshold, otherwise split will be selected.
  380. // No check for vert/horiz split as too few samples for variance.
  381. if (bsize == bsize_min) {
  382. // Variance already computed to set the force_split.
  383. if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none);
  384. if (mi_col + block_width / 2 < cm->mi_cols &&
  385. mi_row + block_height / 2 < cm->mi_rows &&
  386. vt.part_variances->none.variance < threshold) {
  387. set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
  388. return 1;
  389. }
  390. return 0;
  391. } else if (bsize > bsize_min) {
  392. // Variance already computed to set the force_split.
  393. if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none);
  394. // For key frame: take split for bsize above 32X32 or very high variance.
  395. if (cm->frame_type == KEY_FRAME &&
  396. (bsize > BLOCK_32X32 ||
  397. vt.part_variances->none.variance > (threshold << 4))) {
  398. return 0;
  399. }
  400. // If variance is low, take the bsize (no split).
  401. if (mi_col + block_width / 2 < cm->mi_cols &&
  402. mi_row + block_height / 2 < cm->mi_rows &&
  403. vt.part_variances->none.variance < threshold) {
  404. set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
  405. return 1;
  406. }
  407. // Check vertical split.
  408. if (mi_row + block_height / 2 < cm->mi_rows) {
  409. BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
  410. get_variance(&vt.part_variances->vert[0]);
  411. get_variance(&vt.part_variances->vert[1]);
  412. if (vt.part_variances->vert[0].variance < threshold &&
  413. vt.part_variances->vert[1].variance < threshold &&
  414. get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
  415. set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
  416. set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize);
  417. return 1;
  418. }
  419. }
  420. // Check horizontal split.
  421. if (mi_col + block_width / 2 < cm->mi_cols) {
  422. BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
  423. get_variance(&vt.part_variances->horz[0]);
  424. get_variance(&vt.part_variances->horz[1]);
  425. if (vt.part_variances->horz[0].variance < threshold &&
  426. vt.part_variances->horz[1].variance < threshold &&
  427. get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
  428. set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
  429. set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize);
  430. return 1;
  431. }
  432. }
  433. return 0;
  434. }
  435. return 0;
  436. }
  437. static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed,
  438. int width, int height,
  439. int content_state) {
  440. if (speed >= 8) {
  441. if (width <= 640 && height <= 480)
  442. return (5 * threshold_base) >> 2;
  443. else if ((content_state == kLowSadLowSumdiff) ||
  444. (content_state == kHighSadLowSumdiff) ||
  445. (content_state == kLowVarHighSumdiff))
  446. return (5 * threshold_base) >> 2;
  447. } else if (speed == 7) {
  448. if ((content_state == kLowSadLowSumdiff) ||
  449. (content_state == kHighSadLowSumdiff) ||
  450. (content_state == kLowVarHighSumdiff)) {
  451. return (5 * threshold_base) >> 2;
  452. }
  453. }
  454. return threshold_base;
  455. }
  456. // Set the variance split thresholds for following the block sizes:
  457. // 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16,
  458. // 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is
  459. // currently only used on key frame.
  460. static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q,
  461. int content_state) {
  462. VP9_COMMON *const cm = &cpi->common;
  463. const int is_key_frame = (cm->frame_type == KEY_FRAME);
  464. const int threshold_multiplier = is_key_frame ? 20 : 1;
  465. int64_t threshold_base =
  466. (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]);
  467. if (is_key_frame) {
  468. thresholds[0] = threshold_base;
  469. thresholds[1] = threshold_base >> 2;
  470. thresholds[2] = threshold_base >> 2;
  471. thresholds[3] = threshold_base << 2;
  472. } else {
  473. // Increase base variance threshold based on estimated noise level.
  474. if (cpi->noise_estimate.enabled && cm->width >= 640 && cm->height >= 480) {
  475. NOISE_LEVEL noise_level =
  476. vp9_noise_estimate_extract_level(&cpi->noise_estimate);
  477. if (noise_level == kHigh)
  478. threshold_base = 3 * threshold_base;
  479. else if (noise_level == kMedium)
  480. threshold_base = threshold_base << 1;
  481. else if (noise_level < kLow)
  482. threshold_base = (7 * threshold_base) >> 3;
  483. }
  484. #if CONFIG_VP9_TEMPORAL_DENOISING
  485. if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
  486. cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow)
  487. threshold_base =
  488. vp9_scale_part_thresh(threshold_base, cpi->denoiser.denoising_level,
  489. content_state, cpi->svc.temporal_layer_id);
  490. else
  491. threshold_base =
  492. scale_part_thresh_sumdiff(threshold_base, cpi->oxcf.speed, cm->width,
  493. cm->height, content_state);
  494. #else
  495. // Increase base variance threshold based on content_state/sum_diff level.
  496. threshold_base = scale_part_thresh_sumdiff(
  497. threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state);
  498. #endif
  499. thresholds[0] = threshold_base;
  500. thresholds[2] = threshold_base << cpi->oxcf.speed;
  501. if (cm->width >= 1280 && cm->height >= 720 && cpi->oxcf.speed < 7)
  502. thresholds[2] = thresholds[2] << 1;
  503. if (cm->width <= 352 && cm->height <= 288) {
  504. thresholds[0] = threshold_base >> 3;
  505. thresholds[1] = threshold_base >> 1;
  506. thresholds[2] = threshold_base << 3;
  507. } else if (cm->width < 1280 && cm->height < 720) {
  508. thresholds[1] = (5 * threshold_base) >> 2;
  509. } else if (cm->width < 1920 && cm->height < 1080) {
  510. thresholds[1] = threshold_base << 1;
  511. } else {
  512. thresholds[1] = (5 * threshold_base) >> 1;
  513. }
  514. }
  515. }
  516. void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q,
  517. int content_state) {
  518. VP9_COMMON *const cm = &cpi->common;
  519. SPEED_FEATURES *const sf = &cpi->sf;
  520. const int is_key_frame = (cm->frame_type == KEY_FRAME);
  521. if (sf->partition_search_type != VAR_BASED_PARTITION &&
  522. sf->partition_search_type != REFERENCE_PARTITION) {
  523. return;
  524. } else {
  525. set_vbp_thresholds(cpi, cpi->vbp_thresholds, q, content_state);
  526. // The thresholds below are not changed locally.
  527. if (is_key_frame) {
  528. cpi->vbp_threshold_sad = 0;
  529. cpi->vbp_threshold_copy = 0;
  530. cpi->vbp_bsize_min = BLOCK_8X8;
  531. } else {
  532. if (cm->width <= 352 && cm->height <= 288)
  533. cpi->vbp_threshold_sad = 10;
  534. else
  535. cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000
  536. ? (cpi->y_dequant[q][1] << 1)
  537. : 1000;
  538. cpi->vbp_bsize_min = BLOCK_16X16;
  539. if (cm->width <= 352 && cm->height <= 288)
  540. cpi->vbp_threshold_copy = 4000;
  541. else if (cm->width <= 640 && cm->height <= 360)
  542. cpi->vbp_threshold_copy = 8000;
  543. else
  544. cpi->vbp_threshold_copy = (cpi->y_dequant[q][1] << 3) > 8000
  545. ? (cpi->y_dequant[q][1] << 3)
  546. : 8000;
  547. }
  548. cpi->vbp_threshold_minmax = 15 + (q >> 3);
  549. }
  550. }
  551. // Compute the minmax over the 8x8 subblocks.
  552. static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
  553. int dp, int x16_idx, int y16_idx,
  554. #if CONFIG_VP9_HIGHBITDEPTH
  555. int highbd_flag,
  556. #endif
  557. int pixels_wide, int pixels_high) {
  558. int k;
  559. int minmax_max = 0;
  560. int minmax_min = 255;
  561. // Loop over the 4 8x8 subblocks.
  562. for (k = 0; k < 4; k++) {
  563. int x8_idx = x16_idx + ((k & 1) << 3);
  564. int y8_idx = y16_idx + ((k >> 1) << 3);
  565. int min = 0;
  566. int max = 0;
  567. if (x8_idx < pixels_wide && y8_idx < pixels_high) {
  568. #if CONFIG_VP9_HIGHBITDEPTH
  569. if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
  570. vpx_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
  571. d + y8_idx * dp + x8_idx, dp, &min, &max);
  572. } else {
  573. vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx,
  574. dp, &min, &max);
  575. }
  576. #else
  577. vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp,
  578. &min, &max);
  579. #endif
  580. if ((max - min) > minmax_max) minmax_max = (max - min);
  581. if ((max - min) < minmax_min) minmax_min = (max - min);
  582. }
  583. }
  584. return (minmax_max - minmax_min);
  585. }
  586. static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
  587. int dp, int x8_idx, int y8_idx, v8x8 *vst,
  588. #if CONFIG_VP9_HIGHBITDEPTH
  589. int highbd_flag,
  590. #endif
  591. int pixels_wide, int pixels_high,
  592. int is_key_frame) {
  593. int k;
  594. for (k = 0; k < 4; k++) {
  595. int x4_idx = x8_idx + ((k & 1) << 2);
  596. int y4_idx = y8_idx + ((k >> 1) << 2);
  597. unsigned int sse = 0;
  598. int sum = 0;
  599. if (x4_idx < pixels_wide && y4_idx < pixels_high) {
  600. int s_avg;
  601. int d_avg = 128;
  602. #if CONFIG_VP9_HIGHBITDEPTH
  603. if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
  604. s_avg = vpx_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp);
  605. if (!is_key_frame)
  606. d_avg = vpx_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp);
  607. } else {
  608. s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
  609. if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
  610. }
  611. #else
  612. s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
  613. if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
  614. #endif
  615. sum = s_avg - d_avg;
  616. sse = sum * sum;
  617. }
  618. fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
  619. }
  620. }
  621. static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d,
  622. int dp, int x16_idx, int y16_idx, v16x16 *vst,
  623. #if CONFIG_VP9_HIGHBITDEPTH
  624. int highbd_flag,
  625. #endif
  626. int pixels_wide, int pixels_high,
  627. int is_key_frame) {
  628. int k;
  629. for (k = 0; k < 4; k++) {
  630. int x8_idx = x16_idx + ((k & 1) << 3);
  631. int y8_idx = y16_idx + ((k >> 1) << 3);
  632. unsigned int sse = 0;
  633. int sum = 0;
  634. if (x8_idx < pixels_wide && y8_idx < pixels_high) {
  635. int s_avg;
  636. int d_avg = 128;
  637. #if CONFIG_VP9_HIGHBITDEPTH
  638. if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
  639. s_avg = vpx_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
  640. if (!is_key_frame)
  641. d_avg = vpx_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
  642. } else {
  643. s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
  644. if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
  645. }
  646. #else
  647. s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
  648. if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
  649. #endif
  650. sum = s_avg - d_avg;
  651. sse = sum * sum;
  652. }
  653. fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
  654. }
  655. }
  656. // Check if most of the superblock is skin content, and if so, force split to
  657. // 32x32, and set x->sb_is_skin for use in mode selection.
  658. static int skin_sb_split(VP9_COMP *cpi, MACROBLOCK *x, const int low_res,
  659. int mi_row, int mi_col, int *force_split) {
  660. VP9_COMMON *const cm = &cpi->common;
  661. #if CONFIG_VP9_HIGHBITDEPTH
  662. if (cm->use_highbitdepth) return 0;
  663. #endif
  664. // Avoid checking superblocks on/near boundary and avoid low resolutions.
  665. // Note superblock may still pick 64X64 if y_sad is very small
  666. // (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is.
  667. if (!low_res && (mi_col >= 8 && mi_col + 8 < cm->mi_cols && mi_row >= 8 &&
  668. mi_row + 8 < cm->mi_rows)) {
  669. int num_16x16_skin = 0;
  670. int num_16x16_nonskin = 0;
  671. uint8_t *ysignal = x->plane[0].src.buf;
  672. uint8_t *usignal = x->plane[1].src.buf;
  673. uint8_t *vsignal = x->plane[2].src.buf;
  674. int sp = x->plane[0].src.stride;
  675. int spuv = x->plane[1].src.stride;
  676. const int block_index = mi_row * cm->mi_cols + mi_col;
  677. const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
  678. const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
  679. const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
  680. const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
  681. // Loop through the 16x16 sub-blocks.
  682. int i, j;
  683. for (i = 0; i < ymis; i += 2) {
  684. for (j = 0; j < xmis; j += 2) {
  685. int bl_index = block_index + i * cm->mi_cols + j;
  686. int is_skin = cpi->skin_map[bl_index];
  687. num_16x16_skin += is_skin;
  688. num_16x16_nonskin += (1 - is_skin);
  689. if (num_16x16_nonskin > 3) {
  690. // Exit loop if at least 4 of the 16x16 blocks are not skin.
  691. i = ymis;
  692. break;
  693. }
  694. ysignal += 16;
  695. usignal += 8;
  696. vsignal += 8;
  697. }
  698. ysignal += (sp << 4) - 64;
  699. usignal += (spuv << 3) - 32;
  700. vsignal += (spuv << 3) - 32;
  701. }
  702. if (num_16x16_skin > 12) {
  703. *force_split = 1;
  704. return 1;
  705. }
  706. }
  707. return 0;
  708. }
  709. static void set_low_temp_var_flag(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
  710. v64x64 *vt, int64_t thresholds[],
  711. MV_REFERENCE_FRAME ref_frame_partition,
  712. int mi_col, int mi_row) {
  713. int i, j;
  714. VP9_COMMON *const cm = &cpi->common;
  715. const int mv_thr = cm->width > 640 ? 8 : 4;
  716. // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected and
  717. // int_pro mv is small. If the temporal variance is small set the flag
  718. // variance_low for the block. The variance threshold can be adjusted, the
  719. // higher the more aggressive.
  720. if (ref_frame_partition == LAST_FRAME &&
  721. (cpi->sf.short_circuit_low_temp_var == 1 ||
  722. (xd->mi[0]->mv[0].as_mv.col < mv_thr &&
  723. xd->mi[0]->mv[0].as_mv.col > -mv_thr &&
  724. xd->mi[0]->mv[0].as_mv.row < mv_thr &&
  725. xd->mi[0]->mv[0].as_mv.row > -mv_thr))) {
  726. if (xd->mi[0]->sb_type == BLOCK_64X64) {
  727. if ((vt->part_variances).none.variance < (thresholds[0] >> 1))
  728. x->variance_low[0] = 1;
  729. } else if (xd->mi[0]->sb_type == BLOCK_64X32) {
  730. for (i = 0; i < 2; i++) {
  731. if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2))
  732. x->variance_low[i + 1] = 1;
  733. }
  734. } else if (xd->mi[0]->sb_type == BLOCK_32X64) {
  735. for (i = 0; i < 2; i++) {
  736. if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2))
  737. x->variance_low[i + 3] = 1;
  738. }
  739. } else {
  740. for (i = 0; i < 4; i++) {
  741. const int idx[4][2] = { { 0, 0 }, { 0, 4 }, { 4, 0 }, { 4, 4 } };
  742. const int idx_str =
  743. cm->mi_stride * (mi_row + idx[i][0]) + mi_col + idx[i][1];
  744. MODE_INFO **this_mi = cm->mi_grid_visible + idx_str;
  745. if (cm->mi_cols <= mi_col + idx[i][1] ||
  746. cm->mi_rows <= mi_row + idx[i][0])
  747. continue;
  748. if ((*this_mi)->sb_type == BLOCK_32X32) {
  749. int64_t threshold_32x32 = (cpi->sf.short_circuit_low_temp_var == 1 ||
  750. cpi->sf.short_circuit_low_temp_var == 3)
  751. ? ((5 * thresholds[1]) >> 3)
  752. : (thresholds[1] >> 1);
  753. if (vt->split[i].part_variances.none.variance < threshold_32x32)
  754. x->variance_low[i + 5] = 1;
  755. } else if (cpi->sf.short_circuit_low_temp_var >= 2) {
  756. // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
  757. // inside.
  758. if ((*this_mi)->sb_type == BLOCK_16X16 ||
  759. (*this_mi)->sb_type == BLOCK_32X16 ||
  760. (*this_mi)->sb_type == BLOCK_16X32) {
  761. for (j = 0; j < 4; j++) {
  762. if (vt->split[i].split[j].part_variances.none.variance <
  763. (thresholds[2] >> 8))
  764. x->variance_low[(i << 2) + j + 9] = 1;
  765. }
  766. }
  767. }
  768. }
  769. }
  770. }
  771. }
  772. static void copy_partitioning_helper(VP9_COMP *cpi, MACROBLOCK *x,
  773. MACROBLOCKD *xd, BLOCK_SIZE bsize,
  774. int mi_row, int mi_col) {
  775. VP9_COMMON *const cm = &cpi->common;
  776. BLOCK_SIZE *prev_part = cpi->prev_partition;
  777. int start_pos = mi_row * cm->mi_stride + mi_col;
  778. const int bsl = b_width_log2_lookup[bsize];
  779. const int bs = (1 << bsl) / 4;
  780. BLOCK_SIZE subsize;
  781. PARTITION_TYPE partition;
  782. if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
  783. partition = partition_lookup[bsl][prev_part[start_pos]];
  784. subsize = get_subsize(bsize, partition);
  785. if (subsize < BLOCK_8X8) {
  786. set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
  787. } else {
  788. switch (partition) {
  789. case PARTITION_NONE:
  790. set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
  791. break;
  792. case PARTITION_HORZ:
  793. set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
  794. set_block_size(cpi, x, xd, mi_row + bs, mi_col, subsize);
  795. break;
  796. case PARTITION_VERT:
  797. set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
  798. set_block_size(cpi, x, xd, mi_row, mi_col + bs, subsize);
  799. break;
  800. case PARTITION_SPLIT:
  801. copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col);
  802. copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col);
  803. copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col + bs);
  804. copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col + bs);
  805. break;
  806. default: assert(0);
  807. }
  808. }
  809. }
  810. static int copy_partitioning(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
  811. int mi_row, int mi_col, int segment_id,
  812. int sb_offset) {
  813. int svc_copy_allowed = 1;
  814. int frames_since_key_thresh = 1;
  815. if (cpi->use_svc) {
  816. // For SVC, don't allow copy if base spatial layer is key frame, or if
  817. // frame is not a temporal enhancement layer frame.
  818. int layer = LAYER_IDS_TO_IDX(0, cpi->svc.temporal_layer_id,
  819. cpi->svc.number_temporal_layers);
  820. const LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
  821. if (lc->is_key_frame ||
  822. (cpi->svc.temporal_layer_id != cpi->svc.number_temporal_layers - 1 &&
  823. cpi->svc.number_temporal_layers > 1))
  824. svc_copy_allowed = 0;
  825. frames_since_key_thresh = cpi->svc.number_spatial_layers << 1;
  826. }
  827. if (cpi->rc.frames_since_key > frames_since_key_thresh && svc_copy_allowed &&
  828. !cpi->resize_pending && segment_id == CR_SEGMENT_ID_BASE &&
  829. cpi->prev_segment_id[sb_offset] == CR_SEGMENT_ID_BASE &&
  830. cpi->copied_frame_cnt[sb_offset] < cpi->max_copied_frame) {
  831. if (cpi->prev_partition != NULL) {
  832. copy_partitioning_helper(cpi, x, xd, BLOCK_64X64, mi_row, mi_col);
  833. cpi->copied_frame_cnt[sb_offset] += 1;
  834. memcpy(x->variance_low, &(cpi->prev_variance_low[sb_offset * 25]),
  835. sizeof(x->variance_low));
  836. return 1;
  837. }
  838. }
  839. return 0;
  840. }
  841. static void update_prev_partition(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
  842. int mi_col) {
  843. VP9_COMMON *const cm = &cpi->common;
  844. BLOCK_SIZE *prev_part = cpi->prev_partition;
  845. int start_pos = mi_row * cm->mi_stride + mi_col;
  846. const int bsl = b_width_log2_lookup[bsize];
  847. const int bs = (1 << bsl) / 4;
  848. BLOCK_SIZE subsize;
  849. PARTITION_TYPE partition;
  850. const MODE_INFO *mi = NULL;
  851. if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
  852. mi = cm->mi_grid_visible[start_pos];
  853. partition = partition_lookup[bsl][mi->sb_type];
  854. subsize = get_subsize(bsize, partition);
  855. if (subsize < BLOCK_8X8) {
  856. prev_part[start_pos] = bsize;
  857. } else {
  858. switch (partition) {
  859. case PARTITION_NONE: prev_part[start_pos] = bsize; break;
  860. case PARTITION_HORZ:
  861. prev_part[start_pos] = subsize;
  862. if (mi_row + bs < cm->mi_rows)
  863. prev_part[start_pos + bs * cm->mi_stride] = subsize;
  864. break;
  865. case PARTITION_VERT:
  866. prev_part[start_pos] = subsize;
  867. if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize;
  868. break;
  869. case PARTITION_SPLIT:
  870. update_prev_partition(cpi, subsize, mi_row, mi_col);
  871. update_prev_partition(cpi, subsize, mi_row + bs, mi_col);
  872. update_prev_partition(cpi, subsize, mi_row, mi_col + bs);
  873. update_prev_partition(cpi, subsize, mi_row + bs, mi_col + bs);
  874. break;
  875. default: assert(0);
  876. }
  877. }
  878. }
  879. static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize,
  880. unsigned int y_sad, int is_key_frame) {
  881. int i;
  882. MACROBLOCKD *xd = &x->e_mbd;
  883. if (is_key_frame) return;
  884. // For speed >= 8, avoid the chroma check if y_sad is above threshold.
  885. if (cpi->oxcf.speed >= 8) {
  886. if (y_sad > cpi->vbp_thresholds[1] &&
  887. (!cpi->noise_estimate.enabled ||
  888. vp9_noise_estimate_extract_level(&cpi->noise_estimate) < kMedium))
  889. return;
  890. }
  891. for (i = 1; i <= 2; ++i) {
  892. unsigned int uv_sad = UINT_MAX;
  893. struct macroblock_plane *p = &x->plane[i];
  894. struct macroblockd_plane *pd = &xd->plane[i];
  895. const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
  896. if (bs != BLOCK_INVALID)
  897. uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, pd->dst.buf,
  898. pd->dst.stride);
  899. // TODO(marpan): Investigate if we should lower this threshold if
  900. // superblock is detected as skin.
  901. x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2);
  902. }
  903. }
  904. static uint64_t avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift,
  905. int sb_offset) {
  906. unsigned int tmp_sse;
  907. uint64_t tmp_sad;
  908. unsigned int tmp_variance;
  909. const BLOCK_SIZE bsize = BLOCK_64X64;
  910. uint8_t *src_y = cpi->Source->y_buffer;
  911. int src_ystride = cpi->Source->y_stride;
  912. uint8_t *last_src_y = cpi->Last_Source->y_buffer;
  913. int last_src_ystride = cpi->Last_Source->y_stride;
  914. uint64_t avg_source_sad_threshold = 10000;
  915. uint64_t avg_source_sad_threshold2 = 12000;
  916. #if CONFIG_VP9_HIGHBITDEPTH
  917. if (cpi->common.use_highbitdepth) return 0;
  918. #endif
  919. src_y += shift;
  920. last_src_y += shift;
  921. tmp_sad =
  922. cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y, last_src_ystride);
  923. tmp_variance = vpx_variance64x64(src_y, src_ystride, last_src_y,
  924. last_src_ystride, &tmp_sse);
  925. // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12)
  926. if (tmp_sad < avg_source_sad_threshold)
  927. x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kLowSadLowSumdiff
  928. : kLowSadHighSumdiff;
  929. else
  930. x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kHighSadLowSumdiff
  931. : kHighSadHighSumdiff;
  932. // Detect large lighting change.
  933. if (cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
  934. cpi->oxcf.rc_mode == VPX_CBR && tmp_variance < (tmp_sse >> 3) &&
  935. (tmp_sse - tmp_variance) > 10000)
  936. x->content_state_sb = kLowVarHighSumdiff;
  937. else if (tmp_sad > (avg_source_sad_threshold << 1))
  938. x->content_state_sb = kVeryHighSad;
  939. if (cpi->content_state_sb_fd != NULL) {
  940. if (tmp_sad < avg_source_sad_threshold2) {
  941. // Cap the increment to 255.
  942. if (cpi->content_state_sb_fd[sb_offset] < 255)
  943. cpi->content_state_sb_fd[sb_offset]++;
  944. } else {
  945. cpi->content_state_sb_fd[sb_offset] = 0;
  946. }
  947. }
  948. return tmp_sad;
  949. }
  950. // This function chooses partitioning based on the variance between source and
  951. // reconstructed last, where variance is computed for down-sampled inputs.
  952. static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
  953. MACROBLOCK *x, int mi_row, int mi_col) {
  954. VP9_COMMON *const cm = &cpi->common;
  955. MACROBLOCKD *xd = &x->e_mbd;
  956. int i, j, k, m;
  957. v64x64 vt;
  958. v16x16 vt2[16];
  959. int force_split[21];
  960. int avg_32x32;
  961. int max_var_32x32 = 0;
  962. int min_var_32x32 = INT_MAX;
  963. int var_32x32;
  964. int avg_16x16[4];
  965. int maxvar_16x16[4];
  966. int minvar_16x16[4];
  967. int64_t threshold_4x4avg;
  968. NOISE_LEVEL noise_level = kLow;
  969. int content_state = 0;
  970. uint8_t *s;
  971. const uint8_t *d;
  972. int sp;
  973. int dp;
  974. unsigned int y_sad = UINT_MAX;
  975. BLOCK_SIZE bsize = BLOCK_64X64;
  976. // Ref frame used in partitioning.
  977. MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME;
  978. int pixels_wide = 64, pixels_high = 64;
  979. int64_t thresholds[4] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1],
  980. cpi->vbp_thresholds[2], cpi->vbp_thresholds[3] };
  981. // For the variance computation under SVC mode, we treat the frame as key if
  982. // the reference (base layer frame) is key frame (i.e., is_key_frame == 1).
  983. const int is_key_frame =
  984. (cm->frame_type == KEY_FRAME ||
  985. (is_one_pass_cbr_svc(cpi) &&
  986. cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame));
  987. // Always use 4x4 partition for key frame.
  988. const int use_4x4_partition = cm->frame_type == KEY_FRAME;
  989. const int low_res = (cm->width <= 352 && cm->height <= 288);
  990. int variance4x4downsample[16];
  991. int segment_id;
  992. int sb_offset = (cm->mi_stride >> 3) * (mi_row >> 3) + (mi_col >> 3);
  993. set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
  994. segment_id = xd->mi[0]->segment_id;
  995. if (cpi->sf.use_source_sad && !is_key_frame) {
  996. int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3);
  997. content_state = x->content_state_sb;
  998. x->skip_low_source_sad = (content_state == kLowSadLowSumdiff ||
  999. content_state == kLowSadHighSumdiff)
  1000. ? 1
  1001. : 0;
  1002. x->lowvar_highsumdiff = (content_state == kLowVarHighSumdiff) ? 1 : 0;
  1003. if (cpi->content_state_sb_fd != NULL)
  1004. x->last_sb_high_content = cpi->content_state_sb_fd[sb_offset2];
  1005. // If source_sad is low copy the partition without computing the y_sad.
  1006. if (x->skip_low_source_sad && cpi->sf.copy_partition_flag &&
  1007. copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) {
  1008. x->sb_use_mv_part = 1;
  1009. return 0;
  1010. }
  1011. }
  1012. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
  1013. cyclic_refresh_segment_id_boosted(segment_id)) {
  1014. int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
  1015. set_vbp_thresholds(cpi, thresholds, q, content_state);
  1016. } else {
  1017. set_vbp_thresholds(cpi, thresholds, cm->base_qindex, content_state);
  1018. }
  1019. // For non keyframes, disable 4x4 average for low resolution when speed = 8
  1020. threshold_4x4avg = (cpi->oxcf.speed < 8) ? thresholds[1] << 1 : INT64_MAX;
  1021. memset(x->variance_low, 0, sizeof(x->variance_low));
  1022. if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
  1023. if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
  1024. s = x->plane[0].src.buf;
  1025. sp = x->plane[0].src.stride;
  1026. // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
  1027. // 5-20 for the 16x16 blocks.
  1028. force_split[0] = 0;
  1029. if (!is_key_frame) {
  1030. // In the case of spatial/temporal scalable coding, the assumption here is
  1031. // that the temporal reference frame will always be of type LAST_FRAME.
  1032. // TODO(marpan): If that assumption is broken, we need to revisit this code.
  1033. MODE_INFO *mi = xd->mi[0];
  1034. YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
  1035. const YV12_BUFFER_CONFIG *yv12_g = NULL;
  1036. unsigned int y_sad_g, y_sad_thr, y_sad_last;
  1037. bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 +
  1038. (mi_row + 4 < cm->mi_rows);
  1039. assert(yv12 != NULL);
  1040. if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id)) {
  1041. // For now, GOLDEN will not be used for non-zero spatial layers, since
  1042. // it may not be a temporal reference.
  1043. yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
  1044. }
  1045. // Only compute y_sad_g (sad for golden reference) for speed < 8.
  1046. if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 &&
  1047. (cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
  1048. vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
  1049. &cm->frame_refs[GOLDEN_FRAME - 1].sf);
  1050. y_sad_g = cpi->fn_ptr[bsize].sdf(
  1051. x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
  1052. xd->plane[0].pre[0].stride);
  1053. } else {
  1054. y_sad_g = UINT_MAX;
  1055. }
  1056. if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
  1057. cpi->rc.is_src_frame_alt_ref) {
  1058. yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME);
  1059. vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
  1060. &cm->frame_refs[ALTREF_FRAME - 1].sf);
  1061. mi->ref_frame[0] = ALTREF_FRAME;
  1062. y_sad_g = UINT_MAX;
  1063. } else {
  1064. vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
  1065. &cm->frame_refs[LAST_FRAME - 1].sf);
  1066. mi->ref_frame[0] = LAST_FRAME;
  1067. }
  1068. mi->ref_frame[1] = NONE;
  1069. mi->sb_type = BLOCK_64X64;
  1070. mi->mv[0].as_int = 0;
  1071. mi->interp_filter = BILINEAR;
  1072. if (cpi->oxcf.speed >= 8 && !low_res &&
  1073. x->content_state_sb != kVeryHighSad) {
  1074. y_sad = cpi->fn_ptr[bsize].sdf(
  1075. x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
  1076. xd->plane[0].pre[0].stride);
  1077. } else {
  1078. y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col);
  1079. x->sb_use_mv_part = 1;
  1080. x->sb_mvcol_part = mi->mv[0].as_mv.col;
  1081. x->sb_mvrow_part = mi->mv[0].as_mv.row;
  1082. }
  1083. y_sad_last = y_sad;
  1084. // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad
  1085. // are close if short_circuit_low_temp_var is on.
  1086. y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad;
  1087. if (y_sad_g < y_sad_thr) {
  1088. vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
  1089. &cm->frame_refs[GOLDEN_FRAME - 1].sf);
  1090. mi->ref_frame[0] = GOLDEN_FRAME;
  1091. mi->mv[0].as_int = 0;
  1092. y_sad = y_sad_g;
  1093. ref_frame_partition = GOLDEN_FRAME;
  1094. } else {
  1095. x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;
  1096. ref_frame_partition = LAST_FRAME;
  1097. }
  1098. set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
  1099. vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
  1100. if (cpi->use_skin_detection)
  1101. x->sb_is_skin =
  1102. skin_sb_split(cpi, x, low_res, mi_row, mi_col, force_split);
  1103. d = xd->plane[0].dst.buf;
  1104. dp = xd->plane[0].dst.stride;
  1105. // If the y_sad is very small, take 64x64 as partition and exit.
  1106. // Don't check on boosted segment for now, as 64x64 is suppressed there.
  1107. if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) {
  1108. const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64];
  1109. const int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64];
  1110. if (mi_col + block_width / 2 < cm->mi_cols &&
  1111. mi_row + block_height / 2 < cm->mi_rows) {
  1112. set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_64X64);
  1113. x->variance_low[0] = 1;
  1114. chroma_check(cpi, x, bsize, y_sad, is_key_frame);
  1115. return 0;
  1116. }
  1117. }
  1118. // If the y_sad is small enough, copy the partition of the superblock in the
  1119. // last frame to current frame only if the last frame is not a keyframe.
  1120. // Stop the copy every cpi->max_copied_frame to refresh the partition.
  1121. // TODO(jianj) : tune the threshold.
  1122. if (cpi->sf.copy_partition_flag && y_sad_last < cpi->vbp_threshold_copy &&
  1123. copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) {
  1124. chroma_check(cpi, x, bsize, y_sad, is_key_frame);
  1125. return 0;
  1126. }
  1127. } else {
  1128. d = VP9_VAR_OFFS;
  1129. dp = 0;
  1130. #if CONFIG_VP9_HIGHBITDEPTH
  1131. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  1132. switch (xd->bd) {
  1133. case 10: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10); break;
  1134. case 12: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12); break;
  1135. case 8:
  1136. default: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8); break;
  1137. }
  1138. }
  1139. #endif // CONFIG_VP9_HIGHBITDEPTH
  1140. }
  1141. // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances
  1142. // for splits.
  1143. for (i = 0; i < 4; i++) {
  1144. const int x32_idx = ((i & 1) << 5);
  1145. const int y32_idx = ((i >> 1) << 5);
  1146. const int i2 = i << 2;
  1147. force_split[i + 1] = 0;
  1148. avg_16x16[i] = 0;
  1149. maxvar_16x16[i] = 0;
  1150. minvar_16x16[i] = INT_MAX;
  1151. for (j = 0; j < 4; j++) {
  1152. const int x16_idx = x32_idx + ((j & 1) << 4);
  1153. const int y16_idx = y32_idx + ((j >> 1) << 4);
  1154. const int split_index = 5 + i2 + j;
  1155. v16x16 *vst = &vt.split[i].split[j];
  1156. force_split[split_index] = 0;
  1157. variance4x4downsample[i2 + j] = 0;
  1158. if (!is_key_frame) {
  1159. fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst,
  1160. #if CONFIG_VP9_HIGHBITDEPTH
  1161. xd->cur_buf->flags,
  1162. #endif
  1163. pixels_wide, pixels_high, is_key_frame);
  1164. fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16);
  1165. get_variance(&vt.split[i].split[j].part_variances.none);
  1166. avg_16x16[i] += vt.split[i].split[j].part_variances.none.variance;
  1167. if (vt.split[i].split[j].part_variances.none.variance < minvar_16x16[i])
  1168. minvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance;
  1169. if (vt.split[i].split[j].part_variances.none.variance > maxvar_16x16[i])
  1170. maxvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance;
  1171. if (vt.split[i].split[j].part_variances.none.variance > thresholds[2]) {
  1172. // 16X16 variance is above threshold for split, so force split to 8x8
  1173. // for this 16x16 block (this also forces splits for upper levels).
  1174. force_split[split_index] = 1;
  1175. force_split[i + 1] = 1;
  1176. force_split[0] = 1;
  1177. } else if (cpi->oxcf.speed < 8 &&
  1178. vt.split[i].split[j].part_variances.none.variance >
  1179. thresholds[1] &&
  1180. !cyclic_refresh_segment_id_boosted(segment_id)) {
  1181. // We have some nominal amount of 16x16 variance (based on average),
  1182. // compute the minmax over the 8x8 sub-blocks, and if above threshold,
  1183. // force split to 8x8 block for this 16x16 block.
  1184. int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx,
  1185. #if CONFIG_VP9_HIGHBITDEPTH
  1186. xd->cur_buf->flags,
  1187. #endif
  1188. pixels_wide, pixels_high);
  1189. int thresh_minmax = (int)cpi->vbp_threshold_minmax;
  1190. if (x->content_state_sb == kVeryHighSad)
  1191. thresh_minmax = thresh_minmax << 1;
  1192. if (minmax > thresh_minmax) {
  1193. force_split[split_index] = 1;
  1194. force_split[i + 1] = 1;
  1195. force_split[0] = 1;
  1196. }
  1197. }
  1198. }
  1199. if (is_key_frame || (low_res &&
  1200. vt.split[i].split[j].part_variances.none.variance >
  1201. threshold_4x4avg)) {
  1202. force_split[split_index] = 0;
  1203. // Go down to 4x4 down-sampling for variance.
  1204. variance4x4downsample[i2 + j] = 1;
  1205. for (k = 0; k < 4; k++) {
  1206. int x8_idx = x16_idx + ((k & 1) << 3);
  1207. int y8_idx = y16_idx + ((k >> 1) << 3);
  1208. v8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k];
  1209. fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2,
  1210. #if CONFIG_VP9_HIGHBITDEPTH
  1211. xd->cur_buf->flags,
  1212. #endif
  1213. pixels_wide, pixels_high, is_key_frame);
  1214. }
  1215. }
  1216. }
  1217. }
  1218. if (cpi->noise_estimate.enabled)
  1219. noise_level = vp9_noise_estimate_extract_level(&cpi->noise_estimate);
  1220. // Fill the rest of the variance tree by summing split partition values.
  1221. avg_32x32 = 0;
  1222. for (i = 0; i < 4; i++) {
  1223. const int i2 = i << 2;
  1224. for (j = 0; j < 4; j++) {
  1225. if (variance4x4downsample[i2 + j] == 1) {
  1226. v16x16 *vtemp = (!is_key_frame) ? &vt2[i2 + j] : &vt.split[i].split[j];
  1227. for (m = 0; m < 4; m++) fill_variance_tree(&vtemp->split[m], BLOCK_8X8);
  1228. fill_variance_tree(vtemp, BLOCK_16X16);
  1229. // If variance of this 16x16 block is above the threshold, force block
  1230. // to split. This also forces a split on the upper levels.
  1231. get_variance(&vtemp->part_variances.none);
  1232. if (vtemp->part_variances.none.variance > thresholds[2]) {
  1233. force_split[5 + i2 + j] = 1;
  1234. force_split[i + 1] = 1;
  1235. force_split[0] = 1;
  1236. }
  1237. }
  1238. }
  1239. fill_variance_tree(&vt.split[i], BLOCK_32X32);
  1240. // If variance of this 32x32 block is above the threshold, or if its above
  1241. // (some threshold of) the average variance over the sub-16x16 blocks, then
  1242. // force this block to split. This also forces a split on the upper
  1243. // (64x64) level.
  1244. if (!force_split[i + 1]) {
  1245. get_variance(&vt.split[i].part_variances.none);
  1246. var_32x32 = vt.split[i].part_variances.none.variance;
  1247. max_var_32x32 = VPXMAX(var_32x32, max_var_32x32);
  1248. min_var_32x32 = VPXMIN(var_32x32, min_var_32x32);
  1249. if (vt.split[i].part_variances.none.variance > thresholds[1] ||
  1250. (!is_key_frame &&
  1251. vt.split[i].part_variances.none.variance > (thresholds[1] >> 1) &&
  1252. vt.split[i].part_variances.none.variance > (avg_16x16[i] >> 1))) {
  1253. force_split[i + 1] = 1;
  1254. force_split[0] = 1;
  1255. } else if (!is_key_frame && noise_level < kLow && cm->height <= 360 &&
  1256. (maxvar_16x16[i] - minvar_16x16[i]) > (thresholds[1] >> 1) &&
  1257. maxvar_16x16[i] > thresholds[1]) {
  1258. force_split[i + 1] = 1;
  1259. force_split[0] = 1;
  1260. }
  1261. avg_32x32 += var_32x32;
  1262. }
  1263. }
  1264. if (!force_split[0]) {
  1265. fill_variance_tree(&vt, BLOCK_64X64);
  1266. get_variance(&vt.part_variances.none);
  1267. // If variance of this 64x64 block is above (some threshold of) the average
  1268. // variance over the sub-32x32 blocks, then force this block to split.
  1269. // Only checking this for noise level >= medium for now.
  1270. if (!is_key_frame && noise_level >= kMedium &&
  1271. vt.part_variances.none.variance > (9 * avg_32x32) >> 5)
  1272. force_split[0] = 1;
  1273. // Else if the maximum 32x32 variance minus the miniumum 32x32 variance in
  1274. // a 64x64 block is greater than threshold and the maximum 32x32 variance is
  1275. // above a miniumum threshold, then force the split of a 64x64 block
  1276. // Only check this for low noise.
  1277. else if (!is_key_frame && noise_level < kMedium &&
  1278. (max_var_32x32 - min_var_32x32) > 3 * (thresholds[0] >> 3) &&
  1279. max_var_32x32 > thresholds[0] >> 1)
  1280. force_split[0] = 1;
  1281. }
  1282. // Now go through the entire structure, splitting every block size until
  1283. // we get to one that's got a variance lower than our threshold.
  1284. if (mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows ||
  1285. !set_vt_partitioning(cpi, x, xd, &vt, BLOCK_64X64, mi_row, mi_col,
  1286. thresholds[0], BLOCK_16X16, force_split[0])) {
  1287. for (i = 0; i < 4; ++i) {
  1288. const int x32_idx = ((i & 1) << 2);
  1289. const int y32_idx = ((i >> 1) << 2);
  1290. const int i2 = i << 2;
  1291. if (!set_vt_partitioning(cpi, x, xd, &vt.split[i], BLOCK_32X32,
  1292. (mi_row + y32_idx), (mi_col + x32_idx),
  1293. thresholds[1], BLOCK_16X16,
  1294. force_split[i + 1])) {
  1295. for (j = 0; j < 4; ++j) {
  1296. const int x16_idx = ((j & 1) << 1);
  1297. const int y16_idx = ((j >> 1) << 1);
  1298. // For inter frames: if variance4x4downsample[] == 1 for this 16x16
  1299. // block, then the variance is based on 4x4 down-sampling, so use vt2
  1300. // in set_vt_partioning(), otherwise use vt.
  1301. v16x16 *vtemp = (!is_key_frame && variance4x4downsample[i2 + j] == 1)
  1302. ? &vt2[i2 + j]
  1303. : &vt.split[i].split[j];
  1304. if (!set_vt_partitioning(
  1305. cpi, x, xd, vtemp, BLOCK_16X16, mi_row + y32_idx + y16_idx,
  1306. mi_col + x32_idx + x16_idx, thresholds[2], cpi->vbp_bsize_min,
  1307. force_split[5 + i2 + j])) {
  1308. for (k = 0; k < 4; ++k) {
  1309. const int x8_idx = (k & 1);
  1310. const int y8_idx = (k >> 1);
  1311. if (use_4x4_partition) {
  1312. if (!set_vt_partitioning(cpi, x, xd, &vtemp->split[k],
  1313. BLOCK_8X8,
  1314. mi_row + y32_idx + y16_idx + y8_idx,
  1315. mi_col + x32_idx + x16_idx + x8_idx,
  1316. thresholds[3], BLOCK_8X8, 0)) {
  1317. set_block_size(
  1318. cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx),
  1319. (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_4X4);
  1320. }
  1321. } else {
  1322. set_block_size(
  1323. cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx),
  1324. (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_8X8);
  1325. }
  1326. }
  1327. }
  1328. }
  1329. }
  1330. }
  1331. }
  1332. if (cm->frame_type != KEY_FRAME && cpi->sf.copy_partition_flag) {
  1333. update_prev_partition(cpi, BLOCK_64X64, mi_row, mi_col);
  1334. cpi->prev_segment_id[sb_offset] = segment_id;
  1335. memcpy(&(cpi->prev_variance_low[sb_offset * 25]), x->variance_low,
  1336. sizeof(x->variance_low));
  1337. // Reset the counter for copy partitioning
  1338. if (cpi->copied_frame_cnt[sb_offset] == cpi->max_copied_frame)
  1339. cpi->copied_frame_cnt[sb_offset] = 0;
  1340. }
  1341. if (cpi->sf.short_circuit_low_temp_var) {
  1342. set_low_temp_var_flag(cpi, x, xd, &vt, thresholds, ref_frame_partition,
  1343. mi_col, mi_row);
  1344. }
  1345. chroma_check(cpi, x, bsize, y_sad, is_key_frame);
  1346. return 0;
  1347. }
  1348. static void update_state(VP9_COMP *cpi, ThreadData *td, PICK_MODE_CONTEXT *ctx,
  1349. int mi_row, int mi_col, BLOCK_SIZE bsize,
  1350. int output_enabled) {
  1351. int i, x_idx, y;
  1352. VP9_COMMON *const cm = &cpi->common;
  1353. RD_COUNTS *const rdc = &td->rd_counts;
  1354. MACROBLOCK *const x = &td->mb;
  1355. MACROBLOCKD *const xd = &x->e_mbd;
  1356. struct macroblock_plane *const p = x->plane;
  1357. struct macroblockd_plane *const pd = xd->plane;
  1358. MODE_INFO *mi = &ctx->mic;
  1359. MODE_INFO *const xdmi = xd->mi[0];
  1360. MODE_INFO *mi_addr = xd->mi[0];
  1361. const struct segmentation *const seg = &cm->seg;
  1362. const int bw = num_8x8_blocks_wide_lookup[mi->sb_type];
  1363. const int bh = num_8x8_blocks_high_lookup[mi->sb_type];
  1364. const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
  1365. const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
  1366. MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
  1367. int w, h;
  1368. const int mis = cm->mi_stride;
  1369. const int mi_width = num_8x8_blocks_wide_lookup[bsize];
  1370. const int mi_height = num_8x8_blocks_high_lookup[bsize];
  1371. int max_plane;
  1372. assert(mi->sb_type == bsize);
  1373. *mi_addr = *mi;
  1374. *x->mbmi_ext = ctx->mbmi_ext;
  1375. // If segmentation in use
  1376. if (seg->enabled) {
  1377. // For in frame complexity AQ copy the segment id from the segment map.
  1378. if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
  1379. const uint8_t *const map =
  1380. seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
  1381. mi_addr->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
  1382. }
  1383. // Else for cyclic refresh mode update the segment map, set the segment id
  1384. // and then update the quantizer.
  1385. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
  1386. vp9_cyclic_refresh_update_segment(cpi, xd->mi[0], mi_row, mi_col, bsize,
  1387. ctx->rate, ctx->dist, x->skip, p);
  1388. }
  1389. }
  1390. max_plane = is_inter_block(xdmi) ? MAX_MB_PLANE : 1;
  1391. for (i = 0; i < max_plane; ++i) {
  1392. p[i].coeff = ctx->coeff_pbuf[i][1];
  1393. p[i].qcoeff = ctx->qcoeff_pbuf[i][1];
  1394. pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
  1395. p[i].eobs = ctx->eobs_pbuf[i][1];
  1396. }
  1397. for (i = max_plane; i < MAX_MB_PLANE; ++i) {
  1398. p[i].coeff = ctx->coeff_pbuf[i][2];
  1399. p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
  1400. pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
  1401. p[i].eobs = ctx->eobs_pbuf[i][2];
  1402. }
  1403. // Restore the coding context of the MB to that that was in place
  1404. // when the mode was picked for it
  1405. for (y = 0; y < mi_height; y++)
  1406. for (x_idx = 0; x_idx < mi_width; x_idx++)
  1407. if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx &&
  1408. (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) {
  1409. xd->mi[x_idx + y * mis] = mi_addr;
  1410. }
  1411. if (cpi->oxcf.aq_mode != NO_AQ) vp9_init_plane_quantizers(cpi, x);
  1412. if (is_inter_block(xdmi) && xdmi->sb_type < BLOCK_8X8) {
  1413. xdmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
  1414. xdmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
  1415. }
  1416. x->skip = ctx->skip;
  1417. memcpy(x->zcoeff_blk[xdmi->tx_size], ctx->zcoeff_blk,
  1418. sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
  1419. if (!output_enabled) return;
  1420. #if CONFIG_INTERNAL_STATS
  1421. if (frame_is_intra_only(cm)) {
  1422. static const int kf_mode_index[] = {
  1423. THR_DC /*DC_PRED*/, THR_V_PRED /*V_PRED*/,
  1424. THR_H_PRED /*H_PRED*/, THR_D45_PRED /*D45_PRED*/,
  1425. THR_D135_PRED /*D135_PRED*/, THR_D117_PRED /*D117_PRED*/,
  1426. THR_D153_PRED /*D153_PRED*/, THR_D207_PRED /*D207_PRED*/,
  1427. THR_D63_PRED /*D63_PRED*/, THR_TM /*TM_PRED*/,
  1428. };
  1429. ++cpi->mode_chosen_counts[kf_mode_index[xdmi->mode]];
  1430. } else {
  1431. // Note how often each mode chosen as best
  1432. ++cpi->mode_chosen_counts[ctx->best_mode_index];
  1433. }
  1434. #endif
  1435. if (!frame_is_intra_only(cm)) {
  1436. if (is_inter_block(xdmi)) {
  1437. vp9_update_mv_count(td);
  1438. if (cm->interp_filter == SWITCHABLE) {
  1439. const int ctx = get_pred_context_switchable_interp(xd);
  1440. ++td->counts->switchable_interp[ctx][xdmi->interp_filter];
  1441. }
  1442. }
  1443. rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
  1444. rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
  1445. rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
  1446. for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
  1447. rdc->filter_diff[i] += ctx->best_filter_diff[i];
  1448. }
  1449. for (h = 0; h < y_mis; ++h) {
  1450. MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
  1451. for (w = 0; w < x_mis; ++w) {
  1452. MV_REF *const mv = frame_mv + w;
  1453. mv->ref_frame[0] = mi->ref_frame[0];
  1454. mv->ref_frame[1] = mi->ref_frame[1];
  1455. mv->mv[0].as_int = mi->mv[0].as_int;
  1456. mv->mv[1].as_int = mi->mv[1].as_int;
  1457. }
  1458. }
  1459. }
  1460. void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
  1461. int mi_row, int mi_col) {
  1462. uint8_t *const buffers[3] = { src->y_buffer, src->u_buffer, src->v_buffer };
  1463. const int strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
  1464. int i;
  1465. // Set current frame pointer.
  1466. x->e_mbd.cur_buf = src;
  1467. for (i = 0; i < MAX_MB_PLANE; i++)
  1468. setup_pred_plane(&x->plane[i].src, buffers[i], strides[i], mi_row, mi_col,
  1469. NULL, x->e_mbd.plane[i].subsampling_x,
  1470. x->e_mbd.plane[i].subsampling_y);
  1471. }
  1472. static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode,
  1473. RD_COST *rd_cost, BLOCK_SIZE bsize) {
  1474. MACROBLOCKD *const xd = &x->e_mbd;
  1475. MODE_INFO *const mi = xd->mi[0];
  1476. INTERP_FILTER filter_ref;
  1477. filter_ref = get_pred_context_switchable_interp(xd);
  1478. if (filter_ref == SWITCHABLE_FILTERS) filter_ref = EIGHTTAP;
  1479. mi->sb_type = bsize;
  1480. mi->mode = ZEROMV;
  1481. mi->tx_size =
  1482. VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[tx_mode]);
  1483. mi->skip = 1;
  1484. mi->uv_mode = DC_PRED;
  1485. mi->ref_frame[0] = LAST_FRAME;
  1486. mi->ref_frame[1] = NONE;
  1487. mi->mv[0].as_int = 0;
  1488. mi->interp_filter = filter_ref;
  1489. xd->mi[0]->bmi[0].as_mv[0].as_int = 0;
  1490. x->skip = 1;
  1491. vp9_rd_cost_init(rd_cost);
  1492. }
  1493. static int set_segment_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x,
  1494. int8_t segment_id) {
  1495. int segment_qindex;
  1496. VP9_COMMON *const cm = &cpi->common;
  1497. vp9_init_plane_quantizers(cpi, x);
  1498. vpx_clear_system_state();
  1499. segment_qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
  1500. return vp9_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
  1501. }
  1502. static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
  1503. MACROBLOCK *const x, int mi_row, int mi_col,
  1504. RD_COST *rd_cost, BLOCK_SIZE bsize,
  1505. PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
  1506. VP9_COMMON *const cm = &cpi->common;
  1507. TileInfo *const tile_info = &tile_data->tile_info;
  1508. MACROBLOCKD *const xd = &x->e_mbd;
  1509. MODE_INFO *mi;
  1510. struct macroblock_plane *const p = x->plane;
  1511. struct macroblockd_plane *const pd = xd->plane;
  1512. const AQ_MODE aq_mode = cpi->oxcf.aq_mode;
  1513. int i, orig_rdmult;
  1514. vpx_clear_system_state();
  1515. // Use the lower precision, but faster, 32x32 fdct for mode selection.
  1516. x->use_lp32x32fdct = 1;
  1517. set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
  1518. mi = xd->mi[0];
  1519. mi->sb_type = bsize;
  1520. for (i = 0; i < MAX_MB_PLANE; ++i) {
  1521. p[i].coeff = ctx->coeff_pbuf[i][0];
  1522. p[i].qcoeff = ctx->qcoeff_pbuf[i][0];
  1523. pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];
  1524. p[i].eobs = ctx->eobs_pbuf[i][0];
  1525. }
  1526. ctx->is_coded = 0;
  1527. ctx->skippable = 0;
  1528. ctx->pred_pixel_ready = 0;
  1529. x->skip_recode = 0;
  1530. // Set to zero to make sure we do not use the previous encoded frame stats
  1531. mi->skip = 0;
  1532. #if CONFIG_VP9_HIGHBITDEPTH
  1533. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  1534. x->source_variance = vp9_high_get_sby_perpixel_variance(
  1535. cpi, &x->plane[0].src, bsize, xd->bd);
  1536. } else {
  1537. x->source_variance =
  1538. vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
  1539. }
  1540. #else
  1541. x->source_variance =
  1542. vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
  1543. #endif // CONFIG_VP9_HIGHBITDEPTH
  1544. // Save rdmult before it might be changed, so it can be restored later.
  1545. orig_rdmult = x->rdmult;
  1546. if ((cpi->sf.tx_domain_thresh > 0.0) || (cpi->sf.quant_opt_thresh > 0.0)) {
  1547. double logvar = vp9_log_block_var(cpi, x, bsize);
  1548. // Check block complexity as part of descision on using pixel or transform
  1549. // domain distortion in rd tests.
  1550. x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion &&
  1551. (logvar >= cpi->sf.tx_domain_thresh);
  1552. // Check block complexity as part of descision on using quantized
  1553. // coefficient optimisation inside the rd loop.
  1554. x->block_qcoeff_opt =
  1555. cpi->sf.allow_quant_coeff_opt && (logvar <= cpi->sf.quant_opt_thresh);
  1556. } else {
  1557. x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion;
  1558. x->block_qcoeff_opt = cpi->sf.allow_quant_coeff_opt;
  1559. }
  1560. if (aq_mode == VARIANCE_AQ) {
  1561. const int energy =
  1562. bsize <= BLOCK_16X16 ? x->mb_energy : vp9_block_energy(cpi, x, bsize);
  1563. if (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame ||
  1564. cpi->force_update_segmentation ||
  1565. (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
  1566. mi->segment_id = vp9_vaq_segment_id(energy);
  1567. } else {
  1568. const uint8_t *const map =
  1569. cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
  1570. mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
  1571. }
  1572. x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id);
  1573. } else if (aq_mode == LOOKAHEAD_AQ) {
  1574. const uint8_t *const map = cpi->segmentation_map;
  1575. // I do not change rdmult here consciously.
  1576. mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
  1577. } else if (aq_mode == EQUATOR360_AQ) {
  1578. if (cm->frame_type == KEY_FRAME || cpi->force_update_segmentation) {
  1579. mi->segment_id = vp9_360aq_segment_id(mi_row, cm->mi_rows);
  1580. } else {
  1581. const uint8_t *const map =
  1582. cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
  1583. mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
  1584. }
  1585. x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id);
  1586. } else if (aq_mode == COMPLEXITY_AQ) {
  1587. x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id);
  1588. } else if (aq_mode == CYCLIC_REFRESH_AQ) {
  1589. const uint8_t *const map =
  1590. cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
  1591. // If segment is boosted, use rdmult for that segment.
  1592. if (cyclic_refresh_segment_id_boosted(
  1593. get_segment_id(cm, map, bsize, mi_row, mi_col)))
  1594. x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
  1595. }
  1596. // Find best coding mode & reconstruct the MB so it is available
  1597. // as a predictor for MBs that follow in the SB
  1598. if (frame_is_intra_only(cm)) {
  1599. vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd);
  1600. } else {
  1601. if (bsize >= BLOCK_8X8) {
  1602. if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP))
  1603. vp9_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize,
  1604. ctx, best_rd);
  1605. else
  1606. vp9_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost,
  1607. bsize, ctx, best_rd);
  1608. } else {
  1609. vp9_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col, rd_cost,
  1610. bsize, ctx, best_rd);
  1611. }
  1612. }
  1613. // Examine the resulting rate and for AQ mode 2 make a segment choice.
  1614. if ((rd_cost->rate != INT_MAX) && (aq_mode == COMPLEXITY_AQ) &&
  1615. (bsize >= BLOCK_16X16) &&
  1616. (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame ||
  1617. (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref))) {
  1618. vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
  1619. }
  1620. x->rdmult = orig_rdmult;
  1621. // TODO(jingning) The rate-distortion optimization flow needs to be
  1622. // refactored to provide proper exit/return handle.
  1623. if (rd_cost->rate == INT_MAX) rd_cost->rdcost = INT64_MAX;
  1624. ctx->rate = rd_cost->rate;
  1625. ctx->dist = rd_cost->dist;
  1626. }
  1627. static void update_stats(VP9_COMMON *cm, ThreadData *td) {
  1628. const MACROBLOCK *x = &td->mb;
  1629. const MACROBLOCKD *const xd = &x->e_mbd;
  1630. const MODE_INFO *const mi = xd->mi[0];
  1631. const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
  1632. const BLOCK_SIZE bsize = mi->sb_type;
  1633. if (!frame_is_intra_only(cm)) {
  1634. FRAME_COUNTS *const counts = td->counts;
  1635. const int inter_block = is_inter_block(mi);
  1636. const int seg_ref_active =
  1637. segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_REF_FRAME);
  1638. if (!seg_ref_active) {
  1639. counts->intra_inter[get_intra_inter_context(xd)][inter_block]++;
  1640. // If the segment reference feature is enabled we have only a single
  1641. // reference frame allowed for the segment so exclude it from
  1642. // the reference frame counts used to work out probabilities.
  1643. if (inter_block) {
  1644. const MV_REFERENCE_FRAME ref0 = mi->ref_frame[0];
  1645. if (cm->reference_mode == REFERENCE_MODE_SELECT)
  1646. counts->comp_inter[vp9_get_reference_mode_context(cm, xd)]
  1647. [has_second_ref(mi)]++;
  1648. if (has_second_ref(mi)) {
  1649. counts->comp_ref[vp9_get_pred_context_comp_ref_p(cm, xd)]
  1650. [ref0 == GOLDEN_FRAME]++;
  1651. } else {
  1652. counts->single_ref[vp9_get_pred_context_single_ref_p1(xd)][0]
  1653. [ref0 != LAST_FRAME]++;
  1654. if (ref0 != LAST_FRAME)
  1655. counts->single_ref[vp9_get_pred_context_single_ref_p2(xd)][1]
  1656. [ref0 != GOLDEN_FRAME]++;
  1657. }
  1658. }
  1659. }
  1660. if (inter_block &&
  1661. !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) {
  1662. const int mode_ctx = mbmi_ext->mode_context[mi->ref_frame[0]];
  1663. if (bsize >= BLOCK_8X8) {
  1664. const PREDICTION_MODE mode = mi->mode;
  1665. ++counts->inter_mode[mode_ctx][INTER_OFFSET(mode)];
  1666. } else {
  1667. const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
  1668. const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
  1669. int idx, idy;
  1670. for (idy = 0; idy < 2; idy += num_4x4_h) {
  1671. for (idx = 0; idx < 2; idx += num_4x4_w) {
  1672. const int j = idy * 2 + idx;
  1673. const PREDICTION_MODE b_mode = mi->bmi[j].as_mode;
  1674. ++counts->inter_mode[mode_ctx][INTER_OFFSET(b_mode)];
  1675. }
  1676. }
  1677. }
  1678. }
  1679. }
  1680. }
  1681. static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col,
  1682. ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
  1683. ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
  1684. PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
  1685. BLOCK_SIZE bsize) {
  1686. MACROBLOCKD *const xd = &x->e_mbd;
  1687. int p;
  1688. const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  1689. const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
  1690. int mi_width = num_8x8_blocks_wide_lookup[bsize];
  1691. int mi_height = num_8x8_blocks_high_lookup[bsize];
  1692. for (p = 0; p < MAX_MB_PLANE; p++) {
  1693. memcpy(xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
  1694. a + num_4x4_blocks_wide * p,
  1695. (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
  1696. xd->plane[p].subsampling_x);
  1697. memcpy(xd->left_context[p] +
  1698. ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
  1699. l + num_4x4_blocks_high * p,
  1700. (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
  1701. xd->plane[p].subsampling_y);
  1702. }
  1703. memcpy(xd->above_seg_context + mi_col, sa,
  1704. sizeof(*xd->above_seg_context) * mi_width);
  1705. memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl,
  1706. sizeof(xd->left_seg_context[0]) * mi_height);
  1707. }
  1708. static void save_context(MACROBLOCK *const x, int mi_row, int mi_col,
  1709. ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
  1710. ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
  1711. PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
  1712. BLOCK_SIZE bsize) {
  1713. const MACROBLOCKD *const xd = &x->e_mbd;
  1714. int p;
  1715. const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  1716. const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
  1717. int mi_width = num_8x8_blocks_wide_lookup[bsize];
  1718. int mi_height = num_8x8_blocks_high_lookup[bsize];
  1719. // buffer the above/left context information of the block in search.
  1720. for (p = 0; p < MAX_MB_PLANE; ++p) {
  1721. memcpy(a + num_4x4_blocks_wide * p,
  1722. xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
  1723. (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
  1724. xd->plane[p].subsampling_x);
  1725. memcpy(l + num_4x4_blocks_high * p,
  1726. xd->left_context[p] +
  1727. ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
  1728. (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
  1729. xd->plane[p].subsampling_y);
  1730. }
  1731. memcpy(sa, xd->above_seg_context + mi_col,
  1732. sizeof(*xd->above_seg_context) * mi_width);
  1733. memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK),
  1734. sizeof(xd->left_seg_context[0]) * mi_height);
  1735. }
  1736. static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, ThreadData *td,
  1737. TOKENEXTRA **tp, int mi_row, int mi_col,
  1738. int output_enabled, BLOCK_SIZE bsize,
  1739. PICK_MODE_CONTEXT *ctx) {
  1740. MACROBLOCK *const x = &td->mb;
  1741. set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
  1742. update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled);
  1743. encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
  1744. if (output_enabled) {
  1745. update_stats(&cpi->common, td);
  1746. (*tp)->token = EOSB_TOKEN;
  1747. (*tp)++;
  1748. }
  1749. }
  1750. static void encode_sb(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile,
  1751. TOKENEXTRA **tp, int mi_row, int mi_col,
  1752. int output_enabled, BLOCK_SIZE bsize, PC_TREE *pc_tree) {
  1753. VP9_COMMON *const cm = &cpi->common;
  1754. MACROBLOCK *const x = &td->mb;
  1755. MACROBLOCKD *const xd = &x->e_mbd;
  1756. const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
  1757. int ctx;
  1758. PARTITION_TYPE partition;
  1759. BLOCK_SIZE subsize = bsize;
  1760. if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
  1761. if (bsize >= BLOCK_8X8) {
  1762. ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
  1763. subsize = get_subsize(bsize, pc_tree->partitioning);
  1764. } else {
  1765. ctx = 0;
  1766. subsize = BLOCK_4X4;
  1767. }
  1768. partition = partition_lookup[bsl][subsize];
  1769. if (output_enabled && bsize != BLOCK_4X4)
  1770. td->counts->partition[ctx][partition]++;
  1771. switch (partition) {
  1772. case PARTITION_NONE:
  1773. encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
  1774. &pc_tree->none);
  1775. break;
  1776. case PARTITION_VERT:
  1777. encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
  1778. &pc_tree->vertical[0]);
  1779. if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
  1780. encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled,
  1781. subsize, &pc_tree->vertical[1]);
  1782. }
  1783. break;
  1784. case PARTITION_HORZ:
  1785. encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
  1786. &pc_tree->horizontal[0]);
  1787. if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
  1788. encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled,
  1789. subsize, &pc_tree->horizontal[1]);
  1790. }
  1791. break;
  1792. case PARTITION_SPLIT:
  1793. if (bsize == BLOCK_8X8) {
  1794. encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
  1795. pc_tree->leaf_split[0]);
  1796. } else {
  1797. encode_sb(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
  1798. pc_tree->split[0]);
  1799. encode_sb(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
  1800. subsize, pc_tree->split[1]);
  1801. encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
  1802. subsize, pc_tree->split[2]);
  1803. encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled,
  1804. subsize, pc_tree->split[3]);
  1805. }
  1806. break;
  1807. default: assert(0 && "Invalid partition type."); break;
  1808. }
  1809. if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
  1810. update_partition_context(xd, mi_row, mi_col, subsize, bsize);
  1811. }
  1812. // Check to see if the given partition size is allowed for a specified number
  1813. // of 8x8 block rows and columns remaining in the image.
  1814. // If not then return the largest allowed partition size
  1815. static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left,
  1816. int cols_left, int *bh, int *bw) {
  1817. if (rows_left <= 0 || cols_left <= 0) {
  1818. return VPXMIN(bsize, BLOCK_8X8);
  1819. } else {
  1820. for (; bsize > 0; bsize -= 3) {
  1821. *bh = num_8x8_blocks_high_lookup[bsize];
  1822. *bw = num_8x8_blocks_wide_lookup[bsize];
  1823. if ((*bh <= rows_left) && (*bw <= cols_left)) {
  1824. break;
  1825. }
  1826. }
  1827. }
  1828. return bsize;
  1829. }
  1830. static void set_partial_b64x64_partition(MODE_INFO *mi, int mis, int bh_in,
  1831. int bw_in, int row8x8_remaining,
  1832. int col8x8_remaining, BLOCK_SIZE bsize,
  1833. MODE_INFO **mi_8x8) {
  1834. int bh = bh_in;
  1835. int r, c;
  1836. for (r = 0; r < MI_BLOCK_SIZE; r += bh) {
  1837. int bw = bw_in;
  1838. for (c = 0; c < MI_BLOCK_SIZE; c += bw) {
  1839. const int index = r * mis + c;
  1840. mi_8x8[index] = mi + index;
  1841. mi_8x8[index]->sb_type = find_partition_size(
  1842. bsize, row8x8_remaining - r, col8x8_remaining - c, &bh, &bw);
  1843. }
  1844. }
  1845. }
  1846. // This function attempts to set all mode info entries in a given SB64
  1847. // to the same block partition size.
  1848. // However, at the bottom and right borders of the image the requested size
  1849. // may not be allowed in which case this code attempts to choose the largest
  1850. // allowable partition.
  1851. static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
  1852. MODE_INFO **mi_8x8, int mi_row, int mi_col,
  1853. BLOCK_SIZE bsize) {
  1854. VP9_COMMON *const cm = &cpi->common;
  1855. const int mis = cm->mi_stride;
  1856. const int row8x8_remaining = tile->mi_row_end - mi_row;
  1857. const int col8x8_remaining = tile->mi_col_end - mi_col;
  1858. int block_row, block_col;
  1859. MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
  1860. int bh = num_8x8_blocks_high_lookup[bsize];
  1861. int bw = num_8x8_blocks_wide_lookup[bsize];
  1862. assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
  1863. // Apply the requested partition size to the SB64 if it is all "in image"
  1864. if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
  1865. (row8x8_remaining >= MI_BLOCK_SIZE)) {
  1866. for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) {
  1867. for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) {
  1868. int index = block_row * mis + block_col;
  1869. mi_8x8[index] = mi_upper_left + index;
  1870. mi_8x8[index]->sb_type = bsize;
  1871. }
  1872. }
  1873. } else {
  1874. // Else this is a partial SB64.
  1875. set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining,
  1876. col8x8_remaining, bsize, mi_8x8);
  1877. }
  1878. }
  1879. static const struct {
  1880. int row;
  1881. int col;
  1882. } coord_lookup[16] = {
  1883. // 32x32 index = 0
  1884. { 0, 0 },
  1885. { 0, 2 },
  1886. { 2, 0 },
  1887. { 2, 2 },
  1888. // 32x32 index = 1
  1889. { 0, 4 },
  1890. { 0, 6 },
  1891. { 2, 4 },
  1892. { 2, 6 },
  1893. // 32x32 index = 2
  1894. { 4, 0 },
  1895. { 4, 2 },
  1896. { 6, 0 },
  1897. { 6, 2 },
  1898. // 32x32 index = 3
  1899. { 4, 4 },
  1900. { 4, 6 },
  1901. { 6, 4 },
  1902. { 6, 6 },
  1903. };
  1904. static void set_source_var_based_partition(VP9_COMP *cpi,
  1905. const TileInfo *const tile,
  1906. MACROBLOCK *const x,
  1907. MODE_INFO **mi_8x8, int mi_row,
  1908. int mi_col) {
  1909. VP9_COMMON *const cm = &cpi->common;
  1910. const int mis = cm->mi_stride;
  1911. const int row8x8_remaining = tile->mi_row_end - mi_row;
  1912. const int col8x8_remaining = tile->mi_col_end - mi_col;
  1913. MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
  1914. vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
  1915. assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
  1916. // In-image SB64
  1917. if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
  1918. (row8x8_remaining >= MI_BLOCK_SIZE)) {
  1919. int i, j;
  1920. int index;
  1921. diff d32[4];
  1922. const int offset = (mi_row >> 1) * cm->mb_cols + (mi_col >> 1);
  1923. int is_larger_better = 0;
  1924. int use32x32 = 0;
  1925. unsigned int thr = cpi->source_var_thresh;
  1926. memset(d32, 0, 4 * sizeof(diff));
  1927. for (i = 0; i < 4; i++) {
  1928. diff *d16[4];
  1929. for (j = 0; j < 4; j++) {
  1930. int b_mi_row = coord_lookup[i * 4 + j].row;
  1931. int b_mi_col = coord_lookup[i * 4 + j].col;
  1932. int boffset = b_mi_row / 2 * cm->mb_cols + b_mi_col / 2;
  1933. d16[j] = cpi->source_diff_var + offset + boffset;
  1934. index = b_mi_row * mis + b_mi_col;
  1935. mi_8x8[index] = mi_upper_left + index;
  1936. mi_8x8[index]->sb_type = BLOCK_16X16;
  1937. // TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition
  1938. // size to further improve quality.
  1939. }
  1940. is_larger_better = (d16[0]->var < thr) && (d16[1]->var < thr) &&
  1941. (d16[2]->var < thr) && (d16[3]->var < thr);
  1942. // Use 32x32 partition
  1943. if (is_larger_better) {
  1944. use32x32 += 1;
  1945. for (j = 0; j < 4; j++) {
  1946. d32[i].sse += d16[j]->sse;
  1947. d32[i].sum += d16[j]->sum;
  1948. }
  1949. d32[i].var =
  1950. (unsigned int)(d32[i].sse -
  1951. (unsigned int)(((int64_t)d32[i].sum * d32[i].sum) >>
  1952. 10));
  1953. index = coord_lookup[i * 4].row * mis + coord_lookup[i * 4].col;
  1954. mi_8x8[index] = mi_upper_left + index;
  1955. mi_8x8[index]->sb_type = BLOCK_32X32;
  1956. }
  1957. }
  1958. if (use32x32 == 4) {
  1959. thr <<= 1;
  1960. is_larger_better = (d32[0].var < thr) && (d32[1].var < thr) &&
  1961. (d32[2].var < thr) && (d32[3].var < thr);
  1962. // Use 64x64 partition
  1963. if (is_larger_better) {
  1964. mi_8x8[0] = mi_upper_left;
  1965. mi_8x8[0]->sb_type = BLOCK_64X64;
  1966. }
  1967. }
  1968. } else { // partial in-image SB64
  1969. int bh = num_8x8_blocks_high_lookup[BLOCK_16X16];
  1970. int bw = num_8x8_blocks_wide_lookup[BLOCK_16X16];
  1971. set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining,
  1972. col8x8_remaining, BLOCK_16X16, mi_8x8);
  1973. }
  1974. }
  1975. static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
  1976. PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
  1977. int bsize) {
  1978. VP9_COMMON *const cm = &cpi->common;
  1979. MACROBLOCK *const x = &td->mb;
  1980. MACROBLOCKD *const xd = &x->e_mbd;
  1981. MODE_INFO *const mi = xd->mi[0];
  1982. struct macroblock_plane *const p = x->plane;
  1983. const struct segmentation *const seg = &cm->seg;
  1984. const int bw = num_8x8_blocks_wide_lookup[mi->sb_type];
  1985. const int bh = num_8x8_blocks_high_lookup[mi->sb_type];
  1986. const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
  1987. const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
  1988. *(xd->mi[0]) = ctx->mic;
  1989. *(x->mbmi_ext) = ctx->mbmi_ext;
  1990. if (seg->enabled && cpi->oxcf.aq_mode != NO_AQ) {
  1991. // For in frame complexity AQ or variance AQ, copy segment_id from
  1992. // segmentation_map.
  1993. if (cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ) {
  1994. const uint8_t *const map =
  1995. seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
  1996. mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
  1997. } else {
  1998. // Setting segmentation map for cyclic_refresh.
  1999. vp9_cyclic_refresh_update_segment(cpi, mi, mi_row, mi_col, bsize,
  2000. ctx->rate, ctx->dist, x->skip, p);
  2001. }
  2002. vp9_init_plane_quantizers(cpi, x);
  2003. }
  2004. if (is_inter_block(mi)) {
  2005. vp9_update_mv_count(td);
  2006. if (cm->interp_filter == SWITCHABLE) {
  2007. const int pred_ctx = get_pred_context_switchable_interp(xd);
  2008. ++td->counts->switchable_interp[pred_ctx][mi->interp_filter];
  2009. }
  2010. if (mi->sb_type < BLOCK_8X8) {
  2011. mi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
  2012. mi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
  2013. }
  2014. }
  2015. if (cm->use_prev_frame_mvs || !cm->error_resilient_mode ||
  2016. (cpi->svc.use_base_mv && cpi->svc.number_spatial_layers > 1 &&
  2017. cpi->svc.spatial_layer_id != cpi->svc.number_spatial_layers - 1)) {
  2018. MV_REF *const frame_mvs =
  2019. cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
  2020. int w, h;
  2021. for (h = 0; h < y_mis; ++h) {
  2022. MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
  2023. for (w = 0; w < x_mis; ++w) {
  2024. MV_REF *const mv = frame_mv + w;
  2025. mv->ref_frame[0] = mi->ref_frame[0];
  2026. mv->ref_frame[1] = mi->ref_frame[1];
  2027. mv->mv[0].as_int = mi->mv[0].as_int;
  2028. mv->mv[1].as_int = mi->mv[1].as_int;
  2029. }
  2030. }
  2031. }
  2032. x->skip = ctx->skip;
  2033. x->skip_txfm[0] = mi->segment_id ? 0 : ctx->skip_txfm[0];
  2034. }
  2035. static void encode_b_rt(VP9_COMP *cpi, ThreadData *td,
  2036. const TileInfo *const tile, TOKENEXTRA **tp, int mi_row,
  2037. int mi_col, int output_enabled, BLOCK_SIZE bsize,
  2038. PICK_MODE_CONTEXT *ctx) {
  2039. MACROBLOCK *const x = &td->mb;
  2040. set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
  2041. update_state_rt(cpi, td, ctx, mi_row, mi_col, bsize);
  2042. encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
  2043. update_stats(&cpi->common, td);
  2044. (*tp)->token = EOSB_TOKEN;
  2045. (*tp)++;
  2046. }
  2047. static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td,
  2048. const TileInfo *const tile, TOKENEXTRA **tp,
  2049. int mi_row, int mi_col, int output_enabled,
  2050. BLOCK_SIZE bsize, PC_TREE *pc_tree) {
  2051. VP9_COMMON *const cm = &cpi->common;
  2052. MACROBLOCK *const x = &td->mb;
  2053. MACROBLOCKD *const xd = &x->e_mbd;
  2054. const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
  2055. int ctx;
  2056. PARTITION_TYPE partition;
  2057. BLOCK_SIZE subsize;
  2058. if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
  2059. if (bsize >= BLOCK_8X8) {
  2060. const int idx_str = xd->mi_stride * mi_row + mi_col;
  2061. MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
  2062. ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
  2063. subsize = mi_8x8[0]->sb_type;
  2064. } else {
  2065. ctx = 0;
  2066. subsize = BLOCK_4X4;
  2067. }
  2068. partition = partition_lookup[bsl][subsize];
  2069. if (output_enabled && bsize != BLOCK_4X4)
  2070. td->counts->partition[ctx][partition]++;
  2071. switch (partition) {
  2072. case PARTITION_NONE:
  2073. encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
  2074. &pc_tree->none);
  2075. break;
  2076. case PARTITION_VERT:
  2077. encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
  2078. &pc_tree->vertical[0]);
  2079. if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
  2080. encode_b_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
  2081. subsize, &pc_tree->vertical[1]);
  2082. }
  2083. break;
  2084. case PARTITION_HORZ:
  2085. encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
  2086. &pc_tree->horizontal[0]);
  2087. if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
  2088. encode_b_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
  2089. subsize, &pc_tree->horizontal[1]);
  2090. }
  2091. break;
  2092. case PARTITION_SPLIT:
  2093. subsize = get_subsize(bsize, PARTITION_SPLIT);
  2094. encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
  2095. pc_tree->split[0]);
  2096. encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
  2097. subsize, pc_tree->split[1]);
  2098. encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
  2099. subsize, pc_tree->split[2]);
  2100. encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs,
  2101. output_enabled, subsize, pc_tree->split[3]);
  2102. break;
  2103. default: assert(0 && "Invalid partition type."); break;
  2104. }
  2105. if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
  2106. update_partition_context(xd, mi_row, mi_col, subsize, bsize);
  2107. }
  2108. static void rd_use_partition(VP9_COMP *cpi, ThreadData *td,
  2109. TileDataEnc *tile_data, MODE_INFO **mi_8x8,
  2110. TOKENEXTRA **tp, int mi_row, int mi_col,
  2111. BLOCK_SIZE bsize, int *rate, int64_t *dist,
  2112. int do_recon, PC_TREE *pc_tree) {
  2113. VP9_COMMON *const cm = &cpi->common;
  2114. TileInfo *const tile_info = &tile_data->tile_info;
  2115. MACROBLOCK *const x = &td->mb;
  2116. MACROBLOCKD *const xd = &x->e_mbd;
  2117. const int mis = cm->mi_stride;
  2118. const int bsl = b_width_log2_lookup[bsize];
  2119. const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2;
  2120. const int bss = (1 << bsl) / 4;
  2121. int i, pl;
  2122. PARTITION_TYPE partition = PARTITION_NONE;
  2123. BLOCK_SIZE subsize;
  2124. ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
  2125. PARTITION_CONTEXT sl[8], sa[8];
  2126. RD_COST last_part_rdc, none_rdc, chosen_rdc;
  2127. BLOCK_SIZE sub_subsize = BLOCK_4X4;
  2128. int splits_below = 0;
  2129. BLOCK_SIZE bs_type = mi_8x8[0]->sb_type;
  2130. int do_partition_search = 1;
  2131. PICK_MODE_CONTEXT *ctx = &pc_tree->none;
  2132. if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
  2133. assert(num_4x4_blocks_wide_lookup[bsize] ==
  2134. num_4x4_blocks_high_lookup[bsize]);
  2135. vp9_rd_cost_reset(&last_part_rdc);
  2136. vp9_rd_cost_reset(&none_rdc);
  2137. vp9_rd_cost_reset(&chosen_rdc);
  2138. partition = partition_lookup[bsl][bs_type];
  2139. subsize = get_subsize(bsize, partition);
  2140. pc_tree->partitioning = partition;
  2141. save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  2142. if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ) {
  2143. set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
  2144. x->mb_energy = vp9_block_energy(cpi, x, bsize);
  2145. }
  2146. if (do_partition_search &&
  2147. cpi->sf.partition_search_type == SEARCH_PARTITION &&
  2148. cpi->sf.adjust_partitioning_from_last_frame) {
  2149. // Check if any of the sub blocks are further split.
  2150. if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
  2151. sub_subsize = get_subsize(subsize, PARTITION_SPLIT);
  2152. splits_below = 1;
  2153. for (i = 0; i < 4; i++) {
  2154. int jj = i >> 1, ii = i & 0x01;
  2155. MODE_INFO *this_mi = mi_8x8[jj * bss * mis + ii * bss];
  2156. if (this_mi && this_mi->sb_type >= sub_subsize) {
  2157. splits_below = 0;
  2158. }
  2159. }
  2160. }
  2161. // If partition is not none try none unless each of the 4 splits are split
  2162. // even further..
  2163. if (partition != PARTITION_NONE && !splits_below &&
  2164. mi_row + (mi_step >> 1) < cm->mi_rows &&
  2165. mi_col + (mi_step >> 1) < cm->mi_cols) {
  2166. pc_tree->partitioning = PARTITION_NONE;
  2167. rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize, ctx,
  2168. INT64_MAX);
  2169. pl = partition_plane_context(xd, mi_row, mi_col, bsize);
  2170. if (none_rdc.rate < INT_MAX) {
  2171. none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
  2172. none_rdc.rdcost =
  2173. RDCOST(x->rdmult, x->rddiv, none_rdc.rate, none_rdc.dist);
  2174. }
  2175. restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  2176. mi_8x8[0]->sb_type = bs_type;
  2177. pc_tree->partitioning = partition;
  2178. }
  2179. }
  2180. switch (partition) {
  2181. case PARTITION_NONE:
  2182. rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, bsize,
  2183. ctx, INT64_MAX);
  2184. break;
  2185. case PARTITION_HORZ:
  2186. rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
  2187. subsize, &pc_tree->horizontal[0], INT64_MAX);
  2188. if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
  2189. mi_row + (mi_step >> 1) < cm->mi_rows) {
  2190. RD_COST tmp_rdc;
  2191. PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
  2192. vp9_rd_cost_init(&tmp_rdc);
  2193. update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
  2194. encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
  2195. rd_pick_sb_modes(cpi, tile_data, x, mi_row + (mi_step >> 1), mi_col,
  2196. &tmp_rdc, subsize, &pc_tree->horizontal[1], INT64_MAX);
  2197. if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
  2198. vp9_rd_cost_reset(&last_part_rdc);
  2199. break;
  2200. }
  2201. last_part_rdc.rate += tmp_rdc.rate;
  2202. last_part_rdc.dist += tmp_rdc.dist;
  2203. last_part_rdc.rdcost += tmp_rdc.rdcost;
  2204. }
  2205. break;
  2206. case PARTITION_VERT:
  2207. rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
  2208. subsize, &pc_tree->vertical[0], INT64_MAX);
  2209. if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
  2210. mi_col + (mi_step >> 1) < cm->mi_cols) {
  2211. RD_COST tmp_rdc;
  2212. PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0];
  2213. vp9_rd_cost_init(&tmp_rdc);
  2214. update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
  2215. encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
  2216. rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1),
  2217. &tmp_rdc, subsize,
  2218. &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX);
  2219. if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
  2220. vp9_rd_cost_reset(&last_part_rdc);
  2221. break;
  2222. }
  2223. last_part_rdc.rate += tmp_rdc.rate;
  2224. last_part_rdc.dist += tmp_rdc.dist;
  2225. last_part_rdc.rdcost += tmp_rdc.rdcost;
  2226. }
  2227. break;
  2228. case PARTITION_SPLIT:
  2229. if (bsize == BLOCK_8X8) {
  2230. rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
  2231. subsize, pc_tree->leaf_split[0], INT64_MAX);
  2232. break;
  2233. }
  2234. last_part_rdc.rate = 0;
  2235. last_part_rdc.dist = 0;
  2236. last_part_rdc.rdcost = 0;
  2237. for (i = 0; i < 4; i++) {
  2238. int x_idx = (i & 1) * (mi_step >> 1);
  2239. int y_idx = (i >> 1) * (mi_step >> 1);
  2240. int jj = i >> 1, ii = i & 0x01;
  2241. RD_COST tmp_rdc;
  2242. if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
  2243. continue;
  2244. vp9_rd_cost_init(&tmp_rdc);
  2245. rd_use_partition(cpi, td, tile_data, mi_8x8 + jj * bss * mis + ii * bss,
  2246. tp, mi_row + y_idx, mi_col + x_idx, subsize,
  2247. &tmp_rdc.rate, &tmp_rdc.dist, i != 3,
  2248. pc_tree->split[i]);
  2249. if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
  2250. vp9_rd_cost_reset(&last_part_rdc);
  2251. break;
  2252. }
  2253. last_part_rdc.rate += tmp_rdc.rate;
  2254. last_part_rdc.dist += tmp_rdc.dist;
  2255. }
  2256. break;
  2257. default: assert(0); break;
  2258. }
  2259. pl = partition_plane_context(xd, mi_row, mi_col, bsize);
  2260. if (last_part_rdc.rate < INT_MAX) {
  2261. last_part_rdc.rate += cpi->partition_cost[pl][partition];
  2262. last_part_rdc.rdcost =
  2263. RDCOST(x->rdmult, x->rddiv, last_part_rdc.rate, last_part_rdc.dist);
  2264. }
  2265. if (do_partition_search && cpi->sf.adjust_partitioning_from_last_frame &&
  2266. cpi->sf.partition_search_type == SEARCH_PARTITION &&
  2267. partition != PARTITION_SPLIT && bsize > BLOCK_8X8 &&
  2268. (mi_row + mi_step < cm->mi_rows ||
  2269. mi_row + (mi_step >> 1) == cm->mi_rows) &&
  2270. (mi_col + mi_step < cm->mi_cols ||
  2271. mi_col + (mi_step >> 1) == cm->mi_cols)) {
  2272. BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT);
  2273. chosen_rdc.rate = 0;
  2274. chosen_rdc.dist = 0;
  2275. restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  2276. pc_tree->partitioning = PARTITION_SPLIT;
  2277. // Split partition.
  2278. for (i = 0; i < 4; i++) {
  2279. int x_idx = (i & 1) * (mi_step >> 1);
  2280. int y_idx = (i >> 1) * (mi_step >> 1);
  2281. RD_COST tmp_rdc;
  2282. ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
  2283. PARTITION_CONTEXT sl[8], sa[8];
  2284. if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
  2285. continue;
  2286. save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  2287. pc_tree->split[i]->partitioning = PARTITION_NONE;
  2288. rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
  2289. &tmp_rdc, split_subsize, &pc_tree->split[i]->none,
  2290. INT64_MAX);
  2291. restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  2292. if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
  2293. vp9_rd_cost_reset(&chosen_rdc);
  2294. break;
  2295. }
  2296. chosen_rdc.rate += tmp_rdc.rate;
  2297. chosen_rdc.dist += tmp_rdc.dist;
  2298. if (i != 3)
  2299. encode_sb(cpi, td, tile_info, tp, mi_row + y_idx, mi_col + x_idx, 0,
  2300. split_subsize, pc_tree->split[i]);
  2301. pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx,
  2302. split_subsize);
  2303. chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
  2304. }
  2305. pl = partition_plane_context(xd, mi_row, mi_col, bsize);
  2306. if (chosen_rdc.rate < INT_MAX) {
  2307. chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
  2308. chosen_rdc.rdcost =
  2309. RDCOST(x->rdmult, x->rddiv, chosen_rdc.rate, chosen_rdc.dist);
  2310. }
  2311. }
  2312. // If last_part is better set the partitioning to that.
  2313. if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
  2314. mi_8x8[0]->sb_type = bsize;
  2315. if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition;
  2316. chosen_rdc = last_part_rdc;
  2317. }
  2318. // If none was better set the partitioning to that.
  2319. if (none_rdc.rdcost < chosen_rdc.rdcost) {
  2320. if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
  2321. chosen_rdc = none_rdc;
  2322. }
  2323. restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  2324. // We must have chosen a partitioning and encoding or we'll fail later on.
  2325. // No other opportunities for success.
  2326. if (bsize == BLOCK_64X64)
  2327. assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
  2328. if (do_recon) {
  2329. int output_enabled = (bsize == BLOCK_64X64);
  2330. encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
  2331. pc_tree);
  2332. }
  2333. *rate = chosen_rdc.rate;
  2334. *dist = chosen_rdc.dist;
  2335. }
  2336. static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = {
  2337. BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4,
  2338. BLOCK_4X4, BLOCK_8X8, BLOCK_8X8, BLOCK_8X8, BLOCK_16X16,
  2339. BLOCK_16X16, BLOCK_16X16, BLOCK_16X16
  2340. };
  2341. static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = {
  2342. BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32,
  2343. BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64,
  2344. BLOCK_64X64, BLOCK_64X64, BLOCK_64X64
  2345. };
  2346. // Look at all the mode_info entries for blocks that are part of this
  2347. // partition and find the min and max values for sb_type.
  2348. // At the moment this is designed to work on a 64x64 SB but could be
  2349. // adjusted to use a size parameter.
  2350. //
  2351. // The min and max are assumed to have been initialized prior to calling this
  2352. // function so repeat calls can accumulate a min and max of more than one sb64.
  2353. static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8,
  2354. BLOCK_SIZE *min_block_size,
  2355. BLOCK_SIZE *max_block_size,
  2356. int bs_hist[BLOCK_SIZES]) {
  2357. int sb_width_in_blocks = MI_BLOCK_SIZE;
  2358. int sb_height_in_blocks = MI_BLOCK_SIZE;
  2359. int i, j;
  2360. int index = 0;
  2361. // Check the sb_type for each block that belongs to this region.
  2362. for (i = 0; i < sb_height_in_blocks; ++i) {
  2363. for (j = 0; j < sb_width_in_blocks; ++j) {
  2364. MODE_INFO *mi = mi_8x8[index + j];
  2365. BLOCK_SIZE sb_type = mi ? mi->sb_type : 0;
  2366. bs_hist[sb_type]++;
  2367. *min_block_size = VPXMIN(*min_block_size, sb_type);
  2368. *max_block_size = VPXMAX(*max_block_size, sb_type);
  2369. }
  2370. index += xd->mi_stride;
  2371. }
  2372. }
  2373. // Next square block size less or equal than current block size.
  2374. static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = {
  2375. BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8,
  2376. BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32,
  2377. BLOCK_32X32, BLOCK_32X32, BLOCK_64X64
  2378. };
  2379. // Look at neighboring blocks and set a min and max partition size based on
  2380. // what they chose.
  2381. static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
  2382. MACROBLOCKD *const xd, int mi_row,
  2383. int mi_col, BLOCK_SIZE *min_block_size,
  2384. BLOCK_SIZE *max_block_size) {
  2385. VP9_COMMON *const cm = &cpi->common;
  2386. MODE_INFO **mi = xd->mi;
  2387. const int left_in_image = !!xd->left_mi;
  2388. const int above_in_image = !!xd->above_mi;
  2389. const int row8x8_remaining = tile->mi_row_end - mi_row;
  2390. const int col8x8_remaining = tile->mi_col_end - mi_col;
  2391. int bh, bw;
  2392. BLOCK_SIZE min_size = BLOCK_4X4;
  2393. BLOCK_SIZE max_size = BLOCK_64X64;
  2394. int bs_hist[BLOCK_SIZES] = { 0 };
  2395. // Trap case where we do not have a prediction.
  2396. if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) {
  2397. // Default "min to max" and "max to min"
  2398. min_size = BLOCK_64X64;
  2399. max_size = BLOCK_4X4;
  2400. // NOTE: each call to get_sb_partition_size_range() uses the previous
  2401. // passed in values for min and max as a starting point.
  2402. // Find the min and max partition used in previous frame at this location
  2403. if (cm->frame_type != KEY_FRAME) {
  2404. MODE_INFO **prev_mi =
  2405. &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col];
  2406. get_sb_partition_size_range(xd, prev_mi, &min_size, &max_size, bs_hist);
  2407. }
  2408. // Find the min and max partition sizes used in the left SB64
  2409. if (left_in_image) {
  2410. MODE_INFO **left_sb64_mi = &mi[-MI_BLOCK_SIZE];
  2411. get_sb_partition_size_range(xd, left_sb64_mi, &min_size, &max_size,
  2412. bs_hist);
  2413. }
  2414. // Find the min and max partition sizes used in the above SB64.
  2415. if (above_in_image) {
  2416. MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MI_BLOCK_SIZE];
  2417. get_sb_partition_size_range(xd, above_sb64_mi, &min_size, &max_size,
  2418. bs_hist);
  2419. }
  2420. // Adjust observed min and max for "relaxed" auto partition case.
  2421. if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
  2422. min_size = min_partition_size[min_size];
  2423. max_size = max_partition_size[max_size];
  2424. }
  2425. }
  2426. // Check border cases where max and min from neighbors may not be legal.
  2427. max_size = find_partition_size(max_size, row8x8_remaining, col8x8_remaining,
  2428. &bh, &bw);
  2429. // Test for blocks at the edge of the active image.
  2430. // This may be the actual edge of the image or where there are formatting
  2431. // bars.
  2432. if (vp9_active_edge_sb(cpi, mi_row, mi_col)) {
  2433. min_size = BLOCK_4X4;
  2434. } else {
  2435. min_size =
  2436. VPXMIN(cpi->sf.rd_auto_partition_min_limit, VPXMIN(min_size, max_size));
  2437. }
  2438. // When use_square_partition_only is true, make sure at least one square
  2439. // partition is allowed by selecting the next smaller square size as
  2440. // *min_block_size.
  2441. if (cpi->sf.use_square_partition_only &&
  2442. next_square_size[max_size] < min_size) {
  2443. min_size = next_square_size[max_size];
  2444. }
  2445. *min_block_size = min_size;
  2446. *max_block_size = max_size;
  2447. }
  2448. // TODO(jingning) refactor functions setting partition search range
  2449. static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, int mi_row,
  2450. int mi_col, BLOCK_SIZE bsize,
  2451. BLOCK_SIZE *min_bs, BLOCK_SIZE *max_bs) {
  2452. int mi_width = num_8x8_blocks_wide_lookup[bsize];
  2453. int mi_height = num_8x8_blocks_high_lookup[bsize];
  2454. int idx, idy;
  2455. MODE_INFO *mi;
  2456. const int idx_str = cm->mi_stride * mi_row + mi_col;
  2457. MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str];
  2458. BLOCK_SIZE bs, min_size, max_size;
  2459. min_size = BLOCK_64X64;
  2460. max_size = BLOCK_4X4;
  2461. if (prev_mi) {
  2462. for (idy = 0; idy < mi_height; ++idy) {
  2463. for (idx = 0; idx < mi_width; ++idx) {
  2464. mi = prev_mi[idy * cm->mi_stride + idx];
  2465. bs = mi ? mi->sb_type : bsize;
  2466. min_size = VPXMIN(min_size, bs);
  2467. max_size = VPXMAX(max_size, bs);
  2468. }
  2469. }
  2470. }
  2471. if (xd->left_mi) {
  2472. for (idy = 0; idy < mi_height; ++idy) {
  2473. mi = xd->mi[idy * cm->mi_stride - 1];
  2474. bs = mi ? mi->sb_type : bsize;
  2475. min_size = VPXMIN(min_size, bs);
  2476. max_size = VPXMAX(max_size, bs);
  2477. }
  2478. }
  2479. if (xd->above_mi) {
  2480. for (idx = 0; idx < mi_width; ++idx) {
  2481. mi = xd->mi[idx - cm->mi_stride];
  2482. bs = mi ? mi->sb_type : bsize;
  2483. min_size = VPXMIN(min_size, bs);
  2484. max_size = VPXMAX(max_size, bs);
  2485. }
  2486. }
  2487. if (min_size == max_size) {
  2488. min_size = min_partition_size[min_size];
  2489. max_size = max_partition_size[max_size];
  2490. }
  2491. *min_bs = min_size;
  2492. *max_bs = max_size;
  2493. }
  2494. static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
  2495. memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
  2496. }
  2497. static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
  2498. memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
  2499. }
  2500. #if CONFIG_FP_MB_STATS
  2501. const int num_16x16_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1,
  2502. 1, 2, 2, 2, 4, 4 };
  2503. const int num_16x16_blocks_high_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1,
  2504. 2, 1, 2, 4, 2, 4 };
  2505. const int qindex_skip_threshold_lookup[BLOCK_SIZES] = {
  2506. 0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120
  2507. };
  2508. const int qindex_split_threshold_lookup[BLOCK_SIZES] = {
  2509. 0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120
  2510. };
  2511. const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = {
  2512. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6
  2513. };
  2514. typedef enum {
  2515. MV_ZERO = 0,
  2516. MV_LEFT = 1,
  2517. MV_UP = 2,
  2518. MV_RIGHT = 3,
  2519. MV_DOWN = 4,
  2520. MV_INVALID
  2521. } MOTION_DIRECTION;
  2522. static INLINE MOTION_DIRECTION get_motion_direction_fp(uint8_t fp_byte) {
  2523. if (fp_byte & FPMB_MOTION_ZERO_MASK) {
  2524. return MV_ZERO;
  2525. } else if (fp_byte & FPMB_MOTION_LEFT_MASK) {
  2526. return MV_LEFT;
  2527. } else if (fp_byte & FPMB_MOTION_RIGHT_MASK) {
  2528. return MV_RIGHT;
  2529. } else if (fp_byte & FPMB_MOTION_UP_MASK) {
  2530. return MV_UP;
  2531. } else {
  2532. return MV_DOWN;
  2533. }
  2534. }
  2535. static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
  2536. MOTION_DIRECTION that_mv) {
  2537. if (this_mv == that_mv) {
  2538. return 0;
  2539. } else {
  2540. return abs(this_mv - that_mv) == 2 ? 2 : 1;
  2541. }
  2542. }
  2543. #endif
  2544. // Calculate the score used in machine-learning based partition search early
  2545. // termination.
  2546. static double compute_score(VP9_COMMON *const cm, MACROBLOCKD *const xd,
  2547. PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
  2548. BLOCK_SIZE bsize) {
  2549. const double *clf;
  2550. const double *mean;
  2551. const double *sd;
  2552. const int mag_mv =
  2553. abs(ctx->mic.mv[0].as_mv.col) + abs(ctx->mic.mv[0].as_mv.row);
  2554. const int left_in_image = !!xd->left_mi;
  2555. const int above_in_image = !!xd->above_mi;
  2556. MODE_INFO **prev_mi =
  2557. &cm->prev_mi_grid_visible[mi_col + cm->mi_stride * mi_row];
  2558. int above_par = 0; // above_partitioning
  2559. int left_par = 0; // left_partitioning
  2560. int last_par = 0; // last_partitioning
  2561. BLOCK_SIZE context_size;
  2562. double score;
  2563. int offset = 0;
  2564. assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]);
  2565. if (above_in_image) {
  2566. context_size = xd->above_mi->sb_type;
  2567. if (context_size < bsize)
  2568. above_par = 2;
  2569. else if (context_size == bsize)
  2570. above_par = 1;
  2571. }
  2572. if (left_in_image) {
  2573. context_size = xd->left_mi->sb_type;
  2574. if (context_size < bsize)
  2575. left_par = 2;
  2576. else if (context_size == bsize)
  2577. left_par = 1;
  2578. }
  2579. if (prev_mi) {
  2580. context_size = prev_mi[0]->sb_type;
  2581. if (context_size < bsize)
  2582. last_par = 2;
  2583. else if (context_size == bsize)
  2584. last_par = 1;
  2585. }
  2586. if (bsize == BLOCK_64X64)
  2587. offset = 0;
  2588. else if (bsize == BLOCK_32X32)
  2589. offset = 8;
  2590. else if (bsize == BLOCK_16X16)
  2591. offset = 16;
  2592. // early termination score calculation
  2593. clf = &classifiers[offset];
  2594. mean = &train_mean[offset];
  2595. sd = &train_stdm[offset];
  2596. score = clf[0] * (((double)ctx->rate - mean[0]) / sd[0]) +
  2597. clf[1] * (((double)ctx->dist - mean[1]) / sd[1]) +
  2598. clf[2] * (((double)mag_mv / 2 - mean[2]) * sd[2]) +
  2599. clf[3] * (((double)(left_par + above_par) / 2 - mean[3]) * sd[3]) +
  2600. clf[4] * (((double)ctx->sum_y_eobs - mean[4]) / sd[4]) +
  2601. clf[5] * (((double)cm->base_qindex - mean[5]) * sd[5]) +
  2602. clf[6] * (((double)last_par - mean[6]) * sd[6]) + clf[7];
  2603. return score;
  2604. }
  2605. // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
  2606. // unlikely to be selected depending on previous rate-distortion optimization
  2607. // results, for encoding speed-up.
  2608. static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
  2609. TileDataEnc *tile_data, TOKENEXTRA **tp,
  2610. int mi_row, int mi_col, BLOCK_SIZE bsize,
  2611. RD_COST *rd_cost, int64_t best_rd,
  2612. PC_TREE *pc_tree) {
  2613. VP9_COMMON *const cm = &cpi->common;
  2614. TileInfo *const tile_info = &tile_data->tile_info;
  2615. MACROBLOCK *const x = &td->mb;
  2616. MACROBLOCKD *const xd = &x->e_mbd;
  2617. const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2;
  2618. ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
  2619. PARTITION_CONTEXT sl[8], sa[8];
  2620. TOKENEXTRA *tp_orig = *tp;
  2621. PICK_MODE_CONTEXT *ctx = &pc_tree->none;
  2622. int i;
  2623. const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
  2624. BLOCK_SIZE subsize;
  2625. RD_COST this_rdc, sum_rdc, best_rdc;
  2626. int do_split = bsize >= BLOCK_8X8;
  2627. int do_rect = 1;
  2628. INTERP_FILTER pred_interp_filter;
  2629. // Override skipping rectangular partition operations for edge blocks
  2630. const int force_horz_split = (mi_row + mi_step >= cm->mi_rows);
  2631. const int force_vert_split = (mi_col + mi_step >= cm->mi_cols);
  2632. const int xss = x->e_mbd.plane[1].subsampling_x;
  2633. const int yss = x->e_mbd.plane[1].subsampling_y;
  2634. BLOCK_SIZE min_size = x->min_partition_size;
  2635. BLOCK_SIZE max_size = x->max_partition_size;
  2636. #if CONFIG_FP_MB_STATS
  2637. unsigned int src_diff_var = UINT_MAX;
  2638. int none_complexity = 0;
  2639. #endif
  2640. int partition_none_allowed = !force_horz_split && !force_vert_split;
  2641. int partition_horz_allowed =
  2642. !force_vert_split && yss <= xss && bsize >= BLOCK_8X8;
  2643. int partition_vert_allowed =
  2644. !force_horz_split && xss <= yss && bsize >= BLOCK_8X8;
  2645. int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_thr.dist;
  2646. int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate;
  2647. (void)*tp_orig;
  2648. assert(num_8x8_blocks_wide_lookup[bsize] ==
  2649. num_8x8_blocks_high_lookup[bsize]);
  2650. // Adjust dist breakout threshold according to the partition size.
  2651. dist_breakout_thr >>=
  2652. 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
  2653. rate_breakout_thr *= num_pels_log2_lookup[bsize];
  2654. vp9_rd_cost_init(&this_rdc);
  2655. vp9_rd_cost_init(&sum_rdc);
  2656. vp9_rd_cost_reset(&best_rdc);
  2657. best_rdc.rdcost = best_rd;
  2658. set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
  2659. if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ &&
  2660. cpi->oxcf.aq_mode != LOOKAHEAD_AQ)
  2661. x->mb_energy = vp9_block_energy(cpi, x, bsize);
  2662. if (cpi->sf.cb_partition_search && bsize == BLOCK_16X16) {
  2663. int cb_partition_search_ctrl =
  2664. ((pc_tree->index == 0 || pc_tree->index == 3) +
  2665. get_chessboard_index(cm->current_video_frame)) &
  2666. 0x1;
  2667. if (cb_partition_search_ctrl && bsize > min_size && bsize < max_size)
  2668. set_partition_range(cm, xd, mi_row, mi_col, bsize, &min_size, &max_size);
  2669. }
  2670. // Determine partition types in search according to the speed features.
  2671. // The threshold set here has to be of square block size.
  2672. if (cpi->sf.auto_min_max_partition_size) {
  2673. partition_none_allowed &= (bsize <= max_size && bsize >= min_size);
  2674. partition_horz_allowed &=
  2675. ((bsize <= max_size && bsize > min_size) || force_horz_split);
  2676. partition_vert_allowed &=
  2677. ((bsize <= max_size && bsize > min_size) || force_vert_split);
  2678. do_split &= bsize > min_size;
  2679. }
  2680. if (cpi->sf.use_square_partition_only &&
  2681. bsize > cpi->sf.use_square_only_threshold) {
  2682. if (cpi->use_svc) {
  2683. if (!vp9_active_h_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless)
  2684. partition_horz_allowed &= force_horz_split;
  2685. if (!vp9_active_v_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless)
  2686. partition_vert_allowed &= force_vert_split;
  2687. } else {
  2688. partition_horz_allowed &= force_horz_split;
  2689. partition_vert_allowed &= force_vert_split;
  2690. }
  2691. }
  2692. save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  2693. #if CONFIG_FP_MB_STATS
  2694. if (cpi->use_fp_mb_stats) {
  2695. set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
  2696. src_diff_var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src, mi_row,
  2697. mi_col, bsize);
  2698. }
  2699. #endif
  2700. #if CONFIG_FP_MB_STATS
  2701. // Decide whether we shall split directly and skip searching NONE by using
  2702. // the first pass block statistics
  2703. if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_split &&
  2704. partition_none_allowed && src_diff_var > 4 &&
  2705. cm->base_qindex < qindex_split_threshold_lookup[bsize]) {
  2706. int mb_row = mi_row >> 1;
  2707. int mb_col = mi_col >> 1;
  2708. int mb_row_end =
  2709. VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
  2710. int mb_col_end =
  2711. VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
  2712. int r, c;
  2713. // compute a complexity measure, basically measure inconsistency of motion
  2714. // vectors obtained from the first pass in the current block
  2715. for (r = mb_row; r < mb_row_end; r++) {
  2716. for (c = mb_col; c < mb_col_end; c++) {
  2717. const int mb_index = r * cm->mb_cols + c;
  2718. MOTION_DIRECTION this_mv;
  2719. MOTION_DIRECTION right_mv;
  2720. MOTION_DIRECTION bottom_mv;
  2721. this_mv =
  2722. get_motion_direction_fp(cpi->twopass.this_frame_mb_stats[mb_index]);
  2723. // to its right
  2724. if (c != mb_col_end - 1) {
  2725. right_mv = get_motion_direction_fp(
  2726. cpi->twopass.this_frame_mb_stats[mb_index + 1]);
  2727. none_complexity += get_motion_inconsistency(this_mv, right_mv);
  2728. }
  2729. // to its bottom
  2730. if (r != mb_row_end - 1) {
  2731. bottom_mv = get_motion_direction_fp(
  2732. cpi->twopass.this_frame_mb_stats[mb_index + cm->mb_cols]);
  2733. none_complexity += get_motion_inconsistency(this_mv, bottom_mv);
  2734. }
  2735. // do not count its left and top neighbors to avoid double counting
  2736. }
  2737. }
  2738. if (none_complexity > complexity_16x16_blocks_threshold[bsize]) {
  2739. partition_none_allowed = 0;
  2740. }
  2741. }
  2742. #endif
  2743. // PARTITION_NONE
  2744. if (partition_none_allowed) {
  2745. rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx,
  2746. best_rdc.rdcost);
  2747. if (this_rdc.rate != INT_MAX) {
  2748. if (bsize >= BLOCK_8X8) {
  2749. this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
  2750. this_rdc.rdcost =
  2751. RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
  2752. }
  2753. if (this_rdc.rdcost < best_rdc.rdcost) {
  2754. MODE_INFO *mi = xd->mi[0];
  2755. best_rdc = this_rdc;
  2756. if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
  2757. if (!cpi->sf.ml_partition_search_early_termination) {
  2758. // If all y, u, v transform blocks in this partition are skippable,
  2759. // and the dist & rate are within the thresholds, the partition search
  2760. // is terminated for current branch of the partition search tree.
  2761. if (!x->e_mbd.lossless && ctx->skippable &&
  2762. ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
  2763. (best_rdc.dist < dist_breakout_thr &&
  2764. best_rdc.rate < rate_breakout_thr))) {
  2765. do_split = 0;
  2766. do_rect = 0;
  2767. }
  2768. } else {
  2769. // Currently, the machine-learning based partition search early
  2770. // termination is only used while bsize is 16x16, 32x32 or 64x64,
  2771. // VPXMIN(cm->width, cm->height) >= 480, and speed = 0.
  2772. if (!x->e_mbd.lossless &&
  2773. !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP) &&
  2774. ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) {
  2775. if (compute_score(cm, xd, ctx, mi_row, mi_col, bsize) < 0.0) {
  2776. do_split = 0;
  2777. do_rect = 0;
  2778. }
  2779. }
  2780. }
  2781. #if CONFIG_FP_MB_STATS
  2782. // Check if every 16x16 first pass block statistics has zero
  2783. // motion and the corresponding first pass residue is small enough.
  2784. // If that is the case, check the difference variance between the
  2785. // current frame and the last frame. If the variance is small enough,
  2786. // stop further splitting in RD optimization
  2787. if (cpi->use_fp_mb_stats && do_split != 0 &&
  2788. cm->base_qindex > qindex_skip_threshold_lookup[bsize]) {
  2789. int mb_row = mi_row >> 1;
  2790. int mb_col = mi_col >> 1;
  2791. int mb_row_end =
  2792. VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
  2793. int mb_col_end =
  2794. VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
  2795. int r, c;
  2796. int skip = 1;
  2797. for (r = mb_row; r < mb_row_end; r++) {
  2798. for (c = mb_col; c < mb_col_end; c++) {
  2799. const int mb_index = r * cm->mb_cols + c;
  2800. if (!(cpi->twopass.this_frame_mb_stats[mb_index] &
  2801. FPMB_MOTION_ZERO_MASK) ||
  2802. !(cpi->twopass.this_frame_mb_stats[mb_index] &
  2803. FPMB_ERROR_SMALL_MASK)) {
  2804. skip = 0;
  2805. break;
  2806. }
  2807. }
  2808. if (skip == 0) {
  2809. break;
  2810. }
  2811. }
  2812. if (skip) {
  2813. if (src_diff_var == UINT_MAX) {
  2814. set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
  2815. src_diff_var = get_sby_perpixel_diff_variance(
  2816. cpi, &x->plane[0].src, mi_row, mi_col, bsize);
  2817. }
  2818. if (src_diff_var < 8) {
  2819. do_split = 0;
  2820. do_rect = 0;
  2821. }
  2822. }
  2823. }
  2824. #endif
  2825. }
  2826. }
  2827. restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  2828. }
  2829. // store estimated motion vector
  2830. if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx);
  2831. // If the interp_filter is marked as SWITCHABLE_FILTERS, it was for an
  2832. // intra block and used for context purposes.
  2833. if (ctx->mic.interp_filter == SWITCHABLE_FILTERS) {
  2834. pred_interp_filter = EIGHTTAP;
  2835. } else {
  2836. pred_interp_filter = ctx->mic.interp_filter;
  2837. }
  2838. // PARTITION_SPLIT
  2839. // TODO(jingning): use the motion vectors given by the above search as
  2840. // the starting point of motion search in the following partition type check.
  2841. if (do_split) {
  2842. subsize = get_subsize(bsize, PARTITION_SPLIT);
  2843. if (bsize == BLOCK_8X8) {
  2844. i = 4;
  2845. if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
  2846. pc_tree->leaf_split[0]->pred_interp_filter = pred_interp_filter;
  2847. rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
  2848. pc_tree->leaf_split[0], best_rdc.rdcost);
  2849. if (sum_rdc.rate == INT_MAX) sum_rdc.rdcost = INT64_MAX;
  2850. } else {
  2851. for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
  2852. const int x_idx = (i & 1) * mi_step;
  2853. const int y_idx = (i >> 1) * mi_step;
  2854. if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
  2855. continue;
  2856. if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
  2857. pc_tree->split[i]->index = i;
  2858. rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
  2859. mi_col + x_idx, subsize, &this_rdc,
  2860. best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
  2861. if (this_rdc.rate == INT_MAX) {
  2862. sum_rdc.rdcost = INT64_MAX;
  2863. break;
  2864. } else {
  2865. sum_rdc.rate += this_rdc.rate;
  2866. sum_rdc.dist += this_rdc.dist;
  2867. sum_rdc.rdcost += this_rdc.rdcost;
  2868. }
  2869. }
  2870. }
  2871. if (sum_rdc.rdcost < best_rdc.rdcost && i == 4) {
  2872. sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
  2873. sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
  2874. if (sum_rdc.rdcost < best_rdc.rdcost) {
  2875. best_rdc = sum_rdc;
  2876. pc_tree->partitioning = PARTITION_SPLIT;
  2877. // Rate and distortion based partition search termination clause.
  2878. if (!cpi->sf.ml_partition_search_early_termination &&
  2879. !x->e_mbd.lossless && ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
  2880. (best_rdc.dist < dist_breakout_thr &&
  2881. best_rdc.rate < rate_breakout_thr))) {
  2882. do_rect = 0;
  2883. }
  2884. }
  2885. } else {
  2886. // skip rectangular partition test when larger block size
  2887. // gives better rd cost
  2888. if ((cpi->sf.less_rectangular_check) &&
  2889. ((bsize > cpi->sf.use_square_only_threshold) ||
  2890. (best_rdc.dist < dist_breakout_thr)))
  2891. do_rect &= !partition_none_allowed;
  2892. }
  2893. restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  2894. }
  2895. // PARTITION_HORZ
  2896. if (partition_horz_allowed &&
  2897. (do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) {
  2898. subsize = get_subsize(bsize, PARTITION_HORZ);
  2899. if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
  2900. if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
  2901. partition_none_allowed)
  2902. pc_tree->horizontal[0].pred_interp_filter = pred_interp_filter;
  2903. rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
  2904. &pc_tree->horizontal[0], best_rdc.rdcost);
  2905. if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows &&
  2906. bsize > BLOCK_8X8) {
  2907. PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
  2908. update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
  2909. encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
  2910. if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
  2911. if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
  2912. partition_none_allowed)
  2913. pc_tree->horizontal[1].pred_interp_filter = pred_interp_filter;
  2914. rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
  2915. subsize, &pc_tree->horizontal[1],
  2916. best_rdc.rdcost - sum_rdc.rdcost);
  2917. if (this_rdc.rate == INT_MAX) {
  2918. sum_rdc.rdcost = INT64_MAX;
  2919. } else {
  2920. sum_rdc.rate += this_rdc.rate;
  2921. sum_rdc.dist += this_rdc.dist;
  2922. sum_rdc.rdcost += this_rdc.rdcost;
  2923. }
  2924. }
  2925. if (sum_rdc.rdcost < best_rdc.rdcost) {
  2926. sum_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ];
  2927. sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
  2928. if (sum_rdc.rdcost < best_rdc.rdcost) {
  2929. best_rdc = sum_rdc;
  2930. pc_tree->partitioning = PARTITION_HORZ;
  2931. if ((cpi->sf.less_rectangular_check) &&
  2932. (bsize > cpi->sf.use_square_only_threshold))
  2933. do_rect = 0;
  2934. }
  2935. }
  2936. restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  2937. }
  2938. // PARTITION_VERT
  2939. if (partition_vert_allowed &&
  2940. (do_rect || vp9_active_v_edge(cpi, mi_col, mi_step))) {
  2941. subsize = get_subsize(bsize, PARTITION_VERT);
  2942. if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
  2943. if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
  2944. partition_none_allowed)
  2945. pc_tree->vertical[0].pred_interp_filter = pred_interp_filter;
  2946. rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
  2947. &pc_tree->vertical[0], best_rdc.rdcost);
  2948. if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols &&
  2949. bsize > BLOCK_8X8) {
  2950. update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0);
  2951. encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize,
  2952. &pc_tree->vertical[0]);
  2953. if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
  2954. if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
  2955. partition_none_allowed)
  2956. pc_tree->vertical[1].pred_interp_filter = pred_interp_filter;
  2957. rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
  2958. subsize, &pc_tree->vertical[1],
  2959. best_rdc.rdcost - sum_rdc.rdcost);
  2960. if (this_rdc.rate == INT_MAX) {
  2961. sum_rdc.rdcost = INT64_MAX;
  2962. } else {
  2963. sum_rdc.rate += this_rdc.rate;
  2964. sum_rdc.dist += this_rdc.dist;
  2965. sum_rdc.rdcost += this_rdc.rdcost;
  2966. }
  2967. }
  2968. if (sum_rdc.rdcost < best_rdc.rdcost) {
  2969. sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT];
  2970. sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
  2971. if (sum_rdc.rdcost < best_rdc.rdcost) {
  2972. best_rdc = sum_rdc;
  2973. pc_tree->partitioning = PARTITION_VERT;
  2974. }
  2975. }
  2976. restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
  2977. }
  2978. // TODO(jbb): This code added so that we avoid static analysis
  2979. // warning related to the fact that best_rd isn't used after this
  2980. // point. This code should be refactored so that the duplicate
  2981. // checks occur in some sub function and thus are used...
  2982. (void)best_rd;
  2983. *rd_cost = best_rdc;
  2984. if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
  2985. pc_tree->index != 3) {
  2986. int output_enabled = (bsize == BLOCK_64X64);
  2987. encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
  2988. pc_tree);
  2989. }
  2990. if (bsize == BLOCK_64X64) {
  2991. assert(tp_orig < *tp);
  2992. assert(best_rdc.rate < INT_MAX);
  2993. assert(best_rdc.dist < INT64_MAX);
  2994. } else {
  2995. assert(tp_orig == *tp);
  2996. }
  2997. }
  2998. static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td,
  2999. TileDataEnc *tile_data, int mi_row,
  3000. TOKENEXTRA **tp) {
  3001. VP9_COMMON *const cm = &cpi->common;
  3002. TileInfo *const tile_info = &tile_data->tile_info;
  3003. MACROBLOCK *const x = &td->mb;
  3004. MACROBLOCKD *const xd = &x->e_mbd;
  3005. SPEED_FEATURES *const sf = &cpi->sf;
  3006. const int mi_col_start = tile_info->mi_col_start;
  3007. const int mi_col_end = tile_info->mi_col_end;
  3008. int mi_col;
  3009. const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2;
  3010. const int num_sb_cols =
  3011. get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2);
  3012. int sb_col_in_tile;
  3013. // Initialize the left context for the new SB row
  3014. memset(&xd->left_context, 0, sizeof(xd->left_context));
  3015. memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
  3016. // Code each SB in the row
  3017. for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end;
  3018. mi_col += MI_BLOCK_SIZE, sb_col_in_tile++) {
  3019. const struct segmentation *const seg = &cm->seg;
  3020. int dummy_rate;
  3021. int64_t dummy_dist;
  3022. RD_COST dummy_rdc;
  3023. int i;
  3024. int seg_skip = 0;
  3025. const int idx_str = cm->mi_stride * mi_row + mi_col;
  3026. MODE_INFO **mi = cm->mi_grid_visible + idx_str;
  3027. (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
  3028. sb_col_in_tile);
  3029. if (sf->adaptive_pred_interp_filter) {
  3030. for (i = 0; i < 64; ++i) td->leaf_tree[i].pred_interp_filter = SWITCHABLE;
  3031. for (i = 0; i < 64; ++i) {
  3032. td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
  3033. td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
  3034. td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
  3035. td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE;
  3036. }
  3037. }
  3038. vp9_zero(x->pred_mv);
  3039. td->pc_root->index = 0;
  3040. if (seg->enabled) {
  3041. const uint8_t *const map =
  3042. seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
  3043. int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
  3044. seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
  3045. }
  3046. x->source_variance = UINT_MAX;
  3047. if (sf->partition_search_type == FIXED_PARTITION || seg_skip) {
  3048. const BLOCK_SIZE bsize =
  3049. seg_skip ? BLOCK_64X64 : sf->always_this_block_size;
  3050. set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
  3051. set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
  3052. rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
  3053. &dummy_rate, &dummy_dist, 1, td->pc_root);
  3054. } else if (cpi->partition_search_skippable_frame) {
  3055. BLOCK_SIZE bsize;
  3056. set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
  3057. bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
  3058. set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
  3059. rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
  3060. &dummy_rate, &dummy_dist, 1, td->pc_root);
  3061. } else if (sf->partition_search_type == VAR_BASED_PARTITION &&
  3062. cm->frame_type != KEY_FRAME) {
  3063. choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
  3064. rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
  3065. &dummy_rate, &dummy_dist, 1, td->pc_root);
  3066. } else {
  3067. // If required set upper and lower partition size limits
  3068. if (sf->auto_min_max_partition_size) {
  3069. set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
  3070. rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col,
  3071. &x->min_partition_size, &x->max_partition_size);
  3072. }
  3073. rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64,
  3074. &dummy_rdc, INT64_MAX, td->pc_root);
  3075. }
  3076. (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
  3077. sb_col_in_tile, num_sb_cols);
  3078. }
  3079. }
  3080. static void init_encode_frame_mb_context(VP9_COMP *cpi) {
  3081. MACROBLOCK *const x = &cpi->td.mb;
  3082. VP9_COMMON *const cm = &cpi->common;
  3083. MACROBLOCKD *const xd = &x->e_mbd;
  3084. const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
  3085. // Copy data over into macro block data structures.
  3086. vp9_setup_src_planes(x, cpi->Source, 0, 0);
  3087. vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
  3088. // Note: this memset assumes above_context[0], [1] and [2]
  3089. // are allocated as part of the same buffer.
  3090. memset(xd->above_context[0], 0,
  3091. sizeof(*xd->above_context[0]) * 2 * aligned_mi_cols * MAX_MB_PLANE);
  3092. memset(xd->above_seg_context, 0,
  3093. sizeof(*xd->above_seg_context) * aligned_mi_cols);
  3094. }
  3095. static int check_dual_ref_flags(VP9_COMP *cpi) {
  3096. const int ref_flags = cpi->ref_frame_flags;
  3097. if (segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) {
  3098. return 0;
  3099. } else {
  3100. return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG) +
  3101. !!(ref_flags & VP9_ALT_FLAG)) >= 2;
  3102. }
  3103. }
  3104. static void reset_skip_tx_size(VP9_COMMON *cm, TX_SIZE max_tx_size) {
  3105. int mi_row, mi_col;
  3106. const int mis = cm->mi_stride;
  3107. MODE_INFO **mi_ptr = cm->mi_grid_visible;
  3108. for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) {
  3109. for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
  3110. if (mi_ptr[mi_col]->tx_size > max_tx_size)
  3111. mi_ptr[mi_col]->tx_size = max_tx_size;
  3112. }
  3113. }
  3114. }
  3115. static MV_REFERENCE_FRAME get_frame_type(const VP9_COMP *cpi) {
  3116. if (frame_is_intra_only(&cpi->common))
  3117. return INTRA_FRAME;
  3118. else if (cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame)
  3119. return ALTREF_FRAME;
  3120. else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
  3121. return GOLDEN_FRAME;
  3122. else
  3123. return LAST_FRAME;
  3124. }
  3125. static TX_MODE select_tx_mode(const VP9_COMP *cpi, MACROBLOCKD *const xd) {
  3126. if (xd->lossless) return ONLY_4X4;
  3127. if (cpi->common.frame_type == KEY_FRAME && cpi->sf.use_nonrd_pick_mode)
  3128. return ALLOW_16X16;
  3129. if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
  3130. return ALLOW_32X32;
  3131. else if (cpi->sf.tx_size_search_method == USE_FULL_RD ||
  3132. cpi->sf.tx_size_search_method == USE_TX_8X8)
  3133. return TX_MODE_SELECT;
  3134. else
  3135. return cpi->common.tx_mode;
  3136. }
  3137. static void hybrid_intra_mode_search(VP9_COMP *cpi, MACROBLOCK *const x,
  3138. RD_COST *rd_cost, BLOCK_SIZE bsize,
  3139. PICK_MODE_CONTEXT *ctx) {
  3140. if (bsize < BLOCK_16X16)
  3141. vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
  3142. else
  3143. vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx);
  3144. }
  3145. static void nonrd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
  3146. MACROBLOCK *const x, int mi_row, int mi_col,
  3147. RD_COST *rd_cost, BLOCK_SIZE bsize,
  3148. PICK_MODE_CONTEXT *ctx) {
  3149. VP9_COMMON *const cm = &cpi->common;
  3150. TileInfo *const tile_info = &tile_data->tile_info;
  3151. MACROBLOCKD *const xd = &x->e_mbd;
  3152. MODE_INFO *mi;
  3153. ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
  3154. BLOCK_SIZE bs = VPXMAX(bsize, BLOCK_8X8); // processing unit block size
  3155. const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs];
  3156. const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs];
  3157. int plane;
  3158. set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
  3159. mi = xd->mi[0];
  3160. mi->sb_type = bsize;
  3161. for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
  3162. struct macroblockd_plane *pd = &xd->plane[plane];
  3163. memcpy(a + num_4x4_blocks_wide * plane, pd->above_context,
  3164. (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x);
  3165. memcpy(l + num_4x4_blocks_high * plane, pd->left_context,
  3166. (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y);
  3167. }
  3168. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled)
  3169. if (cyclic_refresh_segment_id_boosted(mi->segment_id))
  3170. x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
  3171. if (cm->frame_type == KEY_FRAME)
  3172. hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx);
  3173. else if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP))
  3174. set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize);
  3175. else if (bsize >= BLOCK_8X8)
  3176. vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, ctx);
  3177. else
  3178. vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx);
  3179. duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
  3180. for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
  3181. struct macroblockd_plane *pd = &xd->plane[plane];
  3182. memcpy(pd->above_context, a + num_4x4_blocks_wide * plane,
  3183. (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x);
  3184. memcpy(pd->left_context, l + num_4x4_blocks_high * plane,
  3185. (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y);
  3186. }
  3187. if (rd_cost->rate == INT_MAX) vp9_rd_cost_reset(rd_cost);
  3188. ctx->rate = rd_cost->rate;
  3189. ctx->dist = rd_cost->dist;
  3190. }
  3191. static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x, int mi_row,
  3192. int mi_col, BLOCK_SIZE bsize, PC_TREE *pc_tree) {
  3193. MACROBLOCKD *xd = &x->e_mbd;
  3194. int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
  3195. PARTITION_TYPE partition = pc_tree->partitioning;
  3196. BLOCK_SIZE subsize = get_subsize(bsize, partition);
  3197. assert(bsize >= BLOCK_8X8);
  3198. if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
  3199. switch (partition) {
  3200. case PARTITION_NONE:
  3201. set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
  3202. *(xd->mi[0]) = pc_tree->none.mic;
  3203. *(x->mbmi_ext) = pc_tree->none.mbmi_ext;
  3204. duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
  3205. break;
  3206. case PARTITION_VERT:
  3207. set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
  3208. *(xd->mi[0]) = pc_tree->vertical[0].mic;
  3209. *(x->mbmi_ext) = pc_tree->vertical[0].mbmi_ext;
  3210. duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize);
  3211. if (mi_col + hbs < cm->mi_cols) {
  3212. set_mode_info_offsets(cm, x, xd, mi_row, mi_col + hbs);
  3213. *(xd->mi[0]) = pc_tree->vertical[1].mic;
  3214. *(x->mbmi_ext) = pc_tree->vertical[1].mbmi_ext;
  3215. duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, subsize);
  3216. }
  3217. break;
  3218. case PARTITION_HORZ:
  3219. set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
  3220. *(xd->mi[0]) = pc_tree->horizontal[0].mic;
  3221. *(x->mbmi_ext) = pc_tree->horizontal[0].mbmi_ext;
  3222. duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize);
  3223. if (mi_row + hbs < cm->mi_rows) {
  3224. set_mode_info_offsets(cm, x, xd, mi_row + hbs, mi_col);
  3225. *(xd->mi[0]) = pc_tree->horizontal[1].mic;
  3226. *(x->mbmi_ext) = pc_tree->horizontal[1].mbmi_ext;
  3227. duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, subsize);
  3228. }
  3229. break;
  3230. case PARTITION_SPLIT: {
  3231. fill_mode_info_sb(cm, x, mi_row, mi_col, subsize, pc_tree->split[0]);
  3232. fill_mode_info_sb(cm, x, mi_row, mi_col + hbs, subsize,
  3233. pc_tree->split[1]);
  3234. fill_mode_info_sb(cm, x, mi_row + hbs, mi_col, subsize,
  3235. pc_tree->split[2]);
  3236. fill_mode_info_sb(cm, x, mi_row + hbs, mi_col + hbs, subsize,
  3237. pc_tree->split[3]);
  3238. break;
  3239. }
  3240. default: break;
  3241. }
  3242. }
  3243. // Reset the prediction pixel ready flag recursively.
  3244. static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
  3245. pc_tree->none.pred_pixel_ready = 0;
  3246. pc_tree->horizontal[0].pred_pixel_ready = 0;
  3247. pc_tree->horizontal[1].pred_pixel_ready = 0;
  3248. pc_tree->vertical[0].pred_pixel_ready = 0;
  3249. pc_tree->vertical[1].pred_pixel_ready = 0;
  3250. if (bsize > BLOCK_8X8) {
  3251. BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
  3252. int i;
  3253. for (i = 0; i < 4; ++i) pred_pixel_ready_reset(pc_tree->split[i], subsize);
  3254. }
  3255. }
  3256. static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
  3257. TileDataEnc *tile_data, TOKENEXTRA **tp,
  3258. int mi_row, int mi_col, BLOCK_SIZE bsize,
  3259. RD_COST *rd_cost, int do_recon,
  3260. int64_t best_rd, PC_TREE *pc_tree) {
  3261. const SPEED_FEATURES *const sf = &cpi->sf;
  3262. VP9_COMMON *const cm = &cpi->common;
  3263. TileInfo *const tile_info = &tile_data->tile_info;
  3264. MACROBLOCK *const x = &td->mb;
  3265. MACROBLOCKD *const xd = &x->e_mbd;
  3266. const int ms = num_8x8_blocks_wide_lookup[bsize] / 2;
  3267. TOKENEXTRA *tp_orig = *tp;
  3268. PICK_MODE_CONTEXT *ctx = &pc_tree->none;
  3269. int i;
  3270. BLOCK_SIZE subsize = bsize;
  3271. RD_COST this_rdc, sum_rdc, best_rdc;
  3272. int do_split = bsize >= BLOCK_8X8;
  3273. int do_rect = 1;
  3274. // Override skipping rectangular partition operations for edge blocks
  3275. const int force_horz_split = (mi_row + ms >= cm->mi_rows);
  3276. const int force_vert_split = (mi_col + ms >= cm->mi_cols);
  3277. const int xss = x->e_mbd.plane[1].subsampling_x;
  3278. const int yss = x->e_mbd.plane[1].subsampling_y;
  3279. int partition_none_allowed = !force_horz_split && !force_vert_split;
  3280. int partition_horz_allowed =
  3281. !force_vert_split && yss <= xss && bsize >= BLOCK_8X8;
  3282. int partition_vert_allowed =
  3283. !force_horz_split && xss <= yss && bsize >= BLOCK_8X8;
  3284. (void)*tp_orig;
  3285. // Avoid checking for rectangular partitions for speed >= 6.
  3286. if (cpi->oxcf.speed >= 6) do_rect = 0;
  3287. assert(num_8x8_blocks_wide_lookup[bsize] ==
  3288. num_8x8_blocks_high_lookup[bsize]);
  3289. vp9_rd_cost_init(&sum_rdc);
  3290. vp9_rd_cost_reset(&best_rdc);
  3291. best_rdc.rdcost = best_rd;
  3292. // Determine partition types in search according to the speed features.
  3293. // The threshold set here has to be of square block size.
  3294. if (sf->auto_min_max_partition_size) {
  3295. partition_none_allowed &=
  3296. (bsize <= x->max_partition_size && bsize >= x->min_partition_size);
  3297. partition_horz_allowed &=
  3298. ((bsize <= x->max_partition_size && bsize > x->min_partition_size) ||
  3299. force_horz_split);
  3300. partition_vert_allowed &=
  3301. ((bsize <= x->max_partition_size && bsize > x->min_partition_size) ||
  3302. force_vert_split);
  3303. do_split &= bsize > x->min_partition_size;
  3304. }
  3305. if (sf->use_square_partition_only) {
  3306. partition_horz_allowed &= force_horz_split;
  3307. partition_vert_allowed &= force_vert_split;
  3308. }
  3309. ctx->pred_pixel_ready =
  3310. !(partition_vert_allowed || partition_horz_allowed || do_split);
  3311. // PARTITION_NONE
  3312. if (partition_none_allowed) {
  3313. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize,
  3314. ctx);
  3315. ctx->mic = *xd->mi[0];
  3316. ctx->mbmi_ext = *x->mbmi_ext;
  3317. ctx->skip_txfm[0] = x->skip_txfm[0];
  3318. ctx->skip = x->skip;
  3319. if (this_rdc.rate != INT_MAX) {
  3320. int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
  3321. this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
  3322. this_rdc.rdcost =
  3323. RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
  3324. if (this_rdc.rdcost < best_rdc.rdcost) {
  3325. int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist;
  3326. int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate;
  3327. dist_breakout_thr >>=
  3328. 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
  3329. rate_breakout_thr *= num_pels_log2_lookup[bsize];
  3330. best_rdc = this_rdc;
  3331. if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
  3332. if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr &&
  3333. this_rdc.dist < dist_breakout_thr) {
  3334. do_split = 0;
  3335. do_rect = 0;
  3336. }
  3337. }
  3338. }
  3339. }
  3340. // store estimated motion vector
  3341. store_pred_mv(x, ctx);
  3342. // PARTITION_SPLIT
  3343. if (do_split) {
  3344. int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
  3345. sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
  3346. sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
  3347. subsize = get_subsize(bsize, PARTITION_SPLIT);
  3348. for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
  3349. const int x_idx = (i & 1) * ms;
  3350. const int y_idx = (i >> 1) * ms;
  3351. if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
  3352. continue;
  3353. load_pred_mv(x, ctx);
  3354. nonrd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
  3355. mi_col + x_idx, subsize, &this_rdc, 0,
  3356. best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
  3357. if (this_rdc.rate == INT_MAX) {
  3358. vp9_rd_cost_reset(&sum_rdc);
  3359. } else {
  3360. sum_rdc.rate += this_rdc.rate;
  3361. sum_rdc.dist += this_rdc.dist;
  3362. sum_rdc.rdcost += this_rdc.rdcost;
  3363. }
  3364. }
  3365. if (sum_rdc.rdcost < best_rdc.rdcost) {
  3366. best_rdc = sum_rdc;
  3367. pc_tree->partitioning = PARTITION_SPLIT;
  3368. } else {
  3369. // skip rectangular partition test when larger block size
  3370. // gives better rd cost
  3371. if (sf->less_rectangular_check) do_rect &= !partition_none_allowed;
  3372. }
  3373. }
  3374. // PARTITION_HORZ
  3375. if (partition_horz_allowed && do_rect) {
  3376. subsize = get_subsize(bsize, PARTITION_HORZ);
  3377. if (sf->adaptive_motion_search) load_pred_mv(x, ctx);
  3378. pc_tree->horizontal[0].pred_pixel_ready = 1;
  3379. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
  3380. &pc_tree->horizontal[0]);
  3381. pc_tree->horizontal[0].mic = *xd->mi[0];
  3382. pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
  3383. pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
  3384. pc_tree->horizontal[0].skip = x->skip;
  3385. if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + ms < cm->mi_rows) {
  3386. load_pred_mv(x, ctx);
  3387. pc_tree->horizontal[1].pred_pixel_ready = 1;
  3388. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + ms, mi_col, &this_rdc,
  3389. subsize, &pc_tree->horizontal[1]);
  3390. pc_tree->horizontal[1].mic = *xd->mi[0];
  3391. pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
  3392. pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
  3393. pc_tree->horizontal[1].skip = x->skip;
  3394. if (this_rdc.rate == INT_MAX) {
  3395. vp9_rd_cost_reset(&sum_rdc);
  3396. } else {
  3397. int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
  3398. this_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ];
  3399. sum_rdc.rate += this_rdc.rate;
  3400. sum_rdc.dist += this_rdc.dist;
  3401. sum_rdc.rdcost =
  3402. RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
  3403. }
  3404. }
  3405. if (sum_rdc.rdcost < best_rdc.rdcost) {
  3406. best_rdc = sum_rdc;
  3407. pc_tree->partitioning = PARTITION_HORZ;
  3408. } else {
  3409. pred_pixel_ready_reset(pc_tree, bsize);
  3410. }
  3411. }
  3412. // PARTITION_VERT
  3413. if (partition_vert_allowed && do_rect) {
  3414. subsize = get_subsize(bsize, PARTITION_VERT);
  3415. if (sf->adaptive_motion_search) load_pred_mv(x, ctx);
  3416. pc_tree->vertical[0].pred_pixel_ready = 1;
  3417. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
  3418. &pc_tree->vertical[0]);
  3419. pc_tree->vertical[0].mic = *xd->mi[0];
  3420. pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
  3421. pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
  3422. pc_tree->vertical[0].skip = x->skip;
  3423. if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + ms < cm->mi_cols) {
  3424. load_pred_mv(x, ctx);
  3425. pc_tree->vertical[1].pred_pixel_ready = 1;
  3426. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + ms, &this_rdc,
  3427. subsize, &pc_tree->vertical[1]);
  3428. pc_tree->vertical[1].mic = *xd->mi[0];
  3429. pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
  3430. pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
  3431. pc_tree->vertical[1].skip = x->skip;
  3432. if (this_rdc.rate == INT_MAX) {
  3433. vp9_rd_cost_reset(&sum_rdc);
  3434. } else {
  3435. int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
  3436. sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT];
  3437. sum_rdc.rate += this_rdc.rate;
  3438. sum_rdc.dist += this_rdc.dist;
  3439. sum_rdc.rdcost =
  3440. RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
  3441. }
  3442. }
  3443. if (sum_rdc.rdcost < best_rdc.rdcost) {
  3444. best_rdc = sum_rdc;
  3445. pc_tree->partitioning = PARTITION_VERT;
  3446. } else {
  3447. pred_pixel_ready_reset(pc_tree, bsize);
  3448. }
  3449. }
  3450. *rd_cost = best_rdc;
  3451. if (best_rdc.rate == INT_MAX) {
  3452. vp9_rd_cost_reset(rd_cost);
  3453. return;
  3454. }
  3455. // update mode info array
  3456. fill_mode_info_sb(cm, x, mi_row, mi_col, bsize, pc_tree);
  3457. if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && do_recon) {
  3458. int output_enabled = (bsize == BLOCK_64X64);
  3459. encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
  3460. pc_tree);
  3461. }
  3462. if (bsize == BLOCK_64X64 && do_recon) {
  3463. assert(tp_orig < *tp);
  3464. assert(best_rdc.rate < INT_MAX);
  3465. assert(best_rdc.dist < INT64_MAX);
  3466. } else {
  3467. assert(tp_orig == *tp);
  3468. }
  3469. }
  3470. static void nonrd_select_partition(VP9_COMP *cpi, ThreadData *td,
  3471. TileDataEnc *tile_data, MODE_INFO **mi,
  3472. TOKENEXTRA **tp, int mi_row, int mi_col,
  3473. BLOCK_SIZE bsize, int output_enabled,
  3474. RD_COST *rd_cost, PC_TREE *pc_tree) {
  3475. VP9_COMMON *const cm = &cpi->common;
  3476. TileInfo *const tile_info = &tile_data->tile_info;
  3477. MACROBLOCK *const x = &td->mb;
  3478. MACROBLOCKD *const xd = &x->e_mbd;
  3479. const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
  3480. const int mis = cm->mi_stride;
  3481. PARTITION_TYPE partition;
  3482. BLOCK_SIZE subsize;
  3483. RD_COST this_rdc;
  3484. BLOCK_SIZE subsize_ref =
  3485. (cpi->sf.adapt_partition_source_sad) ? BLOCK_8X8 : BLOCK_16X16;
  3486. vp9_rd_cost_reset(&this_rdc);
  3487. if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
  3488. subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4;
  3489. partition = partition_lookup[bsl][subsize];
  3490. if (bsize == BLOCK_32X32 && subsize == BLOCK_32X32) {
  3491. x->max_partition_size = BLOCK_32X32;
  3492. x->min_partition_size = BLOCK_16X16;
  3493. nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost,
  3494. 0, INT64_MAX, pc_tree);
  3495. } else if (bsize == BLOCK_32X32 && partition != PARTITION_NONE &&
  3496. subsize >= subsize_ref) {
  3497. x->max_partition_size = BLOCK_32X32;
  3498. x->min_partition_size = BLOCK_8X8;
  3499. nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost,
  3500. 0, INT64_MAX, pc_tree);
  3501. } else if (bsize == BLOCK_16X16 && partition != PARTITION_NONE) {
  3502. x->max_partition_size = BLOCK_16X16;
  3503. x->min_partition_size = BLOCK_8X8;
  3504. nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost,
  3505. 0, INT64_MAX, pc_tree);
  3506. } else {
  3507. switch (partition) {
  3508. case PARTITION_NONE:
  3509. pc_tree->none.pred_pixel_ready = 1;
  3510. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize,
  3511. &pc_tree->none);
  3512. pc_tree->none.mic = *xd->mi[0];
  3513. pc_tree->none.mbmi_ext = *x->mbmi_ext;
  3514. pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
  3515. pc_tree->none.skip = x->skip;
  3516. break;
  3517. case PARTITION_VERT:
  3518. pc_tree->vertical[0].pred_pixel_ready = 1;
  3519. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize,
  3520. &pc_tree->vertical[0]);
  3521. pc_tree->vertical[0].mic = *xd->mi[0];
  3522. pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
  3523. pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
  3524. pc_tree->vertical[0].skip = x->skip;
  3525. if (mi_col + hbs < cm->mi_cols) {
  3526. pc_tree->vertical[1].pred_pixel_ready = 1;
  3527. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs,
  3528. &this_rdc, subsize, &pc_tree->vertical[1]);
  3529. pc_tree->vertical[1].mic = *xd->mi[0];
  3530. pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
  3531. pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
  3532. pc_tree->vertical[1].skip = x->skip;
  3533. if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
  3534. rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
  3535. rd_cost->rate += this_rdc.rate;
  3536. rd_cost->dist += this_rdc.dist;
  3537. }
  3538. }
  3539. break;
  3540. case PARTITION_HORZ:
  3541. pc_tree->horizontal[0].pred_pixel_ready = 1;
  3542. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize,
  3543. &pc_tree->horizontal[0]);
  3544. pc_tree->horizontal[0].mic = *xd->mi[0];
  3545. pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
  3546. pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
  3547. pc_tree->horizontal[0].skip = x->skip;
  3548. if (mi_row + hbs < cm->mi_rows) {
  3549. pc_tree->horizontal[1].pred_pixel_ready = 1;
  3550. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col,
  3551. &this_rdc, subsize, &pc_tree->horizontal[1]);
  3552. pc_tree->horizontal[1].mic = *xd->mi[0];
  3553. pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
  3554. pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
  3555. pc_tree->horizontal[1].skip = x->skip;
  3556. if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
  3557. rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
  3558. rd_cost->rate += this_rdc.rate;
  3559. rd_cost->dist += this_rdc.dist;
  3560. }
  3561. }
  3562. break;
  3563. case PARTITION_SPLIT:
  3564. subsize = get_subsize(bsize, PARTITION_SPLIT);
  3565. nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
  3566. subsize, output_enabled, rd_cost,
  3567. pc_tree->split[0]);
  3568. nonrd_select_partition(cpi, td, tile_data, mi + hbs, tp, mi_row,
  3569. mi_col + hbs, subsize, output_enabled, &this_rdc,
  3570. pc_tree->split[1]);
  3571. if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
  3572. rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
  3573. rd_cost->rate += this_rdc.rate;
  3574. rd_cost->dist += this_rdc.dist;
  3575. }
  3576. nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis, tp,
  3577. mi_row + hbs, mi_col, subsize, output_enabled,
  3578. &this_rdc, pc_tree->split[2]);
  3579. if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
  3580. rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
  3581. rd_cost->rate += this_rdc.rate;
  3582. rd_cost->dist += this_rdc.dist;
  3583. }
  3584. nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp,
  3585. mi_row + hbs, mi_col + hbs, subsize,
  3586. output_enabled, &this_rdc, pc_tree->split[3]);
  3587. if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
  3588. rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
  3589. rd_cost->rate += this_rdc.rate;
  3590. rd_cost->dist += this_rdc.dist;
  3591. }
  3592. break;
  3593. default: assert(0 && "Invalid partition type."); break;
  3594. }
  3595. }
  3596. if (bsize == BLOCK_64X64 && output_enabled)
  3597. encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, 1, bsize, pc_tree);
  3598. }
  3599. static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td,
  3600. TileDataEnc *tile_data, MODE_INFO **mi,
  3601. TOKENEXTRA **tp, int mi_row, int mi_col,
  3602. BLOCK_SIZE bsize, int output_enabled,
  3603. RD_COST *dummy_cost, PC_TREE *pc_tree) {
  3604. VP9_COMMON *const cm = &cpi->common;
  3605. TileInfo *tile_info = &tile_data->tile_info;
  3606. MACROBLOCK *const x = &td->mb;
  3607. MACROBLOCKD *const xd = &x->e_mbd;
  3608. const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
  3609. const int mis = cm->mi_stride;
  3610. PARTITION_TYPE partition;
  3611. BLOCK_SIZE subsize;
  3612. if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
  3613. subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4;
  3614. partition = partition_lookup[bsl][subsize];
  3615. if (output_enabled && bsize != BLOCK_4X4) {
  3616. int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
  3617. td->counts->partition[ctx][partition]++;
  3618. }
  3619. switch (partition) {
  3620. case PARTITION_NONE:
  3621. pc_tree->none.pred_pixel_ready = 1;
  3622. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
  3623. subsize, &pc_tree->none);
  3624. pc_tree->none.mic = *xd->mi[0];
  3625. pc_tree->none.mbmi_ext = *x->mbmi_ext;
  3626. pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
  3627. pc_tree->none.skip = x->skip;
  3628. encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
  3629. subsize, &pc_tree->none);
  3630. break;
  3631. case PARTITION_VERT:
  3632. pc_tree->vertical[0].pred_pixel_ready = 1;
  3633. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
  3634. subsize, &pc_tree->vertical[0]);
  3635. pc_tree->vertical[0].mic = *xd->mi[0];
  3636. pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
  3637. pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
  3638. pc_tree->vertical[0].skip = x->skip;
  3639. encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
  3640. subsize, &pc_tree->vertical[0]);
  3641. if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
  3642. pc_tree->vertical[1].pred_pixel_ready = 1;
  3643. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, dummy_cost,
  3644. subsize, &pc_tree->vertical[1]);
  3645. pc_tree->vertical[1].mic = *xd->mi[0];
  3646. pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
  3647. pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
  3648. pc_tree->vertical[1].skip = x->skip;
  3649. encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col + hbs,
  3650. output_enabled, subsize, &pc_tree->vertical[1]);
  3651. }
  3652. break;
  3653. case PARTITION_HORZ:
  3654. pc_tree->horizontal[0].pred_pixel_ready = 1;
  3655. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
  3656. subsize, &pc_tree->horizontal[0]);
  3657. pc_tree->horizontal[0].mic = *xd->mi[0];
  3658. pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
  3659. pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
  3660. pc_tree->horizontal[0].skip = x->skip;
  3661. encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
  3662. subsize, &pc_tree->horizontal[0]);
  3663. if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
  3664. pc_tree->horizontal[1].pred_pixel_ready = 1;
  3665. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, dummy_cost,
  3666. subsize, &pc_tree->horizontal[1]);
  3667. pc_tree->horizontal[1].mic = *xd->mi[0];
  3668. pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
  3669. pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
  3670. pc_tree->horizontal[1].skip = x->skip;
  3671. encode_b_rt(cpi, td, tile_info, tp, mi_row + hbs, mi_col,
  3672. output_enabled, subsize, &pc_tree->horizontal[1]);
  3673. }
  3674. break;
  3675. case PARTITION_SPLIT:
  3676. subsize = get_subsize(bsize, PARTITION_SPLIT);
  3677. if (bsize == BLOCK_8X8) {
  3678. nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
  3679. subsize, pc_tree->leaf_split[0]);
  3680. encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
  3681. subsize, pc_tree->leaf_split[0]);
  3682. } else {
  3683. nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, subsize,
  3684. output_enabled, dummy_cost, pc_tree->split[0]);
  3685. nonrd_use_partition(cpi, td, tile_data, mi + hbs, tp, mi_row,
  3686. mi_col + hbs, subsize, output_enabled, dummy_cost,
  3687. pc_tree->split[1]);
  3688. nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis, tp,
  3689. mi_row + hbs, mi_col, subsize, output_enabled,
  3690. dummy_cost, pc_tree->split[2]);
  3691. nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp,
  3692. mi_row + hbs, mi_col + hbs, subsize, output_enabled,
  3693. dummy_cost, pc_tree->split[3]);
  3694. }
  3695. break;
  3696. default: assert(0 && "Invalid partition type."); break;
  3697. }
  3698. if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
  3699. update_partition_context(xd, mi_row, mi_col, subsize, bsize);
  3700. }
  3701. static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
  3702. TileDataEnc *tile_data, int mi_row,
  3703. TOKENEXTRA **tp) {
  3704. SPEED_FEATURES *const sf = &cpi->sf;
  3705. VP9_COMMON *const cm = &cpi->common;
  3706. TileInfo *const tile_info = &tile_data->tile_info;
  3707. MACROBLOCK *const x = &td->mb;
  3708. MACROBLOCKD *const xd = &x->e_mbd;
  3709. const int mi_col_start = tile_info->mi_col_start;
  3710. const int mi_col_end = tile_info->mi_col_end;
  3711. int mi_col;
  3712. const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2;
  3713. const int num_sb_cols =
  3714. get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2);
  3715. int sb_col_in_tile;
  3716. // Initialize the left context for the new SB row
  3717. memset(&xd->left_context, 0, sizeof(xd->left_context));
  3718. memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
  3719. // Code each SB in the row
  3720. for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end;
  3721. mi_col += MI_BLOCK_SIZE, ++sb_col_in_tile) {
  3722. const struct segmentation *const seg = &cm->seg;
  3723. RD_COST dummy_rdc;
  3724. const int idx_str = cm->mi_stride * mi_row + mi_col;
  3725. MODE_INFO **mi = cm->mi_grid_visible + idx_str;
  3726. PARTITION_SEARCH_TYPE partition_search_type = sf->partition_search_type;
  3727. BLOCK_SIZE bsize = BLOCK_64X64;
  3728. int seg_skip = 0;
  3729. (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
  3730. sb_col_in_tile);
  3731. if (cpi->use_skin_detection) {
  3732. vp9_compute_skin_sb(cpi, BLOCK_16X16, mi_row, mi_col);
  3733. }
  3734. x->source_variance = UINT_MAX;
  3735. vp9_zero(x->pred_mv);
  3736. vp9_rd_cost_init(&dummy_rdc);
  3737. x->color_sensitivity[0] = 0;
  3738. x->color_sensitivity[1] = 0;
  3739. x->sb_is_skin = 0;
  3740. x->skip_low_source_sad = 0;
  3741. x->lowvar_highsumdiff = 0;
  3742. x->content_state_sb = 0;
  3743. x->sb_use_mv_part = 0;
  3744. x->sb_mvcol_part = 0;
  3745. x->sb_mvrow_part = 0;
  3746. x->sb_pickmode_part = 0;
  3747. if (seg->enabled) {
  3748. const uint8_t *const map =
  3749. seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
  3750. int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
  3751. seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
  3752. if (seg_skip) {
  3753. partition_search_type = FIXED_PARTITION;
  3754. }
  3755. }
  3756. if (cpi->compute_source_sad_onepass && cpi->sf.use_source_sad) {
  3757. int shift = cpi->Source->y_stride * (mi_row << 3) + (mi_col << 3);
  3758. int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3);
  3759. int64_t source_sad = avg_source_sad(cpi, x, shift, sb_offset2);
  3760. if (sf->adapt_partition_source_sad &&
  3761. (cpi->oxcf.rc_mode == VPX_VBR && !cpi->rc.is_src_frame_alt_ref &&
  3762. source_sad > sf->adapt_partition_thresh &&
  3763. cpi->refresh_golden_frame))
  3764. partition_search_type = REFERENCE_PARTITION;
  3765. }
  3766. // Set the partition type of the 64X64 block
  3767. switch (partition_search_type) {
  3768. case VAR_BASED_PARTITION:
  3769. // TODO(jingning, marpan): The mode decision and encoding process
  3770. // support both intra and inter sub8x8 block coding for RTC mode.
  3771. // Tune the thresholds accordingly to use sub8x8 block coding for
  3772. // coding performance improvement.
  3773. choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
  3774. nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
  3775. BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
  3776. break;
  3777. case SOURCE_VAR_BASED_PARTITION:
  3778. set_source_var_based_partition(cpi, tile_info, x, mi, mi_row, mi_col);
  3779. nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
  3780. BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
  3781. break;
  3782. case FIXED_PARTITION:
  3783. if (!seg_skip) bsize = sf->always_this_block_size;
  3784. set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
  3785. nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
  3786. BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
  3787. break;
  3788. case REFERENCE_PARTITION:
  3789. x->sb_pickmode_part = 1;
  3790. set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
  3791. // Use nonrd_pick_partition on scene-cut for VBR mode.
  3792. // nonrd_pick_partition does not support 4x4 partition, so avoid it
  3793. // on key frame for now.
  3794. if ((cpi->oxcf.rc_mode == VPX_VBR && cpi->rc.high_source_sad &&
  3795. cm->frame_type != KEY_FRAME)) {
  3796. // Use lower max_partition_size for low resoultions.
  3797. if (cm->width <= 352 && cm->height <= 288)
  3798. x->max_partition_size = BLOCK_32X32;
  3799. else
  3800. x->max_partition_size = BLOCK_64X64;
  3801. x->min_partition_size = BLOCK_8X8;
  3802. nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
  3803. BLOCK_64X64, &dummy_rdc, 1, INT64_MAX,
  3804. td->pc_root);
  3805. } else {
  3806. choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
  3807. // TODO(marpan): Seems like nonrd_select_partition does not support
  3808. // 4x4 partition. Since 4x4 is used on key frame, use this switch
  3809. // for now.
  3810. if (cm->frame_type == KEY_FRAME)
  3811. nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
  3812. BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
  3813. else
  3814. nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
  3815. BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
  3816. }
  3817. break;
  3818. default: assert(0); break;
  3819. }
  3820. (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
  3821. sb_col_in_tile, num_sb_cols);
  3822. }
  3823. }
  3824. // end RTC play code
  3825. static INLINE uint32_t variance(const diff *const d) {
  3826. return d->sse - (uint32_t)(((int64_t)d->sum * d->sum) >> 8);
  3827. }
  3828. #if CONFIG_VP9_HIGHBITDEPTH
  3829. static INLINE uint32_t variance_highbd(diff *const d) {
  3830. const int64_t var = (int64_t)d->sse - (((int64_t)d->sum * d->sum) >> 8);
  3831. return (var >= 0) ? (uint32_t)var : 0;
  3832. }
  3833. #endif // CONFIG_VP9_HIGHBITDEPTH
  3834. static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
  3835. const SPEED_FEATURES *const sf = &cpi->sf;
  3836. const VP9_COMMON *const cm = &cpi->common;
  3837. const uint8_t *src = cpi->Source->y_buffer;
  3838. const uint8_t *last_src = cpi->Last_Source->y_buffer;
  3839. const int src_stride = cpi->Source->y_stride;
  3840. const int last_stride = cpi->Last_Source->y_stride;
  3841. // Pick cutoff threshold
  3842. const int cutoff = (VPXMIN(cm->width, cm->height) >= 720)
  3843. ? (cm->MBs * VAR_HIST_LARGE_CUT_OFF / 100)
  3844. : (cm->MBs * VAR_HIST_SMALL_CUT_OFF / 100);
  3845. DECLARE_ALIGNED(16, int, hist[VAR_HIST_BINS]);
  3846. diff *var16 = cpi->source_diff_var;
  3847. int sum = 0;
  3848. int i, j;
  3849. memset(hist, 0, VAR_HIST_BINS * sizeof(hist[0]));
  3850. for (i = 0; i < cm->mb_rows; i++) {
  3851. for (j = 0; j < cm->mb_cols; j++) {
  3852. #if CONFIG_VP9_HIGHBITDEPTH
  3853. if (cm->use_highbitdepth) {
  3854. switch (cm->bit_depth) {
  3855. case VPX_BITS_8:
  3856. vpx_highbd_8_get16x16var(src, src_stride, last_src, last_stride,
  3857. &var16->sse, &var16->sum);
  3858. var16->var = variance(var16);
  3859. break;
  3860. case VPX_BITS_10:
  3861. vpx_highbd_10_get16x16var(src, src_stride, last_src, last_stride,
  3862. &var16->sse, &var16->sum);
  3863. var16->var = variance_highbd(var16);
  3864. break;
  3865. case VPX_BITS_12:
  3866. vpx_highbd_12_get16x16var(src, src_stride, last_src, last_stride,
  3867. &var16->sse, &var16->sum);
  3868. var16->var = variance_highbd(var16);
  3869. break;
  3870. default:
  3871. assert(0 &&
  3872. "cm->bit_depth should be VPX_BITS_8, VPX_BITS_10"
  3873. " or VPX_BITS_12");
  3874. return -1;
  3875. }
  3876. } else {
  3877. vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse,
  3878. &var16->sum);
  3879. var16->var = variance(var16);
  3880. }
  3881. #else
  3882. vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse,
  3883. &var16->sum);
  3884. var16->var = variance(var16);
  3885. #endif // CONFIG_VP9_HIGHBITDEPTH
  3886. if (var16->var >= VAR_HIST_MAX_BG_VAR)
  3887. hist[VAR_HIST_BINS - 1]++;
  3888. else
  3889. hist[var16->var / VAR_HIST_FACTOR]++;
  3890. src += 16;
  3891. last_src += 16;
  3892. var16++;
  3893. }
  3894. src = src - cm->mb_cols * 16 + 16 * src_stride;
  3895. last_src = last_src - cm->mb_cols * 16 + 16 * last_stride;
  3896. }
  3897. cpi->source_var_thresh = 0;
  3898. if (hist[VAR_HIST_BINS - 1] < cutoff) {
  3899. for (i = 0; i < VAR_HIST_BINS - 1; i++) {
  3900. sum += hist[i];
  3901. if (sum > cutoff) {
  3902. cpi->source_var_thresh = (i + 1) * VAR_HIST_FACTOR;
  3903. return 0;
  3904. }
  3905. }
  3906. }
  3907. return sf->search_type_check_frequency;
  3908. }
  3909. static void source_var_based_partition_search_method(VP9_COMP *cpi) {
  3910. VP9_COMMON *const cm = &cpi->common;
  3911. SPEED_FEATURES *const sf = &cpi->sf;
  3912. if (cm->frame_type == KEY_FRAME) {
  3913. // For key frame, use SEARCH_PARTITION.
  3914. sf->partition_search_type = SEARCH_PARTITION;
  3915. } else if (cm->intra_only) {
  3916. sf->partition_search_type = FIXED_PARTITION;
  3917. } else {
  3918. if (cm->last_width != cm->width || cm->last_height != cm->height) {
  3919. if (cpi->source_diff_var) vpx_free(cpi->source_diff_var);
  3920. CHECK_MEM_ERROR(cm, cpi->source_diff_var,
  3921. vpx_calloc(cm->MBs, sizeof(diff)));
  3922. }
  3923. if (!cpi->frames_till_next_var_check)
  3924. cpi->frames_till_next_var_check = set_var_thresh_from_histogram(cpi);
  3925. if (cpi->frames_till_next_var_check > 0) {
  3926. sf->partition_search_type = FIXED_PARTITION;
  3927. cpi->frames_till_next_var_check--;
  3928. }
  3929. }
  3930. }
  3931. static int get_skip_encode_frame(const VP9_COMMON *cm, ThreadData *const td) {
  3932. unsigned int intra_count = 0, inter_count = 0;
  3933. int j;
  3934. for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
  3935. intra_count += td->counts->intra_inter[j][0];
  3936. inter_count += td->counts->intra_inter[j][1];
  3937. }
  3938. return (intra_count << 2) < inter_count && cm->frame_type != KEY_FRAME &&
  3939. cm->show_frame;
  3940. }
  3941. void vp9_init_tile_data(VP9_COMP *cpi) {
  3942. VP9_COMMON *const cm = &cpi->common;
  3943. const int tile_cols = 1 << cm->log2_tile_cols;
  3944. const int tile_rows = 1 << cm->log2_tile_rows;
  3945. int tile_col, tile_row;
  3946. TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
  3947. TOKENLIST *tplist = cpi->tplist[0][0];
  3948. int tile_tok = 0;
  3949. int tplist_count = 0;
  3950. if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
  3951. if (cpi->tile_data != NULL) vpx_free(cpi->tile_data);
  3952. CHECK_MEM_ERROR(cm, cpi->tile_data, vpx_malloc(tile_cols * tile_rows *
  3953. sizeof(*cpi->tile_data)));
  3954. cpi->allocated_tiles = tile_cols * tile_rows;
  3955. for (tile_row = 0; tile_row < tile_rows; ++tile_row)
  3956. for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
  3957. TileDataEnc *tile_data =
  3958. &cpi->tile_data[tile_row * tile_cols + tile_col];
  3959. int i, j;
  3960. for (i = 0; i < BLOCK_SIZES; ++i) {
  3961. for (j = 0; j < MAX_MODES; ++j) {
  3962. tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT;
  3963. tile_data->mode_map[i][j] = j;
  3964. }
  3965. }
  3966. #if CONFIG_MULTITHREAD
  3967. tile_data->row_base_thresh_freq_fact = NULL;
  3968. #endif
  3969. }
  3970. }
  3971. for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
  3972. for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
  3973. TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
  3974. TileInfo *tile_info = &this_tile->tile_info;
  3975. vp9_tile_init(tile_info, cm, tile_row, tile_col);
  3976. cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
  3977. pre_tok = cpi->tile_tok[tile_row][tile_col];
  3978. tile_tok = allocated_tokens(*tile_info);
  3979. cpi->tplist[tile_row][tile_col] = tplist + tplist_count;
  3980. tplist = cpi->tplist[tile_row][tile_col];
  3981. tplist_count = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);
  3982. }
  3983. }
  3984. }
  3985. void vp9_encode_sb_row(VP9_COMP *cpi, ThreadData *td, int tile_row,
  3986. int tile_col, int mi_row) {
  3987. VP9_COMMON *const cm = &cpi->common;
  3988. const int tile_cols = 1 << cm->log2_tile_cols;
  3989. TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
  3990. const TileInfo *const tile_info = &this_tile->tile_info;
  3991. TOKENEXTRA *tok = NULL;
  3992. int tile_sb_row;
  3993. int tile_mb_cols = (tile_info->mi_col_end - tile_info->mi_col_start + 1) >> 1;
  3994. tile_sb_row = mi_cols_aligned_to_sb(mi_row - tile_info->mi_row_start) >>
  3995. MI_BLOCK_SIZE_LOG2;
  3996. get_start_tok(cpi, tile_row, tile_col, mi_row, &tok);
  3997. cpi->tplist[tile_row][tile_col][tile_sb_row].start = tok;
  3998. if (cpi->sf.use_nonrd_pick_mode)
  3999. encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok);
  4000. else
  4001. encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
  4002. cpi->tplist[tile_row][tile_col][tile_sb_row].stop = tok;
  4003. cpi->tplist[tile_row][tile_col][tile_sb_row].count =
  4004. (unsigned int)(cpi->tplist[tile_row][tile_col][tile_sb_row].stop -
  4005. cpi->tplist[tile_row][tile_col][tile_sb_row].start);
  4006. assert(tok - cpi->tplist[tile_row][tile_col][tile_sb_row].start <=
  4007. get_token_alloc(MI_BLOCK_SIZE >> 1, tile_mb_cols));
  4008. (void)tile_mb_cols;
  4009. }
  4010. void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row,
  4011. int tile_col) {
  4012. VP9_COMMON *const cm = &cpi->common;
  4013. const int tile_cols = 1 << cm->log2_tile_cols;
  4014. TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
  4015. const TileInfo *const tile_info = &this_tile->tile_info;
  4016. const int mi_row_start = tile_info->mi_row_start;
  4017. const int mi_row_end = tile_info->mi_row_end;
  4018. int mi_row;
  4019. for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE)
  4020. vp9_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
  4021. }
  4022. static void encode_tiles(VP9_COMP *cpi) {
  4023. VP9_COMMON *const cm = &cpi->common;
  4024. const int tile_cols = 1 << cm->log2_tile_cols;
  4025. const int tile_rows = 1 << cm->log2_tile_rows;
  4026. int tile_col, tile_row;
  4027. vp9_init_tile_data(cpi);
  4028. for (tile_row = 0; tile_row < tile_rows; ++tile_row)
  4029. for (tile_col = 0; tile_col < tile_cols; ++tile_col)
  4030. vp9_encode_tile(cpi, &cpi->td, tile_row, tile_col);
  4031. }
  4032. #if CONFIG_FP_MB_STATS
  4033. static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats,
  4034. VP9_COMMON *cm, uint8_t **this_frame_mb_stats) {
  4035. uint8_t *mb_stats_in = firstpass_mb_stats->mb_stats_start +
  4036. cm->current_video_frame * cm->MBs * sizeof(uint8_t);
  4037. if (mb_stats_in > firstpass_mb_stats->mb_stats_end) return EOF;
  4038. *this_frame_mb_stats = mb_stats_in;
  4039. return 1;
  4040. }
  4041. #endif
  4042. static void encode_frame_internal(VP9_COMP *cpi) {
  4043. SPEED_FEATURES *const sf = &cpi->sf;
  4044. ThreadData *const td = &cpi->td;
  4045. MACROBLOCK *const x = &td->mb;
  4046. VP9_COMMON *const cm = &cpi->common;
  4047. MACROBLOCKD *const xd = &x->e_mbd;
  4048. xd->mi = cm->mi_grid_visible;
  4049. xd->mi[0] = cm->mi;
  4050. vp9_zero(*td->counts);
  4051. vp9_zero(cpi->td.rd_counts);
  4052. xd->lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 &&
  4053. cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
  4054. #if CONFIG_VP9_HIGHBITDEPTH
  4055. if (cm->use_highbitdepth)
  4056. x->fwd_txfm4x4 = xd->lossless ? vp9_highbd_fwht4x4 : vpx_highbd_fdct4x4;
  4057. else
  4058. x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4;
  4059. x->highbd_inv_txfm_add =
  4060. xd->lossless ? vp9_highbd_iwht4x4_add : vp9_highbd_idct4x4_add;
  4061. #else
  4062. x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4;
  4063. #endif // CONFIG_VP9_HIGHBITDEPTH
  4064. x->inv_txfm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
  4065. if (xd->lossless) x->optimize = 0;
  4066. cm->tx_mode = select_tx_mode(cpi, xd);
  4067. vp9_frame_init_quantizer(cpi);
  4068. vp9_initialize_rd_consts(cpi);
  4069. vp9_initialize_me_consts(cpi, x, cm->base_qindex);
  4070. init_encode_frame_mb_context(cpi);
  4071. cm->use_prev_frame_mvs =
  4072. !cm->error_resilient_mode && cm->width == cm->last_width &&
  4073. cm->height == cm->last_height && !cm->intra_only && cm->last_show_frame;
  4074. // Special case: set prev_mi to NULL when the previous mode info
  4075. // context cannot be used.
  4076. cm->prev_mi =
  4077. cm->use_prev_frame_mvs ? cm->prev_mip + cm->mi_stride + 1 : NULL;
  4078. x->quant_fp = cpi->sf.use_quant_fp;
  4079. vp9_zero(x->skip_txfm);
  4080. if (sf->use_nonrd_pick_mode) {
  4081. // Initialize internal buffer pointers for rtc coding, where non-RD
  4082. // mode decision is used and hence no buffer pointer swap needed.
  4083. int i;
  4084. struct macroblock_plane *const p = x->plane;
  4085. struct macroblockd_plane *const pd = xd->plane;
  4086. PICK_MODE_CONTEXT *ctx = &cpi->td.pc_root->none;
  4087. for (i = 0; i < MAX_MB_PLANE; ++i) {
  4088. p[i].coeff = ctx->coeff_pbuf[i][0];
  4089. p[i].qcoeff = ctx->qcoeff_pbuf[i][0];
  4090. pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];
  4091. p[i].eobs = ctx->eobs_pbuf[i][0];
  4092. }
  4093. vp9_zero(x->zcoeff_blk);
  4094. if (cm->frame_type != KEY_FRAME && cpi->rc.frames_since_golden == 0 &&
  4095. !(cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR) &&
  4096. !cpi->use_svc)
  4097. cpi->ref_frame_flags &= (~VP9_GOLD_FLAG);
  4098. if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION)
  4099. source_var_based_partition_search_method(cpi);
  4100. }
  4101. {
  4102. struct vpx_usec_timer emr_timer;
  4103. vpx_usec_timer_start(&emr_timer);
  4104. #if CONFIG_FP_MB_STATS
  4105. if (cpi->use_fp_mb_stats) {
  4106. input_fpmb_stats(&cpi->twopass.firstpass_mb_stats, cm,
  4107. &cpi->twopass.this_frame_mb_stats);
  4108. }
  4109. #endif
  4110. if (!cpi->row_mt) {
  4111. cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read_dummy;
  4112. cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write_dummy;
  4113. // If allowed, encoding tiles in parallel with one thread handling one
  4114. // tile when row based multi-threading is disabled.
  4115. if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1)
  4116. vp9_encode_tiles_mt(cpi);
  4117. else
  4118. encode_tiles(cpi);
  4119. } else {
  4120. cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read;
  4121. cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write;
  4122. vp9_encode_tiles_row_mt(cpi);
  4123. }
  4124. vpx_usec_timer_mark(&emr_timer);
  4125. cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
  4126. }
  4127. sf->skip_encode_frame =
  4128. sf->skip_encode_sb ? get_skip_encode_frame(cm, td) : 0;
  4129. #if 0
  4130. // Keep record of the total distortion this time around for future use
  4131. cpi->last_frame_distortion = cpi->frame_distortion;
  4132. #endif
  4133. }
  4134. static INTERP_FILTER get_interp_filter(
  4135. const int64_t threshes[SWITCHABLE_FILTER_CONTEXTS], int is_alt_ref) {
  4136. if (!is_alt_ref && threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP] &&
  4137. threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP_SHARP] &&
  4138. threshes[EIGHTTAP_SMOOTH] > threshes[SWITCHABLE - 1]) {
  4139. return EIGHTTAP_SMOOTH;
  4140. } else if (threshes[EIGHTTAP_SHARP] > threshes[EIGHTTAP] &&
  4141. threshes[EIGHTTAP_SHARP] > threshes[SWITCHABLE - 1]) {
  4142. return EIGHTTAP_SHARP;
  4143. } else if (threshes[EIGHTTAP] > threshes[SWITCHABLE - 1]) {
  4144. return EIGHTTAP;
  4145. } else {
  4146. return SWITCHABLE;
  4147. }
  4148. }
  4149. static int compute_frame_aq_offset(struct VP9_COMP *cpi) {
  4150. VP9_COMMON *const cm = &cpi->common;
  4151. MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible;
  4152. struct segmentation *const seg = &cm->seg;
  4153. int mi_row, mi_col;
  4154. int sum_delta = 0;
  4155. int map_index = 0;
  4156. int qdelta_index;
  4157. int segment_id;
  4158. for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
  4159. MODE_INFO **mi_8x8 = mi_8x8_ptr;
  4160. for (mi_col = 0; mi_col < cm->mi_cols; mi_col++, mi_8x8++) {
  4161. segment_id = mi_8x8[0]->segment_id;
  4162. qdelta_index = get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
  4163. sum_delta += qdelta_index;
  4164. map_index++;
  4165. }
  4166. mi_8x8_ptr += cm->mi_stride;
  4167. }
  4168. return sum_delta / (cm->mi_rows * cm->mi_cols);
  4169. }
  4170. void vp9_encode_frame(VP9_COMP *cpi) {
  4171. VP9_COMMON *const cm = &cpi->common;
  4172. // In the longer term the encoder should be generalized to match the
  4173. // decoder such that we allow compound where one of the 3 buffers has a
  4174. // different sign bias and that buffer is then the fixed ref. However, this
  4175. // requires further work in the rd loop. For now the only supported encoder
  4176. // side behavior is where the ALT ref buffer has opposite sign bias to
  4177. // the other two.
  4178. if (!frame_is_intra_only(cm)) {
  4179. if ((cm->ref_frame_sign_bias[ALTREF_FRAME] ==
  4180. cm->ref_frame_sign_bias[GOLDEN_FRAME]) ||
  4181. (cm->ref_frame_sign_bias[ALTREF_FRAME] ==
  4182. cm->ref_frame_sign_bias[LAST_FRAME])) {
  4183. cpi->allow_comp_inter_inter = 0;
  4184. } else {
  4185. cpi->allow_comp_inter_inter = 1;
  4186. cm->comp_fixed_ref = ALTREF_FRAME;
  4187. cm->comp_var_ref[0] = LAST_FRAME;
  4188. cm->comp_var_ref[1] = GOLDEN_FRAME;
  4189. }
  4190. }
  4191. if (cpi->sf.frame_parameter_update) {
  4192. int i;
  4193. RD_OPT *const rd_opt = &cpi->rd;
  4194. FRAME_COUNTS *counts = cpi->td.counts;
  4195. RD_COUNTS *const rdc = &cpi->td.rd_counts;
  4196. // This code does a single RD pass over the whole frame assuming
  4197. // either compound, single or hybrid prediction as per whatever has
  4198. // worked best for that type of frame in the past.
  4199. // It also predicts whether another coding mode would have worked
  4200. // better than this coding mode. If that is the case, it remembers
  4201. // that for subsequent frames.
  4202. // It also does the same analysis for transform size selection.
  4203. const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
  4204. int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type];
  4205. int64_t *const filter_thrs = rd_opt->filter_threshes[frame_type];
  4206. const int is_alt_ref = frame_type == ALTREF_FRAME;
  4207. /* prediction (compound, single or hybrid) mode selection */
  4208. if (is_alt_ref || !cpi->allow_comp_inter_inter)
  4209. cm->reference_mode = SINGLE_REFERENCE;
  4210. else if (mode_thrs[COMPOUND_REFERENCE] > mode_thrs[SINGLE_REFERENCE] &&
  4211. mode_thrs[COMPOUND_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT] &&
  4212. check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100)
  4213. cm->reference_mode = COMPOUND_REFERENCE;
  4214. else if (mode_thrs[SINGLE_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT])
  4215. cm->reference_mode = SINGLE_REFERENCE;
  4216. else
  4217. cm->reference_mode = REFERENCE_MODE_SELECT;
  4218. if (cm->interp_filter == SWITCHABLE)
  4219. cm->interp_filter = get_interp_filter(filter_thrs, is_alt_ref);
  4220. encode_frame_internal(cpi);
  4221. for (i = 0; i < REFERENCE_MODES; ++i)
  4222. mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2;
  4223. for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
  4224. filter_thrs[i] = (filter_thrs[i] + rdc->filter_diff[i] / cm->MBs) / 2;
  4225. if (cm->reference_mode == REFERENCE_MODE_SELECT) {
  4226. int single_count_zero = 0;
  4227. int comp_count_zero = 0;
  4228. for (i = 0; i < COMP_INTER_CONTEXTS; i++) {
  4229. single_count_zero += counts->comp_inter[i][0];
  4230. comp_count_zero += counts->comp_inter[i][1];
  4231. }
  4232. if (comp_count_zero == 0) {
  4233. cm->reference_mode = SINGLE_REFERENCE;
  4234. vp9_zero(counts->comp_inter);
  4235. } else if (single_count_zero == 0) {
  4236. cm->reference_mode = COMPOUND_REFERENCE;
  4237. vp9_zero(counts->comp_inter);
  4238. }
  4239. }
  4240. if (cm->tx_mode == TX_MODE_SELECT) {
  4241. int count4x4 = 0;
  4242. int count8x8_lp = 0, count8x8_8x8p = 0;
  4243. int count16x16_16x16p = 0, count16x16_lp = 0;
  4244. int count32x32 = 0;
  4245. for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
  4246. count4x4 += counts->tx.p32x32[i][TX_4X4];
  4247. count4x4 += counts->tx.p16x16[i][TX_4X4];
  4248. count4x4 += counts->tx.p8x8[i][TX_4X4];
  4249. count8x8_lp += counts->tx.p32x32[i][TX_8X8];
  4250. count8x8_lp += counts->tx.p16x16[i][TX_8X8];
  4251. count8x8_8x8p += counts->tx.p8x8[i][TX_8X8];
  4252. count16x16_16x16p += counts->tx.p16x16[i][TX_16X16];
  4253. count16x16_lp += counts->tx.p32x32[i][TX_16X16];
  4254. count32x32 += counts->tx.p32x32[i][TX_32X32];
  4255. }
  4256. if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
  4257. count32x32 == 0) {
  4258. cm->tx_mode = ALLOW_8X8;
  4259. reset_skip_tx_size(cm, TX_8X8);
  4260. } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 &&
  4261. count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) {
  4262. cm->tx_mode = ONLY_4X4;
  4263. reset_skip_tx_size(cm, TX_4X4);
  4264. } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) {
  4265. cm->tx_mode = ALLOW_32X32;
  4266. } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) {
  4267. cm->tx_mode = ALLOW_16X16;
  4268. reset_skip_tx_size(cm, TX_16X16);
  4269. }
  4270. }
  4271. } else {
  4272. cm->reference_mode = SINGLE_REFERENCE;
  4273. encode_frame_internal(cpi);
  4274. }
  4275. // If segmented AQ is enabled compute the average AQ weighting.
  4276. if (cm->seg.enabled && (cpi->oxcf.aq_mode != NO_AQ) &&
  4277. (cm->seg.update_map || cm->seg.update_data)) {
  4278. cm->seg.aq_av_offset = compute_frame_aq_offset(cpi);
  4279. }
  4280. }
  4281. static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) {
  4282. const PREDICTION_MODE y_mode = mi->mode;
  4283. const PREDICTION_MODE uv_mode = mi->uv_mode;
  4284. const BLOCK_SIZE bsize = mi->sb_type;
  4285. if (bsize < BLOCK_8X8) {
  4286. int idx, idy;
  4287. const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
  4288. const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
  4289. for (idy = 0; idy < 2; idy += num_4x4_h)
  4290. for (idx = 0; idx < 2; idx += num_4x4_w)
  4291. ++counts->y_mode[0][mi->bmi[idy * 2 + idx].as_mode];
  4292. } else {
  4293. ++counts->y_mode[size_group_lookup[bsize]][y_mode];
  4294. }
  4295. ++counts->uv_mode[y_mode][uv_mode];
  4296. }
  4297. static void update_zeromv_cnt(VP9_COMP *const cpi, const MODE_INFO *const mi,
  4298. int mi_row, int mi_col, BLOCK_SIZE bsize) {
  4299. const VP9_COMMON *const cm = &cpi->common;
  4300. MV mv = mi->mv[0].as_mv;
  4301. const int bw = num_8x8_blocks_wide_lookup[bsize];
  4302. const int bh = num_8x8_blocks_high_lookup[bsize];
  4303. const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
  4304. const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
  4305. const int block_index = mi_row * cm->mi_cols + mi_col;
  4306. int x, y;
  4307. for (y = 0; y < ymis; y++)
  4308. for (x = 0; x < xmis; x++) {
  4309. int map_offset = block_index + y * cm->mi_cols + x;
  4310. if (is_inter_block(mi) && mi->segment_id <= CR_SEGMENT_ID_BOOST2) {
  4311. if (abs(mv.row) < 8 && abs(mv.col) < 8) {
  4312. if (cpi->consec_zero_mv[map_offset] < 255)
  4313. cpi->consec_zero_mv[map_offset]++;
  4314. } else {
  4315. cpi->consec_zero_mv[map_offset] = 0;
  4316. }
  4317. }
  4318. }
  4319. }
  4320. static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
  4321. int output_enabled, int mi_row, int mi_col,
  4322. BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
  4323. VP9_COMMON *const cm = &cpi->common;
  4324. MACROBLOCK *const x = &td->mb;
  4325. MACROBLOCKD *const xd = &x->e_mbd;
  4326. MODE_INFO *mi = xd->mi[0];
  4327. const int seg_skip =
  4328. segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP);
  4329. x->skip_recode = !x->select_tx_size && mi->sb_type >= BLOCK_8X8 &&
  4330. cpi->oxcf.aq_mode != COMPLEXITY_AQ &&
  4331. cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ &&
  4332. cpi->sf.allow_skip_recode;
  4333. if (!x->skip_recode && !cpi->sf.use_nonrd_pick_mode)
  4334. memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
  4335. x->skip_optimize = ctx->is_coded;
  4336. ctx->is_coded = 1;
  4337. x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
  4338. x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame &&
  4339. x->q_index < QIDX_SKIP_THRESH);
  4340. if (x->skip_encode) return;
  4341. if (!is_inter_block(mi)) {
  4342. int plane;
  4343. #if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
  4344. if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) &&
  4345. (xd->above_mi == NULL || xd->left_mi == NULL) &&
  4346. need_top_left[mi->uv_mode])
  4347. assert(0);
  4348. #endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
  4349. mi->skip = 1;
  4350. for (plane = 0; plane < MAX_MB_PLANE; ++plane)
  4351. vp9_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane, 1);
  4352. if (output_enabled) sum_intra_stats(td->counts, mi);
  4353. vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip,
  4354. VPXMAX(bsize, BLOCK_8X8));
  4355. } else {
  4356. int ref;
  4357. const int is_compound = has_second_ref(mi);
  4358. set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
  4359. for (ref = 0; ref < 1 + is_compound; ++ref) {
  4360. YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mi->ref_frame[ref]);
  4361. assert(cfg != NULL);
  4362. vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
  4363. &xd->block_refs[ref]->sf);
  4364. }
  4365. if (!(cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready) || seg_skip)
  4366. vp9_build_inter_predictors_sby(xd, mi_row, mi_col,
  4367. VPXMAX(bsize, BLOCK_8X8));
  4368. vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col,
  4369. VPXMAX(bsize, BLOCK_8X8));
  4370. vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8));
  4371. vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip,
  4372. VPXMAX(bsize, BLOCK_8X8));
  4373. }
  4374. if (seg_skip) {
  4375. assert(mi->skip);
  4376. }
  4377. if (output_enabled) {
  4378. if (cm->tx_mode == TX_MODE_SELECT && mi->sb_type >= BLOCK_8X8 &&
  4379. !(is_inter_block(mi) && mi->skip)) {
  4380. ++get_tx_counts(max_txsize_lookup[bsize], get_tx_size_context(xd),
  4381. &td->counts->tx)[mi->tx_size];
  4382. } else {
  4383. // The new intra coding scheme requires no change of transform size
  4384. if (is_inter_block(mi)) {
  4385. mi->tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode],
  4386. max_txsize_lookup[bsize]);
  4387. } else {
  4388. mi->tx_size = (bsize >= BLOCK_8X8) ? mi->tx_size : TX_4X4;
  4389. }
  4390. }
  4391. ++td->counts->tx.tx_totals[mi->tx_size];
  4392. ++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])];
  4393. if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
  4394. vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize);
  4395. if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0)
  4396. update_zeromv_cnt(cpi, mi, mi_row, mi_col, bsize);
  4397. }
  4398. }