aoptx86.pas 167 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  32. protected
  33. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  34. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  35. { checks whether reading the value in reg1 depends on the value of reg2. This
  36. is very similar to SuperRegisterEquals, except it takes into account that
  37. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  38. depend on the value in AH). }
  39. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  40. procedure DebugMsg(const s : string; p : tai);inline;
  41. class function IsExitCode(p : tai) : boolean;
  42. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  43. procedure RemoveLastDeallocForFuncRes(p : tai);
  44. function DoSubAddOpt(var p : tai) : Boolean;
  45. function PrePeepholeOptSxx(var p : tai) : boolean;
  46. function PrePeepholeOptIMUL(var p : tai) : boolean;
  47. function OptPass1AND(var p : tai) : boolean;
  48. function OptPass1VMOVAP(var p : tai) : boolean;
  49. function OptPass1VOP(var p : tai) : boolean;
  50. function OptPass1MOV(var p : tai) : boolean;
  51. function OptPass1Movx(var p : tai) : boolean;
  52. function OptPass1MOVAP(var p : tai) : boolean;
  53. function OptPass1MOVXX(var p : tai) : boolean;
  54. function OptPass1OP(var p : tai) : boolean;
  55. function OptPass1LEA(var p : tai) : boolean;
  56. function OptPass1Sub(var p : tai) : boolean;
  57. function OptPass1SHLSAL(var p : tai) : boolean;
  58. function OptPass1SETcc(var p: tai): boolean;
  59. function OptPass1FSTP(var p: tai): boolean;
  60. function OptPass1FLD(var p: tai): boolean;
  61. function OptPass2MOV(var p : tai) : boolean;
  62. function OptPass2Imul(var p : tai) : boolean;
  63. function OptPass2Jmp(var p : tai) : boolean;
  64. function OptPass2Jcc(var p : tai) : boolean;
  65. function PostPeepholeOptMov(var p : tai) : Boolean;
  66. {$ifdef x86_64} { These post-peephole optimisations only affect 64-bit registers. [Kit] }
  67. function PostPeepholeOptMovzx(var p : tai) : Boolean;
  68. function PostPeepholeOptXor(var p : tai) : Boolean;
  69. {$endif}
  70. function PostPeepholeOptCmp(var p : tai) : Boolean;
  71. function PostPeepholeOptTestOr(var p : tai) : Boolean;
  72. function PostPeepholeOptCall(var p : tai) : Boolean;
  73. function PostPeepholeOptLea(var p : tai) : Boolean;
  74. procedure OptReferences;
  75. end;
  76. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  77. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  78. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  79. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  80. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  81. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  82. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  83. function RefsEqual(const r1, r2: treference): boolean;
  84. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  85. { returns true, if ref is a reference using only the registers passed as base and index
  86. and having an offset }
  87. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  88. {$ifdef DEBUG_AOPTCPU}
  89. const
  90. SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
  91. {$else DEBUG_AOPTCPU}
  92. { Empty strings help the optimizer to remove string concatenations that won't
  93. ever appear to the user on release builds. [Kit] }
  94. const
  95. SPeepholeOptimization = '';
  96. {$endif DEBUG_AOPTCPU}
  97. implementation
  98. uses
  99. cutils,verbose,
  100. globals,
  101. cpuinfo,
  102. procinfo,
  103. aasmbase,
  104. aoptutils,
  105. symconst,symsym,
  106. cgx86,
  107. itcpugas;
  108. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  109. begin
  110. result :=
  111. (instr.typ = ait_instruction) and
  112. (taicpu(instr).opcode = op) and
  113. ((opsize = []) or (taicpu(instr).opsize in opsize));
  114. end;
  115. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  116. begin
  117. result :=
  118. (instr.typ = ait_instruction) and
  119. ((taicpu(instr).opcode = op1) or
  120. (taicpu(instr).opcode = op2)
  121. ) and
  122. ((opsize = []) or (taicpu(instr).opsize in opsize));
  123. end;
  124. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  125. begin
  126. result :=
  127. (instr.typ = ait_instruction) and
  128. ((taicpu(instr).opcode = op1) or
  129. (taicpu(instr).opcode = op2) or
  130. (taicpu(instr).opcode = op3)
  131. ) and
  132. ((opsize = []) or (taicpu(instr).opsize in opsize));
  133. end;
  134. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  135. const opsize : topsizes) : boolean;
  136. var
  137. op : TAsmOp;
  138. begin
  139. result:=false;
  140. for op in ops do
  141. begin
  142. if (instr.typ = ait_instruction) and
  143. (taicpu(instr).opcode = op) and
  144. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  145. begin
  146. result:=true;
  147. exit;
  148. end;
  149. end;
  150. end;
  151. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  152. begin
  153. result := (oper.typ = top_reg) and (oper.reg = reg);
  154. end;
  155. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  156. begin
  157. result := (oper.typ = top_const) and (oper.val = a);
  158. end;
  159. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  160. begin
  161. result := oper1.typ = oper2.typ;
  162. if result then
  163. case oper1.typ of
  164. top_const:
  165. Result:=oper1.val = oper2.val;
  166. top_reg:
  167. Result:=oper1.reg = oper2.reg;
  168. top_ref:
  169. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  170. else
  171. internalerror(2013102801);
  172. end
  173. end;
  174. function RefsEqual(const r1, r2: treference): boolean;
  175. begin
  176. RefsEqual :=
  177. (r1.offset = r2.offset) and
  178. (r1.segment = r2.segment) and (r1.base = r2.base) and
  179. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  180. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  181. (r1.relsymbol = r2.relsymbol) and
  182. (r1.volatility=[]) and
  183. (r2.volatility=[]);
  184. end;
  185. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  186. begin
  187. Result:=(ref.offset=0) and
  188. (ref.scalefactor in [0,1]) and
  189. (ref.segment=NR_NO) and
  190. (ref.symbol=nil) and
  191. (ref.relsymbol=nil) and
  192. ((base=NR_INVALID) or
  193. (ref.base=base)) and
  194. ((index=NR_INVALID) or
  195. (ref.index=index)) and
  196. (ref.volatility=[]);
  197. end;
  198. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  199. begin
  200. Result:=(ref.scalefactor in [0,1]) and
  201. (ref.segment=NR_NO) and
  202. (ref.symbol=nil) and
  203. (ref.relsymbol=nil) and
  204. ((base=NR_INVALID) or
  205. (ref.base=base)) and
  206. ((index=NR_INVALID) or
  207. (ref.index=index)) and
  208. (ref.volatility=[]);
  209. end;
  210. function InstrReadsFlags(p: tai): boolean;
  211. begin
  212. InstrReadsFlags := true;
  213. case p.typ of
  214. ait_instruction:
  215. if InsProp[taicpu(p).opcode].Ch*
  216. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  217. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  218. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  219. exit;
  220. ait_label:
  221. exit;
  222. else
  223. ;
  224. end;
  225. InstrReadsFlags := false;
  226. end;
  227. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  228. begin
  229. Result:=RegReadByInstruction(reg,hp);
  230. end;
  231. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  232. var
  233. p: taicpu;
  234. opcount: longint;
  235. begin
  236. RegReadByInstruction := false;
  237. if hp.typ <> ait_instruction then
  238. exit;
  239. p := taicpu(hp);
  240. case p.opcode of
  241. A_CALL:
  242. regreadbyinstruction := true;
  243. A_IMUL:
  244. case p.ops of
  245. 1:
  246. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  247. (
  248. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  249. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  250. );
  251. 2,3:
  252. regReadByInstruction :=
  253. reginop(reg,p.oper[0]^) or
  254. reginop(reg,p.oper[1]^);
  255. end;
  256. A_MUL:
  257. begin
  258. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  259. (
  260. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  261. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  262. );
  263. end;
  264. A_IDIV,A_DIV:
  265. begin
  266. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  267. (
  268. (getregtype(reg)=R_INTREGISTER) and
  269. (
  270. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  271. )
  272. );
  273. end;
  274. else
  275. begin
  276. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  277. begin
  278. RegReadByInstruction := false;
  279. exit;
  280. end;
  281. for opcount := 0 to p.ops-1 do
  282. if (p.oper[opCount]^.typ = top_ref) and
  283. RegInRef(reg,p.oper[opcount]^.ref^) then
  284. begin
  285. RegReadByInstruction := true;
  286. exit
  287. end;
  288. { special handling for SSE MOVSD }
  289. if (p.opcode=A_MOVSD) and (p.ops>0) then
  290. begin
  291. if p.ops<>2 then
  292. internalerror(2017042702);
  293. regReadByInstruction := reginop(reg,p.oper[0]^) or
  294. (
  295. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  296. );
  297. exit;
  298. end;
  299. with insprop[p.opcode] do
  300. begin
  301. if getregtype(reg)=R_INTREGISTER then
  302. begin
  303. case getsupreg(reg) of
  304. RS_EAX:
  305. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  306. begin
  307. RegReadByInstruction := true;
  308. exit
  309. end;
  310. RS_ECX:
  311. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  312. begin
  313. RegReadByInstruction := true;
  314. exit
  315. end;
  316. RS_EDX:
  317. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  318. begin
  319. RegReadByInstruction := true;
  320. exit
  321. end;
  322. RS_EBX:
  323. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  324. begin
  325. RegReadByInstruction := true;
  326. exit
  327. end;
  328. RS_ESP:
  329. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  330. begin
  331. RegReadByInstruction := true;
  332. exit
  333. end;
  334. RS_EBP:
  335. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  336. begin
  337. RegReadByInstruction := true;
  338. exit
  339. end;
  340. RS_ESI:
  341. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  342. begin
  343. RegReadByInstruction := true;
  344. exit
  345. end;
  346. RS_EDI:
  347. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  348. begin
  349. RegReadByInstruction := true;
  350. exit
  351. end;
  352. end;
  353. end;
  354. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  355. begin
  356. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  357. begin
  358. case p.condition of
  359. C_A,C_NBE, { CF=0 and ZF=0 }
  360. C_BE,C_NA: { CF=1 or ZF=1 }
  361. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  362. C_AE,C_NB,C_NC, { CF=0 }
  363. C_B,C_NAE,C_C: { CF=1 }
  364. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  365. C_NE,C_NZ, { ZF=0 }
  366. C_E,C_Z: { ZF=1 }
  367. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  368. C_G,C_NLE, { ZF=0 and SF=OF }
  369. C_LE,C_NG: { ZF=1 or SF<>OF }
  370. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  371. C_GE,C_NL, { SF=OF }
  372. C_L,C_NGE: { SF<>OF }
  373. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  374. C_NO, { OF=0 }
  375. C_O: { OF=1 }
  376. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  377. C_NP,C_PO, { PF=0 }
  378. C_P,C_PE: { PF=1 }
  379. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  380. C_NS, { SF=0 }
  381. C_S: { SF=1 }
  382. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  383. else
  384. internalerror(2017042701);
  385. end;
  386. if RegReadByInstruction then
  387. exit;
  388. end;
  389. case getsubreg(reg) of
  390. R_SUBW,R_SUBD,R_SUBQ:
  391. RegReadByInstruction :=
  392. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  393. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  394. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  395. R_SUBFLAGCARRY:
  396. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  397. R_SUBFLAGPARITY:
  398. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  399. R_SUBFLAGAUXILIARY:
  400. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  401. R_SUBFLAGZERO:
  402. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  403. R_SUBFLAGSIGN:
  404. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  405. R_SUBFLAGOVERFLOW:
  406. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  407. R_SUBFLAGINTERRUPT:
  408. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  409. R_SUBFLAGDIRECTION:
  410. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  411. else
  412. internalerror(2017042601);
  413. end;
  414. exit;
  415. end;
  416. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  417. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  418. (p.oper[0]^.reg=p.oper[1]^.reg) then
  419. exit;
  420. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  421. begin
  422. RegReadByInstruction := true;
  423. exit
  424. end;
  425. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  426. begin
  427. RegReadByInstruction := true;
  428. exit
  429. end;
  430. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  431. begin
  432. RegReadByInstruction := true;
  433. exit
  434. end;
  435. if ([Ch_RWOP4,Ch_ROP4,Ch_MOP4]*Ch<>[]) and reginop(reg,p.oper[3]^) then
  436. begin
  437. RegReadByInstruction := true;
  438. exit
  439. end;
  440. end;
  441. end;
  442. end;
  443. end;
  444. {$ifdef DEBUG_AOPTCPU}
  445. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  446. begin
  447. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  448. end;
  449. function debug_tostr(i: tcgint): string; inline;
  450. begin
  451. Result := tostr(i);
  452. end;
  453. function debug_regname(r: TRegister): string; inline;
  454. begin
  455. Result := '%' + std_regname(r);
  456. end;
  457. { Debug output function - creates a string representation of an operator }
  458. function debug_operstr(oper: TOper): string;
  459. begin
  460. case oper.typ of
  461. top_const:
  462. Result := '$' + debug_tostr(oper.val);
  463. top_reg:
  464. Result := debug_regname(oper.reg);
  465. top_ref:
  466. begin
  467. if oper.ref^.offset <> 0 then
  468. Result := debug_tostr(oper.ref^.offset) + '('
  469. else
  470. Result := '(';
  471. if (oper.ref^.base <> NR_INVALID) and (oper.ref^.base <> NR_NO) then
  472. begin
  473. Result := Result + debug_regname(oper.ref^.base);
  474. if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then
  475. Result := Result + ',' + debug_regname(oper.ref^.index);
  476. end
  477. else
  478. if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then
  479. Result := Result + debug_regname(oper.ref^.index);
  480. if (oper.ref^.scalefactor > 1) then
  481. Result := Result + ',' + debug_tostr(oper.ref^.scalefactor) + ')'
  482. else
  483. Result := Result + ')';
  484. end;
  485. else
  486. Result := '[UNKNOWN]';
  487. end;
  488. end;
  489. function debug_op2str(opcode: tasmop): string; inline;
  490. begin
  491. Result := std_op2str[opcode];
  492. end;
  493. function debug_opsize2str(opsize: topsize): string; inline;
  494. begin
  495. Result := gas_opsize2str[opsize];
  496. end;
  497. {$else DEBUG_AOPTCPU}
  498. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  499. begin
  500. end;
  501. function debug_tostr(i: tcgint): string; inline;
  502. begin
  503. Result := '';
  504. end;
  505. function debug_regname(r: TRegister): string; inline;
  506. begin
  507. Result := '';
  508. end;
  509. function debug_operstr(oper: TOper): string; inline;
  510. begin
  511. Result := '';
  512. end;
  513. function debug_op2str(opcode: tasmop): string; inline;
  514. begin
  515. Result := '';
  516. end;
  517. function debug_opsize2str(opsize: topsize): string; inline;
  518. begin
  519. Result := '';
  520. end;
  521. {$endif DEBUG_AOPTCPU}
  522. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  523. begin
  524. if not SuperRegistersEqual(reg1,reg2) then
  525. exit(false);
  526. if getregtype(reg1)<>R_INTREGISTER then
  527. exit(true); {because SuperRegisterEqual is true}
  528. case getsubreg(reg1) of
  529. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  530. higher, it preserves the high bits, so the new value depends on
  531. reg2's previous value. In other words, it is equivalent to doing:
  532. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  533. R_SUBL:
  534. exit(getsubreg(reg2)=R_SUBL);
  535. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  536. higher, it actually does a:
  537. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  538. R_SUBH:
  539. exit(getsubreg(reg2)=R_SUBH);
  540. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  541. bits of reg2:
  542. reg2 := (reg2 and $ffff0000) or word(reg1); }
  543. R_SUBW:
  544. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  545. { a write to R_SUBD always overwrites every other subregister,
  546. because it clears the high 32 bits of R_SUBQ on x86_64 }
  547. R_SUBD,
  548. R_SUBQ:
  549. exit(true);
  550. else
  551. internalerror(2017042801);
  552. end;
  553. end;
  554. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  555. begin
  556. if not SuperRegistersEqual(reg1,reg2) then
  557. exit(false);
  558. if getregtype(reg1)<>R_INTREGISTER then
  559. exit(true); {because SuperRegisterEqual is true}
  560. case getsubreg(reg1) of
  561. R_SUBL:
  562. exit(getsubreg(reg2)<>R_SUBH);
  563. R_SUBH:
  564. exit(getsubreg(reg2)<>R_SUBL);
  565. R_SUBW,
  566. R_SUBD,
  567. R_SUBQ:
  568. exit(true);
  569. else
  570. internalerror(2017042802);
  571. end;
  572. end;
  573. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  574. var
  575. hp1 : tai;
  576. l : TCGInt;
  577. begin
  578. result:=false;
  579. { changes the code sequence
  580. shr/sar const1, x
  581. shl const2, x
  582. to
  583. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  584. if GetNextInstruction(p, hp1) and
  585. MatchInstruction(hp1,A_SHL,[]) and
  586. (taicpu(p).oper[0]^.typ = top_const) and
  587. (taicpu(hp1).oper[0]^.typ = top_const) and
  588. (taicpu(hp1).opsize = taicpu(p).opsize) and
  589. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  590. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  591. begin
  592. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  593. not(cs_opt_size in current_settings.optimizerswitches) then
  594. begin
  595. { shr/sar const1, %reg
  596. shl const2, %reg
  597. with const1 > const2 }
  598. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  599. taicpu(hp1).opcode := A_AND;
  600. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  601. case taicpu(p).opsize Of
  602. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  603. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  604. S_L: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffff));
  605. S_Q: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffffffffffff));
  606. else
  607. Internalerror(2017050703)
  608. end;
  609. end
  610. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  611. not(cs_opt_size in current_settings.optimizerswitches) then
  612. begin
  613. { shr/sar const1, %reg
  614. shl const2, %reg
  615. with const1 < const2 }
  616. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  617. taicpu(p).opcode := A_AND;
  618. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  619. case taicpu(p).opsize Of
  620. S_B: taicpu(p).loadConst(0,l Xor $ff);
  621. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  622. S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));
  623. S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));
  624. else
  625. Internalerror(2017050702)
  626. end;
  627. end
  628. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  629. begin
  630. { shr/sar const1, %reg
  631. shl const2, %reg
  632. with const1 = const2 }
  633. taicpu(p).opcode := A_AND;
  634. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  635. case taicpu(p).opsize Of
  636. S_B: taicpu(p).loadConst(0,l Xor $ff);
  637. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  638. S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));
  639. S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));
  640. else
  641. Internalerror(2017050701)
  642. end;
  643. asml.remove(hp1);
  644. hp1.free;
  645. end;
  646. end;
  647. end;
  648. function TX86AsmOptimizer.PrePeepholeOptIMUL(var p : tai) : boolean;
  649. var
  650. opsize : topsize;
  651. hp1 : tai;
  652. tmpref : treference;
  653. ShiftValue : Cardinal;
  654. BaseValue : TCGInt;
  655. begin
  656. result:=false;
  657. opsize:=taicpu(p).opsize;
  658. { changes certain "imul const, %reg"'s to lea sequences }
  659. if (MatchOpType(taicpu(p),top_const,top_reg) or
  660. MatchOpType(taicpu(p),top_const,top_reg,top_reg)) and
  661. (opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) then
  662. if (taicpu(p).oper[0]^.val = 1) then
  663. if (taicpu(p).ops = 2) then
  664. { remove "imul $1, reg" }
  665. begin
  666. hp1 := tai(p.Next);
  667. asml.remove(p);
  668. DebugMsg(SPeepholeOptimization + 'Imul2Nop done',p);
  669. p.free;
  670. p := hp1;
  671. result:=true;
  672. end
  673. else
  674. { change "imul $1, reg1, reg2" to "mov reg1, reg2" }
  675. begin
  676. hp1 := taicpu.Op_Reg_Reg(A_MOV, opsize, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  677. InsertLLItem(p.previous, p.next, hp1);
  678. DebugMsg(SPeepholeOptimization + 'Imul2Mov done',p);
  679. p.free;
  680. p := hp1;
  681. end
  682. else if
  683. ((taicpu(p).ops <= 2) or
  684. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  685. not(cs_opt_size in current_settings.optimizerswitches) and
  686. (not(GetNextInstruction(p, hp1)) or
  687. not((tai(hp1).typ = ait_instruction) and
  688. ((taicpu(hp1).opcode=A_Jcc) and
  689. (taicpu(hp1).condition in [C_O,C_NO])))) then
  690. begin
  691. {
  692. imul X, reg1, reg2 to
  693. lea (reg1,reg1,Y), reg2
  694. shl ZZ,reg2
  695. imul XX, reg1 to
  696. lea (reg1,reg1,YY), reg1
  697. shl ZZ,reg2
  698. This optimziation makes sense for pretty much every x86, except the VIA Nano3000: it has IMUL latency 2, lea/shl pair as well,
  699. it does not exist as a separate optimization target in FPC though.
  700. This optimziation can be applied as long as only two bits are set in the constant and those two bits are separated by
  701. at most two zeros
  702. }
  703. reference_reset(tmpref,1,[]);
  704. if (PopCnt(QWord(taicpu(p).oper[0]^.val))=2) and (BsrQWord(taicpu(p).oper[0]^.val)-BsfQWord(taicpu(p).oper[0]^.val)<=3) then
  705. begin
  706. ShiftValue:=BsfQWord(taicpu(p).oper[0]^.val);
  707. BaseValue:=taicpu(p).oper[0]^.val shr ShiftValue;
  708. TmpRef.base := taicpu(p).oper[1]^.reg;
  709. TmpRef.index := taicpu(p).oper[1]^.reg;
  710. if not(BaseValue in [3,5,9]) then
  711. Internalerror(2018110101);
  712. TmpRef.ScaleFactor := BaseValue-1;
  713. if (taicpu(p).ops = 2) then
  714. hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[1]^.reg)
  715. else
  716. hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[2]^.reg);
  717. AsmL.InsertAfter(hp1,p);
  718. DebugMsg(SPeepholeOptimization + 'Imul2LeaShl done',p);
  719. AsmL.Remove(p);
  720. taicpu(hp1).fileinfo:=taicpu(p).fileinfo;
  721. p.free;
  722. p := hp1;
  723. if ShiftValue>0 then
  724. AsmL.InsertAfter(taicpu.op_const_reg(A_SHL, opsize, ShiftValue, taicpu(hp1).oper[1]^.reg),hp1);
  725. end;
  726. end;
  727. end;
  728. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  729. var
  730. p: taicpu;
  731. begin
  732. if not assigned(hp) or
  733. (hp.typ <> ait_instruction) then
  734. begin
  735. Result := false;
  736. exit;
  737. end;
  738. p := taicpu(hp);
  739. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  740. with insprop[p.opcode] do
  741. begin
  742. case getsubreg(reg) of
  743. R_SUBW,R_SUBD,R_SUBQ:
  744. Result:=
  745. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  746. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  747. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  748. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  749. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  750. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  751. R_SUBFLAGCARRY:
  752. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  753. R_SUBFLAGPARITY:
  754. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  755. R_SUBFLAGAUXILIARY:
  756. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  757. R_SUBFLAGZERO:
  758. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  759. R_SUBFLAGSIGN:
  760. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  761. R_SUBFLAGOVERFLOW:
  762. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  763. R_SUBFLAGINTERRUPT:
  764. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  765. R_SUBFLAGDIRECTION:
  766. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  767. else
  768. begin
  769. writeln(getsubreg(reg));
  770. internalerror(2017050501);
  771. end;
  772. end;
  773. exit;
  774. end;
  775. Result :=
  776. (((p.opcode = A_MOV) or
  777. (p.opcode = A_MOVZX) or
  778. (p.opcode = A_MOVSX) or
  779. (p.opcode = A_LEA) or
  780. (p.opcode = A_VMOVSS) or
  781. (p.opcode = A_VMOVSD) or
  782. (p.opcode = A_VMOVAPD) or
  783. (p.opcode = A_VMOVAPS) or
  784. (p.opcode = A_VMOVQ) or
  785. (p.opcode = A_MOVSS) or
  786. (p.opcode = A_MOVSD) or
  787. (p.opcode = A_MOVQ) or
  788. (p.opcode = A_MOVAPD) or
  789. (p.opcode = A_MOVAPS) or
  790. {$ifndef x86_64}
  791. (p.opcode = A_LDS) or
  792. (p.opcode = A_LES) or
  793. {$endif not x86_64}
  794. (p.opcode = A_LFS) or
  795. (p.opcode = A_LGS) or
  796. (p.opcode = A_LSS)) and
  797. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  798. (p.oper[1]^.typ = top_reg) and
  799. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  800. ((p.oper[0]^.typ = top_const) or
  801. ((p.oper[0]^.typ = top_reg) and
  802. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  803. ((p.oper[0]^.typ = top_ref) and
  804. not RegInRef(reg,p.oper[0]^.ref^)))) or
  805. ((p.opcode = A_POP) and
  806. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  807. ((p.opcode = A_IMUL) and
  808. (p.ops=3) and
  809. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  810. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  811. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  812. ((((p.opcode = A_IMUL) or
  813. (p.opcode = A_MUL)) and
  814. (p.ops=1)) and
  815. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  816. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  817. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  818. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  819. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  820. {$ifdef x86_64}
  821. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  822. {$endif x86_64}
  823. )) or
  824. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  825. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  826. {$ifdef x86_64}
  827. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  828. {$endif x86_64}
  829. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  830. {$ifndef x86_64}
  831. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  832. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  833. {$endif not x86_64}
  834. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  835. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  836. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  837. {$ifndef x86_64}
  838. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  839. {$endif not x86_64}
  840. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  841. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  842. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  843. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  844. {$ifdef x86_64}
  845. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  846. {$endif x86_64}
  847. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  848. (((p.opcode = A_FSTSW) or
  849. (p.opcode = A_FNSTSW)) and
  850. (p.oper[0]^.typ=top_reg) and
  851. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  852. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  853. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  854. (p.oper[0]^.reg=p.oper[1]^.reg) and
  855. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  856. end;
  857. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  858. var
  859. hp2,hp3 : tai;
  860. begin
  861. { some x86-64 issue a NOP before the real exit code }
  862. if MatchInstruction(p,A_NOP,[]) then
  863. GetNextInstruction(p,p);
  864. result:=assigned(p) and (p.typ=ait_instruction) and
  865. ((taicpu(p).opcode = A_RET) or
  866. ((taicpu(p).opcode=A_LEAVE) and
  867. GetNextInstruction(p,hp2) and
  868. MatchInstruction(hp2,A_RET,[S_NO])
  869. ) or
  870. (((taicpu(p).opcode=A_LEA) and
  871. MatchOpType(taicpu(p),top_ref,top_reg) and
  872. (taicpu(p).oper[0]^.ref^.base=NR_STACK_POINTER_REG) and
  873. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  874. ) and
  875. GetNextInstruction(p,hp2) and
  876. MatchInstruction(hp2,A_RET,[S_NO])
  877. ) or
  878. ((((taicpu(p).opcode=A_MOV) and
  879. MatchOpType(taicpu(p),top_reg,top_reg) and
  880. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  881. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  882. ((taicpu(p).opcode=A_LEA) and
  883. MatchOpType(taicpu(p),top_ref,top_reg) and
  884. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  885. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  886. )
  887. ) and
  888. GetNextInstruction(p,hp2) and
  889. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  890. MatchOpType(taicpu(hp2),top_reg) and
  891. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  892. GetNextInstruction(hp2,hp3) and
  893. MatchInstruction(hp3,A_RET,[S_NO])
  894. )
  895. );
  896. end;
  897. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  898. begin
  899. isFoldableArithOp := False;
  900. case hp1.opcode of
  901. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  902. isFoldableArithOp :=
  903. ((taicpu(hp1).oper[0]^.typ = top_const) or
  904. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  905. (taicpu(hp1).oper[0]^.reg <> reg))) and
  906. (taicpu(hp1).oper[1]^.typ = top_reg) and
  907. (taicpu(hp1).oper[1]^.reg = reg);
  908. A_INC,A_DEC,A_NEG,A_NOT:
  909. isFoldableArithOp :=
  910. (taicpu(hp1).oper[0]^.typ = top_reg) and
  911. (taicpu(hp1).oper[0]^.reg = reg);
  912. else
  913. ;
  914. end;
  915. end;
  916. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  917. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  918. var
  919. hp2: tai;
  920. begin
  921. hp2 := p;
  922. repeat
  923. hp2 := tai(hp2.previous);
  924. if assigned(hp2) and
  925. (hp2.typ = ait_regalloc) and
  926. (tai_regalloc(hp2).ratype=ra_dealloc) and
  927. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  928. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  929. begin
  930. asml.remove(hp2);
  931. hp2.free;
  932. break;
  933. end;
  934. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  935. end;
  936. begin
  937. case current_procinfo.procdef.returndef.typ of
  938. arraydef,recorddef,pointerdef,
  939. stringdef,enumdef,procdef,objectdef,errordef,
  940. filedef,setdef,procvardef,
  941. classrefdef,forwarddef:
  942. DoRemoveLastDeallocForFuncRes(RS_EAX);
  943. orddef:
  944. if current_procinfo.procdef.returndef.size <> 0 then
  945. begin
  946. DoRemoveLastDeallocForFuncRes(RS_EAX);
  947. { for int64/qword }
  948. if current_procinfo.procdef.returndef.size = 8 then
  949. DoRemoveLastDeallocForFuncRes(RS_EDX);
  950. end;
  951. else
  952. ;
  953. end;
  954. end;
  955. function TX86AsmOptimizer.OptPass1MOVAP(var p : tai) : boolean;
  956. var
  957. hp1,hp2 : tai;
  958. begin
  959. result:=false;
  960. if MatchOpType(taicpu(p),top_reg,top_reg) and
  961. GetNextInstruction(p, hp1) and
  962. (hp1.typ = ait_instruction) and
  963. GetNextInstruction(hp1, hp2) and
  964. MatchInstruction(hp2,taicpu(p).opcode,[]) and
  965. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  966. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  967. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  968. (((taicpu(p).opcode=A_MOVAPS) and
  969. ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or
  970. (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or
  971. ((taicpu(p).opcode=A_MOVAPD) and
  972. ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or
  973. (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))
  974. ) then
  975. { change
  976. movapX reg,reg2
  977. addsX/subsX/... reg3, reg2
  978. movapX reg2,reg
  979. to
  980. addsX/subsX/... reg3,reg
  981. }
  982. begin
  983. TransferUsedRegs(TmpUsedRegs);
  984. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  985. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  986. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  987. begin
  988. DebugMsg(SPeepholeOptimization + 'MovapXOpMovapX2Op ('+
  989. debug_op2str(taicpu(p).opcode)+' '+
  990. debug_op2str(taicpu(hp1).opcode)+' '+
  991. debug_op2str(taicpu(hp2).opcode)+') done',p);
  992. { we cannot eliminate the first move if
  993. the operations uses the same register for source and dest }
  994. if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then
  995. begin
  996. asml.remove(p);
  997. p.Free;
  998. end;
  999. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1000. asml.remove(hp2);
  1001. hp2.Free;
  1002. p:=hp1;
  1003. result:=true;
  1004. end;
  1005. end
  1006. end;
  1007. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  1008. var
  1009. hp1,hp2 : tai;
  1010. begin
  1011. result:=false;
  1012. if MatchOpType(taicpu(p),top_reg,top_reg) then
  1013. begin
  1014. { vmova* reg1,reg1
  1015. =>
  1016. <nop> }
  1017. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  1018. begin
  1019. GetNextInstruction(p,hp1);
  1020. asml.Remove(p);
  1021. p.Free;
  1022. p:=hp1;
  1023. result:=true;
  1024. end
  1025. else if GetNextInstruction(p,hp1) then
  1026. begin
  1027. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  1028. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1029. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1030. begin
  1031. { vmova* reg1,reg2
  1032. vmova* reg2,reg3
  1033. dealloc reg2
  1034. =>
  1035. vmova* reg1,reg3 }
  1036. TransferUsedRegs(TmpUsedRegs);
  1037. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1038. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1039. begin
  1040. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1041. asml.Remove(hp1);
  1042. hp1.Free;
  1043. result:=true;
  1044. end
  1045. { special case:
  1046. vmova* reg1,reg2
  1047. vmova* reg2,reg1
  1048. =>
  1049. vmova* reg1,reg2 }
  1050. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  1051. begin
  1052. asml.Remove(hp1);
  1053. hp1.Free;
  1054. result:=true;
  1055. end
  1056. end
  1057. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  1058. { we mix single and double opperations here because we assume that the compiler
  1059. generates vmovapd only after double operations and vmovaps only after single operations }
  1060. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  1061. GetNextInstruction(hp1,hp2) and
  1062. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  1063. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  1064. begin
  1065. TransferUsedRegs(TmpUsedRegs);
  1066. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1067. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1068. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  1069. then
  1070. begin
  1071. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  1072. asml.Remove(p);
  1073. p.Free;
  1074. asml.Remove(hp2);
  1075. hp2.Free;
  1076. p:=hp1;
  1077. end;
  1078. end;
  1079. end;
  1080. end;
  1081. end;
  1082. function TX86AsmOptimizer.OptPass1VOP(var p : tai) : boolean;
  1083. var
  1084. hp1 : tai;
  1085. begin
  1086. result:=false;
  1087. { replace
  1088. V<Op>X %mreg1,%mreg2,%mreg3
  1089. VMovX %mreg3,%mreg4
  1090. dealloc %mreg3
  1091. by
  1092. V<Op>X %mreg1,%mreg2,%mreg4
  1093. ?
  1094. }
  1095. if GetNextInstruction(p,hp1) and
  1096. { we mix single and double operations here because we assume that the compiler
  1097. generates vmovapd only after double operations and vmovaps only after single operations }
  1098. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  1099. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  1100. (taicpu(hp1).oper[1]^.typ=top_reg) then
  1101. begin
  1102. TransferUsedRegs(TmpUsedRegs);
  1103. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1104. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  1105. ) then
  1106. begin
  1107. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  1108. DebugMsg(SPeepholeOptimization + 'VOpVmov2VOp done',p);
  1109. asml.Remove(hp1);
  1110. hp1.Free;
  1111. result:=true;
  1112. end;
  1113. end;
  1114. end;
  1115. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  1116. var
  1117. hp1, hp2: tai;
  1118. GetNextInstruction_p: Boolean;
  1119. PreMessage, RegName1, RegName2, InputVal, MaskNum: string;
  1120. NewSize: topsize;
  1121. begin
  1122. Result:=false;
  1123. GetNextInstruction_p:=GetNextInstruction(p, hp1);
  1124. { remove mov reg1,reg1? }
  1125. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^)
  1126. then
  1127. begin
  1128. DebugMsg(SPeepholeOptimization + 'Mov2Nop done',p);
  1129. { take care of the register (de)allocs following p }
  1130. UpdateUsedRegs(tai(p.next));
  1131. asml.remove(p);
  1132. p.free;
  1133. p:=hp1;
  1134. Result:=true;
  1135. exit;
  1136. end;
  1137. if GetNextInstruction_p and
  1138. MatchInstruction(hp1,A_AND,[]) and
  1139. (taicpu(p).oper[1]^.typ = top_reg) and
  1140. MatchOpType(taicpu(hp1),top_const,top_reg) then
  1141. begin
  1142. if MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  1143. begin
  1144. case taicpu(p).opsize of
  1145. S_L:
  1146. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  1147. begin
  1148. { Optimize out:
  1149. mov x, %reg
  1150. and ffffffffh, %reg
  1151. }
  1152. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 1 done',p);
  1153. asml.remove(hp1);
  1154. hp1.free;
  1155. Result:=true;
  1156. exit;
  1157. end;
  1158. S_Q: { TODO: Confirm if this is even possible }
  1159. if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then
  1160. begin
  1161. { Optimize out:
  1162. mov x, %reg
  1163. and ffffffffffffffffh, %reg
  1164. }
  1165. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 2 done',p);
  1166. asml.remove(hp1);
  1167. hp1.free;
  1168. Result:=true;
  1169. exit;
  1170. end;
  1171. else
  1172. ;
  1173. end;
  1174. end
  1175. else if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.typ = top_reg) and
  1176. (taicpu(p).oper[0]^.typ <> top_const) and { MOVZX only supports registers and memory, not immediates (use MOV for that!) }
  1177. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  1178. then
  1179. begin
  1180. InputVal := debug_operstr(taicpu(p).oper[0]^);
  1181. MaskNum := debug_tostr(taicpu(hp1).oper[0]^.val);
  1182. case taicpu(p).opsize of
  1183. S_B:
  1184. if (taicpu(hp1).oper[0]^.val = $ff) then
  1185. begin
  1186. { Convert:
  1187. movb x, %regl movb x, %regl
  1188. andw ffh, %regw andl ffh, %regd
  1189. To:
  1190. movzbw x, %regd movzbl x, %regd
  1191. (Identical registers, just different sizes)
  1192. }
  1193. RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 8-bit register name }
  1194. RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 16/32-bit register name }
  1195. case taicpu(hp1).opsize of
  1196. S_W: NewSize := S_BW;
  1197. S_L: NewSize := S_BL;
  1198. {$ifdef x86_64}
  1199. S_Q: NewSize := S_BQ;
  1200. {$endif x86_64}
  1201. else
  1202. InternalError(2018011510);
  1203. end;
  1204. end
  1205. else
  1206. NewSize := S_NO;
  1207. S_W:
  1208. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1209. begin
  1210. { Convert:
  1211. movw x, %regw
  1212. andl ffffh, %regd
  1213. To:
  1214. movzwl x, %regd
  1215. (Identical registers, just different sizes)
  1216. }
  1217. RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 16-bit register name }
  1218. RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 32-bit register name }
  1219. case taicpu(hp1).opsize of
  1220. S_L: NewSize := S_WL;
  1221. {$ifdef x86_64}
  1222. S_Q: NewSize := S_WQ;
  1223. {$endif x86_64}
  1224. else
  1225. InternalError(2018011511);
  1226. end;
  1227. end
  1228. else
  1229. NewSize := S_NO;
  1230. else
  1231. NewSize := S_NO;
  1232. end;
  1233. if NewSize <> S_NO then
  1234. begin
  1235. PreMessage := 'mov' + debug_opsize2str(taicpu(p).opsize) + ' ' + InputVal + ',' + RegName1;
  1236. { The actual optimization }
  1237. taicpu(p).opcode := A_MOVZX;
  1238. taicpu(p).changeopsize(NewSize);
  1239. taicpu(p).oper[1]^ := taicpu(hp1).oper[1]^;
  1240. { Safeguard if "and" is followed by a conditional command }
  1241. TransferUsedRegs(TmpUsedRegs);
  1242. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  1243. if (RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)) then
  1244. begin
  1245. { At this point, the "and" command is effectively equivalent to
  1246. "test %reg,%reg". This will be handled separately by the
  1247. Peephole Optimizer. [Kit] }
  1248. DebugMsg(SPeepholeOptimization + PreMessage +
  1249. ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
  1250. end
  1251. else
  1252. begin
  1253. DebugMsg(SPeepholeOptimization + PreMessage + '; and' + debug_opsize2str(taicpu(hp1).opsize) + ' $' + MaskNum + ',' + RegName2 +
  1254. ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
  1255. asml.Remove(hp1);
  1256. hp1.Free;
  1257. end;
  1258. Result := True;
  1259. Exit;
  1260. end;
  1261. end;
  1262. end
  1263. else if GetNextInstruction_p and
  1264. MatchInstruction(hp1,A_MOV,[]) and
  1265. (taicpu(p).oper[1]^.typ = top_reg) and
  1266. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1267. begin
  1268. TransferUsedRegs(TmpUsedRegs);
  1269. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  1270. { we have
  1271. mov x, %treg
  1272. mov %treg, y
  1273. }
  1274. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  1275. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1276. { we've got
  1277. mov x, %treg
  1278. mov %treg, y
  1279. with %treg is not used after }
  1280. case taicpu(p).oper[0]^.typ Of
  1281. top_reg:
  1282. begin
  1283. { change
  1284. mov %reg, %treg
  1285. mov %treg, y
  1286. to
  1287. mov %reg, y
  1288. }
  1289. if taicpu(hp1).oper[1]^.typ=top_reg then
  1290. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1291. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1292. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 2 done',p);
  1293. asml.remove(hp1);
  1294. hp1.free;
  1295. Result:=true;
  1296. Exit;
  1297. end;
  1298. top_const:
  1299. begin
  1300. { change
  1301. mov const, %treg
  1302. mov %treg, y
  1303. to
  1304. mov const, y
  1305. }
  1306. if (taicpu(hp1).oper[1]^.typ=top_reg) or
  1307. ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
  1308. begin
  1309. if taicpu(hp1).oper[1]^.typ=top_reg then
  1310. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1311. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1312. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 5 done',p);
  1313. asml.remove(hp1);
  1314. hp1.free;
  1315. Result:=true;
  1316. Exit;
  1317. end;
  1318. end;
  1319. top_ref:
  1320. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1321. begin
  1322. { change
  1323. mov mem, %treg
  1324. mov %treg, %reg
  1325. to
  1326. mov mem, %reg"
  1327. }
  1328. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1329. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 3 done',p);
  1330. asml.remove(hp1);
  1331. hp1.free;
  1332. Result:=true;
  1333. Exit;
  1334. end;
  1335. else
  1336. ;
  1337. end;
  1338. end
  1339. else
  1340. { Change
  1341. mov %reg1, %reg2
  1342. xxx %reg2, ???
  1343. to
  1344. mov %reg1, %reg2
  1345. xxx %reg1, ???
  1346. to avoid a write/read penalty
  1347. }
  1348. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1349. GetNextInstruction(p,hp1) and
  1350. (tai(hp1).typ = ait_instruction) and
  1351. (taicpu(hp1).ops >= 1) and
  1352. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1353. { we have
  1354. mov %reg1, %reg2
  1355. XXX %reg2, ???
  1356. }
  1357. begin
  1358. if ((taicpu(hp1).opcode = A_OR) or
  1359. (taicpu(hp1).opcode = A_AND) or
  1360. (taicpu(hp1).opcode = A_TEST)) and
  1361. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1362. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  1363. { we have
  1364. mov %reg1, %reg2
  1365. test/or/and %reg2, %reg2
  1366. }
  1367. begin
  1368. TransferUsedRegs(TmpUsedRegs);
  1369. { reg1 will be used after the first instruction,
  1370. so update the allocation info }
  1371. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1372. if GetNextInstruction(hp1, hp2) and
  1373. (hp2.typ = ait_instruction) and
  1374. taicpu(hp2).is_jmp and
  1375. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  1376. { change
  1377. mov %reg1, %reg2
  1378. test/or/and %reg2, %reg2
  1379. jxx
  1380. to
  1381. test %reg1, %reg1
  1382. jxx
  1383. }
  1384. begin
  1385. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1386. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1387. DebugMsg(SPeepholeOptimization + 'MovTestJxx2TestMov done',p);
  1388. asml.remove(p);
  1389. p.free;
  1390. p := hp1;
  1391. Exit;
  1392. end
  1393. else
  1394. { change
  1395. mov %reg1, %reg2
  1396. test/or/and %reg2, %reg2
  1397. to
  1398. mov %reg1, %reg2
  1399. test/or/and %reg1, %reg1
  1400. }
  1401. begin
  1402. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1403. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1404. DebugMsg(SPeepholeOptimization + 'MovTestJxx2MovTestJxx done',p);
  1405. end;
  1406. end
  1407. end
  1408. else
  1409. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1410. x >= RetOffset) as it doesn't do anything (it writes either to a
  1411. parameter or to the temporary storage room for the function
  1412. result)
  1413. }
  1414. if GetNextInstruction_p and
  1415. (tai(hp1).typ = ait_instruction) then
  1416. begin
  1417. if IsExitCode(hp1) and
  1418. MatchOpType(taicpu(p),top_reg,top_ref) and
  1419. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1420. not(assigned(current_procinfo.procdef.funcretsym) and
  1421. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1422. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  1423. begin
  1424. asml.remove(p);
  1425. p.free;
  1426. p:=hp1;
  1427. DebugMsg(SPeepholeOptimization + 'removed deadstore before leave/ret',p);
  1428. RemoveLastDeallocForFuncRes(p);
  1429. exit;
  1430. end
  1431. { change
  1432. mov reg1, mem1
  1433. test/cmp x, mem1
  1434. to
  1435. mov reg1, mem1
  1436. test/cmp x, reg1
  1437. }
  1438. else if MatchOpType(taicpu(p),top_reg,top_ref) and
  1439. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  1440. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1441. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1442. begin
  1443. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1444. DebugMsg(SPeepholeOptimization + 'MovTestCmp2MovTestCmp 1',hp1);
  1445. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1446. end;
  1447. end;
  1448. { Next instruction is also a MOV ? }
  1449. if GetNextInstruction_p and
  1450. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1451. begin
  1452. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1453. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1454. { mov reg1, mem1 or mov mem1, reg1
  1455. mov mem2, reg2 mov reg2, mem2}
  1456. begin
  1457. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1458. { mov reg1, mem1 or mov mem1, reg1
  1459. mov mem2, reg1 mov reg2, mem1}
  1460. begin
  1461. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1462. { Removes the second statement from
  1463. mov reg1, mem1/reg2
  1464. mov mem1/reg2, reg1 }
  1465. begin
  1466. if taicpu(p).oper[0]^.typ=top_reg then
  1467. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1468. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 1',p);
  1469. asml.remove(hp1);
  1470. hp1.free;
  1471. Result:=true;
  1472. exit;
  1473. end
  1474. else
  1475. begin
  1476. TransferUsedRegs(TmpUsedRegs);
  1477. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1478. if (taicpu(p).oper[1]^.typ = top_ref) and
  1479. { mov reg1, mem1
  1480. mov mem2, reg1 }
  1481. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1482. GetNextInstruction(hp1, hp2) and
  1483. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  1484. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1485. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  1486. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1487. { change to
  1488. mov reg1, mem1 mov reg1, mem1
  1489. mov mem2, reg1 cmp reg1, mem2
  1490. cmp mem1, reg1
  1491. }
  1492. begin
  1493. asml.remove(hp2);
  1494. hp2.free;
  1495. taicpu(hp1).opcode := A_CMP;
  1496. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1497. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1498. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1499. DebugMsg(SPeepholeOptimization + 'MovMovCmp2MovCmp done',hp1);
  1500. end;
  1501. end;
  1502. end
  1503. else if (taicpu(p).oper[1]^.typ=top_ref) and
  1504. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1505. begin
  1506. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1507. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1508. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov1 done',p);
  1509. end
  1510. else
  1511. begin
  1512. TransferUsedRegs(TmpUsedRegs);
  1513. if GetNextInstruction(hp1, hp2) and
  1514. MatchOpType(taicpu(p),top_ref,top_reg) and
  1515. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1516. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1517. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  1518. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  1519. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1520. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  1521. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1522. { mov mem1, %reg1
  1523. mov %reg1, mem2
  1524. mov mem2, reg2
  1525. to:
  1526. mov mem1, reg2
  1527. mov reg2, mem2}
  1528. begin
  1529. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1530. DebugMsg(SPeepholeOptimization + 'MovMovMov2MovMov 1 done',p);
  1531. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1532. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1533. asml.remove(hp2);
  1534. hp2.free;
  1535. end
  1536. {$ifdef i386}
  1537. { this is enabled for i386 only, as the rules to create the reg sets below
  1538. are too complicated for x86-64, so this makes this code too error prone
  1539. on x86-64
  1540. }
  1541. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1542. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  1543. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  1544. { mov mem1, reg1 mov mem1, reg1
  1545. mov reg1, mem2 mov reg1, mem2
  1546. mov mem2, reg2 mov mem2, reg1
  1547. to: to:
  1548. mov mem1, reg1 mov mem1, reg1
  1549. mov mem1, reg2 mov reg1, mem2
  1550. mov reg1, mem2
  1551. or (if mem1 depends on reg1
  1552. and/or if mem2 depends on reg2)
  1553. to:
  1554. mov mem1, reg1
  1555. mov reg1, mem2
  1556. mov reg1, reg2
  1557. }
  1558. begin
  1559. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1560. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1561. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1562. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1563. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1564. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1565. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1566. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1567. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1568. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1569. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1570. end
  1571. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1572. begin
  1573. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1574. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1575. end
  1576. else
  1577. begin
  1578. asml.remove(hp2);
  1579. hp2.free;
  1580. end
  1581. {$endif i386}
  1582. ;
  1583. end;
  1584. end
  1585. (* { movl [mem1],reg1
  1586. movl [mem1],reg2
  1587. to
  1588. movl [mem1],reg1
  1589. movl reg1,reg2
  1590. }
  1591. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1592. (taicpu(p).oper[1]^.typ = top_reg) and
  1593. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1594. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1595. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1596. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1597. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1598. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1599. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1600. else*)
  1601. { movl const1,[mem1]
  1602. movl [mem1],reg1
  1603. to
  1604. movl const1,reg1
  1605. movl reg1,[mem1]
  1606. }
  1607. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  1608. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1609. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1610. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1611. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1612. begin
  1613. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1614. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1615. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1616. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1617. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1618. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov 1',p);
  1619. end
  1620. {
  1621. mov* x,reg1
  1622. mov* y,reg1
  1623. to
  1624. mov* y,reg1
  1625. }
  1626. else if (taicpu(p).oper[1]^.typ=top_reg) and
  1627. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1628. not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^)) then
  1629. begin
  1630. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 4 done',p);
  1631. { take care of the register (de)allocs following p }
  1632. UpdateUsedRegs(tai(p.next));
  1633. asml.remove(p);
  1634. p.free;
  1635. p:=hp1;
  1636. Result:=true;
  1637. exit;
  1638. end;
  1639. end
  1640. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1641. GetNextInstruction_p and
  1642. (hp1.typ = ait_instruction) and
  1643. GetNextInstruction(hp1, hp2) and
  1644. MatchInstruction(hp2,A_MOV,[]) and
  1645. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1646. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1647. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and (taicpu(hp2).opsize=S_L) and
  1648. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1649. ) then
  1650. begin
  1651. if OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1652. (taicpu(hp2).oper[0]^.typ=top_reg) then
  1653. { change movsX/movzX reg/ref, reg2
  1654. add/sub/or/... reg3/$const, reg2
  1655. mov reg2 reg/ref
  1656. dealloc reg2
  1657. to
  1658. add/sub/or/... reg3/$const, reg/ref }
  1659. begin
  1660. TransferUsedRegs(TmpUsedRegs);
  1661. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1662. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1663. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1664. begin
  1665. { by example:
  1666. movswl %si,%eax movswl %si,%eax p
  1667. decl %eax addl %edx,%eax hp1
  1668. movw %ax,%si movw %ax,%si hp2
  1669. ->
  1670. movswl %si,%eax movswl %si,%eax p
  1671. decw %eax addw %edx,%eax hp1
  1672. movw %ax,%si movw %ax,%si hp2
  1673. }
  1674. DebugMsg(SPeepholeOptimization + 'MovOpMov2Op ('+
  1675. debug_op2str(taicpu(p).opcode)+debug_opsize2str(taicpu(p).opsize)+' '+
  1676. debug_op2str(taicpu(hp1).opcode)+debug_opsize2str(taicpu(hp1).opsize)+' '+
  1677. debug_op2str(taicpu(hp2).opcode)+debug_opsize2str(taicpu(hp2).opsize),p);
  1678. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1679. {
  1680. ->
  1681. movswl %si,%eax movswl %si,%eax p
  1682. decw %si addw %dx,%si hp1
  1683. movw %ax,%si movw %ax,%si hp2
  1684. }
  1685. case taicpu(hp1).ops of
  1686. 1:
  1687. begin
  1688. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1689. if taicpu(hp1).oper[0]^.typ=top_reg then
  1690. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1691. end;
  1692. 2:
  1693. begin
  1694. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1695. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1696. (taicpu(hp1).opcode<>A_SHL) and
  1697. (taicpu(hp1).opcode<>A_SHR) and
  1698. (taicpu(hp1).opcode<>A_SAR) then
  1699. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1700. end;
  1701. else
  1702. internalerror(2008042701);
  1703. end;
  1704. {
  1705. ->
  1706. decw %si addw %dx,%si p
  1707. }
  1708. asml.remove(p);
  1709. asml.remove(hp2);
  1710. p.Free;
  1711. hp2.Free;
  1712. p := hp1;
  1713. end;
  1714. end
  1715. else if MatchOpType(taicpu(hp2),top_reg,top_reg) and
  1716. not(SuperRegistersEqual(taicpu(hp1).oper[0]^.reg,taicpu(hp2).oper[1]^.reg)) and
  1717. ((topsize2memsize[taicpu(hp1).opsize]<= topsize2memsize[taicpu(hp2).opsize]) or
  1718. { opsize matters for these opcodes, we could probably work around this, but it is not worth the effort }
  1719. ((taicpu(hp1).opcode<>A_SHL) and (taicpu(hp1).opcode<>A_SHR) and (taicpu(hp1).opcode<>A_SAR))
  1720. )
  1721. {$ifdef i386}
  1722. { byte registers of esi, edi, ebp, esp are not available on i386 }
  1723. and ((taicpu(hp2).opsize<>S_B) or not(getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_ESI,RS_EDI,RS_EBP,RS_ESP]))
  1724. and ((taicpu(hp2).opsize<>S_B) or not(getsupreg(taicpu(p).oper[0]^.reg) in [RS_ESI,RS_EDI,RS_EBP,RS_ESP]))
  1725. {$endif i386}
  1726. then
  1727. { change movsX/movzX reg/ref, reg2
  1728. add/sub/or/... regX/$const, reg2
  1729. mov reg2, reg3
  1730. dealloc reg2
  1731. to
  1732. movsX/movzX reg/ref, reg3
  1733. add/sub/or/... reg3/$const, reg3
  1734. }
  1735. begin
  1736. TransferUsedRegs(TmpUsedRegs);
  1737. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1738. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1739. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1740. begin
  1741. { by example:
  1742. movswl %si,%eax movswl %si,%eax p
  1743. decl %eax addl %edx,%eax hp1
  1744. movw %ax,%si movw %ax,%si hp2
  1745. ->
  1746. movswl %si,%eax movswl %si,%eax p
  1747. decw %eax addw %edx,%eax hp1
  1748. movw %ax,%si movw %ax,%si hp2
  1749. }
  1750. DebugMsg(SPeepholeOptimization + 'MovOpMov2MovOp ('+
  1751. debug_op2str(taicpu(p).opcode)+debug_opsize2str(taicpu(p).opsize)+' '+
  1752. debug_op2str(taicpu(hp1).opcode)+debug_opsize2str(taicpu(hp1).opsize)+' '+
  1753. debug_op2str(taicpu(hp2).opcode)+debug_opsize2str(taicpu(hp2).opsize),p);
  1754. { limit size of constants as well to avoid assembler errors, but
  1755. check opsize to avoid overflow when left shifting the 1 }
  1756. if (taicpu(p).oper[0]^.typ=top_const) and (topsize2memsize[taicpu(hp2).opsize]<=4) then
  1757. taicpu(p).oper[0]^.val:=taicpu(p).oper[0]^.val and ((qword(1) shl (topsize2memsize[taicpu(hp2).opsize]*8))-1);
  1758. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1759. taicpu(p).changeopsize(taicpu(hp2).opsize);
  1760. if taicpu(p).oper[0]^.typ=top_reg then
  1761. setsubreg(taicpu(p).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1762. taicpu(p).loadoper(1, taicpu(hp2).oper[1]^);
  1763. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
  1764. {
  1765. ->
  1766. movswl %si,%eax movswl %si,%eax p
  1767. decw %si addw %dx,%si hp1
  1768. movw %ax,%si movw %ax,%si hp2
  1769. }
  1770. case taicpu(hp1).ops of
  1771. 1:
  1772. begin
  1773. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1774. if taicpu(hp1).oper[0]^.typ=top_reg then
  1775. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1776. end;
  1777. 2:
  1778. begin
  1779. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1780. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1781. (taicpu(hp1).opcode<>A_SHL) and
  1782. (taicpu(hp1).opcode<>A_SHR) and
  1783. (taicpu(hp1).opcode<>A_SAR) then
  1784. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1785. end;
  1786. else
  1787. internalerror(2018111801);
  1788. end;
  1789. {
  1790. ->
  1791. decw %si addw %dx,%si p
  1792. }
  1793. asml.remove(hp2);
  1794. hp2.Free;
  1795. end;
  1796. end;
  1797. end
  1798. else if GetNextInstruction_p and
  1799. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1800. GetNextInstruction(hp1, hp2) and
  1801. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1802. MatchOperand(Taicpu(p).oper[0]^,0) and
  1803. (Taicpu(p).oper[1]^.typ = top_reg) and
  1804. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1805. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1806. { mov reg1,0
  1807. bts reg1,operand1 --> mov reg1,operand2
  1808. or reg1,operand2 bts reg1,operand1}
  1809. begin
  1810. Taicpu(hp2).opcode:=A_MOV;
  1811. asml.remove(hp1);
  1812. insertllitem(hp2,hp2.next,hp1);
  1813. asml.remove(p);
  1814. p.free;
  1815. p:=hp1;
  1816. end
  1817. else if GetNextInstruction_p and
  1818. MatchInstruction(hp1,A_LEA,[S_L]) and
  1819. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1820. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1821. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1822. ) or
  1823. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1824. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1825. )
  1826. ) then
  1827. { mov reg1,ref
  1828. lea reg2,[reg1,reg2]
  1829. to
  1830. add reg2,ref}
  1831. begin
  1832. TransferUsedRegs(TmpUsedRegs);
  1833. { reg1 may not be used afterwards }
  1834. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1835. begin
  1836. Taicpu(hp1).opcode:=A_ADD;
  1837. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1838. DebugMsg(SPeepholeOptimization + 'MovLea2Add done',hp1);
  1839. asml.remove(p);
  1840. p.free;
  1841. p:=hp1;
  1842. end;
  1843. end;
  1844. end;
  1845. function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;
  1846. var
  1847. hp1 : tai;
  1848. begin
  1849. Result:=false;
  1850. if taicpu(p).ops <> 2 then
  1851. exit;
  1852. if GetNextInstruction(p,hp1) and
  1853. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  1854. (taicpu(hp1).ops = 2) then
  1855. begin
  1856. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1857. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1858. { movXX reg1, mem1 or movXX mem1, reg1
  1859. movXX mem2, reg2 movXX reg2, mem2}
  1860. begin
  1861. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1862. { movXX reg1, mem1 or movXX mem1, reg1
  1863. movXX mem2, reg1 movXX reg2, mem1}
  1864. begin
  1865. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1866. begin
  1867. { Removes the second statement from
  1868. movXX reg1, mem1/reg2
  1869. movXX mem1/reg2, reg1
  1870. }
  1871. if taicpu(p).oper[0]^.typ=top_reg then
  1872. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1873. { Removes the second statement from
  1874. movXX mem1/reg1, reg2
  1875. movXX reg2, mem1/reg1
  1876. }
  1877. if (taicpu(p).oper[1]^.typ=top_reg) and
  1878. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then
  1879. begin
  1880. asml.remove(p);
  1881. p.free;
  1882. GetNextInstruction(hp1,p);
  1883. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2Nop 1 done',p);
  1884. end
  1885. else
  1886. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2MoVXX 1 done',p);
  1887. asml.remove(hp1);
  1888. hp1.free;
  1889. Result:=true;
  1890. exit;
  1891. end
  1892. end;
  1893. end;
  1894. end;
  1895. end;
  1896. function TX86AsmOptimizer.OptPass1OP(var p : tai) : boolean;
  1897. var
  1898. hp1 : tai;
  1899. begin
  1900. result:=false;
  1901. { replace
  1902. <Op>X %mreg1,%mreg2 // Op in [ADD,MUL]
  1903. MovX %mreg2,%mreg1
  1904. dealloc %mreg2
  1905. by
  1906. <Op>X %mreg2,%mreg1
  1907. ?
  1908. }
  1909. if GetNextInstruction(p,hp1) and
  1910. { we mix single and double opperations here because we assume that the compiler
  1911. generates vmovapd only after double operations and vmovaps only after single operations }
  1912. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  1913. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1914. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  1915. (taicpu(p).oper[0]^.typ=top_reg) then
  1916. begin
  1917. TransferUsedRegs(TmpUsedRegs);
  1918. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1919. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1920. begin
  1921. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  1922. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1923. DebugMsg(SPeepholeOptimization + 'OpMov2Op done',p);
  1924. asml.Remove(hp1);
  1925. hp1.Free;
  1926. result:=true;
  1927. end;
  1928. end;
  1929. end;
  1930. function TX86AsmOptimizer.OptPass1LEA(var p : tai) : boolean;
  1931. var
  1932. hp1 : tai;
  1933. l : ASizeInt;
  1934. begin
  1935. Result:=false;
  1936. { removes seg register prefixes from LEA operations, as they
  1937. don't do anything}
  1938. taicpu(p).oper[0]^.ref^.Segment:=NR_NO;
  1939. { changes "lea (%reg1), %reg2" into "mov %reg1, %reg2" }
  1940. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1941. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1942. { do not mess with leas acessing the stack pointer }
  1943. (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
  1944. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  1945. begin
  1946. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1947. (taicpu(p).oper[0]^.ref^.offset = 0) then
  1948. begin
  1949. hp1:=taicpu.op_reg_reg(A_MOV,taicpu(p).opsize,taicpu(p).oper[0]^.ref^.base,
  1950. taicpu(p).oper[1]^.reg);
  1951. InsertLLItem(p.previous,p.next, hp1);
  1952. DebugMsg(SPeepholeOptimization + 'Lea2Mov done',hp1);
  1953. p.free;
  1954. p:=hp1;
  1955. Result:=true;
  1956. exit;
  1957. end
  1958. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  1959. begin
  1960. hp1:=taicpu(p.Next);
  1961. DebugMsg(SPeepholeOptimization + 'Lea2Nop done',p);
  1962. asml.remove(p);
  1963. p.free;
  1964. p:=hp1;
  1965. Result:=true;
  1966. exit;
  1967. end
  1968. { continue to use lea to adjust the stack pointer,
  1969. it is the recommended way, but only if not optimizing for size }
  1970. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1971. (cs_opt_size in current_settings.optimizerswitches) then
  1972. with taicpu(p).oper[0]^.ref^ do
  1973. if (base = taicpu(p).oper[1]^.reg) then
  1974. begin
  1975. l:=offset;
  1976. if (l=1) and UseIncDec then
  1977. begin
  1978. taicpu(p).opcode:=A_INC;
  1979. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1980. taicpu(p).ops:=1;
  1981. DebugMsg(SPeepholeOptimization + 'Lea2Inc done',p);
  1982. end
  1983. else if (l=-1) and UseIncDec then
  1984. begin
  1985. taicpu(p).opcode:=A_DEC;
  1986. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1987. taicpu(p).ops:=1;
  1988. DebugMsg(SPeepholeOptimization + 'Lea2Dec done',p);
  1989. end
  1990. else
  1991. begin
  1992. if (l<0) and (l<>-2147483648) then
  1993. begin
  1994. taicpu(p).opcode:=A_SUB;
  1995. taicpu(p).loadConst(0,-l);
  1996. DebugMsg(SPeepholeOptimization + 'Lea2Sub done',p);
  1997. end
  1998. else
  1999. begin
  2000. taicpu(p).opcode:=A_ADD;
  2001. taicpu(p).loadConst(0,l);
  2002. DebugMsg(SPeepholeOptimization + 'Lea2Add done',p);
  2003. end;
  2004. end;
  2005. Result:=true;
  2006. exit;
  2007. end;
  2008. end;
  2009. if GetNextInstruction(p,hp1) and
  2010. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  2011. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  2012. MatchOpType(Taicpu(hp1),top_reg,top_reg) and
  2013. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) then
  2014. begin
  2015. TransferUsedRegs(TmpUsedRegs);
  2016. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2017. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  2018. begin
  2019. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  2020. DebugMsg(SPeepholeOptimization + 'LeaMov2Lea done',p);
  2021. asml.Remove(hp1);
  2022. hp1.Free;
  2023. result:=true;
  2024. end;
  2025. end;
  2026. end;
  2027. function TX86AsmOptimizer.DoSubAddOpt(var p: tai): Boolean;
  2028. var
  2029. hp1 : tai;
  2030. begin
  2031. DoSubAddOpt := False;
  2032. if GetLastInstruction(p, hp1) and
  2033. (hp1.typ = ait_instruction) and
  2034. (taicpu(hp1).opsize = taicpu(p).opsize) then
  2035. case taicpu(hp1).opcode Of
  2036. A_DEC:
  2037. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  2038. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  2039. begin
  2040. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  2041. asml.remove(hp1);
  2042. hp1.free;
  2043. end;
  2044. A_SUB:
  2045. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  2046. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  2047. begin
  2048. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  2049. asml.remove(hp1);
  2050. hp1.free;
  2051. end;
  2052. A_ADD:
  2053. begin
  2054. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  2055. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  2056. begin
  2057. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  2058. asml.remove(hp1);
  2059. hp1.free;
  2060. if (taicpu(p).oper[0]^.val = 0) then
  2061. begin
  2062. hp1 := tai(p.next);
  2063. asml.remove(p);
  2064. p.free;
  2065. if not GetLastInstruction(hp1, p) then
  2066. p := hp1;
  2067. DoSubAddOpt := True;
  2068. end
  2069. end;
  2070. end;
  2071. else
  2072. ;
  2073. end;
  2074. end;
  2075. function TX86AsmOptimizer.OptPass1Sub(var p : tai) : boolean;
  2076. {$ifdef i386}
  2077. var
  2078. hp1 : tai;
  2079. {$endif i386}
  2080. begin
  2081. Result:=false;
  2082. { * change "subl $2, %esp; pushw x" to "pushl x"}
  2083. { * change "sub/add const1, reg" or "dec reg" followed by
  2084. "sub const2, reg" to one "sub ..., reg" }
  2085. if MatchOpType(taicpu(p),top_const,top_reg) then
  2086. begin
  2087. {$ifdef i386}
  2088. if (taicpu(p).oper[0]^.val = 2) and
  2089. (taicpu(p).oper[1]^.reg = NR_ESP) and
  2090. { Don't do the sub/push optimization if the sub }
  2091. { comes from setting up the stack frame (JM) }
  2092. (not(GetLastInstruction(p,hp1)) or
  2093. not(MatchInstruction(hp1,A_MOV,[S_L]) and
  2094. MatchOperand(taicpu(hp1).oper[0]^,NR_ESP) and
  2095. MatchOperand(taicpu(hp1).oper[0]^,NR_EBP))) then
  2096. begin
  2097. hp1 := tai(p.next);
  2098. while Assigned(hp1) and
  2099. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  2100. not RegReadByInstruction(NR_ESP,hp1) and
  2101. not RegModifiedByInstruction(NR_ESP,hp1) do
  2102. hp1 := tai(hp1.next);
  2103. if Assigned(hp1) and
  2104. MatchInstruction(hp1,A_PUSH,[S_W]) then
  2105. begin
  2106. taicpu(hp1).changeopsize(S_L);
  2107. if taicpu(hp1).oper[0]^.typ=top_reg then
  2108. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  2109. hp1 := tai(p.next);
  2110. asml.remove(p);
  2111. p.free;
  2112. p := hp1;
  2113. Result:=true;
  2114. exit;
  2115. end;
  2116. end;
  2117. {$endif i386}
  2118. if DoSubAddOpt(p) then
  2119. Result:=true;
  2120. end;
  2121. end;
  2122. function TX86AsmOptimizer.OptPass1SHLSAL(var p : tai) : boolean;
  2123. var
  2124. TmpBool1,TmpBool2 : Boolean;
  2125. tmpref : treference;
  2126. hp1,hp2: tai;
  2127. begin
  2128. Result:=false;
  2129. if MatchOpType(taicpu(p),top_const,top_reg) and
  2130. (taicpu(p).opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) and
  2131. (taicpu(p).oper[0]^.val <= 3) then
  2132. { Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement }
  2133. begin
  2134. { should we check the next instruction? }
  2135. TmpBool1 := True;
  2136. { have we found an add/sub which could be
  2137. integrated in the lea? }
  2138. TmpBool2 := False;
  2139. reference_reset(tmpref,2,[]);
  2140. TmpRef.index := taicpu(p).oper[1]^.reg;
  2141. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  2142. while TmpBool1 and
  2143. GetNextInstruction(p, hp1) and
  2144. (tai(hp1).typ = ait_instruction) and
  2145. ((((taicpu(hp1).opcode = A_ADD) or
  2146. (taicpu(hp1).opcode = A_SUB)) and
  2147. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  2148. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  2149. (((taicpu(hp1).opcode = A_INC) or
  2150. (taicpu(hp1).opcode = A_DEC)) and
  2151. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  2152. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  2153. (not GetNextInstruction(hp1,hp2) or
  2154. not instrReadsFlags(hp2)) Do
  2155. begin
  2156. TmpBool1 := False;
  2157. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  2158. begin
  2159. TmpBool1 := True;
  2160. TmpBool2 := True;
  2161. case taicpu(hp1).opcode of
  2162. A_ADD:
  2163. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  2164. A_SUB:
  2165. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  2166. else
  2167. internalerror(2019050536);
  2168. end;
  2169. asml.remove(hp1);
  2170. hp1.free;
  2171. end
  2172. else
  2173. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  2174. (((taicpu(hp1).opcode = A_ADD) and
  2175. (TmpRef.base = NR_NO)) or
  2176. (taicpu(hp1).opcode = A_INC) or
  2177. (taicpu(hp1).opcode = A_DEC)) then
  2178. begin
  2179. TmpBool1 := True;
  2180. TmpBool2 := True;
  2181. case taicpu(hp1).opcode of
  2182. A_ADD:
  2183. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  2184. A_INC:
  2185. inc(TmpRef.offset);
  2186. A_DEC:
  2187. dec(TmpRef.offset);
  2188. else
  2189. internalerror(2019050535);
  2190. end;
  2191. asml.remove(hp1);
  2192. hp1.free;
  2193. end;
  2194. end;
  2195. if TmpBool2
  2196. {$ifndef x86_64}
  2197. or
  2198. ((current_settings.optimizecputype < cpu_Pentium2) and
  2199. (taicpu(p).oper[0]^.val <= 3) and
  2200. not(cs_opt_size in current_settings.optimizerswitches))
  2201. {$endif x86_64}
  2202. then
  2203. begin
  2204. if not(TmpBool2) and
  2205. (taicpu(p).oper[0]^.val = 1) then
  2206. begin
  2207. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  2208. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  2209. end
  2210. else
  2211. hp1 := taicpu.op_ref_reg(A_LEA, taicpu(p).opsize, TmpRef,
  2212. taicpu(p).oper[1]^.reg);
  2213. InsertLLItem(p.previous, p.next, hp1);
  2214. p.free;
  2215. p := hp1;
  2216. end;
  2217. end
  2218. {$ifndef x86_64}
  2219. else if (current_settings.optimizecputype < cpu_Pentium2) and
  2220. MatchOpType(taicpu(p),top_const,top_reg) then
  2221. begin
  2222. { changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  2223. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  2224. (unlike shl, which is only Tairable in the U pipe) }
  2225. if taicpu(p).oper[0]^.val=1 then
  2226. begin
  2227. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  2228. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  2229. InsertLLItem(p.previous, p.next, hp1);
  2230. p.free;
  2231. p := hp1;
  2232. end
  2233. { changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  2234. "shl $3, %reg" to "lea (,%reg,8), %reg }
  2235. else if (taicpu(p).opsize = S_L) and
  2236. (taicpu(p).oper[0]^.val<= 3) then
  2237. begin
  2238. reference_reset(tmpref,2,[]);
  2239. TmpRef.index := taicpu(p).oper[1]^.reg;
  2240. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  2241. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  2242. InsertLLItem(p.previous, p.next, hp1);
  2243. p.free;
  2244. p := hp1;
  2245. end;
  2246. end
  2247. {$endif x86_64}
  2248. ;
  2249. end;
  2250. function TX86AsmOptimizer.OptPass1SETcc(var p: tai): boolean;
  2251. var
  2252. hp1,hp2,next: tai; SetC, JumpC: TAsmCond;
  2253. begin
  2254. Result:=false;
  2255. if MatchOpType(taicpu(p),top_reg) and
  2256. GetNextInstruction(p, hp1) and
  2257. MatchInstruction(hp1, A_TEST, [S_B]) and
  2258. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  2259. (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
  2260. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
  2261. GetNextInstruction(hp1, hp2) and
  2262. MatchInstruction(hp2, A_Jcc, []) then
  2263. { Change from: To:
  2264. set(C) %reg j(~C) label
  2265. test %reg,%reg
  2266. je label
  2267. set(C) %reg j(C) label
  2268. test %reg,%reg
  2269. jne label
  2270. }
  2271. begin
  2272. next := tai(p.Next);
  2273. TransferUsedRegs(TmpUsedRegs);
  2274. UpdateUsedRegs(TmpUsedRegs, next);
  2275. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  2276. asml.Remove(hp1);
  2277. hp1.Free;
  2278. JumpC := taicpu(hp2).condition;
  2279. if conditions_equal(JumpC, C_E) then
  2280. SetC := inverse_cond(taicpu(p).condition)
  2281. else if conditions_equal(JumpC, C_NE) then
  2282. SetC := taicpu(p).condition
  2283. else
  2284. InternalError(2018061400);
  2285. if SetC = C_NONE then
  2286. InternalError(2018061401);
  2287. taicpu(hp2).SetCondition(SetC);
  2288. if not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs) then
  2289. begin
  2290. asml.Remove(p);
  2291. UpdateUsedRegs(next);
  2292. p.Free;
  2293. Result := True;
  2294. p := hp2;
  2295. end;
  2296. DebugMsg(SPeepholeOptimization + 'SETcc/TEST/Jcc -> Jcc',p);
  2297. end;
  2298. end;
  2299. function TX86AsmOptimizer.OptPass1FSTP(var p: tai): boolean;
  2300. { returns true if a "continue" should be done after this optimization }
  2301. var
  2302. hp1, hp2: tai;
  2303. begin
  2304. Result := false;
  2305. if MatchOpType(taicpu(p),top_ref) and
  2306. GetNextInstruction(p, hp1) and
  2307. (hp1.typ = ait_instruction) and
  2308. (((taicpu(hp1).opcode = A_FLD) and
  2309. (taicpu(p).opcode = A_FSTP)) or
  2310. ((taicpu(p).opcode = A_FISTP) and
  2311. (taicpu(hp1).opcode = A_FILD))) and
  2312. MatchOpType(taicpu(hp1),top_ref) and
  2313. (taicpu(hp1).opsize = taicpu(p).opsize) and
  2314. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  2315. begin
  2316. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  2317. if (taicpu(p).opsize=S_FX) and
  2318. GetNextInstruction(hp1, hp2) and
  2319. (hp2.typ = ait_instruction) and
  2320. IsExitCode(hp2) and
  2321. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  2322. not(assigned(current_procinfo.procdef.funcretsym) and
  2323. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  2324. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  2325. begin
  2326. asml.remove(p);
  2327. asml.remove(hp1);
  2328. p.free;
  2329. hp1.free;
  2330. p := hp2;
  2331. RemoveLastDeallocForFuncRes(p);
  2332. Result := true;
  2333. end
  2334. (* can't be done because the store operation rounds
  2335. else
  2336. { fst can't store an extended value! }
  2337. if (taicpu(p).opsize <> S_FX) and
  2338. (taicpu(p).opsize <> S_IQ) then
  2339. begin
  2340. if (taicpu(p).opcode = A_FSTP) then
  2341. taicpu(p).opcode := A_FST
  2342. else taicpu(p).opcode := A_FIST;
  2343. asml.remove(hp1);
  2344. hp1.free;
  2345. end
  2346. *)
  2347. end;
  2348. end;
  2349. function TX86AsmOptimizer.OptPass1FLD(var p : tai) : boolean;
  2350. var
  2351. hp1, hp2: tai;
  2352. begin
  2353. result:=false;
  2354. if MatchOpType(taicpu(p),top_reg) and
  2355. GetNextInstruction(p, hp1) and
  2356. (hp1.typ = Ait_Instruction) and
  2357. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  2358. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  2359. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  2360. { change to
  2361. fld reg fxxx reg,st
  2362. fxxxp st, st1 (hp1)
  2363. Remark: non commutative operations must be reversed!
  2364. }
  2365. begin
  2366. case taicpu(hp1).opcode Of
  2367. A_FMULP,A_FADDP,
  2368. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  2369. begin
  2370. case taicpu(hp1).opcode Of
  2371. A_FADDP: taicpu(hp1).opcode := A_FADD;
  2372. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  2373. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  2374. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  2375. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  2376. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  2377. else
  2378. internalerror(2019050534);
  2379. end;
  2380. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  2381. taicpu(hp1).oper[1]^.reg := NR_ST;
  2382. asml.remove(p);
  2383. p.free;
  2384. p := hp1;
  2385. Result:=true;
  2386. exit;
  2387. end;
  2388. else
  2389. ;
  2390. end;
  2391. end
  2392. else
  2393. if MatchOpType(taicpu(p),top_ref) and
  2394. GetNextInstruction(p, hp2) and
  2395. (hp2.typ = Ait_Instruction) and
  2396. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  2397. (taicpu(p).opsize in [S_FS, S_FL]) and
  2398. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  2399. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  2400. if GetLastInstruction(p, hp1) and
  2401. MatchInstruction(hp1,A_FLD,A_FST,[taicpu(p).opsize]) and
  2402. MatchOpType(taicpu(hp1),top_ref) and
  2403. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  2404. if ((taicpu(hp2).opcode = A_FMULP) or
  2405. (taicpu(hp2).opcode = A_FADDP)) then
  2406. { change to
  2407. fld/fst mem1 (hp1) fld/fst mem1
  2408. fld mem1 (p) fadd/
  2409. faddp/ fmul st, st
  2410. fmulp st, st1 (hp2) }
  2411. begin
  2412. asml.remove(p);
  2413. p.free;
  2414. p := hp1;
  2415. if (taicpu(hp2).opcode = A_FADDP) then
  2416. taicpu(hp2).opcode := A_FADD
  2417. else
  2418. taicpu(hp2).opcode := A_FMUL;
  2419. taicpu(hp2).oper[1]^.reg := NR_ST;
  2420. end
  2421. else
  2422. { change to
  2423. fld/fst mem1 (hp1) fld/fst mem1
  2424. fld mem1 (p) fld st}
  2425. begin
  2426. taicpu(p).changeopsize(S_FL);
  2427. taicpu(p).loadreg(0,NR_ST);
  2428. end
  2429. else
  2430. begin
  2431. case taicpu(hp2).opcode Of
  2432. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  2433. { change to
  2434. fld/fst mem1 (hp1) fld/fst mem1
  2435. fld mem2 (p) fxxx mem2
  2436. fxxxp st, st1 (hp2) }
  2437. begin
  2438. case taicpu(hp2).opcode Of
  2439. A_FADDP: taicpu(p).opcode := A_FADD;
  2440. A_FMULP: taicpu(p).opcode := A_FMUL;
  2441. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  2442. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  2443. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  2444. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  2445. else
  2446. internalerror(2019050533);
  2447. end;
  2448. asml.remove(hp2);
  2449. hp2.free;
  2450. end
  2451. else
  2452. ;
  2453. end
  2454. end
  2455. end;
  2456. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  2457. var
  2458. hp1,hp2: tai;
  2459. {$ifdef x86_64}
  2460. hp3: tai;
  2461. {$endif x86_64}
  2462. begin
  2463. Result:=false;
  2464. if MatchOpType(taicpu(p),top_reg,top_reg) and
  2465. GetNextInstruction(p, hp1) and
  2466. {$ifdef x86_64}
  2467. MatchInstruction(hp1,A_MOVZX,A_MOVSX,A_MOVSXD,[]) and
  2468. {$else x86_64}
  2469. MatchInstruction(hp1,A_MOVZX,A_MOVSX,[]) and
  2470. {$endif x86_64}
  2471. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  2472. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  2473. { mov reg1, reg2 mov reg1, reg2
  2474. movzx/sx reg2, reg3 to movzx/sx reg1, reg3}
  2475. begin
  2476. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  2477. DebugMsg(SPeepholeOptimization + 'mov %reg1,%reg2; movzx/sx %reg2,%reg3 -> mov %reg1,%reg2;movzx/sx %reg1,%reg3',p);
  2478. { Don't remove the MOV command without first checking that reg2 isn't used afterwards,
  2479. or unless supreg(reg3) = supreg(reg2)). [Kit] }
  2480. TransferUsedRegs(TmpUsedRegs);
  2481. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  2482. if (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) or
  2483. not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)
  2484. then
  2485. begin
  2486. asml.remove(p);
  2487. p.free;
  2488. p := hp1;
  2489. Result:=true;
  2490. end;
  2491. exit;
  2492. end
  2493. else if MatchOpType(taicpu(p),top_reg,top_reg) and
  2494. GetNextInstruction(p, hp1) and
  2495. {$ifdef x86_64}
  2496. MatchInstruction(hp1,[A_MOV,A_MOVZX,A_MOVSX,A_MOVSXD],[]) and
  2497. {$else x86_64}
  2498. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  2499. {$endif x86_64}
  2500. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  2501. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  2502. or
  2503. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  2504. ) and
  2505. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  2506. { mov reg1, reg2
  2507. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  2508. begin
  2509. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  2510. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  2511. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  2512. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  2513. DebugMsg(SPeepholeOptimization + 'MovMovXX2MoVXX 1 done',p);
  2514. asml.remove(p);
  2515. p.free;
  2516. p := hp1;
  2517. Result:=true;
  2518. exit;
  2519. end
  2520. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2521. GetNextInstruction(p,hp1) and
  2522. (hp1.typ = ait_instruction) and
  2523. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  2524. doing it separately in both branches allows to do the cheap checks
  2525. with low probability earlier }
  2526. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  2527. GetNextInstruction(hp1,hp2) and
  2528. MatchInstruction(hp2,A_MOV,[])
  2529. ) or
  2530. ((taicpu(hp1).opcode=A_LEA) and
  2531. GetNextInstruction(hp1,hp2) and
  2532. MatchInstruction(hp2,A_MOV,[]) and
  2533. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  2534. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  2535. ) or
  2536. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  2537. taicpu(p).oper[1]^.reg) and
  2538. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  2539. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  2540. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  2541. ) and
  2542. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  2543. )
  2544. ) and
  2545. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  2546. (taicpu(hp2).oper[1]^.typ = top_ref) then
  2547. begin
  2548. TransferUsedRegs(TmpUsedRegs);
  2549. UpdateUsedRegs(TmpUsedRegs,tai(p.next));
  2550. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  2551. if (RefsEqual(taicpu(hp2).oper[1]^.ref^,taicpu(p).oper[0]^.ref^) and
  2552. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,TmpUsedRegs))) then
  2553. { change mov (ref), reg
  2554. add/sub/or/... reg2/$const, reg
  2555. mov reg, (ref)
  2556. # release reg
  2557. to add/sub/or/... reg2/$const, (ref) }
  2558. begin
  2559. case taicpu(hp1).opcode of
  2560. A_INC,A_DEC,A_NOT,A_NEG :
  2561. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  2562. A_LEA :
  2563. begin
  2564. taicpu(hp1).opcode:=A_ADD;
  2565. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  2566. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  2567. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  2568. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  2569. else
  2570. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  2571. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  2572. DebugMsg(SPeepholeOptimization + 'FoldLea done',hp1);
  2573. end
  2574. else
  2575. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  2576. end;
  2577. asml.remove(p);
  2578. asml.remove(hp2);
  2579. p.free;
  2580. hp2.free;
  2581. p := hp1
  2582. end;
  2583. Exit;
  2584. {$ifdef x86_64}
  2585. end
  2586. else if (taicpu(p).opsize = S_L) and
  2587. (taicpu(p).oper[1]^.typ = top_reg) and
  2588. (
  2589. GetNextInstruction(p, hp1) and
  2590. MatchInstruction(hp1, A_MOV,[]) and
  2591. (taicpu(hp1).opsize = S_L) and
  2592. (taicpu(hp1).oper[1]^.typ = top_reg)
  2593. ) and (
  2594. GetNextInstruction(hp1, hp2) and
  2595. (tai(hp2).typ=ait_instruction) and
  2596. (taicpu(hp2).opsize = S_Q) and
  2597. (
  2598. (
  2599. MatchInstruction(hp2, A_ADD,[]) and
  2600. (taicpu(hp2).opsize = S_Q) and
  2601. (taicpu(hp2).oper[0]^.typ = top_reg) and (taicpu(hp2).oper[1]^.typ = top_reg) and
  2602. (
  2603. (
  2604. (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) and
  2605. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  2606. ) or (
  2607. (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2608. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))
  2609. )
  2610. )
  2611. ) or (
  2612. MatchInstruction(hp2, A_LEA,[]) and
  2613. (taicpu(hp2).oper[0]^.ref^.offset = 0) and
  2614. (taicpu(hp2).oper[0]^.ref^.scalefactor <= 1) and
  2615. (
  2616. (
  2617. (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(p).oper[1]^.reg)) and
  2618. (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(hp1).oper[1]^.reg))
  2619. ) or (
  2620. (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2621. (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(p).oper[1]^.reg))
  2622. )
  2623. ) and (
  2624. (
  2625. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  2626. ) or (
  2627. (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))
  2628. )
  2629. )
  2630. )
  2631. )
  2632. ) and (
  2633. GetNextInstruction(hp2, hp3) and
  2634. MatchInstruction(hp3, A_SHR,[]) and
  2635. (taicpu(hp3).opsize = S_Q) and
  2636. (taicpu(hp3).oper[0]^.typ = top_const) and (taicpu(hp2).oper[1]^.typ = top_reg) and
  2637. (taicpu(hp3).oper[0]^.val = 1) and
  2638. (taicpu(hp3).oper[1]^.reg = taicpu(hp2).oper[1]^.reg)
  2639. ) then
  2640. begin
  2641. { Change movl x, reg1d movl x, reg1d
  2642. movl y, reg2d movl y, reg2d
  2643. addq reg2q,reg1q or leaq (reg1q,reg2q),reg1q
  2644. shrq $1, reg1q shrq $1, reg1q
  2645. ( reg1d and reg2d can be switched around in the first two instructions )
  2646. To movl x, reg1d
  2647. addl y, reg1d
  2648. rcrl $1, reg1d
  2649. This corresponds to the common expression (x + y) shr 1, where
  2650. x and y are Cardinals (replacing "shr 1" with "div 2" produces
  2651. smaller code, but won't account for x + y causing an overflow). [Kit]
  2652. }
  2653. if (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) then
  2654. { Change first MOV command to have the same register as the final output }
  2655. taicpu(p).oper[1]^.reg := taicpu(hp1).oper[1]^.reg
  2656. else
  2657. taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
  2658. { Change second MOV command to an ADD command. This is easier than
  2659. converting the existing command because it means we don't have to
  2660. touch 'y', which might be a complicated reference, and also the
  2661. fact that the third command might either be ADD or LEA. [Kit] }
  2662. taicpu(hp1).opcode := A_ADD;
  2663. { Delete old ADD/LEA instruction }
  2664. asml.remove(hp2);
  2665. hp2.free;
  2666. { Convert "shrq $1, reg1q" to "rcr $1, reg1d" }
  2667. taicpu(hp3).opcode := A_RCR;
  2668. taicpu(hp3).changeopsize(S_L);
  2669. setsubreg(taicpu(hp3).oper[1]^.reg, R_SUBD);
  2670. {$endif x86_64}
  2671. end;
  2672. end;
  2673. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  2674. var
  2675. hp1 : tai;
  2676. begin
  2677. Result:=false;
  2678. if (taicpu(p).ops >= 2) and
  2679. ((taicpu(p).oper[0]^.typ = top_const) or
  2680. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  2681. (taicpu(p).oper[1]^.typ = top_reg) and
  2682. ((taicpu(p).ops = 2) or
  2683. ((taicpu(p).oper[2]^.typ = top_reg) and
  2684. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  2685. GetLastInstruction(p,hp1) and
  2686. MatchInstruction(hp1,A_MOV,[]) and
  2687. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  2688. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  2689. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  2690. begin
  2691. TransferUsedRegs(TmpUsedRegs);
  2692. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  2693. { change
  2694. mov reg1,reg2
  2695. imul y,reg2 to imul y,reg1,reg2 }
  2696. begin
  2697. taicpu(p).ops := 3;
  2698. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  2699. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  2700. DebugMsg(SPeepholeOptimization + 'MovImul2Imul done',p);
  2701. asml.remove(hp1);
  2702. hp1.free;
  2703. result:=true;
  2704. end;
  2705. end;
  2706. end;
  2707. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  2708. var
  2709. hp1 : tai;
  2710. begin
  2711. {
  2712. change
  2713. jmp .L1
  2714. ...
  2715. .L1:
  2716. ret
  2717. into
  2718. ret
  2719. }
  2720. result:=false;
  2721. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  2722. (taicpu(p).oper[0]^.ref^.index=NR_NO) then
  2723. begin
  2724. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  2725. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and
  2726. MatchInstruction(hp1,A_RET,[S_NO]) then
  2727. begin
  2728. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  2729. taicpu(p).opcode:=A_RET;
  2730. taicpu(p).is_jmp:=false;
  2731. taicpu(p).ops:=taicpu(hp1).ops;
  2732. case taicpu(hp1).ops of
  2733. 0:
  2734. taicpu(p).clearop(0);
  2735. 1:
  2736. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  2737. else
  2738. internalerror(2016041301);
  2739. end;
  2740. result:=true;
  2741. end;
  2742. end;
  2743. end;
  2744. function CanBeCMOV(p : tai) : boolean;
  2745. begin
  2746. CanBeCMOV:=assigned(p) and
  2747. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  2748. { we can't use cmov ref,reg because
  2749. ref could be nil and cmov still throws an exception
  2750. if ref=nil but the mov isn't done (FK)
  2751. or ((taicpu(p).oper[0]^.typ = top_ref) and
  2752. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  2753. }
  2754. MatchOpType(taicpu(p),top_reg,top_reg);
  2755. end;
  2756. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  2757. var
  2758. hp1,hp2,hp3,hp4,hpmov2: tai;
  2759. carryadd_opcode : TAsmOp;
  2760. l : Longint;
  2761. condition : TAsmCond;
  2762. symbol: TAsmSymbol;
  2763. begin
  2764. result:=false;
  2765. symbol:=nil;
  2766. if GetNextInstruction(p,hp1) then
  2767. begin
  2768. symbol := TAsmLabel(taicpu(p).oper[0]^.ref^.symbol);
  2769. if (hp1.typ=ait_instruction) and
  2770. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  2771. (Tasmlabel(symbol) = Tai_label(hp2).labsym) then
  2772. { jb @@1 cmc
  2773. inc/dec operand --> adc/sbb operand,0
  2774. @@1:
  2775. ... and ...
  2776. jnb @@1
  2777. inc/dec operand --> adc/sbb operand,0
  2778. @@1: }
  2779. begin
  2780. carryadd_opcode:=A_NONE;
  2781. if Taicpu(p).condition in [C_NAE,C_B] then
  2782. begin
  2783. if Taicpu(hp1).opcode=A_INC then
  2784. carryadd_opcode:=A_ADC;
  2785. if Taicpu(hp1).opcode=A_DEC then
  2786. carryadd_opcode:=A_SBB;
  2787. if carryadd_opcode<>A_NONE then
  2788. begin
  2789. Taicpu(p).clearop(0);
  2790. Taicpu(p).ops:=0;
  2791. Taicpu(p).is_jmp:=false;
  2792. Taicpu(p).opcode:=A_CMC;
  2793. Taicpu(p).condition:=C_NONE;
  2794. Taicpu(hp1).ops:=2;
  2795. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  2796. Taicpu(hp1).loadconst(0,0);
  2797. Taicpu(hp1).opcode:=carryadd_opcode;
  2798. result:=true;
  2799. exit;
  2800. end;
  2801. end;
  2802. if Taicpu(p).condition in [C_AE,C_NB] then
  2803. begin
  2804. if Taicpu(hp1).opcode=A_INC then
  2805. carryadd_opcode:=A_ADC;
  2806. if Taicpu(hp1).opcode=A_DEC then
  2807. carryadd_opcode:=A_SBB;
  2808. if carryadd_opcode<>A_NONE then
  2809. begin
  2810. asml.remove(p);
  2811. p.free;
  2812. Taicpu(hp1).ops:=2;
  2813. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  2814. Taicpu(hp1).loadconst(0,0);
  2815. Taicpu(hp1).opcode:=carryadd_opcode;
  2816. p:=hp1;
  2817. result:=true;
  2818. exit;
  2819. end;
  2820. end;
  2821. end;
  2822. if ((hp1.typ = ait_label) and (symbol = tai_label(hp1).labsym))
  2823. or ((hp1.typ = ait_align) and GetNextInstruction(hp1, hp2) and (hp2.typ = ait_label) and (symbol = tai_label(hp2).labsym)) then
  2824. begin
  2825. { If Jcc is immediately followed by the label that it's supposed to jump to, remove it }
  2826. DebugMsg(SPeepholeOptimization + 'Removed conditional jump whose destination was immediately after it', p);
  2827. UpdateUsedRegs(hp1);
  2828. TAsmLabel(symbol).decrefs;
  2829. { if the label refs. reach zero, remove any alignment before the label }
  2830. if (hp1.typ = ait_align) then
  2831. begin
  2832. UpdateUsedRegs(hp2);
  2833. if (TAsmLabel(symbol).getrefs = 0) then
  2834. begin
  2835. asml.Remove(hp1);
  2836. hp1.Free;
  2837. end;
  2838. hp1 := hp2; { Set hp1 to the label }
  2839. end;
  2840. asml.remove(p);
  2841. p.free;
  2842. if (TAsmLabel(symbol).getrefs = 0) then
  2843. begin
  2844. GetNextInstruction(hp1, p); { Instruction following the label }
  2845. asml.remove(hp1);
  2846. hp1.free;
  2847. UpdateUsedRegs(p);
  2848. Result := True;
  2849. end
  2850. else
  2851. begin
  2852. { We don't need to set the result to True because we know hp1
  2853. is a label and won't trigger any optimisation routines. [Kit] }
  2854. p := hp1;
  2855. end;
  2856. Exit;
  2857. end;
  2858. end;
  2859. {$ifndef i8086}
  2860. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  2861. begin
  2862. { check for
  2863. jCC xxx
  2864. <several movs>
  2865. xxx:
  2866. }
  2867. l:=0;
  2868. GetNextInstruction(p, hp1);
  2869. while assigned(hp1) and
  2870. CanBeCMOV(hp1) and
  2871. { stop on labels }
  2872. not(hp1.typ=ait_label) do
  2873. begin
  2874. inc(l);
  2875. GetNextInstruction(hp1,hp1);
  2876. end;
  2877. if assigned(hp1) then
  2878. begin
  2879. if FindLabel(tasmlabel(symbol),hp1) then
  2880. begin
  2881. if (l<=4) and (l>0) then
  2882. begin
  2883. condition:=inverse_cond(taicpu(p).condition);
  2884. GetNextInstruction(p,hp1);
  2885. repeat
  2886. if not Assigned(hp1) then
  2887. InternalError(2018062900);
  2888. taicpu(hp1).opcode:=A_CMOVcc;
  2889. taicpu(hp1).condition:=condition;
  2890. UpdateUsedRegs(hp1);
  2891. GetNextInstruction(hp1,hp1);
  2892. until not(CanBeCMOV(hp1));
  2893. { Don't decrement the reference count on the label yet, otherwise
  2894. GetNextInstruction might skip over the label if it drops to
  2895. zero. }
  2896. GetNextInstruction(hp1,hp2);
  2897. { if the label refs. reach zero, remove any alignment before the label }
  2898. if (hp1.typ = ait_align) and (hp2.typ = ait_label) then
  2899. begin
  2900. { Ref = 1 means it will drop to zero }
  2901. if (tasmlabel(symbol).getrefs=1) then
  2902. begin
  2903. asml.Remove(hp1);
  2904. hp1.Free;
  2905. end;
  2906. end
  2907. else
  2908. hp2 := hp1;
  2909. if not Assigned(hp2) then
  2910. InternalError(2018062910);
  2911. if (hp2.typ <> ait_label) then
  2912. begin
  2913. { There's something other than CMOVs here. Move the original jump
  2914. to right before this point, then break out.
  2915. Originally this was part of the above internal error, but it got
  2916. triggered on the bootstrapping process sometimes. Investigate. [Kit] }
  2917. asml.remove(p);
  2918. asml.insertbefore(p, hp2);
  2919. DebugMsg('Jcc/CMOVcc drop-out', p);
  2920. UpdateUsedRegs(p);
  2921. Result := True;
  2922. Exit;
  2923. end;
  2924. { Now we can safely decrement the reference count }
  2925. tasmlabel(symbol).decrefs;
  2926. { Remove the original jump }
  2927. asml.Remove(p);
  2928. p.Free;
  2929. GetNextInstruction(hp2, p); { Instruction after the label }
  2930. { Remove the label if this is its final reference }
  2931. if (tasmlabel(symbol).getrefs=0) then
  2932. begin
  2933. asml.remove(hp2);
  2934. hp2.free;
  2935. end;
  2936. if Assigned(p) then
  2937. begin
  2938. UpdateUsedRegs(p);
  2939. result:=true;
  2940. end;
  2941. exit;
  2942. end;
  2943. end
  2944. else
  2945. begin
  2946. { check further for
  2947. jCC xxx
  2948. <several movs 1>
  2949. jmp yyy
  2950. xxx:
  2951. <several movs 2>
  2952. yyy:
  2953. }
  2954. { hp2 points to jmp yyy }
  2955. hp2:=hp1;
  2956. { skip hp1 to xxx (or an align right before it) }
  2957. GetNextInstruction(hp1, hp1);
  2958. if assigned(hp2) and
  2959. assigned(hp1) and
  2960. (l<=3) and
  2961. (hp2.typ=ait_instruction) and
  2962. (taicpu(hp2).is_jmp) and
  2963. (taicpu(hp2).condition=C_None) and
  2964. { real label and jump, no further references to the
  2965. label are allowed }
  2966. (tasmlabel(symbol).getrefs=1) and
  2967. FindLabel(tasmlabel(symbol),hp1) then
  2968. begin
  2969. l:=0;
  2970. { skip hp1 to <several moves 2> }
  2971. if (hp1.typ = ait_align) then
  2972. GetNextInstruction(hp1, hp1);
  2973. GetNextInstruction(hp1, hpmov2);
  2974. hp1 := hpmov2;
  2975. while assigned(hp1) and
  2976. CanBeCMOV(hp1) do
  2977. begin
  2978. inc(l);
  2979. GetNextInstruction(hp1, hp1);
  2980. end;
  2981. { hp1 points to yyy (or an align right before it) }
  2982. hp3 := hp1;
  2983. if assigned(hp1) and
  2984. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  2985. begin
  2986. condition:=inverse_cond(taicpu(p).condition);
  2987. GetNextInstruction(p,hp1);
  2988. repeat
  2989. taicpu(hp1).opcode:=A_CMOVcc;
  2990. taicpu(hp1).condition:=condition;
  2991. UpdateUsedRegs(hp1);
  2992. GetNextInstruction(hp1,hp1);
  2993. until not(assigned(hp1)) or
  2994. not(CanBeCMOV(hp1));
  2995. condition:=inverse_cond(condition);
  2996. hp1 := hpmov2;
  2997. { hp1 is now at <several movs 2> }
  2998. while Assigned(hp1) and CanBeCMOV(hp1) do
  2999. begin
  3000. taicpu(hp1).opcode:=A_CMOVcc;
  3001. taicpu(hp1).condition:=condition;
  3002. UpdateUsedRegs(hp1);
  3003. GetNextInstruction(hp1,hp1);
  3004. end;
  3005. hp1 := p;
  3006. { Get first instruction after label }
  3007. GetNextInstruction(hp3, p);
  3008. if assigned(p) and (hp3.typ = ait_align) then
  3009. GetNextInstruction(p, p);
  3010. { Don't dereference yet, as doing so will cause
  3011. GetNextInstruction to skip the label and
  3012. optional align marker. [Kit] }
  3013. GetNextInstruction(hp2, hp4);
  3014. { remove jCC }
  3015. asml.remove(hp1);
  3016. hp1.free;
  3017. { Remove label xxx (it will have a ref of zero due to the initial check }
  3018. if (hp4.typ = ait_align) then
  3019. begin
  3020. { Account for alignment as well }
  3021. GetNextInstruction(hp4, hp1);
  3022. asml.remove(hp1);
  3023. hp1.free;
  3024. end;
  3025. asml.remove(hp4);
  3026. hp4.free;
  3027. { Now we can safely decrement it }
  3028. tasmlabel(symbol).decrefs;
  3029. { remove jmp }
  3030. symbol := taicpu(hp2).oper[0]^.ref^.symbol;
  3031. asml.remove(hp2);
  3032. hp2.free;
  3033. { Remove label yyy (and the optional alignment) if its reference will fall to zero }
  3034. if tasmlabel(symbol).getrefs = 1 then
  3035. begin
  3036. if (hp3.typ = ait_align) then
  3037. begin
  3038. { Account for alignment as well }
  3039. GetNextInstruction(hp3, hp1);
  3040. asml.remove(hp1);
  3041. hp1.free;
  3042. end;
  3043. asml.remove(hp3);
  3044. hp3.free;
  3045. { As before, now we can safely decrement it }
  3046. tasmlabel(symbol).decrefs;
  3047. end;
  3048. if Assigned(p) then
  3049. begin
  3050. UpdateUsedRegs(p);
  3051. result:=true;
  3052. end;
  3053. exit;
  3054. end;
  3055. end;
  3056. end;
  3057. end;
  3058. end;
  3059. {$endif i8086}
  3060. end;
  3061. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  3062. var
  3063. hp1,hp2: tai;
  3064. begin
  3065. result:=false;
  3066. if (taicpu(p).oper[1]^.typ = top_reg) and
  3067. GetNextInstruction(p,hp1) and
  3068. (hp1.typ = ait_instruction) and
  3069. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  3070. GetNextInstruction(hp1,hp2) and
  3071. MatchInstruction(hp2,A_MOV,[]) and
  3072. (taicpu(hp2).oper[0]^.typ = top_reg) and
  3073. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  3074. {$ifdef i386}
  3075. { not all registers have byte size sub registers on i386 }
  3076. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  3077. {$endif i386}
  3078. (((taicpu(hp1).ops=2) and
  3079. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  3080. ((taicpu(hp1).ops=1) and
  3081. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  3082. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  3083. begin
  3084. { change movsX/movzX reg/ref, reg2
  3085. add/sub/or/... reg3/$const, reg2
  3086. mov reg2 reg/ref
  3087. to add/sub/or/... reg3/$const, reg/ref }
  3088. { by example:
  3089. movswl %si,%eax movswl %si,%eax p
  3090. decl %eax addl %edx,%eax hp1
  3091. movw %ax,%si movw %ax,%si hp2
  3092. ->
  3093. movswl %si,%eax movswl %si,%eax p
  3094. decw %eax addw %edx,%eax hp1
  3095. movw %ax,%si movw %ax,%si hp2
  3096. }
  3097. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  3098. {
  3099. ->
  3100. movswl %si,%eax movswl %si,%eax p
  3101. decw %si addw %dx,%si hp1
  3102. movw %ax,%si movw %ax,%si hp2
  3103. }
  3104. case taicpu(hp1).ops of
  3105. 1:
  3106. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  3107. 2:
  3108. begin
  3109. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  3110. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  3111. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  3112. end;
  3113. else
  3114. internalerror(2008042701);
  3115. end;
  3116. {
  3117. ->
  3118. decw %si addw %dx,%si p
  3119. }
  3120. DebugMsg(SPeepholeOptimization + 'var3',p);
  3121. asml.remove(p);
  3122. asml.remove(hp2);
  3123. p.free;
  3124. hp2.free;
  3125. p:=hp1;
  3126. end
  3127. else if taicpu(p).opcode=A_MOVZX then
  3128. begin
  3129. { removes superfluous And's after movzx's }
  3130. if (taicpu(p).oper[1]^.typ = top_reg) and
  3131. GetNextInstruction(p, hp1) and
  3132. (tai(hp1).typ = ait_instruction) and
  3133. (taicpu(hp1).opcode = A_AND) and
  3134. (taicpu(hp1).oper[0]^.typ = top_const) and
  3135. (taicpu(hp1).oper[1]^.typ = top_reg) and
  3136. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3137. begin
  3138. case taicpu(p).opsize Of
  3139. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  3140. if (taicpu(hp1).oper[0]^.val = $ff) then
  3141. begin
  3142. DebugMsg(SPeepholeOptimization + 'var4',p);
  3143. asml.remove(hp1);
  3144. hp1.free;
  3145. end;
  3146. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  3147. if (taicpu(hp1).oper[0]^.val = $ffff) then
  3148. begin
  3149. DebugMsg(SPeepholeOptimization + 'var5',p);
  3150. asml.remove(hp1);
  3151. hp1.free;
  3152. end;
  3153. {$ifdef x86_64}
  3154. S_LQ:
  3155. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  3156. begin
  3157. if (cs_asm_source in current_settings.globalswitches) then
  3158. asml.insertbefore(tai_comment.create(strpnew(SPeepholeOptimization + 'var6')),p);
  3159. asml.remove(hp1);
  3160. hp1.Free;
  3161. end;
  3162. {$endif x86_64}
  3163. else
  3164. ;
  3165. end;
  3166. end;
  3167. { changes some movzx constructs to faster synonims (all examples
  3168. are given with eax/ax, but are also valid for other registers)}
  3169. if (taicpu(p).oper[1]^.typ = top_reg) then
  3170. if (taicpu(p).oper[0]^.typ = top_reg) then
  3171. case taicpu(p).opsize of
  3172. S_BW:
  3173. begin
  3174. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  3175. not(cs_opt_size in current_settings.optimizerswitches) then
  3176. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  3177. begin
  3178. taicpu(p).opcode := A_AND;
  3179. taicpu(p).changeopsize(S_W);
  3180. taicpu(p).loadConst(0,$ff);
  3181. DebugMsg(SPeepholeOptimization + 'var7',p);
  3182. end
  3183. else if GetNextInstruction(p, hp1) and
  3184. (tai(hp1).typ = ait_instruction) and
  3185. (taicpu(hp1).opcode = A_AND) and
  3186. (taicpu(hp1).oper[0]^.typ = top_const) and
  3187. (taicpu(hp1).oper[1]^.typ = top_reg) and
  3188. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3189. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  3190. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  3191. begin
  3192. DebugMsg(SPeepholeOptimization + 'var8',p);
  3193. taicpu(p).opcode := A_MOV;
  3194. taicpu(p).changeopsize(S_W);
  3195. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  3196. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  3197. end;
  3198. end;
  3199. S_BL:
  3200. begin
  3201. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  3202. not(cs_opt_size in current_settings.optimizerswitches) then
  3203. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  3204. begin
  3205. taicpu(p).opcode := A_AND;
  3206. taicpu(p).changeopsize(S_L);
  3207. taicpu(p).loadConst(0,$ff)
  3208. end
  3209. else if GetNextInstruction(p, hp1) and
  3210. (tai(hp1).typ = ait_instruction) and
  3211. (taicpu(hp1).opcode = A_AND) and
  3212. (taicpu(hp1).oper[0]^.typ = top_const) and
  3213. (taicpu(hp1).oper[1]^.typ = top_reg) and
  3214. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3215. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  3216. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  3217. begin
  3218. DebugMsg(SPeepholeOptimization + 'var10',p);
  3219. taicpu(p).opcode := A_MOV;
  3220. taicpu(p).changeopsize(S_L);
  3221. { do not use R_SUBWHOLE
  3222. as movl %rdx,%eax
  3223. is invalid in assembler PM }
  3224. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  3225. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  3226. end
  3227. end;
  3228. {$ifndef i8086}
  3229. S_WL:
  3230. begin
  3231. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  3232. not(cs_opt_size in current_settings.optimizerswitches) then
  3233. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  3234. begin
  3235. DebugMsg(SPeepholeOptimization + 'var11',p);
  3236. taicpu(p).opcode := A_AND;
  3237. taicpu(p).changeopsize(S_L);
  3238. taicpu(p).loadConst(0,$ffff);
  3239. end
  3240. else if GetNextInstruction(p, hp1) and
  3241. (tai(hp1).typ = ait_instruction) and
  3242. (taicpu(hp1).opcode = A_AND) and
  3243. (taicpu(hp1).oper[0]^.typ = top_const) and
  3244. (taicpu(hp1).oper[1]^.typ = top_reg) and
  3245. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3246. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  3247. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  3248. begin
  3249. DebugMsg(SPeepholeOptimization + 'var12',p);
  3250. taicpu(p).opcode := A_MOV;
  3251. taicpu(p).changeopsize(S_L);
  3252. { do not use R_SUBWHOLE
  3253. as movl %rdx,%eax
  3254. is invalid in assembler PM }
  3255. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  3256. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  3257. end;
  3258. end;
  3259. {$endif i8086}
  3260. else
  3261. ;
  3262. end
  3263. else if (taicpu(p).oper[0]^.typ = top_ref) then
  3264. begin
  3265. if GetNextInstruction(p, hp1) and
  3266. (tai(hp1).typ = ait_instruction) and
  3267. (taicpu(hp1).opcode = A_AND) and
  3268. MatchOpType(taicpu(hp1),top_const,top_reg) and
  3269. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  3270. begin
  3271. //taicpu(p).opcode := A_MOV;
  3272. case taicpu(p).opsize Of
  3273. S_BL:
  3274. begin
  3275. DebugMsg(SPeepholeOptimization + 'var13',p);
  3276. taicpu(hp1).changeopsize(S_L);
  3277. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  3278. end;
  3279. S_WL:
  3280. begin
  3281. DebugMsg(SPeepholeOptimization + 'var14',p);
  3282. taicpu(hp1).changeopsize(S_L);
  3283. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  3284. end;
  3285. S_BW:
  3286. begin
  3287. DebugMsg(SPeepholeOptimization + 'var15',p);
  3288. taicpu(hp1).changeopsize(S_W);
  3289. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  3290. end;
  3291. {$ifdef x86_64}
  3292. S_BQ:
  3293. begin
  3294. DebugMsg(SPeepholeOptimization + 'var16',p);
  3295. taicpu(hp1).changeopsize(S_Q);
  3296. taicpu(hp1).loadConst(
  3297. 0, taicpu(hp1).oper[0]^.val and $ff);
  3298. end;
  3299. S_WQ:
  3300. begin
  3301. DebugMsg(SPeepholeOptimization + 'var17',p);
  3302. taicpu(hp1).changeopsize(S_Q);
  3303. taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);
  3304. end;
  3305. S_LQ:
  3306. begin
  3307. DebugMsg(SPeepholeOptimization + 'var18',p);
  3308. taicpu(hp1).changeopsize(S_Q);
  3309. taicpu(hp1).loadConst(
  3310. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  3311. end;
  3312. {$endif x86_64}
  3313. else
  3314. Internalerror(2017050704)
  3315. end;
  3316. end;
  3317. end;
  3318. end;
  3319. end;
  3320. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  3321. var
  3322. hp1 : tai;
  3323. MaskLength : Cardinal;
  3324. begin
  3325. Result:=false;
  3326. if GetNextInstruction(p, hp1) then
  3327. begin
  3328. if MatchOpType(taicpu(p),top_const,top_reg) and
  3329. MatchInstruction(hp1,A_AND,[]) and
  3330. MatchOpType(taicpu(hp1),top_const,top_reg) and
  3331. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  3332. { the second register must contain the first one, so compare their subreg types }
  3333. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  3334. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  3335. { change
  3336. and const1, reg
  3337. and const2, reg
  3338. to
  3339. and (const1 and const2), reg
  3340. }
  3341. begin
  3342. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  3343. DebugMsg(SPeepholeOptimization + 'AndAnd2And done',hp1);
  3344. asml.remove(p);
  3345. p.Free;
  3346. p:=hp1;
  3347. Result:=true;
  3348. exit;
  3349. end
  3350. else if MatchOpType(taicpu(p),top_const,top_reg) and
  3351. MatchInstruction(hp1,A_MOVZX,[]) and
  3352. (taicpu(hp1).oper[0]^.typ = top_reg) and
  3353. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  3354. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  3355. (((taicpu(p).opsize=S_W) and
  3356. (taicpu(hp1).opsize=S_BW)) or
  3357. ((taicpu(p).opsize=S_L) and
  3358. (taicpu(hp1).opsize in [S_WL,S_BL]))
  3359. {$ifdef x86_64}
  3360. or
  3361. ((taicpu(p).opsize=S_Q) and
  3362. (taicpu(hp1).opsize in [S_BQ,S_WQ]))
  3363. {$endif x86_64}
  3364. ) then
  3365. begin
  3366. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  3367. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  3368. ) or
  3369. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  3370. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  3371. then
  3372. begin
  3373. { Unlike MOVSX, MOVZX doesn't actually have a version that zero-extends a
  3374. 32-bit register to a 64-bit register, or even a version called MOVZXD, so
  3375. code that tests for the presence of AND 0xffffffff followed by MOVZX is
  3376. wasted, and is indictive of a compiler bug if it were triggered. [Kit]
  3377. NOTE: To zero-extend from 32 bits to 64 bits, simply use the standard MOV.
  3378. }
  3379. DebugMsg(SPeepholeOptimization + 'AndMovzToAnd done',p);
  3380. asml.remove(hp1);
  3381. hp1.free;
  3382. Exit;
  3383. end;
  3384. end
  3385. else if MatchOpType(taicpu(p),top_const,top_reg) and
  3386. MatchInstruction(hp1,A_SHL,[]) and
  3387. MatchOpType(taicpu(hp1),top_const,top_reg) and
  3388. (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) then
  3389. begin
  3390. {$ifopt R+}
  3391. {$define RANGE_WAS_ON}
  3392. {$R-}
  3393. {$endif}
  3394. { get length of potential and mask }
  3395. MaskLength:=SizeOf(taicpu(p).oper[0]^.val)*8-BsrQWord(taicpu(p).oper[0]^.val)-1;
  3396. { really a mask? }
  3397. {$ifdef RANGE_WAS_ON}
  3398. {$R+}
  3399. {$endif}
  3400. if (((QWord(1) shl MaskLength)-1)=taicpu(p).oper[0]^.val) and
  3401. { unmasked part shifted out? }
  3402. ((MaskLength+taicpu(hp1).oper[0]^.val)>=topsize2memsize[taicpu(hp1).opsize]) then
  3403. begin
  3404. DebugMsg(SPeepholeOptimization + 'AndShlToShl done',p);
  3405. { take care of the register (de)allocs following p }
  3406. UpdateUsedRegs(tai(p.next));
  3407. asml.remove(p);
  3408. p.free;
  3409. p:=hp1;
  3410. Result:=true;
  3411. exit;
  3412. end;
  3413. end
  3414. else if MatchOpType(taicpu(p),top_const,top_reg) and
  3415. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  3416. (taicpu(hp1).oper[0]^.typ = top_reg) and
  3417. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  3418. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  3419. (((taicpu(p).opsize=S_W) and
  3420. (taicpu(hp1).opsize=S_BW)) or
  3421. ((taicpu(p).opsize=S_L) and
  3422. (taicpu(hp1).opsize in [S_WL,S_BL]))
  3423. {$ifdef x86_64}
  3424. or
  3425. ((taicpu(p).opsize=S_Q) and
  3426. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  3427. {$endif x86_64}
  3428. ) then
  3429. begin
  3430. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  3431. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  3432. ) or
  3433. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  3434. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  3435. {$ifdef x86_64}
  3436. or
  3437. (((taicpu(hp1).opsize)=S_LQ) and
  3438. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  3439. )
  3440. {$endif x86_64}
  3441. then
  3442. begin
  3443. DebugMsg(SPeepholeOptimization + 'AndMovsxToAnd',p);
  3444. asml.remove(hp1);
  3445. hp1.free;
  3446. Exit;
  3447. end;
  3448. end
  3449. else if (taicpu(p).oper[1]^.typ = top_reg) and
  3450. (hp1.typ = ait_instruction) and
  3451. (taicpu(hp1).is_jmp) and
  3452. (taicpu(hp1).opcode<>A_JMP) and
  3453. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  3454. begin
  3455. { change
  3456. and x, reg
  3457. jxx
  3458. to
  3459. test x, reg
  3460. jxx
  3461. if reg is deallocated before the
  3462. jump, but only if it's a conditional jump (PFV)
  3463. }
  3464. taicpu(p).opcode := A_TEST;
  3465. Exit;
  3466. end;
  3467. end;
  3468. { Lone AND tests }
  3469. if MatchOpType(taicpu(p),top_const,top_reg) then
  3470. begin
  3471. {
  3472. - Convert and $0xFF,reg to and reg,reg if reg is 8-bit
  3473. - Convert and $0xFFFF,reg to and reg,reg if reg is 16-bit
  3474. - Convert and $0xFFFFFFFF,reg to and reg,reg if reg is 32-bit
  3475. }
  3476. if ((taicpu(p).oper[0]^.val = $FF) and (taicpu(p).opsize = S_B)) or
  3477. ((taicpu(p).oper[0]^.val = $FFFF) and (taicpu(p).opsize = S_W)) or
  3478. ((taicpu(p).oper[0]^.val = $FFFFFFFF) and (taicpu(p).opsize = S_L)) then
  3479. begin
  3480. taicpu(p).loadreg(0, taicpu(p).oper[1]^.reg)
  3481. end;
  3482. end;
  3483. end;
  3484. function TX86AsmOptimizer.PostPeepholeOptLea(var p : tai) : Boolean;
  3485. begin
  3486. Result:=false;
  3487. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) and
  3488. MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  3489. (taicpu(p).oper[0]^.ref^.index<>NR_NO) then
  3490. begin
  3491. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.base);
  3492. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.index);
  3493. taicpu(p).opcode:=A_ADD;
  3494. DebugMsg(SPeepholeOptimization + 'Lea2AddBase done',p);
  3495. result:=true;
  3496. end
  3497. else if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) and
  3498. MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and
  3499. (taicpu(p).oper[0]^.ref^.base<>NR_NO) then
  3500. begin
  3501. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.index);
  3502. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.base);
  3503. taicpu(p).opcode:=A_ADD;
  3504. DebugMsg(SPeepholeOptimization + 'Lea2AddIndex done',p);
  3505. result:=true;
  3506. end;
  3507. end;
  3508. function TX86AsmOptimizer.PostPeepholeOptMov(var p : tai) : Boolean;
  3509. var
  3510. Value, RegName: string;
  3511. begin
  3512. Result:=false;
  3513. if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(p).oper[0]^.typ = top_const) then
  3514. begin
  3515. case taicpu(p).oper[0]^.val of
  3516. 0:
  3517. { Don't make this optimisation if the CPU flags are required, since XOR scrambles them }
  3518. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  3519. begin
  3520. { change "mov $0,%reg" into "xor %reg,%reg" }
  3521. taicpu(p).opcode := A_XOR;
  3522. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  3523. Result := True;
  3524. end;
  3525. $1..$FFFFFFFF:
  3526. begin
  3527. { Code size reduction by J. Gareth "Kit" Moreton }
  3528. { change 64-bit register to 32-bit register to reduce code size (upper 32 bits will be set to zero) }
  3529. case taicpu(p).opsize of
  3530. S_Q:
  3531. begin
  3532. RegName := debug_regname(taicpu(p).oper[1]^.reg); { 64-bit register name }
  3533. Value := debug_tostr(taicpu(p).oper[0]^.val);
  3534. { The actual optimization }
  3535. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  3536. taicpu(p).changeopsize(S_L);
  3537. DebugMsg(SPeepholeOptimization + 'movq $' + Value + ',' + RegName + ' -> movl $' + Value + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (immediate can be represented with just 32 bits)', p);
  3538. Result := True;
  3539. end;
  3540. else
  3541. ;
  3542. end;
  3543. end;
  3544. end;
  3545. end;
  3546. end;
  3547. function TX86AsmOptimizer.PostPeepholeOptCmp(var p : tai) : Boolean;
  3548. begin
  3549. Result:=false;
  3550. { change "cmp $0, %reg" to "test %reg, %reg" }
  3551. if MatchOpType(taicpu(p),top_const,top_reg) and
  3552. (taicpu(p).oper[0]^.val = 0) then
  3553. begin
  3554. taicpu(p).opcode := A_TEST;
  3555. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  3556. Result:=true;
  3557. end;
  3558. end;
  3559. function TX86AsmOptimizer.PostPeepholeOptTestOr(var p : tai) : Boolean;
  3560. var
  3561. IsTestConstX : Boolean;
  3562. hp1,hp2 : tai;
  3563. begin
  3564. Result:=false;
  3565. { removes the line marked with (x) from the sequence
  3566. and/or/xor/add/sub/... $x, %y
  3567. test/or %y, %y | test $-1, %y (x)
  3568. j(n)z _Label
  3569. as the first instruction already adjusts the ZF
  3570. %y operand may also be a reference }
  3571. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  3572. MatchOperand(taicpu(p).oper[0]^,-1);
  3573. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  3574. GetLastInstruction(p, hp1) and
  3575. (tai(hp1).typ = ait_instruction) and
  3576. GetNextInstruction(p,hp2) and
  3577. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  3578. case taicpu(hp1).opcode Of
  3579. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  3580. begin
  3581. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  3582. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  3583. { and in case of carry for A(E)/B(E)/C/NC }
  3584. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  3585. ((taicpu(hp1).opcode <> A_ADD) and
  3586. (taicpu(hp1).opcode <> A_SUB))) then
  3587. begin
  3588. hp1 := tai(p.next);
  3589. asml.remove(p);
  3590. p.free;
  3591. p := tai(hp1);
  3592. Result:=true;
  3593. end;
  3594. end;
  3595. A_SHL, A_SAL, A_SHR, A_SAR:
  3596. begin
  3597. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  3598. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  3599. { therefore, it's only safe to do this optimization for }
  3600. { shifts by a (nonzero) constant }
  3601. (taicpu(hp1).oper[0]^.typ = top_const) and
  3602. (taicpu(hp1).oper[0]^.val <> 0) and
  3603. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  3604. { and in case of carry for A(E)/B(E)/C/NC }
  3605. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  3606. begin
  3607. hp1 := tai(p.next);
  3608. asml.remove(p);
  3609. p.free;
  3610. p := tai(hp1);
  3611. Result:=true;
  3612. end;
  3613. end;
  3614. A_DEC, A_INC, A_NEG:
  3615. begin
  3616. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  3617. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  3618. { and in case of carry for A(E)/B(E)/C/NC }
  3619. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  3620. begin
  3621. case taicpu(hp1).opcode of
  3622. A_DEC, A_INC:
  3623. { replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag }
  3624. begin
  3625. case taicpu(hp1).opcode Of
  3626. A_DEC: taicpu(hp1).opcode := A_SUB;
  3627. A_INC: taicpu(hp1).opcode := A_ADD;
  3628. else
  3629. ;
  3630. end;
  3631. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  3632. taicpu(hp1).loadConst(0,1);
  3633. taicpu(hp1).ops:=2;
  3634. end;
  3635. else
  3636. ;
  3637. end;
  3638. hp1 := tai(p.next);
  3639. asml.remove(p);
  3640. p.free;
  3641. p := tai(hp1);
  3642. Result:=true;
  3643. end;
  3644. end
  3645. else
  3646. { change "test $-1,%reg" into "test %reg,%reg" }
  3647. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  3648. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  3649. end { case }
  3650. { change "test $-1,%reg" into "test %reg,%reg" }
  3651. else if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  3652. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  3653. end;
  3654. function TX86AsmOptimizer.PostPeepholeOptCall(var p : tai) : Boolean;
  3655. var
  3656. hp1 : tai;
  3657. {$ifndef x86_64}
  3658. hp2 : taicpu;
  3659. {$endif x86_64}
  3660. begin
  3661. Result:=false;
  3662. {$ifndef x86_64}
  3663. { don't do this on modern CPUs, this really hurts them due to
  3664. broken call/ret pairing }
  3665. if (current_settings.optimizecputype < cpu_Pentium2) and
  3666. not(cs_create_pic in current_settings.moduleswitches) and
  3667. GetNextInstruction(p, hp1) and
  3668. MatchInstruction(hp1,A_JMP,[S_NO]) and
  3669. MatchOpType(taicpu(hp1),top_ref) and
  3670. (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full) then
  3671. begin
  3672. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  3673. InsertLLItem(p.previous, p, hp2);
  3674. taicpu(p).opcode := A_JMP;
  3675. taicpu(p).is_jmp := true;
  3676. asml.remove(hp1);
  3677. hp1.free;
  3678. Result:=true;
  3679. end
  3680. else
  3681. {$endif x86_64}
  3682. { replace
  3683. call procname
  3684. ret
  3685. by
  3686. jmp procname
  3687. this should never hurt except when pic is used, not sure
  3688. how to handle it then
  3689. but do it only on level 4 because it destroys stack back traces
  3690. }
  3691. if (cs_opt_level4 in current_settings.optimizerswitches) and
  3692. not(cs_create_pic in current_settings.moduleswitches) and
  3693. GetNextInstruction(p, hp1) and
  3694. MatchInstruction(hp1,A_RET,[S_NO]) and
  3695. (taicpu(hp1).ops=0) then
  3696. begin
  3697. taicpu(p).opcode := A_JMP;
  3698. taicpu(p).is_jmp := true;
  3699. asml.remove(hp1);
  3700. hp1.free;
  3701. Result:=true;
  3702. end;
  3703. end;
  3704. {$ifdef x86_64}
  3705. function TX86AsmOptimizer.PostPeepholeOptMovzx(var p : tai) : Boolean;
  3706. var
  3707. PreMessage: string;
  3708. begin
  3709. Result := False;
  3710. { Code size reduction by J. Gareth "Kit" Moreton }
  3711. { Convert MOVZBQ and MOVZWQ to MOVZBL and MOVZWL respectively if it removes the REX prefix }
  3712. if (taicpu(p).opsize in [S_BQ, S_WQ]) and
  3713. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP])
  3714. then
  3715. begin
  3716. { Has 64-bit register name and opcode suffix }
  3717. PreMessage := 'movz' + debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' -> movz';
  3718. { The actual optimization }
  3719. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  3720. if taicpu(p).opsize = S_BQ then
  3721. taicpu(p).changeopsize(S_BL)
  3722. else
  3723. taicpu(p).changeopsize(S_WL);
  3724. DebugMsg(SPeepholeOptimization + PreMessage +
  3725. debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (removes REX prefix)', p);
  3726. end;
  3727. end;
  3728. function TX86AsmOptimizer.PostPeepholeOptXor(var p : tai) : Boolean;
  3729. var
  3730. PreMessage, RegName: string;
  3731. begin
  3732. { Code size reduction by J. Gareth "Kit" Moreton }
  3733. { change "xorq %reg,%reg" to "xorl %reg,%reg" for %rax, %rcx, %rdx, %rbx, %rsi, %rdi, %rbp and %rsp,
  3734. as this removes the REX prefix }
  3735. Result := False;
  3736. if not OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  3737. Exit;
  3738. if taicpu(p).oper[0]^.typ <> top_reg then
  3739. { Should be impossible if both operands were equal, since one of XOR's operands must be a register }
  3740. InternalError(2018011500);
  3741. case taicpu(p).opsize of
  3742. S_Q:
  3743. begin
  3744. if (getsupreg(taicpu(p).oper[0]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP]) then
  3745. begin
  3746. RegName := debug_regname(taicpu(p).oper[0]^.reg); { 64-bit register name }
  3747. PreMessage := 'xorq ' + RegName + ',' + RegName + ' -> xorl ';
  3748. { The actual optimization }
  3749. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  3750. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  3751. taicpu(p).changeopsize(S_L);
  3752. RegName := debug_regname(taicpu(p).oper[0]^.reg); { 32-bit register name }
  3753. DebugMsg(SPeepholeOptimization + PreMessage + RegName + ',' + RegName + ' (removes REX prefix)', p);
  3754. end;
  3755. end;
  3756. else
  3757. ;
  3758. end;
  3759. end;
  3760. {$endif}
  3761. procedure TX86AsmOptimizer.OptReferences;
  3762. var
  3763. p: tai;
  3764. i: Integer;
  3765. begin
  3766. p := BlockStart;
  3767. while (p <> BlockEnd) Do
  3768. begin
  3769. if p.typ=ait_instruction then
  3770. begin
  3771. for i:=0 to taicpu(p).ops-1 do
  3772. if taicpu(p).oper[i]^.typ=top_ref then
  3773. optimize_ref(taicpu(p).oper[i]^.ref^,false);
  3774. end;
  3775. p:=tai(p.next);
  3776. end;
  3777. end;
  3778. end.