| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716 | {    Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe    This unit contains the peephole optimizer.    This program is free software; you can redistribute it and/or modify    it under the terms of the GNU General Public License as published by    the Free Software Foundation; either version 2 of the License, or    (at your option) any later version.    This program is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    GNU General Public License for more details.    You should have received a copy of the GNU General Public License    along with this program; if not, write to the Free Software    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ****************************************************************************}unit aoptx86;{$i fpcdefs.inc}{ $define DEBUG_AOPTCPU}  interface    uses      globtype,      cpubase,      aasmtai,aasmcpu,      cgbase,cgutils,      aopt,aoptobj;    type      TX86AsmOptimizer = class(TAsmOptimizer)        function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;        function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;        function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;      protected        { checks whether loading a new value in reg1 overwrites the entirety of reg2 }        function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;        { checks whether reading the value in reg1 depends on the value of reg2. This          is very similar to SuperRegisterEquals, except it takes into account that          R_SUBH and R_SUBL are independendent (e.g. reading from AL does not          depend on the value in AH). }        function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;        procedure DebugMsg(const s : string; p : tai);inline;        class function IsExitCode(p : tai) : boolean;        class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;        procedure RemoveLastDeallocForFuncRes(p : tai);        function DoSubAddOpt(var p : tai) : Boolean;        function PrePeepholeOptSxx(var p : tai) : boolean;        function OptPass1AND(var p : tai) : boolean;        function OptPass1VMOVAP(var p : tai) : boolean;        function OptPass1VOP(var p : tai) : boolean;        function OptPass1MOV(var p : tai) : boolean;        function OptPass1Movx(var p : tai) : boolean;        function OptPass1MOVAP(var p : tai) : boolean;        function OptPass1MOVXX(var p : tai) : boolean;        function OptPass1OP(var p : tai) : boolean;        function OptPass1LEA(var p : tai) : boolean;        function OptPass1Sub(var p : tai) : boolean;        function OptPass1SHLSAL(var p : tai) : boolean;        function OptPass1SETcc(var p: tai): boolean;        function OptPass2MOV(var p : tai) : boolean;        function OptPass2Imul(var p : tai) : boolean;        function OptPass2Jmp(var p : tai) : boolean;        function OptPass2Jcc(var p : tai) : boolean;        function PostPeepholeOptMov(var p : tai) : Boolean;{$ifdef x86_64} { These post-peephole optimisations only affect 64-bit registers. [Kit] }        function PostPeepholeOptMovzx(var p : tai) : Boolean;        function PostPeepholeOptXor(var p : tai) : Boolean;{$endif}        function PostPeepholeOptCmp(var p : tai) : Boolean;        function PostPeepholeOptTestOr(var p : tai) : Boolean;        function PostPeepholeOptCall(var p : tai) : Boolean;        function PostPeepholeOptLea(var p : tai) : Boolean;        procedure OptReferences;      end;    function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;    function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;    function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;    function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;    function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;    function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;    function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;    function RefsEqual(const r1, r2: treference): boolean;    function MatchReference(const ref : treference;base,index : TRegister) : Boolean;    { returns true, if ref is a reference using only the registers passed as base and index      and having an offset }    function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;{$ifdef DEBUG_AOPTCPU}  const    SPeepholeOptimization: shortstring = 'Peephole Optimization: ';{$else DEBUG_AOPTCPU}  { Empty strings help the optimizer to remove string concatenations that won't    ever appear to the user on release builds. [Kit] }  const    SPeepholeOptimization = '';{$endif DEBUG_AOPTCPU}  implementation    uses      cutils,verbose,      globals,      cpuinfo,      procinfo,      aasmbase,      aoptutils,      symconst,symsym,      cgx86,      itcpugas;    function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;      begin        result :=          (instr.typ = ait_instruction) and          (taicpu(instr).opcode = op) and          ((opsize = []) or (taicpu(instr).opsize in opsize));      end;    function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;      begin        result :=          (instr.typ = ait_instruction) and          ((taicpu(instr).opcode = op1) or           (taicpu(instr).opcode = op2)          ) and          ((opsize = []) or (taicpu(instr).opsize in opsize));      end;    function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;      begin        result :=          (instr.typ = ait_instruction) and          ((taicpu(instr).opcode = op1) or           (taicpu(instr).opcode = op2) or           (taicpu(instr).opcode = op3)          ) and          ((opsize = []) or (taicpu(instr).opsize in opsize));      end;    function MatchInstruction(const instr : tai;const ops : array of TAsmOp;     const opsize : topsizes) : boolean;      var        op : TAsmOp;      begin        result:=false;        for op in ops do          begin            if (instr.typ = ait_instruction) and               (taicpu(instr).opcode = op) and               ((opsize = []) or (taicpu(instr).opsize in opsize)) then               begin                 result:=true;                 exit;               end;          end;      end;    function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;      begin        result := (oper.typ = top_reg) and (oper.reg = reg);      end;    function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;      begin        result := (oper.typ = top_const) and (oper.val = a);      end;    function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;      begin        result := oper1.typ = oper2.typ;        if result then          case oper1.typ of            top_const:              Result:=oper1.val = oper2.val;            top_reg:              Result:=oper1.reg = oper2.reg;            top_ref:              Result:=RefsEqual(oper1.ref^, oper2.ref^);            else              internalerror(2013102801);          end      end;    function RefsEqual(const r1, r2: treference): boolean;      begin        RefsEqual :=          (r1.offset = r2.offset) and          (r1.segment = r2.segment) and (r1.base = r2.base) and          (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and          (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and          (r1.relsymbol = r2.relsymbol);      end;    function MatchReference(const ref : treference;base,index : TRegister) : Boolean;      begin       Result:=(ref.offset=0) and         (ref.scalefactor in [0,1]) and         (ref.segment=NR_NO) and         (ref.symbol=nil) and         (ref.relsymbol=nil) and         ((base=NR_INVALID) or          (ref.base=base)) and         ((index=NR_INVALID) or          (ref.index=index));      end;    function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;      begin       Result:=(ref.scalefactor in [0,1]) and         (ref.segment=NR_NO) and         (ref.symbol=nil) and         (ref.relsymbol=nil) and         ((base=NR_INVALID) or          (ref.base=base)) and         ((index=NR_INVALID) or          (ref.index=index));      end;    function InstrReadsFlags(p: tai): boolean;      var        l: longint;      begin        InstrReadsFlags := true;        case p.typ of          ait_instruction:            if InsProp[taicpu(p).opcode].Ch*               [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,                Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,                Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then              exit;          ait_label:            exit;        end;        InstrReadsFlags := false;      end;  function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;    begin      Result:=RegReadByInstruction(reg,hp);    end;  function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;    var      p: taicpu;      opcount: longint;    begin      RegReadByInstruction := false;      if hp.typ <> ait_instruction then        exit;      p := taicpu(hp);      case p.opcode of        A_CALL:          regreadbyinstruction := true;        A_IMUL:          case p.ops of            1:              regReadByInstruction := RegInOp(reg,p.oper[0]^) or                 (                  ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and                  ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))                 );            2,3:              regReadByInstruction :=                reginop(reg,p.oper[0]^) or                reginop(reg,p.oper[1]^);          end;        A_MUL:          begin            regReadByInstruction := RegInOp(reg,p.oper[0]^) or               (                ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and                ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))               );          end;        A_IDIV,A_DIV:          begin            regReadByInstruction := RegInOp(reg,p.oper[0]^) or               (                 (getregtype(reg)=R_INTREGISTER) and                 (                   (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))                 )               );          end;        else          begin            if (p.opcode=A_LEA) and is_segment_reg(reg) then              begin                RegReadByInstruction := false;                exit;              end;            for opcount := 0 to p.ops-1 do              if (p.oper[opCount]^.typ = top_ref) and                 RegInRef(reg,p.oper[opcount]^.ref^) then                begin                  RegReadByInstruction := true;                  exit                end;            { special handling for SSE MOVSD }            if (p.opcode=A_MOVSD) and (p.ops>0) then              begin                if p.ops<>2 then                  internalerror(2017042702);                regReadByInstruction := reginop(reg,p.oper[0]^) or                  (                   (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)                  );                exit;              end;            with insprop[p.opcode] do              begin                if getregtype(reg)=R_INTREGISTER then                  begin                    case getsupreg(reg) of                      RS_EAX:                        if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then                          begin                            RegReadByInstruction := true;                            exit                          end;                      RS_ECX:                        if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then                          begin                            RegReadByInstruction := true;                            exit                          end;                      RS_EDX:                        if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then                          begin                            RegReadByInstruction := true;                            exit                          end;                      RS_EBX:                        if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then                          begin                            RegReadByInstruction := true;                            exit                          end;                      RS_ESP:                        if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then                          begin                            RegReadByInstruction := true;                            exit                          end;                      RS_EBP:                        if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then                          begin                            RegReadByInstruction := true;                            exit                          end;                      RS_ESI:                        if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then                          begin                            RegReadByInstruction := true;                            exit                          end;                      RS_EDI:                        if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then                          begin                            RegReadByInstruction := true;                            exit                          end;                    end;                  end;                if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then                  begin                    if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then                      begin                        case p.condition of                          C_A,C_NBE,       { CF=0 and ZF=0  }                          C_BE,C_NA:       { CF=1 or ZF=1   }                            RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];                          C_AE,C_NB,C_NC,  { CF=0           }                          C_B,C_NAE,C_C:   { CF=1           }                            RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];                          C_NE,C_NZ,       { ZF=0           }                          C_E,C_Z:         { ZF=1           }                            RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];                          C_G,C_NLE,       { ZF=0 and SF=OF }                          C_LE,C_NG:       { ZF=1 or SF<>OF }                            RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];                          C_GE,C_NL,       { SF=OF          }                          C_L,C_NGE:       { SF<>OF         }                            RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];                          C_NO,            { OF=0           }                          C_O:             { OF=1           }                            RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];                          C_NP,C_PO,       { PF=0           }                          C_P,C_PE:        { PF=1           }                            RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];                          C_NS,            { SF=0           }                          C_S:             { SF=1           }                            RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];                          else                            internalerror(2017042701);                        end;                        if RegReadByInstruction then                          exit;                      end;                    case getsubreg(reg) of                      R_SUBW,R_SUBD,R_SUBQ:                        RegReadByInstruction :=                          [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,                           Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,                           Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];                      R_SUBFLAGCARRY:                        RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];                      R_SUBFLAGPARITY:                        RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];                      R_SUBFLAGAUXILIARY:                        RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];                      R_SUBFLAGZERO:                        RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];                      R_SUBFLAGSIGN:                        RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];                      R_SUBFLAGOVERFLOW:                        RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];                      R_SUBFLAGINTERRUPT:                        RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];                      R_SUBFLAGDIRECTION:                        RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];                      else                        internalerror(2017042601);                    end;                    exit;                  end;                if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and                   (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and                   (p.oper[0]^.reg=p.oper[1]^.reg) then                  exit;                if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then                  begin                    RegReadByInstruction := true;                    exit                  end;                if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then                  begin                    RegReadByInstruction := true;                    exit                  end;                if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then                  begin                    RegReadByInstruction := true;                    exit                  end;                if ([Ch_RWOP4,Ch_ROP4,Ch_MOP4]*Ch<>[]) and reginop(reg,p.oper[3]^) then                  begin                    RegReadByInstruction := true;                    exit                  end;              end;          end;      end;    end;{$ifdef DEBUG_AOPTCPU}    procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);      begin        asml.insertbefore(tai_comment.Create(strpnew(s)), p);      end;    function debug_tostr(i: tcgint): string; inline;      begin        Result := tostr(i);      end;    function debug_regname(r: TRegister): string; inline;      begin        Result := '%' + std_regname(r);      end;    { Debug output function - creates a string representation of an operator }    function debug_operstr(oper: TOper): string;      begin        case oper.typ of          top_const:            Result := '$' + debug_tostr(oper.val);          top_reg:            Result := debug_regname(oper.reg);          top_ref:            begin              if oper.ref^.offset <> 0 then                Result := debug_tostr(oper.ref^.offset) + '('              else                Result := '(';              if (oper.ref^.base <> NR_INVALID) and (oper.ref^.base <> NR_NO) then                begin                  Result := Result + debug_regname(oper.ref^.base);                  if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then                    Result := Result + ',' + debug_regname(oper.ref^.index);                end              else                if (oper.ref^.index <> NR_INVALID) and (oper.ref^.index <> NR_NO) then                  Result := Result + debug_regname(oper.ref^.index);              if (oper.ref^.scalefactor > 1) then                Result := Result + ',' + debug_tostr(oper.ref^.scalefactor) + ')'              else                Result := Result + ')';            end;          else            Result := '[UNKNOWN]';        end;      end;    function debug_op2str(opcode: tasmop): string; inline;      begin        Result := std_op2str[opcode];      end;    function debug_opsize2str(opsize: topsize): string; inline;      begin        Result := gas_opsize2str[opsize];      end;{$else DEBUG_AOPTCPU}    procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;      begin      end;    function debug_tostr(i: tcgint): string; inline;      begin        Result := '';      end;    function debug_regname(r: TRegister): string; inline;      begin        Result := '';      end;    function debug_operstr(oper: TOper): string; inline;      begin        Result := '';      end;    function debug_op2str(opcode: tasmop): string; inline;      begin        Result := '';      end;    function debug_opsize2str(opsize: topsize): string; inline;      begin        Result := '';      end;{$endif DEBUG_AOPTCPU}    function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;      begin        if not SuperRegistersEqual(reg1,reg2) then          exit(false);        if getregtype(reg1)<>R_INTREGISTER then          exit(true);  {because SuperRegisterEqual is true}        case getsubreg(reg1) of          { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or            higher, it preserves the high bits, so the new value depends on            reg2's previous value. In other words, it is equivalent to doing:            reg2 := (reg2 and $ffffff00) or byte(reg1); }          R_SUBL:            exit(getsubreg(reg2)=R_SUBL);          { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or            higher, it actually does a:            reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }          R_SUBH:            exit(getsubreg(reg2)=R_SUBH);          { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16            bits of reg2:            reg2 := (reg2 and $ffff0000) or word(reg1); }          R_SUBW:            exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);          { a write to R_SUBD always overwrites every other subregister,            because it clears the high 32 bits of R_SUBQ on x86_64 }          R_SUBD,          R_SUBQ:            exit(true);          else            internalerror(2017042801);        end;      end;    function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;      begin        if not SuperRegistersEqual(reg1,reg2) then          exit(false);        if getregtype(reg1)<>R_INTREGISTER then          exit(true);  {because SuperRegisterEqual is true}        case getsubreg(reg1) of          R_SUBL:            exit(getsubreg(reg2)<>R_SUBH);          R_SUBH:            exit(getsubreg(reg2)<>R_SUBL);          R_SUBW,          R_SUBD,          R_SUBQ:            exit(true);          else            internalerror(2017042802);        end;      end;    function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;      var        hp1 : tai;        l : TCGInt;      begin        result:=false;        { changes the code sequence          shr/sar const1, x          shl     const2, x          to          either "sar/and", "shl/and" or just "and" depending on const1 and const2 }        if GetNextInstruction(p, hp1) and          MatchInstruction(hp1,A_SHL,[]) and          (taicpu(p).oper[0]^.typ = top_const) and          (taicpu(hp1).oper[0]^.typ = top_const) and          (taicpu(hp1).opsize = taicpu(p).opsize) and          (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and          OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then          begin            if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and              not(cs_opt_size in current_settings.optimizerswitches) then              begin                { shr/sar const1, %reg                  shl     const2, %reg                  with const1 > const2 }                taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);                taicpu(hp1).opcode := A_AND;                l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;                case taicpu(p).opsize Of                  S_B: taicpu(hp1).loadConst(0,l Xor $ff);                  S_W: taicpu(hp1).loadConst(0,l Xor $ffff);                  S_L: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffff));                  S_Q: taicpu(hp1).loadConst(0,l Xor tcgint($ffffffffffffffff));                  else                    Internalerror(2017050703)                end;              end            else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and              not(cs_opt_size in current_settings.optimizerswitches) then              begin                { shr/sar const1, %reg                  shl     const2, %reg                  with const1 < const2 }                taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);                taicpu(p).opcode := A_AND;                l := (1 shl (taicpu(p).oper[0]^.val))-1;                case taicpu(p).opsize Of                  S_B: taicpu(p).loadConst(0,l Xor $ff);                  S_W: taicpu(p).loadConst(0,l Xor $ffff);                  S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));                  S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));                  else                    Internalerror(2017050702)                end;              end            else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then              begin                { shr/sar const1, %reg                  shl     const2, %reg                  with const1 = const2 }                taicpu(p).opcode := A_AND;                l := (1 shl (taicpu(p).oper[0]^.val))-1;                case taicpu(p).opsize Of                  S_B: taicpu(p).loadConst(0,l Xor $ff);                  S_W: taicpu(p).loadConst(0,l Xor $ffff);                  S_L: taicpu(p).loadConst(0,l Xor tcgint($ffffffff));                  S_Q: taicpu(p).loadConst(0,l Xor tcgint($ffffffffffffffff));                  else                    Internalerror(2017050701)                end;                asml.remove(hp1);                hp1.free;              end;          end;      end;    function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;      var        p: taicpu;      begin        if not assigned(hp) or           (hp.typ <> ait_instruction) then         begin           Result := false;           exit;         end;        p := taicpu(hp);        if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then          with insprop[p.opcode] do            begin              case getsubreg(reg) of                R_SUBW,R_SUBD,R_SUBQ:                  Result:=                    RegLoadedWithNewValue(NR_CARRYFLAG,hp) and                    RegLoadedWithNewValue(NR_PARITYFLAG,hp) and                    RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and                    RegLoadedWithNewValue(NR_ZEROFLAG,hp) and                    RegLoadedWithNewValue(NR_SIGNFLAG,hp) and                    RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);                R_SUBFLAGCARRY:                  Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];                R_SUBFLAGPARITY:                  Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];                R_SUBFLAGAUXILIARY:                  Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];                R_SUBFLAGZERO:                  Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];                R_SUBFLAGSIGN:                  Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];                R_SUBFLAGOVERFLOW:                  Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];                R_SUBFLAGINTERRUPT:                  Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];                R_SUBFLAGDIRECTION:                  Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];                else                  begin                  writeln(getsubreg(reg));                  internalerror(2017050501);                  end;              end;              exit;            end;        Result :=          (((p.opcode = A_MOV) or            (p.opcode = A_MOVZX) or            (p.opcode = A_MOVSX) or            (p.opcode = A_LEA) or            (p.opcode = A_VMOVSS) or            (p.opcode = A_VMOVSD) or            (p.opcode = A_VMOVAPD) or            (p.opcode = A_VMOVAPS) or            (p.opcode = A_VMOVQ) or            (p.opcode = A_MOVSS) or            (p.opcode = A_MOVSD) or            (p.opcode = A_MOVQ) or            (p.opcode = A_MOVAPD) or            (p.opcode = A_MOVAPS) or{$ifndef x86_64}            (p.opcode = A_LDS) or            (p.opcode = A_LES) or{$endif not x86_64}            (p.opcode = A_LFS) or            (p.opcode = A_LGS) or            (p.opcode = A_LSS)) and           (p.ops=2) and  { A_MOVSD can have zero operands, so this check is needed }           (p.oper[1]^.typ = top_reg) and           (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and           ((p.oper[0]^.typ = top_const) or            ((p.oper[0]^.typ = top_reg) and             not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or            ((p.oper[0]^.typ = top_ref) and             not RegInRef(reg,p.oper[0]^.ref^)))) or          ((p.opcode = A_POP) and           (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or          ((p.opcode = A_IMUL) and           (p.ops=3) and           (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and           (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or            ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or          ((((p.opcode = A_IMUL) or             (p.opcode = A_MUL)) and            (p.ops=1)) and           (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or            ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and           (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or            ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or            ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)){$ifdef x86_64}         or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)){$endif x86_64}           )) or          ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or          ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or{$ifdef x86_64}          ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or{$endif x86_64}          ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or{$ifndef x86_64}          ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or          ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or{$endif not x86_64}          ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or          ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or          ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or{$ifndef x86_64}          ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or{$endif not x86_64}          ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or          ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or          ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or          ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or{$ifdef x86_64}          ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or{$endif x86_64}          ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or          (((p.opcode = A_FSTSW) or            (p.opcode = A_FNSTSW)) and           (p.oper[0]^.typ=top_reg) and           Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or          (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and           (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and           (p.oper[0]^.reg=p.oper[1]^.reg) and           Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));      end;    class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;      var        hp2,hp3 : tai;      begin        { some x86-64 issue a NOP before the real exit code }        if MatchInstruction(p,A_NOP,[]) then          GetNextInstruction(p,p);        result:=assigned(p) and (p.typ=ait_instruction) and        ((taicpu(p).opcode = A_RET) or         ((taicpu(p).opcode=A_LEAVE) and          GetNextInstruction(p,hp2) and          MatchInstruction(hp2,A_RET,[S_NO])         ) or         ((((taicpu(p).opcode=A_MOV) and           MatchOpType(taicpu(p),top_reg,top_reg) and           (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and           (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or           ((taicpu(p).opcode=A_LEA) and           MatchOpType(taicpu(p),top_ref,top_reg) and           (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and           (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)           )          ) and          GetNextInstruction(p,hp2) and          MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and          MatchOpType(taicpu(hp2),top_reg) and          (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and          GetNextInstruction(hp2,hp3) and          MatchInstruction(hp3,A_RET,[S_NO])         )        );      end;    class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;      begin        isFoldableArithOp := False;        case hp1.opcode of          A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:            isFoldableArithOp :=              ((taicpu(hp1).oper[0]^.typ = top_const) or               ((taicpu(hp1).oper[0]^.typ = top_reg) and                (taicpu(hp1).oper[0]^.reg <> reg))) and              (taicpu(hp1).oper[1]^.typ = top_reg) and              (taicpu(hp1).oper[1]^.reg = reg);          A_INC,A_DEC,A_NEG,A_NOT:            isFoldableArithOp :=              (taicpu(hp1).oper[0]^.typ = top_reg) and              (taicpu(hp1).oper[0]^.reg = reg);        end;      end;    procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);      procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);        var          hp2: tai;        begin          hp2 := p;          repeat            hp2 := tai(hp2.previous);            if assigned(hp2) and               (hp2.typ = ait_regalloc) and               (tai_regalloc(hp2).ratype=ra_dealloc) and               (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and               (getsupreg(tai_regalloc(hp2).reg) = supreg) then              begin                asml.remove(hp2);                hp2.free;                break;              end;          until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);        end;      begin          case current_procinfo.procdef.returndef.typ of            arraydef,recorddef,pointerdef,               stringdef,enumdef,procdef,objectdef,errordef,               filedef,setdef,procvardef,               classrefdef,forwarddef:              DoRemoveLastDeallocForFuncRes(RS_EAX);            orddef:              if current_procinfo.procdef.returndef.size <> 0 then                begin                  DoRemoveLastDeallocForFuncRes(RS_EAX);                  { for int64/qword }                  if current_procinfo.procdef.returndef.size = 8 then                    DoRemoveLastDeallocForFuncRes(RS_EDX);                end;          end;      end;    function TX86AsmOptimizer.OptPass1MOVAP(var p : tai) : boolean;      var        TmpUsedRegs : TAllUsedRegs;        hp1,hp2 : tai;        alloc ,dealloc: tai_regalloc;      begin        result:=false;        if MatchOpType(taicpu(p),top_reg,top_reg) and          GetNextInstruction(p, hp1) and          (hp1.typ = ait_instruction) and          GetNextInstruction(hp1, hp2) and          MatchInstruction(hp2,taicpu(p).opcode,[]) and          OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and          MatchOpType(taicpu(hp2),top_reg,top_reg) and          MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and          (((taicpu(p).opcode=A_MOVAPS) and            ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or             (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or           ((taicpu(p).opcode=A_MOVAPD) and            ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or             (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))          ) then          { change                     movapX    reg,reg2                     addsX/subsX/... reg3, reg2                     movapX    reg2,reg            to                     addsX/subsX/... reg3,reg          }          begin            CopyUsedRegs(TmpUsedRegs);            UpdateUsedRegs(TmpUsedRegs, tai(p.next));            UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));            If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then              begin                DebugMsg(SPeepholeOptimization + 'MovapXOpMovapX2Op ('+                      debug_op2str(taicpu(p).opcode)+' '+                      debug_op2str(taicpu(hp1).opcode)+' '+                      debug_op2str(taicpu(hp2).opcode)+') done',p);                { we cannot eliminate the first move if                  the operations uses the same register for source and dest }                if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then                  begin                    asml.remove(p);                    p.Free;                  end;                taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);                asml.remove(hp2);                hp2.Free;                p:=hp1;                result:=true;              end;            ReleaseUsedRegs(TmpUsedRegs);          end        end;    function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;      var        TmpUsedRegs : TAllUsedRegs;        hp1,hp2 : tai;      begin        result:=false;        if MatchOpType(taicpu(p),top_reg,top_reg) then          begin            { vmova* reg1,reg1              =>              <nop> }            if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then              begin                GetNextInstruction(p,hp1);                asml.Remove(p);                p.Free;                p:=hp1;                result:=true;              end            else if GetNextInstruction(p,hp1) then              begin                if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and                  MatchOpType(taicpu(hp1),top_reg,top_reg) and                  MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then                  begin                    { vmova* reg1,reg2                      vmova* reg2,reg3                      dealloc reg2                      =>                      vmova* reg1,reg3 }                    CopyUsedRegs(TmpUsedRegs);                    UpdateUsedRegs(TmpUsedRegs, tai(p.next));                    if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then                      begin                        taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);                        asml.Remove(hp1);                        hp1.Free;                        result:=true;                      end                    { special case:                      vmova* reg1,reg2                      vmova* reg2,reg1                      =>                      vmova* reg1,reg2 }                    else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then                      begin                        asml.Remove(hp1);                        hp1.Free;                        result:=true;                      end                  end                else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and                  { we mix single and double opperations here because we assume that the compiler                    generates vmovapd only after double operations and vmovaps only after single operations }                  MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and                  GetNextInstruction(hp1,hp2) and                  MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and                  MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then                  begin                    CopyUsedRegs(TmpUsedRegs);                    UpdateUsedRegs(TmpUsedRegs, tai(p.next));                    UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));                    if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))                     then                      begin                        taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);                        asml.Remove(p);                        p.Free;                        asml.Remove(hp2);                        hp2.Free;                        p:=hp1;                      end;                  end;              end;          end;      end;    function TX86AsmOptimizer.OptPass1VOP(var p : tai) : boolean;      var        TmpUsedRegs : TAllUsedRegs;        hp1 : tai;      begin        result:=false;        { replace            V<Op>X   %mreg1,%mreg2,%mreg3            VMovX    %mreg3,%mreg4            dealloc  %mreg3            by            V<Op>X   %mreg1,%mreg2,%mreg4          ?        }        if GetNextInstruction(p,hp1) and          { we mix single and double operations here because we assume that the compiler            generates vmovapd only after double operations and vmovaps only after single operations }          MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and          MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and          (taicpu(hp1).oper[1]^.typ=top_reg) then          begin            CopyUsedRegs(TmpUsedRegs);            UpdateUsedRegs(TmpUsedRegs, tai(p.next));            if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)             ) then              begin                taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);                DebugMsg(SPeepholeOptimization + 'VOpVmov2VOp done',p);                asml.Remove(hp1);                hp1.Free;                result:=true;              end;          end;      end;    function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;      var        hp1, hp2: tai;        TmpUsedRegs : TAllUsedRegs;        GetNextInstruction_p: Boolean;        PreMessage, RegName1, RegName2, InputVal, MaskNum: string;        NewSize: topsize;      begin        Result:=false;        GetNextInstruction_p:=GetNextInstruction(p, hp1);        {  remove mov reg1,reg1? }        if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^)        then          begin            DebugMsg(SPeepholeOptimization + 'Mov2Nop done',p);            { take care of the register (de)allocs following p }            UpdateUsedRegs(tai(p.next));            asml.remove(p);            p.free;            p:=hp1;            Result:=true;            exit;          end;        if GetNextInstruction_p and          MatchInstruction(hp1,A_AND,[]) and          (taicpu(p).oper[1]^.typ = top_reg) and          MatchOpType(taicpu(hp1),top_const,top_reg) then          begin            if MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then              begin                case taicpu(p).opsize of                  S_L:                    if (taicpu(hp1).oper[0]^.val = $ffffffff) then                      begin                        { Optimize out:                            mov x, %reg                            and ffffffffh, %reg                        }                        DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 1 done',p);                        asml.remove(hp1);                        hp1.free;                        Result:=true;                        exit;                      end;                  S_Q: { TODO: Confirm if this is even possible }                    if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then                      begin                        { Optimize out:                            mov x, %reg                            and ffffffffffffffffh, %reg                        }                        DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 2 done',p);                        asml.remove(hp1);                        hp1.free;                        Result:=true;                        exit;                      end;                  end;              end            else if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.typ = top_reg) and              (taicpu(p).oper[0]^.typ <> top_const) and { MOVZX only supports registers and memory, not immediates (use MOV for that!) }              (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))              then              begin                InputVal := debug_operstr(taicpu(p).oper[0]^);                MaskNum := debug_tostr(taicpu(hp1).oper[0]^.val);                case taicpu(p).opsize of                  S_B:                    if (taicpu(hp1).oper[0]^.val = $ff) then                      begin                        { Convert:                            movb x, %regl        movb x, %regl                            andw ffh, %regw      andl ffh, %regd                          To:                            movzbw x, %regd      movzbl x, %regd                          (Identical registers, just different sizes)                        }                        RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 8-bit register name }                        RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 16/32-bit register name }                        case taicpu(hp1).opsize of                          S_W: NewSize := S_BW;                          S_L: NewSize := S_BL;{$ifdef x86_64}                          S_Q: NewSize := S_BQ;{$endif x86_64}                          else                            InternalError(2018011510);                        end;                      end                    else                      NewSize := S_NO;                  S_W:                    if (taicpu(hp1).oper[0]^.val = $ffff) then                      begin                        { Convert:                            movw x, %regw                            andl ffffh, %regd                          To:                            movzwl x, %regd                          (Identical registers, just different sizes)                        }                        RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 16-bit register name }                        RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 32-bit register name }                        case taicpu(hp1).opsize of                          S_L: NewSize := S_WL;{$ifdef x86_64}                          S_Q: NewSize := S_WQ;{$endif x86_64}                          else                            InternalError(2018011511);                        end;                      end                    else                      NewSize := S_NO;                  else                    NewSize := S_NO;                end;                if NewSize <> S_NO then                  begin                    PreMessage := 'mov' + debug_opsize2str(taicpu(p).opsize) + ' ' + InputVal + ',' + RegName1;                    { The actual optimization }                    taicpu(p).opcode := A_MOVZX;                    taicpu(p).changeopsize(NewSize);                    taicpu(p).oper[1]^ := taicpu(hp1).oper[1]^;                    { Safeguard if "and" is followed by a conditional command }                    CopyUsedRegs(TmpUsedRegs);                    UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));                    if (RegUsedAfterInstruction(NR_DEFAULTFLAGS, tai(hp1.next), TmpUsedRegs)) then                      begin                        { At this point, the "and" command is effectively equivalent to                          "test %reg,%reg". This will be handled separately by the                          Peephole Optimizer. [Kit] }                        DebugMsg(SPeepholeOptimization + PreMessage +                          ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);                      end                    else                      begin                        DebugMsg(SPeepholeOptimization + PreMessage + '; and' + debug_opsize2str(taicpu(hp1).opsize) + ' $' + MaskNum + ',' + RegName2 +                          ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);                        asml.Remove(hp1);                        hp1.Free;                      end;                    Result := True;                    ReleaseUsedRegs(TmpUsedRegs);                    Exit;                  end;              end;          end        else if GetNextInstruction_p and          MatchInstruction(hp1,A_MOV,[]) and          (taicpu(p).oper[1]^.typ = top_reg) and          MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then          begin            CopyUsedRegs(TmpUsedRegs);            UpdateUsedRegs(TmpUsedRegs, tai(p.Next));            { we have                mov x, %treg                mov %treg, y            }            if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and               not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then              { we've got                mov x, %treg                mov %treg, y                with %treg is not used after }              case taicpu(p).oper[0]^.typ Of                top_reg:                  begin                    { change                        mov %reg, %treg                        mov %treg, y                        to                        mov %reg, y                    }                    if taicpu(hp1).oper[1]^.typ=top_reg then                      AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);                    taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);                    DebugMsg(SPeepholeOptimization + 'MovMov2Mov 2 done',p);                    asml.remove(hp1);                    hp1.free;                    ReleaseUsedRegs(TmpUsedRegs);                    Result:=true;                    Exit;                  end;                top_const:                  begin                    { change                        mov const, %treg                        mov %treg, y                        to                        mov const, y                    }                    if (taicpu(hp1).oper[1]^.typ=top_reg) or                      ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then                      begin                        if taicpu(hp1).oper[1]^.typ=top_reg then                          AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);                        taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);                        DebugMsg(SPeepholeOptimization + 'MovMov2Mov 5 done',p);                        asml.remove(hp1);                        hp1.free;                        ReleaseUsedRegs(TmpUsedRegs);                        Result:=true;                        Exit;                      end;                  end;                top_ref:                  if (taicpu(hp1).oper[1]^.typ = top_reg) then                    begin                      { change                           mov mem, %treg                           mov %treg, %reg                           to                           mov mem, %reg"                      }                      taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);                      DebugMsg(SPeepholeOptimization + 'MovMov2Mov 3 done',p);                      asml.remove(hp1);                      hp1.free;                      ReleaseUsedRegs(TmpUsedRegs);                      Result:=true;                      Exit;                    end;              end;            ReleaseUsedRegs(TmpUsedRegs);          end        else          { Change             mov %reg1, %reg2             xxx %reg2, ???             to             mov %reg1, %reg2             xxx %reg1, ???             to avoid a write/read penalty          }          if MatchOpType(taicpu(p),top_reg,top_reg) and             GetNextInstruction(p,hp1) and             (tai(hp1).typ = ait_instruction) and             (taicpu(hp1).ops >= 1) and             MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then            { we have              mov %reg1, %reg2              XXX %reg2, ???            }            begin              if ((taicpu(hp1).opcode = A_OR) or                  (taicpu(hp1).opcode = A_AND) or                  (taicpu(hp1).opcode = A_TEST)) and                 (taicpu(hp1).oper[1]^.typ = top_reg) and                 (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then                {  we have                   mov %reg1, %reg2                   test/or/and %reg2, %reg2                }                begin                  CopyUsedRegs(TmpUsedRegs);                  { reg1 will be used after the first instruction,                    so update the allocation info                  }                  AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);                  if GetNextInstruction(hp1, hp2) and                     (hp2.typ = ait_instruction) and                     taicpu(hp2).is_jmp and                     not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then                      { change                        mov %reg1, %reg2                        test/or/and %reg2, %reg2                        jxx                        to                        test %reg1, %reg1                        jxx                      }                      begin                        taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);                        taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);                        DebugMsg(SPeepholeOptimization + 'MovTestJxx2TestMov done',p);                        asml.remove(p);                        p.free;                        p := hp1;                        ReleaseUsedRegs(TmpUsedRegs);                        Exit;                      end                    else                      { change                        mov %reg1, %reg2                        test/or/and %reg2, %reg2                        to                        mov %reg1, %reg2                        test/or/and %reg1, %reg1                        }                      begin                        taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);                        taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);                        DebugMsg(SPeepholeOptimization + 'MovTestJxx2MovTestJxx done',p);                      end;                  ReleaseUsedRegs(TmpUsedRegs);                end            end        else          { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with            x >= RetOffset) as it doesn't do anything (it writes either to a            parameter or to the temporary storage room for the function            result)          }          if GetNextInstruction_p and            (tai(hp1).typ = ait_instruction) then            begin              if IsExitCode(hp1) and                MatchOpType(taicpu(p),top_reg,top_ref) and                (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and                not(assigned(current_procinfo.procdef.funcretsym) and                   (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and                (taicpu(p).oper[1]^.ref^.index = NR_NO) then                begin                  asml.remove(p);                  p.free;                  p:=hp1;                  DebugMsg(SPeepholeOptimization + 'removed deadstore before leave/ret',p);                  RemoveLastDeallocForFuncRes(p);                  exit;                end              { change                  mov reg1, mem1                  test/cmp x, mem1                  to                  mov reg1, mem1                  test/cmp x, reg1              }              else if MatchOpType(taicpu(p),top_reg,top_ref) and                  MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and                  (taicpu(hp1).oper[1]^.typ = top_ref) and                   RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then                  begin                    taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);                    DebugMsg(SPeepholeOptimization + 'MovTestCmp2MovTestCmp 1',hp1);                    AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);                  end;            end;        { Next instruction is also a MOV ? }        if GetNextInstruction_p and          MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then          begin            if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and               (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then                {  mov reg1, mem1     or     mov mem1, reg1                   mov mem2, reg2            mov reg2, mem2}              begin                if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then                  { mov reg1, mem1     or     mov mem1, reg1                    mov mem2, reg1            mov reg2, mem1}                  begin                    if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then                      { Removes the second statement from                        mov reg1, mem1/reg2                        mov mem1/reg2, reg1 }                      begin                        if taicpu(p).oper[0]^.typ=top_reg then                          AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);                        DebugMsg(SPeepholeOptimization + 'MovMov2Mov 1',p);                        asml.remove(hp1);                        hp1.free;                        Result:=true;                        exit;                      end                    else                      begin                        CopyUsedRegs(TmpUsedRegs);                        UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));                        if (taicpu(p).oper[1]^.typ = top_ref) and                          { mov reg1, mem1                            mov mem2, reg1 }                           (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and                           GetNextInstruction(hp1, hp2) and                           MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and                           OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and                           OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and                           not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then                           { change                   to                             mov reg1, mem1           mov reg1, mem1                             mov mem2, reg1           cmp reg1, mem2                             cmp mem1, reg1                           }                          begin                            asml.remove(hp2);                            hp2.free;                            taicpu(hp1).opcode := A_CMP;                            taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);                            taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);                            AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);                            DebugMsg(SPeepholeOptimization + 'MovMovCmp2MovCmp done',hp1);                          end;                        ReleaseUsedRegs(TmpUsedRegs);                      end;                  end                else if (taicpu(p).oper[1]^.typ=top_ref) and                  OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then                  begin                    AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);                    taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);                    DebugMsg(SPeepholeOptimization + 'MovMov2MovMov1 done',p);                  end                else                  begin                    CopyUsedRegs(TmpUsedRegs);                    if GetNextInstruction(hp1, hp2) and                      MatchOpType(taicpu(p),top_ref,top_reg) and                      MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and                      (taicpu(hp1).oper[1]^.typ = top_ref) and                      MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and                      MatchOpType(taicpu(hp2),top_ref,top_reg) and                      RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^)  then                      if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and                         not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then                        {   mov mem1, %reg1                            mov %reg1, mem2                            mov mem2, reg2                         to:                            mov mem1, reg2                            mov reg2, mem2}                        begin                          AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);                          DebugMsg(SPeepholeOptimization + 'MovMovMov2MovMov 1 done',p);                          taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);                          taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);                          asml.remove(hp2);                          hp2.free;                        end{$ifdef i386}                      { this is enabled for i386 only, as the rules to create the reg sets below                        are too complicated for x86-64, so this makes this code too error prone                        on x86-64                      }                      else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and                        not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and                        not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then                        {   mov mem1, reg1         mov mem1, reg1                            mov reg1, mem2         mov reg1, mem2                            mov mem2, reg2         mov mem2, reg1                         to:                    to:                            mov mem1, reg1         mov mem1, reg1                            mov mem1, reg2         mov reg1, mem2                            mov reg1, mem2                         or (if mem1 depends on reg1                      and/or if mem2 depends on reg2)                         to:                             mov mem1, reg1                             mov reg1, mem2                             mov reg1, reg2                        }                        begin                          taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);                          taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);                          taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);                          taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);                          AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);                          if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and                             (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then                            AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);                          if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and                             (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then                            AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);                        end                      else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then                        begin                          taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);                          AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);                        end                      else                        begin                          asml.remove(hp2);                          hp2.free;                        end{$endif i386}                        ;                    ReleaseUsedRegs(TmpUsedRegs);                  end;              end(*          { movl [mem1],reg1              movl [mem1],reg2              to              movl [mem1],reg1              movl reg1,reg2             }             else if (taicpu(p).oper[0]^.typ = top_ref) and                (taicpu(p).oper[1]^.typ = top_reg) and                (taicpu(hp1).oper[0]^.typ = top_ref) and                (taicpu(hp1).oper[1]^.typ = top_reg) and                (taicpu(p).opsize = taicpu(hp1).opsize) and                RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and                (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and                (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then                taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)              else*)            {   movl const1,[mem1]                movl [mem1],reg1                to                movl const1,reg1                movl reg1,[mem1]            }            else if MatchOpType(Taicpu(p),top_const,top_ref) and                 MatchOpType(Taicpu(hp1),top_ref,top_reg) and                 (taicpu(p).opsize = taicpu(hp1).opsize) and                 RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and                 not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then              begin                AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);                taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);                taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);                taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);                taicpu(hp1).fileinfo := taicpu(p).fileinfo;                DebugMsg(SPeepholeOptimization + 'MovMov2MovMov 1',p);              end            {              mov*  x,reg1              mov*  y,reg1              to              mov*  y,reg1            }            else if (taicpu(p).oper[1]^.typ=top_reg) and              MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and              not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^)) then              begin                DebugMsg(SPeepholeOptimization + 'MovMov2Mov 4 done',p);                { take care of the register (de)allocs following p }                UpdateUsedRegs(tai(p.next));                asml.remove(p);                p.free;                p:=hp1;                Result:=true;                exit;              end;          end        else if (taicpu(p).oper[1]^.typ = top_reg) and          GetNextInstruction_p and          (hp1.typ = ait_instruction) and          GetNextInstruction(hp1, hp2) and          MatchInstruction(hp2,A_MOV,[]) and          OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and          (taicpu(hp2).oper[0]^.typ=top_reg) and          (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and          (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or           ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and (taicpu(hp2).opsize=S_L) and            IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))          ) then          { change   movsX/movzX    reg/ref, reg2                     add/sub/or/... reg3/$const, reg2                     mov            reg2 reg/ref            to       add/sub/or/... reg3/$const, reg/ref      }          begin            CopyUsedRegs(TmpUsedRegs);            UpdateUsedRegs(TmpUsedRegs, tai(p.next));            UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));            If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then              begin                { by example:                    movswl  %si,%eax        movswl  %si,%eax      p                    decl    %eax            addl    %edx,%eax     hp1                    movw    %ax,%si         movw    %ax,%si       hp2                  ->                    movswl  %si,%eax        movswl  %si,%eax      p                    decw    %eax            addw    %edx,%eax     hp1                    movw    %ax,%si         movw    %ax,%si       hp2                }                DebugMsg(SPeepholeOptimization + 'MovOpMov2Op ('+                      debug_op2str(taicpu(p).opcode)+debug_opsize2str(taicpu(p).opsize)+' '+                      debug_op2str(taicpu(hp1).opcode)+debug_opsize2str(taicpu(hp1).opsize)+' '+                      debug_op2str(taicpu(hp2).opcode)+debug_opsize2str(taicpu(hp2).opsize),p);                taicpu(hp1).changeopsize(taicpu(hp2).opsize);                {                  ->                    movswl  %si,%eax        movswl  %si,%eax      p                    decw    %si             addw    %dx,%si       hp1                    movw    %ax,%si         movw    %ax,%si       hp2                }                case taicpu(hp1).ops of                  1:                    begin                      taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);                      if taicpu(hp1).oper[0]^.typ=top_reg then                        setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));                    end;                  2:                    begin                      taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);                      if (taicpu(hp1).oper[0]^.typ=top_reg) and                        (taicpu(hp1).opcode<>A_SHL) and                        (taicpu(hp1).opcode<>A_SHR) and                        (taicpu(hp1).opcode<>A_SAR) then                        setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));                    end;                  else                    internalerror(2008042701);                end;                {                  ->                    decw    %si             addw    %dx,%si       p                }                asml.remove(p);                asml.remove(hp2);                p.Free;                hp2.Free;                p := hp1;              end;            ReleaseUsedRegs(TmpUsedRegs);          end        else if GetNextInstruction_p and          MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and          GetNextInstruction(hp1, hp2) and          MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and          MatchOperand(Taicpu(p).oper[0]^,0) and          (Taicpu(p).oper[1]^.typ = top_reg) and          MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and          MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then          { mov reg1,0            bts reg1,operand1             -->      mov reg1,operand2            or  reg1,operand2                      bts reg1,operand1}          begin            Taicpu(hp2).opcode:=A_MOV;            asml.remove(hp1);            insertllitem(hp2,hp2.next,hp1);            asml.remove(p);            p.free;            p:=hp1;          end        else if GetNextInstruction_p and           MatchInstruction(hp1,A_LEA,[S_L]) and           MatchOpType(Taicpu(p),top_ref,top_reg) and           ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and             (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)            ) or            (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and             (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)            )           ) then           { mov reg1,ref             lea reg2,[reg1,reg2]             to             add reg2,ref}          begin            CopyUsedRegs(TmpUsedRegs);            { reg1 may not be used afterwards }            if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then              begin                Taicpu(hp1).opcode:=A_ADD;                Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;                DebugMsg(SPeepholeOptimization + 'MovLea2Add done',hp1);                asml.remove(p);                p.free;                p:=hp1;              end;            ReleaseUsedRegs(TmpUsedRegs);          end;      end;    function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;      var        hp1 : tai;      begin        Result:=false;        if taicpu(p).ops <> 2 then          exit;        if GetNextInstruction(p,hp1) and          MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and          (taicpu(hp1).ops = 2) then          begin            if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and               (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then                {  movXX reg1, mem1     or     movXX mem1, reg1                   movXX mem2, reg2            movXX reg2, mem2}              begin                if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then                  { movXX reg1, mem1     or     movXX mem1, reg1                    movXX mem2, reg1            movXX reg2, mem1}                  begin                    if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then                      begin                        { Removes the second statement from                          movXX reg1, mem1/reg2                          movXX mem1/reg2, reg1                        }                        if taicpu(p).oper[0]^.typ=top_reg then                          AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);                        { Removes the second statement from                          movXX mem1/reg1, reg2                          movXX reg2, mem1/reg1                        }                        if (taicpu(p).oper[1]^.typ=top_reg) and                          not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then                          begin                            asml.remove(p);                            p.free;                            GetNextInstruction(hp1,p);                            DebugMsg(SPeepholeOptimization + 'MovXXMovXX2Nop 1 done',p);                          end                        else                          DebugMsg(SPeepholeOptimization + 'MovXXMovXX2MoVXX 1 done',p);                        asml.remove(hp1);                        hp1.free;                        Result:=true;                        exit;                      end                end;            end;        end;      end;    function TX86AsmOptimizer.OptPass1OP(var p : tai) : boolean;      var        TmpUsedRegs : TAllUsedRegs;        hp1 : tai;      begin        result:=false;        { replace            <Op>X    %mreg1,%mreg2  // Op in [ADD,MUL]            MovX     %mreg2,%mreg1            dealloc  %mreg2            by            <Op>X    %mreg2,%mreg1          ?        }        if GetNextInstruction(p,hp1) and          { we mix single and double opperations here because we assume that the compiler            generates vmovapd only after double operations and vmovaps only after single operations }          MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and          MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and          MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and          (taicpu(p).oper[0]^.typ=top_reg) then          begin            CopyUsedRegs(TmpUsedRegs);            UpdateUsedRegs(TmpUsedRegs, tai(p.next));            if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then              begin                taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);                taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);                DebugMsg(SPeepholeOptimization + 'OpMov2Op done',p);                asml.Remove(hp1);                hp1.Free;                result:=true;              end;            ReleaseUsedRegs(TmpUsedRegs);          end;      end;    function TX86AsmOptimizer.OptPass1LEA(var p : tai) : boolean;      var        hp1 : tai;        l : ASizeInt;        TmpUsedRegs : TAllUsedRegs;      begin        Result:=false;        { removes seg register prefixes from LEA operations, as they          don't do anything}        taicpu(p).oper[0]^.ref^.Segment:=NR_NO;        { changes "lea (%reg1), %reg2" into "mov %reg1, %reg2" }        if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and           (taicpu(p).oper[0]^.ref^.index = NR_NO) and           { do not mess with leas acessing the stack pointer }           (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and           (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then          begin            if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and               (taicpu(p).oper[0]^.ref^.offset = 0) then              begin                hp1:=taicpu.op_reg_reg(A_MOV,taicpu(p).opsize,taicpu(p).oper[0]^.ref^.base,                  taicpu(p).oper[1]^.reg);                InsertLLItem(p.previous,p.next, hp1);                DebugMsg(SPeepholeOptimization + 'Lea2Mov done',hp1);                p.free;                p:=hp1;                Result:=true;                exit;              end            else if (taicpu(p).oper[0]^.ref^.offset = 0) then              begin                hp1:=taicpu(p.Next);                DebugMsg(SPeepholeOptimization + 'Lea2Nop done',p);                asml.remove(p);                p.free;                p:=hp1;                Result:=true;                exit;              end            { continue to use lea to adjust the stack pointer,              it is the recommended way, but only if not optimizing for size }            else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or              (cs_opt_size in current_settings.optimizerswitches) then              with taicpu(p).oper[0]^.ref^ do                if (base = taicpu(p).oper[1]^.reg) then                  begin                    l:=offset;                    if (l=1) and UseIncDec then                      begin                        taicpu(p).opcode:=A_INC;                        taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);                        taicpu(p).ops:=1;                        DebugMsg(SPeepholeOptimization + 'Lea2Inc done',p);                      end                    else if (l=-1) and UseIncDec then                      begin                        taicpu(p).opcode:=A_DEC;                        taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);                        taicpu(p).ops:=1;                        DebugMsg(SPeepholeOptimization + 'Lea2Dec done',p);                      end                    else                      begin                        if (l<0) and (l<>-2147483648) then                          begin                            taicpu(p).opcode:=A_SUB;                            taicpu(p).loadConst(0,-l);                            DebugMsg(SPeepholeOptimization + 'Lea2Sub done',p);                          end                        else                          begin                            taicpu(p).opcode:=A_ADD;                            taicpu(p).loadConst(0,l);                            DebugMsg(SPeepholeOptimization + 'Lea2Add done',p);                          end;                      end;                    Result:=true;                    exit;                  end;          end;        if GetNextInstruction(p,hp1) and          MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and          MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and          MatchOpType(Taicpu(hp1),top_reg,top_reg) and          (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) then          begin            CopyUsedRegs(TmpUsedRegs);            UpdateUsedRegs(TmpUsedRegs, tai(p.next));            if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then              begin                taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);                DebugMsg(SPeepholeOptimization + 'LeaMov2Lea done',p);                asml.Remove(hp1);                hp1.Free;                result:=true;              end;            ReleaseUsedRegs(TmpUsedRegs);          end;      end;    function TX86AsmOptimizer.DoSubAddOpt(var p: tai): Boolean;      var        hp1 : tai;      begin        DoSubAddOpt := False;        if GetLastInstruction(p, hp1) and           (hp1.typ = ait_instruction) and           (taicpu(hp1).opsize = taicpu(p).opsize) then          case taicpu(hp1).opcode Of            A_DEC:              if (taicpu(hp1).oper[0]^.typ = top_reg) and                MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then                begin                  taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);                  asml.remove(hp1);                  hp1.free;                end;             A_SUB:               if MatchOpType(taicpu(hp1),top_const,top_reg) and                 MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then                 begin                   taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);                   asml.remove(hp1);                   hp1.free;                 end;             A_ADD:               if MatchOpType(taicpu(hp1),top_const,top_reg) and                 MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then                 begin                   taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);                   asml.remove(hp1);                   hp1.free;                   if (taicpu(p).oper[0]^.val = 0) then                     begin                       hp1 := tai(p.next);                       asml.remove(p);                       p.free;                       if not GetLastInstruction(hp1, p) then                         p := hp1;                       DoSubAddOpt := True;                     end                 end;           end;      end;    function TX86AsmOptimizer.OptPass1Sub(var p : tai) : boolean;      var        hp1 : tai;      begin        Result:=false;        { * change "subl $2, %esp; pushw x" to "pushl x"}        { * change "sub/add const1, reg" or "dec reg" followed by            "sub const2, reg" to one "sub ..., reg" }        if MatchOpType(taicpu(p),top_const,top_reg) then          begin{$ifdef i386}            if (taicpu(p).oper[0]^.val = 2) and               (taicpu(p).oper[1]^.reg = NR_ESP) and               { Don't do the sub/push optimization if the sub }               { comes from setting up the stack frame (JM)    }               (not(GetLastInstruction(p,hp1)) or               not(MatchInstruction(hp1,A_MOV,[S_L]) and                 MatchOperand(taicpu(hp1).oper[0]^,NR_ESP) and                 MatchOperand(taicpu(hp1).oper[0]^,NR_EBP))) then              begin                hp1 := tai(p.next);                while Assigned(hp1) and                      (tai(hp1).typ in [ait_instruction]+SkipInstr) and                      not RegReadByInstruction(NR_ESP,hp1) and                      not RegModifiedByInstruction(NR_ESP,hp1) do                  hp1 := tai(hp1.next);                if Assigned(hp1) and                  MatchInstruction(hp1,A_PUSH,[S_W]) then                  begin                    taicpu(hp1).changeopsize(S_L);                    if taicpu(hp1).oper[0]^.typ=top_reg then                      setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);                    hp1 := tai(p.next);                    asml.remove(p);                    p.free;                    p := hp1;                    Result:=true;                    exit;                  end;              end;{$endif i386}            if DoSubAddOpt(p) then              Result:=true;          end;      end;    function TX86AsmOptimizer.OptPass1SHLSAL(var p : tai) : boolean;      var        TmpBool1,TmpBool2 : Boolean;        tmpref : treference;        hp1,hp2: tai;      begin        Result:=false;        if MatchOpType(taicpu(p),top_const,top_reg) and           (taicpu(p).opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) and           (taicpu(p).oper[0]^.val <= 3) then          { Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement }          begin            { should we check the next instruction? }            TmpBool1 := True;            { have we found an add/sub which could be              integrated in the lea? }            TmpBool2 := False;            reference_reset(tmpref,2,[]);            TmpRef.index := taicpu(p).oper[1]^.reg;            TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;            while TmpBool1 and                  GetNextInstruction(p, hp1) and                  (tai(hp1).typ = ait_instruction) and                  ((((taicpu(hp1).opcode = A_ADD) or                     (taicpu(hp1).opcode = A_SUB)) and                    (taicpu(hp1).oper[1]^.typ = Top_Reg) and                    (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or                   (((taicpu(hp1).opcode = A_INC) or                     (taicpu(hp1).opcode = A_DEC)) and                    (taicpu(hp1).oper[0]^.typ = Top_Reg) and                    (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and                  (not GetNextInstruction(hp1,hp2) or                   not instrReadsFlags(hp2)) Do              begin                TmpBool1 := False;                if (taicpu(hp1).oper[0]^.typ = Top_Const) then                  begin                    TmpBool1 := True;                    TmpBool2 := True;                    case taicpu(hp1).opcode of                      A_ADD:                        inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));                      A_SUB:                        dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));                    end;                    asml.remove(hp1);                    hp1.free;                  end                else                  if (taicpu(hp1).oper[0]^.typ = Top_Reg) and                     (((taicpu(hp1).opcode = A_ADD) and                       (TmpRef.base = NR_NO)) or                      (taicpu(hp1).opcode = A_INC) or                      (taicpu(hp1).opcode = A_DEC)) then                    begin                      TmpBool1 := True;                      TmpBool2 := True;                      case taicpu(hp1).opcode of                        A_ADD:                          TmpRef.base := taicpu(hp1).oper[0]^.reg;                        A_INC:                          inc(TmpRef.offset);                        A_DEC:                          dec(TmpRef.offset);                      end;                      asml.remove(hp1);                      hp1.free;                    end;              end;            if TmpBool2{$ifndef x86_64}               or               ((current_settings.optimizecputype < cpu_Pentium2) and               (taicpu(p).oper[0]^.val <= 3) and               not(cs_opt_size in current_settings.optimizerswitches)){$endif x86_64}              then              begin                if not(TmpBool2) and                    (taicpu(p).oper[0]^.val = 1) then                  begin                    hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,                      taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)                  end                else                  hp1 := taicpu.op_ref_reg(A_LEA, taicpu(p).opsize, TmpRef,                              taicpu(p).oper[1]^.reg);                InsertLLItem(p.previous, p.next, hp1);                p.free;                p := hp1;              end;          end{$ifndef x86_64}        else if (current_settings.optimizecputype < cpu_Pentium2) and          MatchOpType(taicpu(p),top_const,top_reg) then          begin            { changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,              but faster on a 486, and Tairable in both U and V pipes on the Pentium              (unlike shl, which is only Tairable in the U pipe) }            if taicpu(p).oper[0]^.val=1 then                begin                  hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,                            taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);                  InsertLLItem(p.previous, p.next, hp1);                  p.free;                  p := hp1;                end           { changes "shl $2, %reg" to "lea (,%reg,4), %reg"             "shl $3, %reg" to "lea (,%reg,8), %reg }           else if (taicpu(p).opsize = S_L) and                   (taicpu(p).oper[0]^.val<= 3) then             begin               reference_reset(tmpref,2,[]);               TmpRef.index := taicpu(p).oper[1]^.reg;               TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;               hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);               InsertLLItem(p.previous, p.next, hp1);               p.free;               p := hp1;             end;          end{$endif x86_64}          ;      end;    function TX86AsmOptimizer.OptPass1SETcc(var p: tai): boolean;      var        TmpUsedRegs : TAllUsedRegs;        hp1,hp2,next: tai; SetC, JumpC: TAsmCond;      begin        Result:=false;        if MatchOpType(taicpu(p),top_reg) and          GetNextInstruction(p, hp1) and          MatchInstruction(hp1, A_TEST, [S_B]) and          MatchOpType(taicpu(hp1),top_reg,top_reg) and          (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and          (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and          GetNextInstruction(hp1, hp2) and          MatchInstruction(hp2, A_Jcc, []) then          { Change from:             To:            set(C) %reg              j(~C) label            test   %reg,%reg            je     label            set(C) %reg              j(C)  label            test   %reg,%reg            jne    label          }          begin            next := tai(p.Next);            CopyUsedRegs(TmpUsedRegs);            UpdateUsedRegs(TmpUsedRegs, next);            UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));            UpdateUsedRegs(TmpUsedRegs, tai(hp2.next));            asml.Remove(hp1);            hp1.Free;            JumpC := taicpu(hp2).condition;            if conditions_equal(JumpC, C_E) then              SetC := inverse_cond(taicpu(p).condition)            else if conditions_equal(JumpC, C_NE) then              SetC := taicpu(p).condition            else              InternalError(2018061400);            if SetC = C_NONE then              InternalError(2018061401);            taicpu(hp2).SetCondition(SetC);            if not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs) then              begin                asml.Remove(p);                UpdateUsedRegs(next);                p.Free;                Result := True;                p := hp2;              end;            ReleaseUsedRegs(TmpUsedRegs);            DebugMsg(SPeepholeOptimization + 'SETcc/TEST/Jcc -> Jcc',p);          end;      end;    function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;      var       TmpUsedRegs : TAllUsedRegs;       hp1,hp2,hp3: tai;      begin        Result:=false;        if MatchOpType(taicpu(p),top_reg,top_reg) and          GetNextInstruction(p, hp1) and{$ifdef x86_64}          MatchInstruction(hp1,A_MOVZX,A_MOVSX,A_MOVSXD,[]) and{$else x86_64}          MatchInstruction(hp1,A_MOVZX,A_MOVSX,[]) and{$endif x86_64}          MatchOpType(taicpu(hp1),top_reg,top_reg) and          (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then          { mov reg1, reg2                mov reg1, reg2            movzx/sx reg2, reg3      to   movzx/sx reg1, reg3}          begin            taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;            DebugMsg(SPeepholeOptimization + 'mov %reg1,%reg2; movzx/sx %reg2,%reg3 -> mov %reg1,%reg2;movzx/sx %reg1,%reg3',p);            { Don't remove the MOV command without first checking that reg2 isn't used afterwards,              or unless supreg(reg3) = supreg(reg2)). [Kit] }            CopyUsedRegs(TmpUsedRegs);            UpdateUsedRegs(TmpUsedRegs, tai(p.next));            UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));            if (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) or              not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)            then              begin                asml.remove(p);                p.free;                p := hp1;                Result:=true;              end;            ReleaseUsedRegs(TmpUsedRegs);            exit;          end        else if MatchOpType(taicpu(p),top_reg,top_reg) and          GetNextInstruction(p, hp1) and{$ifdef x86_64}          MatchInstruction(hp1,[A_MOV,A_MOVZX,A_MOVSX,A_MOVSXD],[]) and{$else x86_64}          MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and{$endif x86_64}          MatchOpType(taicpu(hp1),top_ref,top_reg) and          ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)           or           (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)            ) and          (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then          { mov reg1, reg2            mov/zx/sx (reg2, ..), reg2      to   mov/zx/sx (reg1, ..), reg2}          begin            if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then              taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;            if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then              taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;            DebugMsg(SPeepholeOptimization + 'MovMovXX2MoVXX 1 done',p);            asml.remove(p);            p.free;            p := hp1;            Result:=true;            exit;          end        else if (taicpu(p).oper[0]^.typ = top_ref) and          GetNextInstruction(p,hp1) and          (hp1.typ = ait_instruction) and          { while the GetNextInstruction(hp1,hp2) call could be factored out,            doing it separately in both branches allows to do the cheap checks            with low probability earlier }          ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and            GetNextInstruction(hp1,hp2) and            MatchInstruction(hp2,A_MOV,[])           ) or           ((taicpu(hp1).opcode=A_LEA) and             GetNextInstruction(hp1,hp2) and             MatchInstruction(hp2,A_MOV,[]) and            ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and             (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)              ) or             (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,              taicpu(p).oper[1]^.reg) and             (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or             (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or             (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))            ) and            ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))           )          ) and          MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and          (taicpu(hp2).oper[1]^.typ = top_ref) then          begin            CopyUsedRegs(TmpUsedRegs);            UpdateUsedRegs(TmpUsedRegs,tai(p.next));            UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));            if (RefsEqual(taicpu(hp2).oper[1]^.ref^,taicpu(p).oper[0]^.ref^) and              not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,TmpUsedRegs))) then              { change   mov            (ref), reg                         add/sub/or/... reg2/$const, reg                         mov            reg, (ref)                         # release reg                to       add/sub/or/... reg2/$const, (ref)    }              begin                case taicpu(hp1).opcode of                  A_INC,A_DEC,A_NOT,A_NEG :                    taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);                  A_LEA :                    begin                      taicpu(hp1).opcode:=A_ADD;                      if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then                        taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)                      else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then                        taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)                      else                        taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);                      taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);                      DebugMsg(SPeepholeOptimization + 'FoldLea done',hp1);                    end                  else                    taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);                end;                asml.remove(p);                asml.remove(hp2);                p.free;                hp2.free;                p := hp1              end;            ReleaseUsedRegs(TmpUsedRegs);            Exit;{$ifdef x86_64}          end        else if (taicpu(p).opsize = S_L) and          (taicpu(p).oper[1]^.typ = top_reg) and          (            GetNextInstruction(p, hp1) and            MatchInstruction(hp1, A_MOV,[]) and            (taicpu(hp1).opsize = S_L) and            (taicpu(hp1).oper[1]^.typ = top_reg)          ) and (            GetNextInstruction(hp1, hp2) and            (tai(hp2).typ=ait_instruction) and            (taicpu(hp2).opsize = S_Q) and            (              (                MatchInstruction(hp2, A_ADD,[]) and                (taicpu(hp2).opsize = S_Q) and                (taicpu(hp2).oper[0]^.typ = top_reg) and (taicpu(hp2).oper[1]^.typ = top_reg) and                (                  (                    (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) and                    (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))                  ) or (                    (getsupreg(taicpu(hp2).oper[0]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and                    (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))                  )                )              ) or (                MatchInstruction(hp2, A_LEA,[]) and                (taicpu(hp2).oper[0]^.ref^.offset = 0) and                (taicpu(hp2).oper[0]^.ref^.scalefactor <= 1) and                (                  (                    (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(p).oper[1]^.reg)) and                    (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(hp1).oper[1]^.reg))                  ) or (                    (getsupreg(taicpu(hp2).oper[0]^.ref^.base) = getsupreg(taicpu(hp1).oper[1]^.reg)) and                    (getsupreg(taicpu(hp2).oper[0]^.ref^.index) = getsupreg(taicpu(p).oper[1]^.reg))                  )                ) and (                  (                    (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))                  ) or (                    (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg))                  )                )              )            )          ) and (            GetNextInstruction(hp2, hp3) and            MatchInstruction(hp3, A_SHR,[]) and            (taicpu(hp3).opsize = S_Q) and            (taicpu(hp3).oper[0]^.typ = top_const) and (taicpu(hp2).oper[1]^.typ = top_reg) and            (taicpu(hp3).oper[0]^.val = 1) and            (taicpu(hp3).oper[1]^.reg = taicpu(hp2).oper[1]^.reg)          ) then          begin            { Change   movl    x,    reg1d         movl    x,    reg1d                       movl    y,    reg2d         movl    y,    reg2d                       addq    reg2q,reg1q   or    leaq    (reg1q,reg2q),reg1q                       shrq    $1,   reg1q         shrq    $1,   reg1q            ( reg1d and reg2d can be switched around in the first two instructions )              To       movl    x,    reg1d                       addl    y,    reg1d                       rcrl    $1,   reg1d              This corresponds to the common expression (x + y) shr 1, where              x and y are Cardinals (replacing "shr 1" with "div 2" produces              smaller code, but won't account for x + y causing an overflow). [Kit]            }            if (getsupreg(taicpu(hp2).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) then              { Change first MOV command to have the same register as the final output }              taicpu(p).oper[1]^.reg := taicpu(hp1).oper[1]^.reg            else              taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;            { Change second MOV command to an ADD command. This is easier than              converting the existing command because it means we don't have to              touch 'y', which might be a complicated reference, and also the              fact that the third command might either be ADD or LEA. [Kit] }            taicpu(hp1).opcode := A_ADD;            { Delete old ADD/LEA instruction }            asml.remove(hp2);            hp2.free;            { Convert "shrq $1, reg1q" to "rcr $1, reg1d" }            taicpu(hp3).opcode := A_RCR;            taicpu(hp3).changeopsize(S_L);            setsubreg(taicpu(hp3).oper[1]^.reg, R_SUBD);{$endif x86_64}          end;      end;    function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;      var        TmpUsedRegs : TAllUsedRegs;        hp1 : tai;      begin        Result:=false;        if (taicpu(p).ops >= 2) and           ((taicpu(p).oper[0]^.typ = top_const) or            ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and           (taicpu(p).oper[1]^.typ = top_reg) and           ((taicpu(p).ops = 2) or            ((taicpu(p).oper[2]^.typ = top_reg) and             (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and           GetLastInstruction(p,hp1) and           MatchInstruction(hp1,A_MOV,[]) and           MatchOpType(taicpu(hp1),top_reg,top_reg) and           ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or            ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then          begin            CopyUsedRegs(TmpUsedRegs);            if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then              { change                  mov reg1,reg2                  imul y,reg2 to imul y,reg1,reg2 }              begin                taicpu(p).ops := 3;                taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);                taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);                DebugMsg(SPeepholeOptimization + 'MovImul2Imul done',p);                asml.remove(hp1);                hp1.free;                result:=true;              end;            ReleaseUsedRegs(TmpUsedRegs);          end;      end;    function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;      var        hp1 : tai;      begin        {          change                 jmp .L1                 ...             .L1:                 ret          into                 ret        }        result:=false;        if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and          (taicpu(p).oper[0]^.ref^.index=NR_NO) then          begin            hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));            if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and              MatchInstruction(hp1,A_RET,[S_NO]) then              begin                tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;                taicpu(p).opcode:=A_RET;                taicpu(p).is_jmp:=false;                taicpu(p).ops:=taicpu(hp1).ops;                case taicpu(hp1).ops of                  0:                    taicpu(p).clearop(0);                  1:                    taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);                  else                    internalerror(2016041301);                end;                result:=true;              end;          end;      end;    function CanBeCMOV(p : tai) : boolean;      begin         CanBeCMOV:=assigned(p) and           MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and           { we can't use cmov ref,reg because             ref could be nil and cmov still throws an exception             if ref=nil but the mov isn't done (FK)            or ((taicpu(p).oper[0]^.typ = top_ref) and             (taicpu(p).oper[0]^.ref^.refaddr = addr_no))           }           MatchOpType(taicpu(p),top_reg,top_reg);      end;    function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;      var        hp1,hp2,hp3,hp4,hpmov2: tai;        carryadd_opcode : TAsmOp;        l : Longint;        condition : TAsmCond;        symbol: TAsmSymbol;      begin        result:=false;        symbol:=nil;        if GetNextInstruction(p,hp1) then          begin            symbol := TAsmLabel(taicpu(p).oper[0]^.ref^.symbol);            if (hp1.typ=ait_instruction) and               GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and               (Tasmlabel(symbol) = Tai_label(hp2).labsym) then                 { jb @@1                            cmc                   inc/dec operand           -->     adc/sbb operand,0                   @@1:                   ... and ...                   jnb @@1                   inc/dec operand           -->     adc/sbb operand,0                   @@1: }              begin                carryadd_opcode:=A_NONE;                if Taicpu(p).condition in [C_NAE,C_B] then                  begin                    if Taicpu(hp1).opcode=A_INC then                      carryadd_opcode:=A_ADC;                    if Taicpu(hp1).opcode=A_DEC then                      carryadd_opcode:=A_SBB;                    if carryadd_opcode<>A_NONE then                      begin                        Taicpu(p).clearop(0);                        Taicpu(p).ops:=0;                        Taicpu(p).is_jmp:=false;                        Taicpu(p).opcode:=A_CMC;                        Taicpu(p).condition:=C_NONE;                        Taicpu(hp1).ops:=2;                        Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);                        Taicpu(hp1).loadconst(0,0);                        Taicpu(hp1).opcode:=carryadd_opcode;                        result:=true;                        exit;                      end;                  end;                if Taicpu(p).condition in [C_AE,C_NB] then                  begin                    if Taicpu(hp1).opcode=A_INC then                      carryadd_opcode:=A_ADC;                    if Taicpu(hp1).opcode=A_DEC then                      carryadd_opcode:=A_SBB;                    if carryadd_opcode<>A_NONE then                      begin                        asml.remove(p);                        p.free;                        Taicpu(hp1).ops:=2;                        Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);                        Taicpu(hp1).loadconst(0,0);                        Taicpu(hp1).opcode:=carryadd_opcode;                        p:=hp1;                        result:=true;                        exit;                      end;                  end;              end;            if ((hp1.typ = ait_label) and (symbol = tai_label(hp1).labsym))                or ((hp1.typ = ait_align) and GetNextInstruction(hp1, hp2) and (hp2.typ = ait_label) and (symbol = tai_label(hp2).labsym)) then              begin                { If Jcc is immediately followed by the label that it's supposed to jump to, remove it }                DebugMsg(SPeepholeOptimization + 'Removed conditional jump whose destination was immediately after it', p);                UpdateUsedRegs(hp1);                TAsmLabel(symbol).decrefs;                { if the label refs. reach zero, remove any alignment before the label }                if (hp1.typ = ait_align) then                  begin                    UpdateUsedRegs(hp2);                    if (TAsmLabel(symbol).getrefs = 0) then                    begin                      asml.Remove(hp1);                      hp1.Free;                    end;                    hp1 := hp2; { Set hp1 to the label }                  end;                asml.remove(p);                p.free;                if (TAsmLabel(symbol).getrefs = 0) then                  begin                    GetNextInstruction(hp1, p); { Instruction following the label }                    asml.remove(hp1);                    hp1.free;                    UpdateUsedRegs(p);                    Result := True;                  end                else                  begin                    { We don't need to set the result to True because we know hp1                      is a label and won't trigger any optimisation routines. [Kit] }                    p := hp1;                  end;                Exit;              end;          end;{$ifndef i8086}        if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then          begin             { check for                    jCC   xxx                    <several movs>                 xxx:             }             l:=0;             GetNextInstruction(p, hp1);             while assigned(hp1) and               CanBeCMOV(hp1) and               { stop on labels }               not(hp1.typ=ait_label) do               begin                  inc(l);                  GetNextInstruction(hp1,hp1);               end;             if assigned(hp1) then               begin                  if FindLabel(tasmlabel(symbol),hp1) then                    begin                      if (l<=4) and (l>0) then                        begin                          condition:=inverse_cond(taicpu(p).condition);                          GetNextInstruction(p,hp1);                          repeat                            if not Assigned(hp1) then                              InternalError(2018062900);                            taicpu(hp1).opcode:=A_CMOVcc;                            taicpu(hp1).condition:=condition;                            UpdateUsedRegs(hp1);                            GetNextInstruction(hp1,hp1);                          until not(CanBeCMOV(hp1));                          { Don't decrement the reference count on the label yet, otherwise                            GetNextInstruction might skip over the label if it drops to                            zero. }                          GetNextInstruction(hp1,hp2);                          { if the label refs. reach zero, remove any alignment before the label }                          if (hp1.typ = ait_align) and (hp2.typ = ait_label) then                            begin                              { Ref = 1 means it will drop to zero }                              if (tasmlabel(symbol).getrefs=1) then                                begin                                  asml.Remove(hp1);                                  hp1.Free;                                end;                            end                          else                            hp2 := hp1;                          if not Assigned(hp2) then                            InternalError(2018062910);                          if (hp2.typ <> ait_label) then                            begin                              { There's something other than CMOVs here.  Move the original jump                                to right before this point, then break out.                                Originally this was part of the above internal error, but it got                                triggered on the bootstrapping process sometimes. Investigate. [Kit] }                              asml.remove(p);                              asml.insertbefore(p, hp2);                              DebugMsg('Jcc/CMOVcc drop-out', p);                              UpdateUsedRegs(p);                              Result := True;                              Exit;                            end;                          { Now we can safely decrement the reference count }                          tasmlabel(symbol).decrefs;                          { Remove the original jump }                          asml.Remove(p);                          p.Free;                          GetNextInstruction(hp2, p); { Instruction after the label }                          { Remove the label if this is its final reference }                          if (tasmlabel(symbol).getrefs=0) then                            begin                              asml.remove(hp2);                              hp2.free;                            end;                          if Assigned(p) then                            begin                              UpdateUsedRegs(p);                              result:=true;                            end;                          exit;                        end;                    end                  else                    begin                       { check further for                              jCC   xxx                              <several movs 1>                              jmp   yyy                      xxx:                              <several movs 2>                      yyy:                       }                      { hp2 points to jmp yyy }                      hp2:=hp1;                      { skip hp1 to xxx (or an align right before it) }                      GetNextInstruction(hp1, hp1);                      if assigned(hp2) and                        assigned(hp1) and                        (l<=3) and                        (hp2.typ=ait_instruction) and                        (taicpu(hp2).is_jmp) and                        (taicpu(hp2).condition=C_None) and                        { real label and jump, no further references to the                          label are allowed }                        (tasmlabel(symbol).getrefs=1) and                        FindLabel(tasmlabel(symbol),hp1) then                         begin                           l:=0;                           { skip hp1 to <several moves 2> }                           if (hp1.typ = ait_align) then                             GetNextInstruction(hp1, hp1);                           GetNextInstruction(hp1, hpmov2);                           hp1 := hpmov2;                           while assigned(hp1) and                             CanBeCMOV(hp1) do                             begin                               inc(l);                               GetNextInstruction(hp1, hp1);                             end;                           { hp1 points to yyy (or an align right before it) }                           hp3 := hp1;                           if assigned(hp1) and                             FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then                             begin                                condition:=inverse_cond(taicpu(p).condition);                                GetNextInstruction(p,hp1);                                repeat                                  taicpu(hp1).opcode:=A_CMOVcc;                                  taicpu(hp1).condition:=condition;                                  UpdateUsedRegs(hp1);                                  GetNextInstruction(hp1,hp1);                                until not(assigned(hp1)) or                                  not(CanBeCMOV(hp1));                                condition:=inverse_cond(condition);                                hp1 := hpmov2;                                { hp1 is now at <several movs 2> }                                while Assigned(hp1) and CanBeCMOV(hp1) do                                  begin                                    taicpu(hp1).opcode:=A_CMOVcc;                                    taicpu(hp1).condition:=condition;                                    UpdateUsedRegs(hp1);                                    GetNextInstruction(hp1,hp1);                                  end;                                hp1 := p;                                { Get first instruction after label }                                GetNextInstruction(hp3, p);                                if assigned(p) and (hp3.typ = ait_align) then                                  GetNextInstruction(p, p);                                { Don't dereference yet, as doing so will cause                                  GetNextInstruction to skip the label and                                  optional align marker. [Kit] }                                GetNextInstruction(hp2, hp4);                                { remove jCC }                                asml.remove(hp1);                                hp1.free;                                { Remove label xxx (it will have a ref of zero due to the initial check }                                if (hp4.typ = ait_align) then                                  begin                                    { Account for alignment as well }                                    GetNextInstruction(hp4, hp1);                                    asml.remove(hp1);                                    hp1.free;                                  end;                                asml.remove(hp4);                                hp4.free;                                { Now we can safely decrement it }                                tasmlabel(symbol).decrefs;                                { remove jmp }                                symbol := taicpu(hp2).oper[0]^.ref^.symbol;                                asml.remove(hp2);                                hp2.free;                                { Remove label yyy (and the optional alignment) if its reference will fall to zero }                                if tasmlabel(symbol).getrefs = 1 then                                  begin                                    if (hp3.typ = ait_align) then                                      begin                                        { Account for alignment as well }                                        GetNextInstruction(hp3, hp1);                                        asml.remove(hp1);                                        hp1.free;                                      end;                                    asml.remove(hp3);                                    hp3.free;                                    { As before, now we can safely decrement it }                                    tasmlabel(symbol).decrefs;                                  end;                                if Assigned(p) then                                  begin                                    UpdateUsedRegs(p);                                    result:=true;                                  end;                                exit;                             end;                         end;                    end;               end;          end;{$endif i8086}      end;    function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;      var        hp1,hp2: tai;      begin        result:=false;        if (taicpu(p).oper[1]^.typ = top_reg) and           GetNextInstruction(p,hp1) and           (hp1.typ = ait_instruction) and           IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and           GetNextInstruction(hp1,hp2) and           MatchInstruction(hp2,A_MOV,[]) and           (taicpu(hp2).oper[0]^.typ = top_reg) and           OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and{$ifdef i386}           { not all registers have byte size sub registers on i386 }           ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and{$endif i386}           (((taicpu(hp1).ops=2) and             (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or             ((taicpu(hp1).ops=1) and             (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and           not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then          begin            { change   movsX/movzX    reg/ref, reg2                       add/sub/or/... reg3/$const, reg2                       mov            reg2 reg/ref              to       add/sub/or/... reg3/$const, reg/ref      }            { by example:                movswl  %si,%eax        movswl  %si,%eax      p                decl    %eax            addl    %edx,%eax     hp1                movw    %ax,%si         movw    %ax,%si       hp2              ->                movswl  %si,%eax        movswl  %si,%eax      p                decw    %eax            addw    %edx,%eax     hp1                movw    %ax,%si         movw    %ax,%si       hp2            }            taicpu(hp1).changeopsize(taicpu(hp2).opsize);            {              ->                movswl  %si,%eax        movswl  %si,%eax      p                decw    %si             addw    %dx,%si       hp1                movw    %ax,%si         movw    %ax,%si       hp2            }            case taicpu(hp1).ops of              1:               taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);              2:                begin                  taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);                  if (taicpu(hp1).oper[0]^.typ = top_reg) then                    setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));                end;              else                internalerror(2008042701);            end;            {              ->                decw    %si             addw    %dx,%si       p            }            DebugMsg(SPeepholeOptimization + 'var3',p);            asml.remove(p);            asml.remove(hp2);            p.free;            hp2.free;            p:=hp1;          end        else if taicpu(p).opcode=A_MOVZX then          begin            { removes superfluous And's after movzx's }            if (taicpu(p).oper[1]^.typ = top_reg) and              GetNextInstruction(p, hp1) and              (tai(hp1).typ = ait_instruction) and              (taicpu(hp1).opcode = A_AND) and              (taicpu(hp1).oper[0]^.typ = top_const) and              (taicpu(hp1).oper[1]^.typ = top_reg) and              (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then              begin                case taicpu(p).opsize Of                  S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:                    if (taicpu(hp1).oper[0]^.val = $ff) then                      begin                        DebugMsg(SPeepholeOptimization + 'var4',p);                        asml.remove(hp1);                        hp1.free;                      end;                    S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:                      if (taicpu(hp1).oper[0]^.val = $ffff) then                        begin                          DebugMsg(SPeepholeOptimization + 'var5',p);                          asml.remove(hp1);                          hp1.free;                        end;{$ifdef x86_64}                    S_LQ:                      if (taicpu(hp1).oper[0]^.val = $ffffffff) then                        begin                          if (cs_asm_source in current_settings.globalswitches) then                            asml.insertbefore(tai_comment.create(strpnew(SPeepholeOptimization + 'var6')),p);                          asml.remove(hp1);                          hp1.Free;                        end;{$endif x86_64}                    end;              end;            { changes some movzx constructs to faster synonims (all examples              are given with eax/ax, but are also valid for other registers)}            if (taicpu(p).oper[1]^.typ = top_reg) then              if (taicpu(p).oper[0]^.typ = top_reg) then                case taicpu(p).opsize of                  S_BW:                    begin                      if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and                         not(cs_opt_size in current_settings.optimizerswitches) then                        {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}                        begin                          taicpu(p).opcode := A_AND;                          taicpu(p).changeopsize(S_W);                          taicpu(p).loadConst(0,$ff);                          DebugMsg(SPeepholeOptimization + 'var7',p);                        end                      else if GetNextInstruction(p, hp1) and                        (tai(hp1).typ = ait_instruction) and                        (taicpu(hp1).opcode = A_AND) and                        (taicpu(hp1).oper[0]^.typ = top_const) and                        (taicpu(hp1).oper[1]^.typ = top_reg) and                        (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then                      { Change "movzbw %reg1, %reg2; andw $const, %reg2"                        to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}                        begin                          DebugMsg(SPeepholeOptimization + 'var8',p);                          taicpu(p).opcode := A_MOV;                          taicpu(p).changeopsize(S_W);                          setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);                          taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);                        end;                    end;                  S_BL:                    begin                      if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and                         not(cs_opt_size in current_settings.optimizerswitches) then                        { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }                        begin                          taicpu(p).opcode := A_AND;                          taicpu(p).changeopsize(S_L);                          taicpu(p).loadConst(0,$ff)                        end                      else if GetNextInstruction(p, hp1) and                        (tai(hp1).typ = ait_instruction) and                        (taicpu(hp1).opcode = A_AND) and                        (taicpu(hp1).oper[0]^.typ = top_const) and                        (taicpu(hp1).oper[1]^.typ = top_reg) and                        (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then                        { Change "movzbl %reg1, %reg2; andl $const, %reg2"                          to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}                        begin                          DebugMsg(SPeepholeOptimization + 'var10',p);                          taicpu(p).opcode := A_MOV;                          taicpu(p).changeopsize(S_L);                          { do not use R_SUBWHOLE                            as movl %rdx,%eax                            is invalid in assembler PM }                          setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);                          taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);                        end                    end;{$ifndef i8086}                  S_WL:                    begin                      if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and                        not(cs_opt_size in current_settings.optimizerswitches) then                        { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }                        begin                          DebugMsg(SPeepholeOptimization + 'var11',p);                          taicpu(p).opcode := A_AND;                          taicpu(p).changeopsize(S_L);                          taicpu(p).loadConst(0,$ffff);                        end                      else if GetNextInstruction(p, hp1) and                        (tai(hp1).typ = ait_instruction) and                        (taicpu(hp1).opcode = A_AND) and                        (taicpu(hp1).oper[0]^.typ = top_const) and                        (taicpu(hp1).oper[1]^.typ = top_reg) and                        (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then                        { Change "movzwl %reg1, %reg2; andl $const, %reg2"                          to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}                        begin                          DebugMsg(SPeepholeOptimization + 'var12',p);                          taicpu(p).opcode := A_MOV;                          taicpu(p).changeopsize(S_L);                          { do not use R_SUBWHOLE                            as movl %rdx,%eax                            is invalid in assembler PM }                          setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);                          taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);                        end;                    end;{$endif i8086}                  end                else if (taicpu(p).oper[0]^.typ = top_ref) then                  begin                    if GetNextInstruction(p, hp1) and                      (tai(hp1).typ = ait_instruction) and                      (taicpu(hp1).opcode = A_AND) and                      MatchOpType(taicpu(hp1),top_const,top_reg) and                      (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then                      begin                        taicpu(p).opcode := A_MOV;                        case taicpu(p).opsize Of                          S_BL:                            begin                              DebugMsg(SPeepholeOptimization + 'var13',p);                              taicpu(p).changeopsize(S_L);                              taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);                            end;                          S_WL:                            begin                              DebugMsg(SPeepholeOptimization + 'var14',p);                              taicpu(p).changeopsize(S_L);                              taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);                            end;                          S_BW:                            begin                              DebugMsg(SPeepholeOptimization + 'var15',p);                              taicpu(p).changeopsize(S_W);                              taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);                            end;{$ifdef x86_64}                          S_BQ:                            begin                              DebugMsg(SPeepholeOptimization + 'var16',p);                              taicpu(p).changeopsize(S_Q);                              taicpu(hp1).loadConst(                                0, taicpu(hp1).oper[0]^.val and $ff);                            end;                          S_WQ:                            begin                              DebugMsg(SPeepholeOptimization + 'var17',p);                              taicpu(p).changeopsize(S_Q);                              taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);                            end;                          S_LQ:                            begin                              DebugMsg(SPeepholeOptimization + 'var18',p);                              taicpu(p).changeopsize(S_Q);                              taicpu(hp1).loadConst(                                0, taicpu(hp1).oper[0]^.val and $ffffffff);                            end;{$endif x86_64}                          else                            Internalerror(2017050704)                        end;                      end;                  end;          end;      end;    function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;      var        hp1 : tai;        RegName1, RegName2: string;        MaskLength : Cardinal;      begin        Result:=false;        if GetNextInstruction(p, hp1) then          begin            if MatchOpType(taicpu(p),top_const,top_reg) and              MatchInstruction(hp1,A_AND,[]) and              MatchOpType(taicpu(hp1),top_const,top_reg) and              (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and              { the second register must contain the first one, so compare their subreg types }              (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and              (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then              { change                  and const1, reg                  and const2, reg                to                  and (const1 and const2), reg              }              begin                taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);                DebugMsg(SPeepholeOptimization + 'AndAnd2And done',hp1);                asml.remove(p);                p.Free;                p:=hp1;                Result:=true;                exit;              end            else if MatchOpType(taicpu(p),top_const,top_reg) and              MatchInstruction(hp1,A_MOVZX,[]) and              (taicpu(hp1).oper[0]^.typ = top_reg) and              MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and              (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and               (((taicpu(p).opsize=S_W) and                 (taicpu(hp1).opsize=S_BW)) or                ((taicpu(p).opsize=S_L) and                 (taicpu(hp1).opsize in [S_WL,S_BL])){$ifdef x86_64}                  or                 ((taicpu(p).opsize=S_Q) and                  (taicpu(hp1).opsize in [S_BQ,S_WQ])){$endif x86_64}                ) then                  begin                    if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and                        ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)                         ) or                       (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and                        ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))                    then                      begin                        { Unlike MOVSX, MOVZX doesn't actually have a version that zero-extends a                          32-bit register to a 64-bit register, or even a version called MOVZXD, so                          code that tests for the presence of AND 0xffffffff followed by MOVZX is                          wasted, and is indictive of a compiler bug if it were triggered. [Kit]                          NOTE: To zero-extend from 32 bits to 64 bits, simply use the standard MOV.                        }                        DebugMsg(SPeepholeOptimization + 'AndMovzToAnd done',p);                        asml.remove(hp1);                        hp1.free;                        Exit;                      end;                  end            else if MatchOpType(taicpu(p),top_const,top_reg) and              MatchInstruction(hp1,A_SHL,[]) and              MatchOpType(taicpu(hp1),top_const,top_reg) and              (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) then              begin{$ifopt R+}{$define RANGE_WAS_ON}{$R-}{$endif}                { get length of potential and mask }                MaskLength:=SizeOf(taicpu(p).oper[0]^.val)*8-BsrQWord(taicpu(p).oper[0]^.val)-1;                { really a mask? }{$ifdef RANGE_WAS_ON}{$R+}{$endif}                if (((QWord(1) shl MaskLength)-1)=taicpu(p).oper[0]^.val) and                  { unmasked part shifted out? }                  ((MaskLength+taicpu(hp1).oper[0]^.val)>=topsize2memsize[taicpu(hp1).opsize]) then                  begin                    DebugMsg(SPeepholeOptimization + 'AndShlToShl done',p);                    { take care of the register (de)allocs following p }                    UpdateUsedRegs(tai(p.next));                    asml.remove(p);                    p.free;                    p:=hp1;                    Result:=true;                    exit;                  end;              end            else if MatchOpType(taicpu(p),top_const,top_reg) and              MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and              (taicpu(hp1).oper[0]^.typ = top_reg) and              MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and              (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and               (((taicpu(p).opsize=S_W) and                 (taicpu(hp1).opsize=S_BW)) or                ((taicpu(p).opsize=S_L) and                 (taicpu(hp1).opsize in [S_WL,S_BL])){$ifdef x86_64}                 or                 ((taicpu(p).opsize=S_Q) and                 (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ])){$endif x86_64}                ) then                  begin                    if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and                        ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)                         ) or                       (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and                        ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val)){$ifdef x86_64}                       or                       (((taicpu(hp1).opsize)=S_LQ) and                        ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)                       ){$endif x86_64}                       then                       begin                         DebugMsg(SPeepholeOptimization + 'AndMovsxToAnd',p);                         asml.remove(hp1);                         hp1.free;                         Exit;                       end;                  end            else if (taicpu(p).oper[1]^.typ = top_reg) and              (hp1.typ = ait_instruction) and              (taicpu(hp1).is_jmp) and              (taicpu(hp1).opcode<>A_JMP) and              not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then              begin                { change                    and x, reg                    jxx                  to                    test x, reg                    jxx                  if reg is deallocated before the                  jump, but only if it's a conditional jump (PFV)                }                taicpu(p).opcode := A_TEST;                Exit;              end;          end;        { Lone AND tests }        if MatchOpType(taicpu(p),top_const,top_reg) then          begin            {              - Convert and $0xFF,reg to and reg,reg if reg is 8-bit              - Convert and $0xFFFF,reg to and reg,reg if reg is 16-bit              - Convert and $0xFFFFFFFF,reg to and reg,reg if reg is 32-bit            }            if ((taicpu(p).oper[0]^.val = $FF) and (taicpu(p).opsize = S_B)) or              ((taicpu(p).oper[0]^.val = $FFFF) and (taicpu(p).opsize = S_W)) or              ((taicpu(p).oper[0]^.val = $FFFFFFFF) and (taicpu(p).opsize = S_L)) then              begin                taicpu(p).loadreg(0, taicpu(p).oper[1]^.reg)              end;          end;      end;    function TX86AsmOptimizer.PostPeepholeOptLea(var p : tai) : Boolean;      begin        Result:=false;        if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) and          MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and          (taicpu(p).oper[0]^.ref^.index<>NR_NO) then          begin            taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.base);            taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.index);            taicpu(p).opcode:=A_ADD;            DebugMsg(SPeepholeOptimization + 'Lea2AddBase done',p);            result:=true;          end        else if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) and          MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and          (taicpu(p).oper[0]^.ref^.base<>NR_NO) then          begin            taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.index);            taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.base);            taicpu(p).opcode:=A_ADD;            DebugMsg(SPeepholeOptimization + 'Lea2AddIndex done',p);            result:=true;          end;      end;    function TX86AsmOptimizer.PostPeepholeOptMov(var p : tai) : Boolean;      var        Value, RegName: string;      begin        Result:=false;        if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(p).oper[0]^.typ = top_const) then          begin            case taicpu(p).oper[0]^.val of            0:              { Don't make this optimisation if the CPU flags are required, since XOR scrambles them }              if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then                begin                  { change "mov $0,%reg" into "xor %reg,%reg" }                  taicpu(p).opcode := A_XOR;                  taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);                  Result := True;                end;            $1..$FFFFFFFF:              begin                { Code size reduction by J. Gareth "Kit" Moreton }                { change 64-bit register to 32-bit register to reduce code size (upper 32 bits will be set to zero) }                case taicpu(p).opsize of                S_Q:                  begin                    RegName := debug_regname(taicpu(p).oper[1]^.reg); { 64-bit register name }                    Value := debug_tostr(taicpu(p).oper[0]^.val);                    { The actual optimization }                    setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);                    taicpu(p).changeopsize(S_L);                    DebugMsg(SPeepholeOptimization + 'movq $' + Value + ',' + RegName + ' -> movl $' + Value + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (immediate can be represented with just 32 bits)', p);                    Result := True;                  end;                end;              end;            end;          end;      end;    function TX86AsmOptimizer.PostPeepholeOptCmp(var p : tai) : Boolean;      begin        Result:=false;        { change "cmp $0, %reg" to "test %reg, %reg" }        if MatchOpType(taicpu(p),top_const,top_reg) and           (taicpu(p).oper[0]^.val = 0) then          begin            taicpu(p).opcode := A_TEST;            taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);            Result:=true;          end;      end;    function TX86AsmOptimizer.PostPeepholeOptTestOr(var p : tai) : Boolean;      var        IsTestConstX : Boolean;        hp1,hp2 : tai;      begin        Result:=false;        { removes the line marked with (x) from the sequence          and/or/xor/add/sub/... $x, %y          test/or %y, %y  | test $-1, %y    (x)          j(n)z _Label             as the first instruction already adjusts the ZF             %y operand may also be a reference }        IsTestConstX:=(taicpu(p).opcode=A_TEST) and          MatchOperand(taicpu(p).oper[0]^,-1);        if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and           GetLastInstruction(p, hp1) and           (tai(hp1).typ = ait_instruction) and           GetNextInstruction(p,hp2) and           MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then          case taicpu(hp1).opcode Of            A_ADD, A_SUB, A_OR, A_XOR, A_AND:              begin                if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and                  { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }                  { and in case of carry for A(E)/B(E)/C/NC                  }                   ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or                    ((taicpu(hp1).opcode <> A_ADD) and                     (taicpu(hp1).opcode <> A_SUB))) then                  begin                    hp1 := tai(p.next);                    asml.remove(p);                    p.free;                    p := tai(hp1);                    Result:=true;                  end;              end;            A_SHL, A_SAL, A_SHR, A_SAR:              begin                if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and                  { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }                  { therefore, it's only safe to do this optimization for     }                  { shifts by a (nonzero) constant                            }                   (taicpu(hp1).oper[0]^.typ = top_const) and                   (taicpu(hp1).oper[0]^.val <> 0) and                  { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }                  { and in case of carry for A(E)/B(E)/C/NC                  }                   (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then                  begin                    hp1 := tai(p.next);                    asml.remove(p);                    p.free;                    p := tai(hp1);                    Result:=true;                  end;              end;            A_DEC, A_INC, A_NEG:              begin                if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and                  { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }                  { and in case of carry for A(E)/B(E)/C/NC                  }                  (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then                  begin                    case taicpu(hp1).opcode Of                      A_DEC, A_INC:                        { replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag }                        begin                          case taicpu(hp1).opcode Of                            A_DEC: taicpu(hp1).opcode := A_SUB;                            A_INC: taicpu(hp1).opcode := A_ADD;                          end;                          taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);                          taicpu(hp1).loadConst(0,1);                          taicpu(hp1).ops:=2;                        end                      end;                    hp1 := tai(p.next);                    asml.remove(p);                    p.free;                    p := tai(hp1);                    Result:=true;                  end;              end          else            { change "test  $-1,%reg" into "test %reg,%reg" }            if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then              taicpu(p).loadoper(0,taicpu(p).oper[1]^);          end { case }        { change "test  $-1,%reg" into "test %reg,%reg" }        else if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then          taicpu(p).loadoper(0,taicpu(p).oper[1]^);      end;    function TX86AsmOptimizer.PostPeepholeOptCall(var p : tai) : Boolean;      var        hp1 : tai;        hp2 : taicpu;      begin        Result:=false;{$ifndef x86_64}        { don't do this on modern CPUs, this really hurts them due to          broken call/ret pairing }        if (current_settings.optimizecputype < cpu_Pentium2) and           not(cs_create_pic in current_settings.moduleswitches) and           GetNextInstruction(p, hp1) and           MatchInstruction(hp1,A_JMP,[S_NO]) and           MatchOpType(taicpu(hp1),top_ref) and           (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full) then          begin            hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);            InsertLLItem(p.previous, p, hp2);            taicpu(p).opcode := A_JMP;            taicpu(p).is_jmp := true;            asml.remove(hp1);            hp1.free;            Result:=true;          end        else{$endif x86_64}        { replace            call   procname            ret          by            jmp    procname          this should never hurt except when pic is used, not sure          how to handle it then          but do it only on level 4 because it destroys stack back traces        }        if (cs_opt_level4 in current_settings.optimizerswitches) and          not(cs_create_pic in current_settings.moduleswitches) and          GetNextInstruction(p, hp1) and          MatchInstruction(hp1,A_RET,[S_NO]) and          (taicpu(hp1).ops=0) then          begin            taicpu(p).opcode := A_JMP;            taicpu(p).is_jmp := true;            asml.remove(hp1);            hp1.free;            Result:=true;          end;      end;{$ifdef x86_64}    function TX86AsmOptimizer.PostPeepholeOptMovzx(var p : tai) : Boolean;      var        PreMessage: string;      begin        Result := False;        { Code size reduction by J. Gareth "Kit" Moreton }        { Convert MOVZBQ and MOVZWQ to MOVZBL and MOVZWL respectively if it removes the REX prefix }        if (taicpu(p).opsize in [S_BQ, S_WQ]) and          (getsupreg(taicpu(p).oper[1]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP])        then          begin            { Has 64-bit register name and opcode suffix }            PreMessage := 'movz' + debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' -> movz';            { The actual optimization }            setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);            if taicpu(p).opsize = S_BQ then              taicpu(p).changeopsize(S_BL)            else              taicpu(p).changeopsize(S_WL);            DebugMsg(SPeepholeOptimization + PreMessage +              debug_opsize2str(taicpu(p).opsize) + ' ' + debug_operstr(taicpu(p).oper[0]^) + ',' + debug_regname(taicpu(p).oper[1]^.reg) + ' (removes REX prefix)', p);          end;      end;    function TX86AsmOptimizer.PostPeepholeOptXor(var p : tai) : Boolean;      var        PreMessage, RegName: string;      begin        { Code size reduction by J. Gareth "Kit" Moreton }        { change "xorq %reg,%reg" to "xorl %reg,%reg" for %rax, %rcx, %rdx, %rbx, %rsi, %rdi, %rbp and %rsp,          as this removes the REX prefix }        Result := False;        if not OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then          Exit;        if taicpu(p).oper[0]^.typ <> top_reg then          { Should be impossible if both operands were equal, since one of XOR's operands must be a register }          InternalError(2018011500);        case taicpu(p).opsize of        S_Q:          if (getsupreg(taicpu(p).oper[0]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP]) then            begin              RegName := debug_regname(taicpu(p).oper[0]^.reg); { 64-bit register name }              PreMessage := 'xorq ' + RegName + ',' + RegName + ' -> xorl ';              { The actual optimization }              setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);              setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);              taicpu(p).changeopsize(S_L);              RegName := debug_regname(taicpu(p).oper[0]^.reg); { 32-bit register name }              DebugMsg(SPeepholeOptimization + PreMessage + RegName + ',' + RegName + ' (removes REX prefix)', p);            end;        end;      end;{$endif}    procedure TX86AsmOptimizer.OptReferences;      var        p: tai;        i: Integer;      begin        p := BlockStart;        while (p <> BlockEnd) Do          begin            if p.typ=ait_instruction then              begin                for i:=0 to taicpu(p).ops-1 do                  if taicpu(p).oper[i]^.typ=top_ref then                    optimize_ref(taicpu(p).oper[i]^.ref^,false);              end;            p:=tai(p.next);          end;      end;end.
 |