| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291 | {    Copyright (c) 2000-2002 by Florian Klaempfl    Common code generation for add nodes on the i386 and x86    This program is free software; you can redistribute it and/or modify    it under the terms of the GNU General Public License as published by    the Free Software Foundation; either version 2 of the License, or    (at your option) any later version.    This program is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    GNU General Public License for more details.    You should have received a copy of the GNU General Public License    along with this program; if not, write to the Free Software    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ****************************************************************************}unit nx86add;{$i fpcdefs.inc}  interface    uses      symtype,      cgbase,      cpubase,      node,nadd,ncgadd;    type      tx86addnode = class(tcgaddnode)      protected        function  getresflags(unsigned : boolean) : tresflags;        function  getfpuresflags : tresflags;        procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);        procedure force_left_and_right_fpureg;        procedure prepare_x87_locations(out refnode: tnode);        procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize;AllocFlags:boolean);        procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);        procedure second_cmpfloatvector;        procedure second_addfloatsse;        procedure second_addfloatavx;      public        function pass_1 : tnode;override;        function simplify(forinline : boolean) : tnode; override;        function use_fma : boolean;override;        procedure second_addfloat;override;{$ifndef i8086}        procedure second_addsmallset;override;        procedure second_addsmallsetelement;override;{$endif not i8086}        procedure second_add64bit;override;        procedure second_cmpfloat;override;        procedure second_cmpsmallset;override;        procedure second_cmp64bit;override;        procedure second_cmpordinal;override;        procedure second_addordinal;override;        procedure second_addboolean;override;{$ifdef SUPPORT_MMX}        procedure second_opmmx;override;{$endif SUPPORT_MMX}        procedure second_opvector;override;      end;  implementation    uses      globtype,globals,      verbose,cutils,compinnr,      cpuinfo,      aasmbase,aasmdata,aasmcpu,      symconst,symdef,      cgobj,hlcgobj,cgx86,cga,cgutils,      tgobj,ncgutil,nutils,      ncon,nset,ninl,ncnv,ncal,nmat,      defutil,defcmp,constexp,      pass_1,pass_2,htypechk;{ Range check must be disabled explicitly as the code serves  on three different architecture sizes }{$R-}{*****************************************************************************                                  Helpers*****************************************************************************}    procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);      var        power : longint;        hl4   : tasmlabel;        r     : Tregister;        href  : treference;        overflowcheck: boolean;        comparison: boolean;      begin        overflowcheck:=needoverflowcheck;        comparison:=          (op=A_CMP) or (op=A_TEST) or (op=A_BT) or is_boolean(resultdef);        { at this point, left.location.loc should be LOC_REGISTER }        if right.location.loc=LOC_REGISTER then         begin           { right.location is a LOC_REGISTER }           { when swapped another result register }           if (nodetype=subn) and (nf_swapped in flags) then            begin              if extra_not then               emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);              emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);              { newly swapped also set swapped flag }              location_swap(left.location,right.location);              toggleflag(nf_swapped);            end           else            begin              if extra_not then                emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);              if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then                location_swap(left.location,right.location);              if comparison then                cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);              emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);            end;         end        else         begin           { right.location is not a LOC_REGISTER }           if (nodetype=subn) and (nf_swapped in flags) then            begin              if extra_not then                cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);              r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);              hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);              if comparison then                cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);              emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);              cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);            end           else            begin               { Optimizations when right.location is a constant value }               if (op=A_CMP) and                  (nodetype in [equaln,unequaln]) and                  (right.location.loc=LOC_CONSTANT) and                  (right.location.value=0) then                 begin                   { 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs                      spilling, while 'test %reg,%reg' still requires loading into register.                      If spilling is not necessary, it is changed back into 'test %reg,%reg' by                      peephole optimizer (this optimization is currently available only for i386). }                   cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);{$ifdef i386}                   emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register){$else i386}                   emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);{$endif i386}                 end               else                 if (op=A_ADD) and                    (right.location.loc=LOC_CONSTANT) and                    (right.location.value=1) and                    not overflowcheck and                    UseIncDec then                  begin                    emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);                  end               else                 if (op=A_SUB) and                    (right.location.loc=LOC_CONSTANT) and                    (right.location.value=1) and                    not overflowcheck and                    UseIncDec then                  begin                    emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);                  end               else                 if (op=A_IMUL) and                    (right.location.loc=LOC_CONSTANT) and                    (ispowerof2(int64(right.location.value),power)) and                    overflowcheck then                  begin                    emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);                  end                else if (op=A_IMUL) and                    (right.location.loc=LOC_CONSTANT) and                    (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and                    (power in [1..3]) and                    not overflowcheck then                  begin                    reference_reset_base(href,left.location.register,0,ctempposinvalid,0,[]);                    href.index:=left.location.register;                    href.scalefactor:=int64(right.location.value)-1;                    left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);                    current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));                  end               else                 begin                   if extra_not then                     begin                        r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);                        hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);                        emit_reg(A_NOT,TCGSize2Opsize[opsize],r);                        if comparison or (mboverflow and overflowcheck) then                          cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);                        emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);                     end                   else                     emit_op_right_left(op,opsize,comparison or (mboverflow and overflowcheck));                 end;            end;         end;        { only in case of overflow operations }        { produce overflow code }        { we must put it here directly, because sign of operation }        { is in unsigned VAR!!                                   }        if mboverflow then         begin           if overflowcheck then            begin              current_asmdata.getjumplabel(hl4);              if unsigned then                cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)              else                cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);              if not comparison then                cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);              cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);              cg.a_label(current_asmdata.CurrAsmList,hl4);            end;         end;      end;    procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);      begin        { left location is not a register? }        if (left.location.loc<>LOC_REGISTER) then         begin           { if right is register then we can swap the locations }           if (not noswap) and              (right.location.loc=LOC_REGISTER) then            begin              location_swap(left.location,right.location);              toggleflag(nf_swapped);            end           else if (not noswap) and              (right.location.loc=LOC_CREGISTER) then            begin              location_swap(left.location,right.location);              toggleflag(nf_swapped);              { maybe we can reuse a constant register when the                operation is a comparison that doesn't change the                value of the register }              hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));              location:=left.location;            end           else            begin              { maybe we can reuse a constant register when the                operation is a comparison that doesn't change the                value of the register }                hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));            end;          end;        if (right.location.loc<>LOC_CONSTANT) and           (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then          hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);        if (left.location.loc<>LOC_CONSTANT) and           (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then          hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);       end;    procedure tx86addnode.force_left_and_right_fpureg;      begin        if (right.location.loc<>LOC_FPUREGISTER) then          begin            hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);            if (left.location.loc<>LOC_FPUREGISTER) then              hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)            else              { left was on the stack => swap }              toggleflag(nf_swapped);          end        { the nominator in st0 }        else if (left.location.loc<>LOC_FPUREGISTER) then          begin            hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)          end        else          begin            { fpu operands are always in the wrong order on the stack }            toggleflag(nf_swapped);          end;      end;    { Makes sides suitable for executing an x87 instruction:      if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'      everything else is loaded to FPU stack. }    procedure tx86addnode.prepare_x87_locations(out refnode: tnode);      begin        refnode:=nil;        { later on, no mm registers are allowed, so transfer everything to memory here          below it is loaded into an fpu register if neede }        if left.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then          hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);        if right.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then          hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);        case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of          0:            begin              hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);              if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then                InternalError(2013090803);              if (left.location.size in [OS_F32,OS_F64]) then                begin                  refnode:=left;                  toggleflag(nf_swapped);                end              else                hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);            end;          1:            begin   { if left is on the stack then swap. }              if (left.location.loc=LOC_FPUREGISTER) then                refnode:=right              else                refnode:=left;              if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then                InternalError(2013090801);              if not (refnode.location.size in [OS_F32,OS_F64]) then                begin                  hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);                  if (refnode=right) then                    toggleflag(nf_swapped);                  refnode:=nil;                end              else                begin                  if (refnode=left) then                    toggleflag(nf_swapped);                end;            end;          2: { fpu operands are always in the wrong order on the stack }            toggleflag(nf_swapped);        else          InternalError(2013090802);        end;      end;    procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize;AllocFlags:boolean);{$ifdef x86_64}      var        tmpreg : tregister;{$endif x86_64}      begin        if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then          hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);        { left must be a register }        case right.location.loc of          LOC_REGISTER,          LOC_CREGISTER :            begin              if AllocFlags then                cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));            end;          LOC_REFERENCE,          LOC_CREFERENCE :            begin              tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);              if AllocFlags then                cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);              current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));            end;          LOC_CONSTANT :            begin{$ifdef x86_64}              { x86_64 only supports signed 32 bits constants directly }              if (opsize in [OS_S64,OS_64]) and                 ((right.location.value<low(longint)) or (right.location.value>high(longint))) then                begin                  tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);                  cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);                  if AllocFlags then                    cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));                end              else{$endif x86_64}                begin                  if AllocFlags then                    cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);                  current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));                end;            end;          else            internalerror(200203232);        end;      end;    function tx86addnode.getresflags(unsigned : boolean) : tresflags;      begin         case nodetype of           equaln : getresflags:=F_E;           unequaln : getresflags:=F_NE;          else           if not(unsigned) then             begin                if nf_swapped in flags then                  case nodetype of                     ltn : getresflags:=F_G;                     lten : getresflags:=F_GE;                     gtn : getresflags:=F_L;                     gten : getresflags:=F_LE;                     else                       internalerror(2013120105);                  end                else                  case nodetype of                     ltn : getresflags:=F_L;                     lten : getresflags:=F_LE;                     gtn : getresflags:=F_G;                     gten : getresflags:=F_GE;                     else                       internalerror(2013120106);                  end;             end           else             begin                if nf_swapped in flags then                  case nodetype of                     ltn : getresflags:=F_A;                     lten : getresflags:=F_AE;                     gtn : getresflags:=F_B;                     gten : getresflags:=F_BE;                     else                       internalerror(2013120107);                  end                else                  case nodetype of                     ltn : getresflags:=F_B;                     lten : getresflags:=F_BE;                     gtn : getresflags:=F_A;                     gten : getresflags:=F_AE;                     else                       internalerror(2013120108);                  end;             end;         end;      end;    function tx86addnode.getfpuresflags : tresflags;      begin        if (nodetype=equaln) then          result:=F_FE        else if (nodetype=unequaln) then          result:=F_FNE        else if (nf_swapped in flags) then          case nodetype of            ltn : result:=F_FA;            lten : result:=F_FAE;            gtn : result:=F_FB;            gten : result:=F_FBE;          else            internalerror(2014031402);          end        else          case nodetype of            ltn : result:=F_FB;            lten : result:=F_FBE;            gtn : result:=F_FA;            gten : result:=F_FAE;          else            internalerror(2014031403);          end;      end;{*****************************************************************************                                AddSmallSet*****************************************************************************}{$ifndef i8086}    procedure tx86addnode.second_addsmallset;      var        setbase : aint;        opdef  : tdef;        opsize : TCGSize;        op     : TAsmOp;        extra_not,        noswap : boolean;        all_member_optimization:boolean;      begin        pass_left_right;        noswap:=false;        extra_not:=false;        all_member_optimization:=false;        opdef:=resultdef;        opsize:=int_cgsize(opdef.size);        if (left.resultdef.typ=setdef) then          setbase:=tsetdef(left.resultdef).setbase        else          setbase:=tsetdef(right.resultdef).setbase;        case nodetype of          addn :            begin              { adding elements is not commutative }              if (nf_swapped in flags) and (left.nodetype=setelementn) then               swapleftright;              { are we adding set elements ? }              if right.nodetype=setelementn then               begin                 { no range support for smallsets! }                 if assigned(tsetelementnode(right).right) then                   internalerror(43244);                 { btsb isn't supported }                 if opsize=OS_8 then                   begin                     opsize:=OS_32;                     opdef:=u32inttype;                   end;                 { bts requires both elements to be registers }                 hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);                 hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);                 register_maybe_adjust_setbase(current_asmdata.CurrAsmList,opdef,right.location,setbase);                 op:=A_BTS;                 noswap:=true;               end              else               op:=A_OR;            end;          symdifn :            op:=A_XOR;          muln :            op:=A_AND;          subn :            begin              op:=A_AND;              if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or                  ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then                all_member_optimization:=true;              if (not(nf_swapped in flags)) and                 (right.location.loc=LOC_CONSTANT) then                right.location.value := not(right.location.value)              else if (nf_swapped in flags) and                      (left.location.loc=LOC_CONSTANT) then                left.location.value := not(left.location.value)              else                extra_not:=true;            end;          xorn :            op:=A_XOR;          orn :            op:=A_OR;          andn :            op:=A_AND;          else            internalerror(2003042215);        end;        if all_member_optimization then          begin            {A set expression [0..31]-x can be implemented with a simple NOT.}            if nf_swapped in flags then              begin                { newly swapped also set swapped flag }                location_swap(left.location,right.location);                toggleflag(nf_swapped);              end;            hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);            emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);            location:=right.location;          end        else          begin            { can we use the BMI1 instruction andn? }            if (op=A_AND) and extra_not and (CPUX86_HAS_BMI1 in cpu_capabilities[current_settings.cputype]) and              (resultdef.size in [4{$ifdef x86_64},8{$endif x86_64}]) then              begin                location_reset(location,LOC_REGISTER,left.location.size);                location.register:=cg.getintregister(current_asmdata.currAsmList,left.location.size);                if nf_swapped in flags then                  begin                    location_swap(left.location,right.location);                    toggleflag(nf_swapped);                  end;                hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,true);                if not(left.location.loc in [LOC_CREGISTER,LOC_REGISTER,LOC_CREFERENCE,LOC_REFERENCE]) then                  hlcg.location_force_reg(current_asmdata.currAsmList,left.location,left.resultdef,opdef,true);                case left.location.loc of                  LOC_CREGISTER,LOC_REGISTER:                    emit_reg_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.register,right.location.register,location.register);                  LOC_CREFERENCE,LOC_REFERENCE:                    emit_ref_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.reference,right.location.register,location.register);                  else                    Internalerror(2018040201);                end;              end            else              begin                { left must be a register }                left_must_be_reg(opdef,opsize,noswap);                emit_generic_code(op,opsize,true,extra_not,false);                location_freetemp(current_asmdata.CurrAsmList,right.location);                { left is always a register and contains the result }                location:=left.location;              end;          end;        { fix the changed opsize we did above because of the missing btsb }        if opsize<>int_cgsize(resultdef.size) then          hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);      end;    procedure tx86addnode.second_addsmallsetelement;      var        setbase, mask: aint;      begin        if resultdef.size=1 then          inherited second_addsmallsetelement        else          begin            if nodetype<>addn then              internalerror(2022090502);            { no range support for smallsets }            if assigned(tsetelementnode(right).right) then              internalerror(2022090501);            pass_left_right;            { setelementn is a special case, it must be on right }            if (nf_swapped in flags) and               (left.nodetype=setelementn) then              swapleftright;            force_reg_left_right(false,false);            set_result_location_reg;            setbase:=tsetdef(left.resultdef).setbase;            if (right.location.loc = LOC_CONSTANT) then              begin                mask:=aint(1 shl (right.location.value-setbase));                hlcg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_OR,resultdef,                  mask,left.location.register,location.register);              end            else              begin                hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,resultdef,true);                register_maybe_adjust_setbase(current_asmdata.CurrAsmList,resultdef,right.location,setbase);                if left.location.loc <> LOC_CONSTANT then                  hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,left.resultdef,resultdef,                      left.location.register,location.register)                else                  hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,                      left.location.value,location.register);                emit_reg_reg(A_BTS,TCGSize2Opsize[def_cgsize(resultdef)],right.location.register,location.register);              end;          end;      end;{$endif not i8086}    procedure tx86addnode.second_cmpsmallset;      var        opdef  : tdef;        opsize : TCGSize;        op     : TAsmOp;      begin        pass_left_right;        opdef:=left.resultdef;        opsize:=int_cgsize(opdef.size);        case nodetype of          equaln,          unequaln :            op:=A_CMP;          lten,gten:            begin              if (not(nf_swapped in flags) and (nodetype = lten)) or                 ((nf_swapped in flags) and (nodetype = gten)) then                swapleftright;              hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);              emit_op_right_left(A_AND,opsize,False);              op:=A_CMP;              { warning: ugly hack, we need a JE so change the node to equaln }              nodetype:=equaln;            end;          else            internalerror(2003042204);        end;        { left must be a register }        left_must_be_reg(opdef,opsize,false);        emit_generic_code(op,opsize,true,false,false);        location_freetemp(current_asmdata.CurrAsmList,right.location);        location_freetemp(current_asmdata.CurrAsmList,left.location);        location_reset(location,LOC_FLAGS,OS_NO);        location.resflags:=getresflags(true);      end;{*****************************************************************************                                AddMMX*****************************************************************************}{$ifdef SUPPORT_MMX}    procedure tx86addnode.second_opmmx;      var        op         : TAsmOp;        cmpop      : boolean;        mmxbase    : tmmxtype;        hreg,        hregister  : tregister;      begin        pass_left_right;        cmpop:=false;        op:=A_NOP;        mmxbase:=mmx_type(left.resultdef);        location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));        case nodetype of          addn :            begin              if (cs_mmx_saturation in current_settings.localswitches) then                begin                   case mmxbase of                      mmxs8bit:                        op:=A_PADDSB;                      mmxu8bit:                        op:=A_PADDUSB;                      mmxs16bit,mmxfixed16:                        op:=A_PADDSW;                      mmxu16bit:                        op:=A_PADDUSW;                      else                       ;                   end;                end              else                begin                   case mmxbase of                      mmxs8bit,mmxu8bit:                        op:=A_PADDB;                      mmxs16bit,mmxu16bit,mmxfixed16:                        op:=A_PADDW;                      mmxs32bit,mmxu32bit:                        op:=A_PADDD;                      else                       ;                   end;                end;            end;          muln :            begin               case mmxbase of                  mmxs16bit,mmxu16bit:                    op:=A_PMULLW;                  mmxfixed16:                    op:=A_PMULHW;                  else                   ;               end;            end;          subn :            begin              if (cs_mmx_saturation in current_settings.localswitches) then                begin                   case mmxbase of                      mmxs8bit:                        op:=A_PSUBSB;                      mmxu8bit:                        op:=A_PSUBUSB;                      mmxs16bit,mmxfixed16:                        op:=A_PSUBSB;                      mmxu16bit:                        op:=A_PSUBUSW;                      else                       ;                   end;                end              else                begin                   case mmxbase of                      mmxs8bit,mmxu8bit:                        op:=A_PSUBB;                      mmxs16bit,mmxu16bit,mmxfixed16:                        op:=A_PSUBW;                      mmxs32bit,mmxu32bit:                        op:=A_PSUBD;                      else                       ;                   end;                end;            end;          xorn:            op:=A_PXOR;          orn:            op:=A_POR;          andn:            op:=A_PAND;          else            internalerror(2003042214);        end;        if op = A_NOP then          internalerror(201408201);        { left and right no register?  }        { then one must be demanded    }        if (left.location.loc<>LOC_MMXREGISTER) then         begin           if (right.location.loc=LOC_MMXREGISTER) then            begin              location_swap(left.location,right.location);              toggleflag(nf_swapped);            end           else            begin              { register variable ? }              if (left.location.loc=LOC_CMMXREGISTER) then               begin                 hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);                 emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);               end              else               begin                 if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then                  internalerror(200203245);                 hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);                 tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);                 emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);               end;              location_reset(left.location,LOC_MMXREGISTER,OS_NO);              left.location.register:=hregister;            end;         end;        { at this point, left.location.loc should be LOC_MMXREGISTER }        if right.location.loc<>LOC_MMXREGISTER then         begin           if (nodetype=subn) and (nf_swapped in flags) then            begin              hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);              if right.location.loc=LOC_CMMXREGISTER then               begin                 emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);                 emit_reg_reg(op,S_NO,left.location.register,hreg);               end              else               begin                 if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then                  internalerror(2002032412);                 tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);                 emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);                 emit_reg_reg(op,S_NO,left.location.register,hreg);               end;               location.register:=hreg;            end           else            begin              if (right.location.loc=LOC_CMMXREGISTER) then                emit_reg_reg(op,S_NO,right.location.register,left.location.register)              else               begin                 if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then                  internalerror(200203246);                 tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);                 emit_ref_reg(op,S_NO,right.location.reference,left.location.register);               end;              location.register:=left.location.register;            end;          end        else          begin            { right.location=LOC_MMXREGISTER }            if (nodetype=subn) and (nf_swapped in flags) then             begin               emit_reg_reg(op,S_NO,left.location.register,right.location.register);               location_swap(left.location,right.location);               toggleflag(nf_swapped);             end            else             begin               emit_reg_reg(op,S_NO,right.location.register,left.location.register);             end;            location.register:=left.location.register;          end;        location_freetemp(current_asmdata.CurrAsmList,right.location);        if cmpop then          location_freetemp(current_asmdata.CurrAsmList,left.location);      end;{$endif SUPPORT_MMX}{*****************************************************************************                                AddFloat*****************************************************************************}    procedure tx86addnode.second_addfloatsse;      var        op : topcg;        sqr_sum : boolean;        tmp : tnode;      begin        sqr_sum:=false;        if (current_settings.fputype>=fpu_sse3) and           use_vectorfpu(resultdef) and           (nodetype in [addn,subn]) and          (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and          (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then          begin            sqr_sum:=true;            tmp:=tinlinenode(left).left;            tinlinenode(left).left:=nil;            left.free;            left:=tmp;            tmp:=tinlinenode(right).left;            tinlinenode(right).left:=nil;            right.free;            right:=tmp;          end;        pass_left_right;        { fpu operands are always in reversed order on the stack }        if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then          toggleflag(nf_swapped);        if (nf_swapped in flags) then          { can't use swapleftright if both are on the fpu stack, since then }          { both are "R_ST" -> nothing would change -> manually switch       }          if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and             (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then            emit_none(A_FXCH,S_NO)          else            swapleftright;        case nodetype of          addn :            op:=OP_ADD;          muln :            op:=OP_MUL;          subn :            op:=OP_SUB;          slashn :            op:=OP_DIV;          else            internalerror(200312231);        end;        location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));        if sqr_sum then          begin            if nf_swapped in flags then              swapleftright;            hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);            hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);            location:=left.location;            if is_double(resultdef) then              begin                current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));                case nodetype of                  addn:                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));                  subn:                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));                  else                    internalerror(201108162);                end;              end            else              begin                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));                { ensure that bits 64..127 contain valid values }                current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));                { the data is now in bits 0..32 and 64..95 }                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));                case nodetype of                  addn:                    begin                      current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));                    end;                  subn:                    begin                      current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));                    end;                  else                    internalerror(201108163);                end;              end          end        { we can use only right as left operand if the operation is commutative }        else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then          begin            location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);            cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);            { force floating point reg. location to be written to memory,              we don't force it to mm register because writing to memory              allows probably shorter code because there is no direct fpu->mm register              copy instruction            }            if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then              hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);            cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);            if left.location.loc=LOC_REFERENCE then              tg.ungetiftemp(current_asmdata.CurrAsmList,left.location.reference);          end        else          begin            if nf_swapped in flags then              swapleftright;            { force floating point reg. location to be written to memory,              we don't force it to mm register because writing to memory              allows probably shorter code because there is no direct fpu->mm register              copy instruction            }            if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then              hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);            location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);            cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);            if left.location.loc=LOC_REFERENCE then              tg.ungetiftemp(current_asmdata.CurrAsmList,left.location.reference);            { force floating point reg. location to be written to memory,              we don't force it to mm register because writing to memory              allows probably shorter code because there is no direct fpu->mm register              copy instruction            }            if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then              hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);            cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);            if right.location.loc=LOC_REFERENCE then              tg.ungetiftemp(current_asmdata.CurrAsmList,right.location.reference);          end;      end;    procedure tx86addnode.second_addfloatavx;      var        op : topcg;        sqr_sum : boolean;        {$ifdef dummy}        tmp : tnode;        {$endif dummy}      begin        sqr_sum:=false;{$ifdef dummy}        if (current_settings.fputype>=fpu_sse3) and           use_vectorfpu(resultdef) and           (nodetype in [addn,subn]) and          (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and          (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then          begin            sqr_sum:=true;            tmp:=tinlinenode(left).left;            tinlinenode(left).left:=nil;            left.free;            left:=tmp;            tmp:=tinlinenode(right).left;            tinlinenode(right).left:=nil;            right.free;            right:=tmp;          end;{$endif dummy}        pass_left_right;        { fpu operands are always in reversed order on the stack }        if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then          toggleflag(nf_swapped);        if (nf_swapped in flags) then          { can't use swapleftright if both are on the fpu stack, since then }          { both are "R_ST" -> nothing would change -> manually switch       }          if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and             (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then            emit_none(A_FXCH,S_NO)          else            swapleftright;        case nodetype of          addn :            op:=OP_ADD;          muln :            op:=OP_MUL;          subn :            op:=OP_SUB;          slashn :            op:=OP_DIV;          else            internalerror(2003122303);        end;        location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));        if sqr_sum then          begin            if nf_swapped in flags then              swapleftright;            hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);            hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);            location:=left.location;            if is_double(resultdef) then              begin                current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));                case nodetype of                  addn:                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));                  subn:                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));                  else                    internalerror(2011081601);                end;              end            else              begin                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));                { ensure that bits 64..127 contain valid values }                current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));                { the data is now in bits 0..32 and 64..95 }                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));                case nodetype of                  addn:                    begin                      current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));                    end;                  subn:                    begin                      current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));                    end;                  else                    internalerror(2011081604);                end;              end          end        { left*2 ? }        else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then          begin            location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);            hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);            cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,              left.location.register,              left.location.register,              location.register,              mms_movescalar);          end        { right*2 ? }        else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then          begin            location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);            hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);            cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,              right.location.register,              right.location.register,              location.register,              mms_movescalar);          end        { we can use only right as left operand if the operation is commutative }        else if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) and (op in [OP_ADD,OP_MUL]) then          begin            location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);            { force floating point reg. location to be written to memory,              we don't force it to mm register because writing to memory              allows probably shorter code because there is no direct fpu->mm register              copy instruction            }            if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then              hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);            cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,              left.location,              right.location.register,              location.register,              mms_movescalar);          end        else          begin            if (nf_swapped in flags) then              swapleftright;            hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);            location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);            { force floating point reg. location to be written to memory,              we don't force it to mm register because writing to memory              allows probably shorter code because there is no direct fpu->mm register              copy instruction            }            if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then              hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);            cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,              right.location,              left.location.register,              location.register,              mms_movescalar);          end;      end;    function tx86addnode.pass_1: tnode;      begin        { on x86, we do not support fpu registers, so in case of operations using the x87, it          is normally useful, not to put the operands into registers which would be mm register }        if ((left.resultdef.typ=floatdef) or (right.resultdef.typ=floatdef)) and          (not(use_vectorfpu(left.resultdef)) and not(use_vectorfpu(right.resultdef)) and           not(use_vectorfpu(resultdef))) then          begin            make_not_regable(left,[ra_addr_regable]);            make_not_regable(right,[ra_addr_regable]);          end;        Result:=inherited pass_1;        { correct expectloc, it does not matter of Result is set as another pass_1 is run on it          which will fix that one }        if use_vectorfpu(resultdef) then          expectloc:=LOC_MMREGISTER;      end;    function tx86addnode.simplify(forinline : boolean) : tnode;      var        t, m, ThisNode, ConstNode: TNode;        lt,rt, ThisType: TNodeType;        ThisDef: TDef;        DoOptimisation: Boolean;        reciprocal, comparison, divisor: AWord;        shift, N: Byte;      begin        { Load into local variables to reduce the number of pointer deallocations }        rt:=right.nodetype;        lt:=left.nodetype;        DoOptimisation:=False;{$if defined(cpu64bitalu) or defined(cpu32bitalu) or defined(cpu16bitalu)}        if (cs_opt_level1 in current_settings.optimizerswitches) and          { The presence of overflow checks tends to cause internal errors with the multiplication nodes }          not (cs_check_overflow in current_settings.localswitches) and          (nodetype in [equaln,unequaln]) then          begin            if (lt=modn) and (rt=ordconstn) and (TOrdConstNode(right).value.uvalue=0) then              begin                t:=left;                m:=right;              end            else if (rt=modn) and (lt=ordconstn) and (TOrdConstNode(left).value.uvalue=0) then              begin                t:=right;                m:=left;              end            else              begin                t:=nil;                m:=nil;              end;            if Assigned(t) and (TModDivNode(t).right.nodetype=ordconstn) and{$ifndef cpu64bitalu}              { Converting Int64 and QWord division doesn't work under i386 }{$ifndef cpu32bitalu}              (TModDivNode(t).resultdef.size < 4) and{$else cpu32bitalu}              (TModDivNode(t).resultdef.size < 8) and{$endif cpu32bitalu}{$endif cpu64bitalu}              (TOrdConstNode(TModDivNode(t).right).value>=3) then              begin                divisor:=TOrdConstNode(TModDivNode(t).right).value.uvalue;                { Exclude powers of 2, as there are more efficient ways to handle those }                if PopCnt(divisor)>1 then                  begin                    if is_signed(TModDivNode(t).left.resultdef) then                      begin                        { See pages 250-251 of Hacker's Delight, Second Edition                          for an explanation and proof of the algorithm, but                          essentially, we're doing the following:                          - Convert the divisor d to the form k.2^b if it isn't                            already odd (in which case, k = d and b = 0)                          - Calculate r, the multiplicative inverse of k modulo 2^N                          - Calculate c = floor(2^(N-1) / k) & -(2^b)                          - Let q = ((n * r) + c) ror b (mod 2^N)                          - Repurpose c to equal floor(2c / 2^b) = c shr (b - 1)                            (some RISC platforms will benefit from doing this over                            precalculating the modified constant. For x86,                            it's better with the constant precalculated for                            32-bit and under, but for 64-bit, use SHR. )                          - If q is below or equal to c, then (n mod d) = 0                          }                        while True do                          begin                            ThisNode:=TModDivNode(t).left;                            case ThisNode.nodetype of                              typeconvn:                                begin                                  ThisDef:=TTypeConvNode(ThisNode).left.resultdef;                                  { See if we can simplify things to a smaller ordinal to                                    reduce code size and increase speed }                                  if is_signed(ThisDef) and                                    is_integer(ThisDef) and                                    { Byte-sized multiplications can cause problems }                                    (ThisDef.size>=2) and                                    { Make sure the divisor is in range }                                    (divisor>=TOrdDef(ThisDef).low) and                                    (divisor<=TOrdDef(ThisDef).high) then                                    begin                                      TOrdConstNode(TModDivNode(t).right).resultdef:=ThisDef;                                      TOrdConstNode(m).resultdef:=ThisDef;                                      TModDivNode(t).resultdef:=ThisDef;                                      { Destroy the typeconv node }                                      TModDivNode(t).left:=TTypeConvNode(ThisNode).left;                                      TTypeConvNode(ThisNode).left:=nil;                                      ThisNode.Free;                                      Continue;                                    end;                                  end;                              ordconstn:                                begin                                  { Just simplify into a constant }                                  Result:=inherited simplify(forinline);                                  Exit;                                end;                              else                                ;                            end;                            DoOptimisation:=True;                            Break;                          end;                        if DoOptimisation then                          begin                            ThisDef:=TModDivNode(t).left.resultdef;                            if nodetype = equaln then                              ThisType:=lten                            else                              ThisType:=gtn;                            N:=ThisDef.size*8;                            calc_mul_inverse(N, TOrdConstNode(TModDivNode(t).right).value.uvalue, reciprocal, shift);                            { Construct the following node tree for odd divisors:                                <lten> (for equaln) or <gtn> (for notequaln)                                  <addn>                                    <muln>                                      <typeconv signed-to-unsigned>                                        <numerator node (TModDivNode(t).left)>                                      <reciprocal constant>                                    <comparison constant (effectively a signed shift)>                                  <comparison constant * 2>                              For even divisors, convert them to the form k.2^b, with                              odd k, then construct the following:                                <lten> (for equaln) or <gtn> (for notequaln)                                  <ror>                                    (b)                                    <addn>                                      <muln>                                        <typeconv signed-to-unsigned>                                          <numerator node (TModDivNode(t).left)>                                        <reciprocal constant>                                      <comparison constant (effectively a signed shift)>                                  <comparison constant shr (b - 1)>                            }                            ThisNode:=ctypeconvnode.create_internal(TModDivNode(t).left, ThisDef);                            TTypeConvNode(ThisNode).convtype:=tc_int_2_int;                            ThisDef:=get_unsigned_inttype(ThisDef);                            ThisNode.resultdef:=ThisDef;                            TModDivNode(t).left:=nil;                            ConstNode:=cordconstnode.create(reciprocal, ThisDef, False);                            ConstNode.resultdef:=ThisDef;                            ThisNode:=caddnode.create_internal(muln, ThisNode, ConstNode);                            ThisNode.resultdef:=ThisDef;{$push}{$warnings off}                            if shift>0 then                              comparison:=((aWord(1) shl ((N-1) and (SizeOf(aWord)*8-1))) div (divisor shr shift)) and -(1 shl shift)                            else                              comparison:=(aWord(1) shl ((N-1) and (SizeOf(aWord)*8-1))) div divisor;{$pop}                            ConstNode:=cordconstnode.create(comparison, ThisDef, False);                            ConstNode.resultdef:=ThisDef;                            ThisNode:=caddnode.create_internal(addn, ThisNode, ConstNode);                            ThisNode.resultdef:=ThisDef;                            if shift>0 then                              begin                                ConstNode:=cordconstnode.create(shift, u8inttype, False);                                ConstNode.resultdef:=u8inttype;                                ThisNode:=cinlinenode.createintern(in_ror_x_y,false,                                  ccallparanode.create(ConstNode,                                  ccallparanode.create(ThisNode, nil)));                                ThisNode.resultdef:=ThisDef;                                ConstNode:=cordconstnode.create(comparison shr (shift - 1), ThisDef, False);                              end                            else                              ConstNode:=cordconstnode.create(comparison*2, ThisDef, False);                            ConstNode.resultdef:=ThisDef;                            Result:=CAddNode.create_internal(ThisType, ThisNode, ConstNode);                            Result.resultdef:=resultdef;                            Exit;                          end;                      end                    else                      begin                        { For bit length N, convert "(x mod d) = 0" or "(x mod d) <> 0", where                          d is an odd-numbered integer constant, to "(x * r) <= m", where                          dr = 1 (mod 2^N) and m = floor(2^N / d).                          If d is even, convert to the form k.2^b, where k is odd, then                          convert to "(x * r) ror b <= m", where kr = 1 (mod 2^N) and                          m = floor(2^N / d) = floor(2^(N-b) / k) }                        while True do                          begin                            ThisNode:=TModDivNode(t).left;                            case ThisNode.nodetype of                              typeconvn:                                begin                                  ThisDef:=TTypeConvNode(ThisNode).left.resultdef;                                  { See if we can simplify things to a smaller ordinal to                                    reduce code size and increase speed }                                  if not is_signed(ThisDef) and                                    is_integer(ThisDef) and                                    { Byte-sized multiplications can cause problems }                                    (ThisDef.size>=2) and                                    { Make sure the divisor is in range }                                    (divisor>=TOrdDef(ThisDef).low) and                                    (divisor<=TOrdDef(ThisDef).high) then                                    begin                                      TOrdConstNode(TModDivNode(t).right).resultdef:=ThisDef;                                      TOrdConstNode(m).resultdef:=ThisDef;                                      TModDivNode(t).resultdef:=ThisDef;                                      { Destroy the typeconv node }                                      TModDivNode(t).left:=TTypeConvNode(ThisNode).left;                                      TTypeConvNode(ThisNode).left:=nil;                                      ThisNode.Free;                                      Continue;                                    end;                                  end;                              ordconstn:                                begin                                  { Just simplify into a constant }                                  Result:=inherited simplify(forinline);                                  Exit;                                end;                              else                                ;                            end;                            DoOptimisation:=True;                            Break;                          end;                        if DoOptimisation then                          begin                            ThisDef:=TModDivNode(t).left.resultdef;                            { Construct the following node tree for odd divisors:                                <lten> (for equaln) or <gtn> (for notequaln)                                  <muln>                                    <numerator node (TModDivNode(t).left)>                                    <reciprocal constant>                                  (2^N / divisor)                              For even divisors, convert them to the form k.2^b, with                              odd k, then construct the following:                                <lten> (for equaln) or <gtn> (for notequaln)                                  <ror>                                    (b)                                    <muln>                                      <numerator node (TModDivNode(t).left)>                                      <reciprocal constant>                                  (2^N / divisor)                            }                            if nodetype=equaln then                              ThisType:=lten                            else                              ThisType:=gtn;                            N:=ThisDef.size*8;                            calc_mul_inverse(N, TOrdConstNode(TModDivNode(t).right).value.uvalue, reciprocal, shift);                            ConstNode:=cordconstnode.create(reciprocal, ThisDef, False);                            ConstNode.resultdef:=ThisDef;                            ThisNode:=caddnode.create_internal(muln, TModDivNode(t).left, ConstNode);                            ThisNode.resultdef:=ThisDef;                            TModDivNode(t).left:=nil;                            if shift>0 then                              begin                                ConstNode:=cordconstnode.create(shift, u8inttype, False);                                ConstNode.resultdef:=u8inttype;                                ThisNode:=cinlinenode.createintern(in_ror_x_y,false,                                  ccallparanode.create(ConstNode,                                  ccallparanode.create(ThisNode, nil)));                                ThisNode.resultdef:=ThisDef;                                comparison:=(aWord(1) shl ((N-shift) and (SizeOf(aWord)*8-1))) div (divisor shr shift);                              end                            else                              begin{$push}{$warnings off}                                { Because 2^N and divisor are relatively prime,                                  floor(2^N / divisor) = floor((2^N - 1) / divisor) }                                comparison:=(aWord(not 0) shr (((SizeOf(aWord)*8)-N) and (SizeOf(aWord)*8-1))) div divisor;{$pop}                              end;                            ConstNode:=cordconstnode.create(comparison, ThisDef, False);                            ConstNode.resultdef:=ThisDef;                            Result:=CAddNode.create_internal(ThisType, ThisNode, ConstNode);                            Result.resultdef:=resultdef;                            Exit;                          end;                      end;                  end;              end;          end;{$ifend defined(cpu64bitalu) or defined(cpu32bitalu) or defined(cpu16bitalu)}        Result:=inherited simplify(forinline);      end;    function tx86addnode.use_fma : boolean;      begin{$ifndef i8086}        { test if the result stays in an xmm register, fiddeling with fpu registers and fma makes no sense }        Result:=use_vectorfpu(resultdef) and          ((fpu_capabilities[current_settings.fputype]*[FPUX86_HAS_FMA,FPUX86_HAS_FMA4])<>[]);{$else i8086}        Result:=inherited use_fma;{$endif i8086}      end;    procedure tx86addnode.second_cmpfloatvector;      var        op : tasmop;      const        ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);        ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);      begin        if is_single(left.resultdef) then          op:=ops_single[UseAVX]        else if is_double(left.resultdef) then          op:=ops_double[UseAVX]        else          internalerror(200402222);        pass_left_right;        { fpu operands are always in reversed order on the stack }        if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then          toggleflag(nf_swapped);        location_reset(location,LOC_FLAGS,OS_NO);        { Direct move fpu->mm register is not possible, so force any fpu operands to          memory (not to mm registers because one of the memory locations can be used          directly in compare instruction, yielding shorter code) }        if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then          hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);        if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then          hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);        if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then          begin            case left.location.loc of              LOC_REFERENCE,LOC_CREFERENCE:                begin                  tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);                  current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));                end;              LOC_MMREGISTER,LOC_CMMREGISTER:                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));              else                internalerror(200402221);            end;            toggleflag(nf_swapped);          end        else          begin            hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);            case right.location.loc of              LOC_REFERENCE,LOC_CREFERENCE:                begin                  tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);                  current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));                end;              LOC_MMREGISTER,LOC_CMMREGISTER:                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));              else                internalerror(200402223);            end;          end;        location.resflags:=getfpuresflags;        location_freetemp(current_asmdata.CurrAsmList,left.location);        location_freetemp(current_asmdata.CurrAsmList,right.location);      end;    procedure tx86addnode.second_opvector;      var        op : topcg;      begin        pass_left_right;        if (nf_swapped in flags) then          swapleftright;        case nodetype of          addn :            op:=OP_ADD;          muln :            op:=OP_MUL;          subn :            op:=OP_SUB;          slashn :            op:=OP_DIV;          else            internalerror(200610071);        end;        if fits_in_mm_register(left.resultdef) then          begin            location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));            { we can use only right as left operand if the operation is commutative }            if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then              begin                if UseAVX then                  begin                    location.register:=cg.getmmregister(current_asmdata.CurrAsmList,OS_VECTOR);                    cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,right.location.register,location.register,nil);                  end                else                  begin                    location.register:=right.location.register;                    cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);                  end;              end            else              begin                location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);                if UseAVX then                  begin                    location.register:=cg.getmmregister(current_asmdata.CurrAsmList,OS_VECTOR);                    cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,                      tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,left.location.register,location.register,nil);                  end                else                  begin                    location.register:=left.location.register;                    cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,                      tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);                  end;              end;          end        else          begin            { not yet supported }            internalerror(200610072);          end      end;    procedure tx86addnode.second_addfloat;      const        ops_add:  array[boolean] of TAsmOp = (A_FADDP,A_FADD);        ops_mul:  array[boolean] of TAsmOp = (A_FMULP,A_FMUL);        ops_sub:  array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);        ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);        ops_div:  array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);        ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);      var        op : TAsmOp;        refnode, hp: tnode;        hasref : boolean;      begin        if use_vectorfpu(resultdef) then          begin            if UseAVX then              second_addfloatavx            else              second_addfloatsse;            exit;          end;        { can the operation do the conversion? }        if (left.nodetype=typeconvn) and (is_double(ttypeconvnode(left).left.resultdef) or is_single(ttypeconvnode(left).left.resultdef)) then          begin            hp:=left;            left:=ttypeconvnode(left).left;            ttypeconvnode(hp).left:=nil;            hp.Free;          end;        if (right.nodetype=typeconvn) and (is_double(ttypeconvnode(right).left.resultdef) or is_single(ttypeconvnode(right).left.resultdef)) then          begin            hp:=right;            right:=ttypeconvnode(right).left;            ttypeconvnode(hp).left:=nil;            hp.Free;          end;        pass_left_right;        prepare_x87_locations(refnode);        hasref:=assigned(refnode);        case nodetype of          addn :            op:=ops_add[hasref];          muln :            op:=ops_mul[hasref];          subn :            if (nf_swapped in flags) then              op:=ops_rsub[hasref]            else              op:=ops_sub[hasref];          slashn :            if (nf_swapped in flags) then              op:=ops_rdiv[hasref]            else              op:=ops_div[hasref];          else            internalerror(2003042203);        end;        if hasref then          emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)        else          begin            emit_reg_reg(op,S_NO,NR_ST,NR_ST1);            tcgx86(cg).dec_fpu_stack;          end;        location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));        location.register:=NR_ST;      end;    procedure tx86addnode.second_cmpfloat;{$ifdef i8086}      var        tmpref: treference;{$endif i8086}      begin        if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then          begin            second_cmpfloatvector;            exit;          end;        pass_left_right;        force_left_and_right_fpureg;{$ifndef x86_64}        if current_settings.cputype<cpu_Pentium2 then          begin            emit_none(A_FCOMPP,S_NO);            tcgx86(cg).dec_fpu_stack;            tcgx86(cg).dec_fpu_stack;            { load fpu flags }{$ifdef i8086}            if current_settings.cputype < cpu_286 then              begin                tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);                emit_ref(A_FSTSW,S_NO,tmpref);                cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);                inc(tmpref.offset);                emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);                dec(tmpref.offset);                emit_none(A_SAHF,S_NO);                cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);                tg.ungettemp(current_asmdata.CurrAsmList,tmpref);              end            else{$endif i8086}              begin                cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);                emit_reg(A_FNSTSW,S_NO,NR_AX);                emit_none(A_SAHF,S_NO);                cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);              end;            if cs_fpu_fwait in current_settings.localswitches then              current_asmdata.CurrAsmList.concat(Taicpu.Op_none(A_FWAIT,S_NO));          end        else{$endif x86_64}          begin            current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));            { fcomip pops only one fpu register }            current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));            tcgx86(cg).dec_fpu_stack;            tcgx86(cg).dec_fpu_stack;          end;        location_reset(location,LOC_FLAGS,OS_NO);        location.resflags:=getfpuresflags;      end;{*****************************************************************************                                  Add64bit*****************************************************************************}    procedure tx86addnode.second_add64bit;      begin{$ifdef cpu64bitalu}        second_addordinal;{$else cpu64bitalu}        { must be implemented separate }        internalerror(200402042);{$endif cpu64bitalu}      end;    procedure tx86addnode.second_cmp64bit;      begin{$ifdef cpu64bitalu}        second_cmpordinal;{$else cpu64bitalu}        { must be implemented separate }        internalerror(200402043);{$endif cpu64bitalu}      end;{*****************************************************************************                                  AddOrdinal*****************************************************************************}    procedure tx86addnode.second_addordinal;      var         opsize : tcgsize;         unsigned : boolean;         cgop : topcg;         checkoverflow : Boolean;         ovloc : tlocation;         tmpreg : TRegister;         indexnode : TNode;      begin        { determine if the comparison will be unsigned }        unsigned:=not(is_signed(left.resultdef)) or                    not(is_signed(right.resultdef));        { assume no overflow checking is require }        checkoverflow := false;        ovloc.loc:=LOC_VOID;        case nodetype of          addn:            begin              cgop:=OP_ADD;              checkoverflow:=true;            end;          xorn :            begin              cgop:=OP_XOR;            end;          orn :            begin              cgop:=OP_OR;            end;          andn:            begin              cgop:=OP_AND;            end;          muln:            begin              checkoverflow:=true;              if unsigned then                cgop:=OP_MUL              else                cgop:=OP_IMUL;            end;          subn :            begin              checkoverflow:=true;              cgop:=OP_SUB;            end;          else            internalerror(2015022501);        end;       checkoverflow:=         checkoverflow and         needoverflowcheck;       opsize:=def_cgsize(left.resultdef);{$ifndef i8086}       if (cs_opt_level2 in current_settings.optimizerswitches) then         begin           { BMI1 optimisations }           if (CPUX86_HAS_BMI1 in cpu_capabilities[current_settings.cputype]) then             begin               { Can we turn "x and (not y)" into an ANDN instruction instead? }               if (nodetype = andn) and                 (opsize in [OS_32, OS_S32{$ifdef x86_64}, OS_64, OS_S64{$endif x86_64}]) and                 ((left.nodetype = notn) or (right.nodetype = notn)) and                 (                   { With "const and (not variable)", ANDN will produce larger                     code once everything is moved into registers (as a side-note,                     "const and (not const)" and "variable and (not const)" will                     have been simplified earlier to remove the NOT operation). }                   not (cs_opt_size in current_settings.optimizerswitches) or                   (                     (left.location.loc <> LOC_CONSTANT) and                     (right.location.loc <> LOC_CONSTANT)                   )                 ) then                 begin                   { ANDN only supports the second operand being inverted; however,                     since we're dealing with ordinals, there won't be any Boolean                     shortcutting, so we can safely swap the parameters }                   if (right.nodetype <> notn) then                     swapleftright;                   secondpass(left);                   { Skip the not node completely }                   Include(right.flags, nf_do_not_execute);                   secondpass(tnotnode(right).left);                   { allocate registers }                   hlcg.location_force_reg(                     current_asmdata.CurrAsmList,                     tnotnode(right).left.location,                     tnotnode(right).left.resultdef,                     tnotnode(right).left.resultdef,                     false                   );                   if left.location.loc = LOC_CONSTANT then                     { With "const and (not variable)", we can probably still make a                       saving when it comes to pipeline stalls (left.location.loc                       will become LOC_CREGISTER). }                     hlcg.location_force_reg(                       current_asmdata.CurrAsmList,                       left.location,                       left.resultdef,                       left.resultdef,                       true                     );                   set_result_location_reg;                   case left.location.loc of                     LOC_REFERENCE,                     LOC_CREFERENCE:                       emit_ref_reg_reg(A_ANDN, TCGSize2OpSize[opsize], left.location.reference, tnotnode(right).left.location.register, location.register);                     LOC_REGISTER,                     LOC_CREGISTER:                       emit_reg_reg_reg(A_ANDN, TCGSize2OpSize[opsize], left.location.register, tnotnode(right).left.location.register, location.register);                     else                       InternalError(2022102110);                   end;                   { Overflow can't happen with and/andn }                   Exit;                 end;             end;           { BMI2 optimisations }           if (CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) then             begin               { Can we turn "x and ((1 shl y) - 1)" into a BZHI instruction instead? }               if (nodetype = andn) and                 (opsize in [OS_32, OS_S32{$ifdef x86_64}, OS_64, OS_S64{$endif x86_64}]) and                 (                   (                     (right.nodetype = subn) and                     (taddnode(right).right.nodetype = ordconstn) and                     (tordconstnode(taddnode(right).right).value = 1) and                     (taddnode(right).left.nodetype = shln) and                     (tshlshrnode(taddnode(right).left).left.nodetype = ordconstn) and                     (tordconstnode(tshlshrnode(taddnode(right).left).left).value = 1)                   ) or                   (                     (left.nodetype = subn) and                     (taddnode(left).right.nodetype = ordconstn) and                     (tordconstnode(taddnode(left).right).value = 1) and                     (taddnode(left).left.nodetype = shln) and                     (tshlshrnode(taddnode(left).left).left.nodetype = ordconstn) and                     (tordconstnode(tshlshrnode(taddnode(left).left).left).value = 1)                   )                 ) then                 begin                   { Put the subtract node on the right }                   if (right.nodetype <> subn) then                     swapleftright;                   secondpass(left);                   { Skip the subtract and shift nodes completely }                   Include(right.flags, nf_do_not_execute);                   Include(taddnode(right).left.flags, nf_do_not_execute);                   { Helps avoid all the awkward typecasts }                   indexnode := tshlshrnode(taddnode(right).left).right;{$ifdef x86_64}                   { The code generator sometimes extends the shift result to 64-bit unnecessarily }                   if (indexnode.nodetype = typeconvn) and (opsize in [OS_32, OS_S32]) and                     (def_cgsize(TTypeConvNode(indexnode).resultdef) in [OS_64, OS_S64]) then                     begin                       { Convert to the 32-bit type }                       indexnode.resultdef := resultdef;                       node_reset_flags(indexnode,[nf_pass1_done]);                       { We should't be getting any new errors }                       if do_firstpass(indexnode) then                         InternalError(2022110201);                       { Keep things internally consistent in case indexnode changed }                       tshlshrnode(taddnode(right).left).right := indexnode;                     end;{$endif x86_64}                   secondpass(indexnode);                   { allocate registers }                   hlcg.location_force_reg(                     current_asmdata.CurrAsmList,                     indexnode.location,                     indexnode.resultdef,                     resultdef,                     false                   );                   set_result_location_reg;                   case left.location.loc of                     LOC_REFERENCE,                     LOC_CREFERENCE:                       emit_reg_ref_reg(A_BZHI, TCGSize2OpSize[opsize], indexnode.location.register, left.location.reference, location.register);                     LOC_REGISTER,                     LOC_CREGISTER:                       emit_reg_reg_reg(A_BZHI, TCGSize2OpSize[opsize], indexnode.location.register, left.location.register, location.register);                     else                       InternalError(2022102111);                   end;                   Exit;                 end;             end;         end;{$endif not i8086}       pass_left_right;       { do we have to allocate a register? If yes, then three opcode instructions are better, however for sub three op code instructions         make no sense if right is a reference }       if ((left.location.loc<>LOC_REGISTER) and (right.location.loc<>LOC_REGISTER) and           ((nodetype<>subn) or not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE])) and           { 3 op mul makes only sense if a constant is involed }           ((nodetype<>muln) or (left.location.loc=LOC_CONSTANT) or (right.location.loc=LOC_CONSTANT){$ifndef i8086}            or ((CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) and (not(needoverflowcheck))               ){$endif i8086}           ) and           (not(nodetype in [orn,andn,xorn]))) or         ((nodetype=addn) and (left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT]) and (right.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT])) then         begin           { allocate registers }           force_reg_left_right(false,true);           set_result_location_reg;           if nodetype<>subn then            begin              if checkoverflow then                cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);              if (right.location.loc<>LOC_CONSTANT) then                hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,                   left.location.register,right.location.register,                   location.register,checkoverflow,ovloc)              else                hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,                   right.location.value,left.location.register,                   location.register,checkoverflow,ovloc);            end          else  { subtract is a special case since its not commutative }            begin              if (nf_swapped in flags) then                swapleftright;              if left.location.loc<>LOC_CONSTANT then                begin                  if checkoverflow then                    cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);                  if right.location.loc<>LOC_CONSTANT then                    hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,                        right.location.register,left.location.register,                        location.register,checkoverflow,ovloc)                  else                    hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,                      right.location.value,left.location.register,                      location.register,checkoverflow,ovloc);                end              else                begin                  tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);                  hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,                    left.location.value,tmpreg);                  if checkoverflow then                    cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);                  hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,                    right.location.register,tmpreg,location.register,checkoverflow,ovloc);                end;            end         end       else         begin           { at least one location should be a register, if yes, try to re-use it, so we can try two operand opcodes }           if left.location.loc<>LOC_REGISTER then              begin                if right.location.loc<>LOC_REGISTER then                  hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false)                else                  begin                    location_swap(left.location,right.location);                    toggleflag(nf_swapped);                  end;              end;           { at this point, left.location.loc should be LOC_REGISTER }           if right.location.loc=LOC_REGISTER then             begin               if checkoverflow then                 cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);               { when swapped another result register }               if (nodetype=subn) and (nf_swapped in flags) then                 begin                   cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,                     left.location.register,right.location.register);                   location_swap(left.location,right.location);                   toggleflag(nf_swapped);                 end               else                 cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,                    right.location.register,left.location.register);             end           else             begin               { right.location<>LOC_REGISTER }               if right.location.loc in [LOC_CSUBSETREF,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_SUBSETREG] then                 hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,left.resultdef,true);               if (nodetype=subn) and (nf_swapped in flags) then                 begin                   tmpreg:=left.location.register;                   left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);                   cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,left.location.register);                   if checkoverflow then                     cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);                   cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,tmpreg,left.location.register);                 end               else                 begin                   if checkoverflow then                     cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);                   cg.a_op_loc_reg(current_asmdata.CurrAsmList,cgop,opsize,right.location,left.location.register);                 end;               location_freetemp(current_asmdata.CurrAsmList,right.location);             end;           location_copy(location,left.location);         end;       { emit overflow check if required }       if checkoverflow then         cg.g_overflowcheck_loc(current_asmdata.CurrAsmList,Location,resultdef,ovloc);     end;    procedure tx86addnode.second_addboolean;      begin        if (nodetype in [orn,andn]) and           (not(cs_full_boolean_eval in current_settings.localswitches) or          (nf_short_bool in flags)) then          inherited second_addboolean        else if is_64bit(left.resultdef) then          inherited        else          second_addordinal;      end;    procedure tx86addnode.second_cmpordinal;      var         opdef  : tdef;         opsize : tcgsize;         unsigned : boolean;      begin         unsigned:=not(is_signed(left.resultdef)) or                   not(is_signed(right.resultdef));         opdef:=left.resultdef;         opsize:=def_cgsize(opdef);         pass_left_right;         if (right.location.loc=LOC_CONSTANT) and            (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE]){$ifdef x86_64}              and ((not (opsize in [OS_64,OS_S64])) or (              (right.location.value>=low(longint)) and (right.location.value<=high(longint))            )){$endif x86_64}         then           begin             cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);             emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);             location_freetemp(current_asmdata.CurrAsmList,left.location);           end         else           begin             left_must_be_reg(opdef,opsize,false);             emit_generic_code(A_CMP,opsize,unsigned,false,false);             location_freetemp(current_asmdata.CurrAsmList,right.location);             location_freetemp(current_asmdata.CurrAsmList,left.location);           end;         location_reset(location,LOC_FLAGS,OS_NO);         location.resflags:=getresflags(unsigned);      end;begin   caddnode:=tx86addnode;end.
 |