12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063 |
- {
- Copyright (c) 2000-2002 by Florian Klaempfl
- Common code generation for add nodes on the i386 and x86
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- ****************************************************************************
- }
- unit nx86add;
- {$i fpcdefs.inc}
- interface
- uses
- symtype,
- cgbase,
- cpubase,
- node,nadd,ncgadd;
- type
- tx86addnode = class(tcgaddnode)
- protected
- function getresflags(unsigned : boolean) : tresflags;
- function getfpuresflags : tresflags;
- procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
- procedure force_left_and_right_fpureg;
- procedure prepare_x87_locations(out refnode: tnode);
- procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
- procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
- procedure second_cmpfloatvector;
- procedure second_addfloatsse;
- procedure second_addfloatavx;
- public
- function pass_1 : tnode;override;
- function simplify(forinline : boolean) : tnode; override;
- function use_fma : boolean;override;
- procedure second_addfloat;override;
- {$ifndef i8086}
- procedure second_addsmallset;override;
- {$endif not i8086}
- procedure second_add64bit;override;
- procedure second_cmpfloat;override;
- procedure second_cmpsmallset;override;
- procedure second_cmp64bit;override;
- procedure second_cmpordinal;override;
- procedure second_addordinal;override;
- procedure second_addboolean;override;
- {$ifdef SUPPORT_MMX}
- procedure second_opmmx;override;
- {$endif SUPPORT_MMX}
- procedure second_opvector;override;
- end;
- implementation
- uses
- globtype,globals,
- verbose,cutils,compinnr,
- cpuinfo,
- aasmbase,aasmdata,aasmcpu,
- symconst,symdef,
- cgobj,hlcgobj,cgx86,cga,cgutils,
- tgobj,ncgutil,
- ncon,nset,ninl,ncnv,ncal,nmat,
- defutil,defcmp,constexp,
- htypechk;
- { Range check must be disabled explicitly as the code serves
- on three different architecture sizes }
- {$R-}
- {*****************************************************************************
- Helpers
- *****************************************************************************}
- procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
- var
- power : longint;
- hl4 : tasmlabel;
- r : Tregister;
- href : treference;
- overflowcheck: boolean;
- begin
- overflowcheck:=needoverflowcheck;
- { at this point, left.location.loc should be LOC_REGISTER }
- if right.location.loc=LOC_REGISTER then
- begin
- { right.location is a LOC_REGISTER }
- { when swapped another result register }
- if (nodetype=subn) and (nf_swapped in flags) then
- begin
- if extra_not then
- emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
- emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
- { newly swapped also set swapped flag }
- location_swap(left.location,right.location);
- toggleflag(nf_swapped);
- end
- else
- begin
- if extra_not then
- emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
- if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
- location_swap(left.location,right.location);
- emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
- end;
- end
- else
- begin
- { right.location is not a LOC_REGISTER }
- if (nodetype=subn) and (nf_swapped in flags) then
- begin
- if extra_not then
- cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
- r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
- hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
- emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
- cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
- end
- else
- begin
- { Optimizations when right.location is a constant value }
- if (op=A_CMP) and
- (nodetype in [equaln,unequaln]) and
- (right.location.loc=LOC_CONSTANT) and
- (right.location.value=0) then
- begin
- { 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs
- spilling, while 'test %reg,%reg' still requires loading into register.
- If spilling is not necessary, it is changed back into 'test %reg,%reg' by
- peephole optimizer (this optimization is currently available only for i386). }
- cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
- {$ifdef i386}
- emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
- {$else i386}
- emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
- {$endif i386}
- end
- else
- if (op=A_ADD) and
- (right.location.loc=LOC_CONSTANT) and
- (right.location.value=1) and
- not overflowcheck and
- UseIncDec then
- begin
- emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
- end
- else
- if (op=A_SUB) and
- (right.location.loc=LOC_CONSTANT) and
- (right.location.value=1) and
- not overflowcheck and
- UseIncDec then
- begin
- emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
- end
- else
- if (op=A_IMUL) and
- (right.location.loc=LOC_CONSTANT) and
- (ispowerof2(int64(right.location.value),power)) and
- overflowcheck then
- begin
- emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
- end
- else if (op=A_IMUL) and
- (right.location.loc=LOC_CONSTANT) and
- (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
- (power in [1..3]) and
- not overflowcheck then
- begin
- reference_reset_base(href,left.location.register,0,ctempposinvalid,0,[]);
- href.index:=left.location.register;
- href.scalefactor:=int64(right.location.value)-1;
- left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
- current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
- end
- else
- begin
- if extra_not then
- begin
- r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
- hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
- emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
- if mboverflow and overflowcheck then
- cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
- emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
- end
- else
- begin
- if mboverflow and overflowcheck then
- cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
- emit_op_right_left(op,opsize);
- end;
- end;
- end;
- end;
- { only in case of overflow operations }
- { produce overflow code }
- { we must put it here directly, because sign of operation }
- { is in unsigned VAR!! }
- if mboverflow then
- begin
- if overflowcheck then
- begin
- current_asmdata.getjumplabel(hl4);
- if unsigned then
- cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
- else
- cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
- cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
- cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
- cg.a_label(current_asmdata.CurrAsmList,hl4);
- end;
- end;
- end;
- procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
- begin
- { left location is not a register? }
- if (left.location.loc<>LOC_REGISTER) then
- begin
- { if right is register then we can swap the locations }
- if (not noswap) and
- (right.location.loc=LOC_REGISTER) then
- begin
- location_swap(left.location,right.location);
- toggleflag(nf_swapped);
- end
- else if (not noswap) and
- (right.location.loc=LOC_CREGISTER) then
- begin
- location_swap(left.location,right.location);
- toggleflag(nf_swapped);
- { maybe we can reuse a constant register when the
- operation is a comparison that doesn't change the
- value of the register }
- hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
- location:=left.location;
- end
- else
- begin
- { maybe we can reuse a constant register when the
- operation is a comparison that doesn't change the
- value of the register }
- hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
- end;
- end;
- if (right.location.loc<>LOC_CONSTANT) and
- (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
- hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
- if (left.location.loc<>LOC_CONSTANT) and
- (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
- hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
- end;
- procedure tx86addnode.force_left_and_right_fpureg;
- begin
- if (right.location.loc<>LOC_FPUREGISTER) then
- begin
- hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
- if (left.location.loc<>LOC_FPUREGISTER) then
- hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
- else
- { left was on the stack => swap }
- toggleflag(nf_swapped);
- end
- { the nominator in st0 }
- else if (left.location.loc<>LOC_FPUREGISTER) then
- begin
- hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
- end
- else
- begin
- { fpu operands are always in the wrong order on the stack }
- toggleflag(nf_swapped);
- end;
- end;
- { Makes sides suitable for executing an x87 instruction:
- if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'
- everything else is loaded to FPU stack. }
- procedure tx86addnode.prepare_x87_locations(out refnode: tnode);
- begin
- refnode:=nil;
- { later on, no mm registers are allowed, so transfer everything to memory here
- below it is loaded into an fpu register if neede }
- if left.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
- hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
- if right.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
- hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
- case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of
- 0:
- begin
- hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
- if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
- InternalError(2013090803);
- if (left.location.size in [OS_F32,OS_F64]) then
- begin
- refnode:=left;
- toggleflag(nf_swapped);
- end
- else
- hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
- end;
- 1:
- begin { if left is on the stack then swap. }
- if (left.location.loc=LOC_FPUREGISTER) then
- refnode:=right
- else
- refnode:=left;
- if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
- InternalError(2013090801);
- if not (refnode.location.size in [OS_F32,OS_F64]) then
- begin
- hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);
- if (refnode=right) then
- toggleflag(nf_swapped);
- refnode:=nil;
- end
- else
- begin
- if (refnode=left) then
- toggleflag(nf_swapped);
- end;
- end;
- 2: { fpu operands are always in the wrong order on the stack }
- toggleflag(nf_swapped);
- else
- InternalError(2013090802);
- end;
- end;
- procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
- {$ifdef x86_64}
- var
- tmpreg : tregister;
- {$endif x86_64}
- begin
- if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
- hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
- { left must be a register }
- case right.location.loc of
- LOC_REGISTER,
- LOC_CREGISTER :
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
- LOC_REFERENCE,
- LOC_CREFERENCE :
- begin
- tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
- current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
- end;
- LOC_CONSTANT :
- begin
- {$ifdef x86_64}
- { x86_64 only supports signed 32 bits constants directly }
- if (opsize in [OS_S64,OS_64]) and
- ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
- begin
- tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
- cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
- end
- else
- {$endif x86_64}
- current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
- end;
- else
- internalerror(200203232);
- end;
- end;
- function tx86addnode.getresflags(unsigned : boolean) : tresflags;
- begin
- case nodetype of
- equaln : getresflags:=F_E;
- unequaln : getresflags:=F_NE;
- else
- if not(unsigned) then
- begin
- if nf_swapped in flags then
- case nodetype of
- ltn : getresflags:=F_G;
- lten : getresflags:=F_GE;
- gtn : getresflags:=F_L;
- gten : getresflags:=F_LE;
- else
- internalerror(2013120105);
- end
- else
- case nodetype of
- ltn : getresflags:=F_L;
- lten : getresflags:=F_LE;
- gtn : getresflags:=F_G;
- gten : getresflags:=F_GE;
- else
- internalerror(2013120106);
- end;
- end
- else
- begin
- if nf_swapped in flags then
- case nodetype of
- ltn : getresflags:=F_A;
- lten : getresflags:=F_AE;
- gtn : getresflags:=F_B;
- gten : getresflags:=F_BE;
- else
- internalerror(2013120107);
- end
- else
- case nodetype of
- ltn : getresflags:=F_B;
- lten : getresflags:=F_BE;
- gtn : getresflags:=F_A;
- gten : getresflags:=F_AE;
- else
- internalerror(2013120108);
- end;
- end;
- end;
- end;
- function tx86addnode.getfpuresflags : tresflags;
- begin
- if (nodetype=equaln) then
- result:=F_FE
- else if (nodetype=unequaln) then
- result:=F_FNE
- else if (nf_swapped in flags) then
- case nodetype of
- ltn : result:=F_FA;
- lten : result:=F_FAE;
- gtn : result:=F_FB;
- gten : result:=F_FBE;
- else
- internalerror(2014031402);
- end
- else
- case nodetype of
- ltn : result:=F_FB;
- lten : result:=F_FBE;
- gtn : result:=F_FA;
- gten : result:=F_FAE;
- else
- internalerror(2014031403);
- end;
- end;
- {*****************************************************************************
- AddSmallSet
- *****************************************************************************}
- {$ifndef i8086}
- procedure tx86addnode.second_addsmallset;
- var
- setbase : aint;
- opdef : tdef;
- opsize : TCGSize;
- op : TAsmOp;
- extra_not,
- noswap : boolean;
- all_member_optimization:boolean;
- begin
- pass_left_right;
- noswap:=false;
- extra_not:=false;
- all_member_optimization:=false;
- opdef:=resultdef;
- opsize:=int_cgsize(opdef.size);
- if (left.resultdef.typ=setdef) then
- setbase:=tsetdef(left.resultdef).setbase
- else
- setbase:=tsetdef(right.resultdef).setbase;
- case nodetype of
- addn :
- begin
- { adding elements is not commutative }
- if (nf_swapped in flags) and (left.nodetype=setelementn) then
- swapleftright;
- { are we adding set elements ? }
- if right.nodetype=setelementn then
- begin
- { no range support for smallsets! }
- if assigned(tsetelementnode(right).right) then
- internalerror(43244);
- { btsb isn't supported }
- if opsize=OS_8 then
- begin
- opsize:=OS_32;
- opdef:=u32inttype;
- end;
- { bts requires both elements to be registers }
- hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
- hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
- register_maybe_adjust_setbase(current_asmdata.CurrAsmList,opdef,right.location,setbase);
- op:=A_BTS;
- noswap:=true;
- end
- else
- op:=A_OR;
- end;
- symdifn :
- op:=A_XOR;
- muln :
- op:=A_AND;
- subn :
- begin
- op:=A_AND;
- if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
- ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
- all_member_optimization:=true;
- if (not(nf_swapped in flags)) and
- (right.location.loc=LOC_CONSTANT) then
- right.location.value := not(right.location.value)
- else if (nf_swapped in flags) and
- (left.location.loc=LOC_CONSTANT) then
- left.location.value := not(left.location.value)
- else
- extra_not:=true;
- end;
- xorn :
- op:=A_XOR;
- orn :
- op:=A_OR;
- andn :
- op:=A_AND;
- else
- internalerror(2003042215);
- end;
- if all_member_optimization then
- begin
- {A set expression [0..31]-x can be implemented with a simple NOT.}
- if nf_swapped in flags then
- begin
- { newly swapped also set swapped flag }
- location_swap(left.location,right.location);
- toggleflag(nf_swapped);
- end;
- hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
- emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
- location:=right.location;
- end
- else
- begin
- { can we use the BMI1 instruction andn? }
- if (op=A_AND) and extra_not and (CPUX86_HAS_BMI1 in cpu_capabilities[current_settings.cputype]) and
- (resultdef.size in [4{$ifdef x86_64},8{$endif x86_64}]) then
- begin
- location_reset(location,LOC_REGISTER,left.location.size);
- location.register:=cg.getintregister(current_asmdata.currAsmList,left.location.size);
- if nf_swapped in flags then
- begin
- location_swap(left.location,right.location);
- toggleflag(nf_swapped);
- end;
- hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,true);
- if not(left.location.loc in [LOC_CREGISTER,LOC_REGISTER,LOC_CREFERENCE,LOC_REFERENCE]) then
- hlcg.location_force_reg(current_asmdata.currAsmList,left.location,left.resultdef,opdef,true);
- case left.location.loc of
- LOC_CREGISTER,LOC_REGISTER:
- emit_reg_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.register,right.location.register,location.register);
- LOC_CREFERENCE,LOC_REFERENCE:
- emit_ref_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.reference,right.location.register,location.register);
- else
- Internalerror(2018040201);
- end;
- end
- else
- begin
- { left must be a register }
- left_must_be_reg(opdef,opsize,noswap);
- emit_generic_code(op,opsize,true,extra_not,false);
- location_freetemp(current_asmdata.CurrAsmList,right.location);
- { left is always a register and contains the result }
- location:=left.location;
- end;
- end;
- { fix the changed opsize we did above because of the missing btsb }
- if opsize<>int_cgsize(resultdef.size) then
- hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
- end;
- {$endif not i8086}
- procedure tx86addnode.second_cmpsmallset;
- var
- opdef : tdef;
- opsize : TCGSize;
- op : TAsmOp;
- begin
- pass_left_right;
- opdef:=left.resultdef;
- opsize:=int_cgsize(opdef.size);
- case nodetype of
- equaln,
- unequaln :
- op:=A_CMP;
- lten,gten:
- begin
- if (not(nf_swapped in flags) and (nodetype = lten)) or
- ((nf_swapped in flags) and (nodetype = gten)) then
- swapleftright;
- hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
- emit_op_right_left(A_AND,opsize);
- op:=A_CMP;
- { warning: ugly hack, we need a JE so change the node to equaln }
- nodetype:=equaln;
- end;
- else
- internalerror(2003042204);
- end;
- { left must be a register }
- left_must_be_reg(opdef,opsize,false);
- emit_generic_code(op,opsize,true,false,false);
- location_freetemp(current_asmdata.CurrAsmList,right.location);
- location_freetemp(current_asmdata.CurrAsmList,left.location);
- location_reset(location,LOC_FLAGS,OS_NO);
- location.resflags:=getresflags(true);
- end;
- {*****************************************************************************
- AddMMX
- *****************************************************************************}
- {$ifdef SUPPORT_MMX}
- procedure tx86addnode.second_opmmx;
- var
- op : TAsmOp;
- cmpop : boolean;
- mmxbase : tmmxtype;
- hreg,
- hregister : tregister;
- begin
- pass_left_right;
- cmpop:=false;
- op:=A_NOP;
- mmxbase:=mmx_type(left.resultdef);
- location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
- case nodetype of
- addn :
- begin
- if (cs_mmx_saturation in current_settings.localswitches) then
- begin
- case mmxbase of
- mmxs8bit:
- op:=A_PADDSB;
- mmxu8bit:
- op:=A_PADDUSB;
- mmxs16bit,mmxfixed16:
- op:=A_PADDSW;
- mmxu16bit:
- op:=A_PADDUSW;
- else
- ;
- end;
- end
- else
- begin
- case mmxbase of
- mmxs8bit,mmxu8bit:
- op:=A_PADDB;
- mmxs16bit,mmxu16bit,mmxfixed16:
- op:=A_PADDW;
- mmxs32bit,mmxu32bit:
- op:=A_PADDD;
- else
- ;
- end;
- end;
- end;
- muln :
- begin
- case mmxbase of
- mmxs16bit,mmxu16bit:
- op:=A_PMULLW;
- mmxfixed16:
- op:=A_PMULHW;
- else
- ;
- end;
- end;
- subn :
- begin
- if (cs_mmx_saturation in current_settings.localswitches) then
- begin
- case mmxbase of
- mmxs8bit:
- op:=A_PSUBSB;
- mmxu8bit:
- op:=A_PSUBUSB;
- mmxs16bit,mmxfixed16:
- op:=A_PSUBSB;
- mmxu16bit:
- op:=A_PSUBUSW;
- else
- ;
- end;
- end
- else
- begin
- case mmxbase of
- mmxs8bit,mmxu8bit:
- op:=A_PSUBB;
- mmxs16bit,mmxu16bit,mmxfixed16:
- op:=A_PSUBW;
- mmxs32bit,mmxu32bit:
- op:=A_PSUBD;
- else
- ;
- end;
- end;
- end;
- xorn:
- op:=A_PXOR;
- orn:
- op:=A_POR;
- andn:
- op:=A_PAND;
- else
- internalerror(2003042214);
- end;
- if op = A_NOP then
- internalerror(201408201);
- { left and right no register? }
- { then one must be demanded }
- if (left.location.loc<>LOC_MMXREGISTER) then
- begin
- if (right.location.loc=LOC_MMXREGISTER) then
- begin
- location_swap(left.location,right.location);
- toggleflag(nf_swapped);
- end
- else
- begin
- { register variable ? }
- if (left.location.loc=LOC_CMMXREGISTER) then
- begin
- hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
- emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
- end
- else
- begin
- if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
- internalerror(200203245);
- hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
- tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
- emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
- end;
- location_reset(left.location,LOC_MMXREGISTER,OS_NO);
- left.location.register:=hregister;
- end;
- end;
- { at this point, left.location.loc should be LOC_MMXREGISTER }
- if right.location.loc<>LOC_MMXREGISTER then
- begin
- if (nodetype=subn) and (nf_swapped in flags) then
- begin
- hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
- if right.location.loc=LOC_CMMXREGISTER then
- begin
- emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
- emit_reg_reg(op,S_NO,left.location.register,hreg);
- end
- else
- begin
- if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
- internalerror(2002032412);
- tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
- emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
- emit_reg_reg(op,S_NO,left.location.register,hreg);
- end;
- location.register:=hreg;
- end
- else
- begin
- if (right.location.loc=LOC_CMMXREGISTER) then
- emit_reg_reg(op,S_NO,right.location.register,left.location.register)
- else
- begin
- if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
- internalerror(200203246);
- tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
- emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
- end;
- location.register:=left.location.register;
- end;
- end
- else
- begin
- { right.location=LOC_MMXREGISTER }
- if (nodetype=subn) and (nf_swapped in flags) then
- begin
- emit_reg_reg(op,S_NO,left.location.register,right.location.register);
- location_swap(left.location,right.location);
- toggleflag(nf_swapped);
- end
- else
- begin
- emit_reg_reg(op,S_NO,right.location.register,left.location.register);
- end;
- location.register:=left.location.register;
- end;
- location_freetemp(current_asmdata.CurrAsmList,right.location);
- if cmpop then
- location_freetemp(current_asmdata.CurrAsmList,left.location);
- end;
- {$endif SUPPORT_MMX}
- {*****************************************************************************
- AddFloat
- *****************************************************************************}
- procedure tx86addnode.second_addfloatsse;
- var
- op : topcg;
- sqr_sum : boolean;
- tmp : tnode;
- begin
- sqr_sum:=false;
- if (current_settings.fputype>=fpu_sse3) and
- use_vectorfpu(resultdef) and
- (nodetype in [addn,subn]) and
- (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
- (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
- begin
- sqr_sum:=true;
- tmp:=tinlinenode(left).left;
- tinlinenode(left).left:=nil;
- left.free;
- left:=tmp;
- tmp:=tinlinenode(right).left;
- tinlinenode(right).left:=nil;
- right.free;
- right:=tmp;
- end;
- pass_left_right;
- { fpu operands are always in reversed order on the stack }
- if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
- toggleflag(nf_swapped);
- if (nf_swapped in flags) then
- { can't use swapleftright if both are on the fpu stack, since then }
- { both are "R_ST" -> nothing would change -> manually switch }
- if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and
- (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
- emit_none(A_FXCH,S_NO)
- else
- swapleftright;
- case nodetype of
- addn :
- op:=OP_ADD;
- muln :
- op:=OP_MUL;
- subn :
- op:=OP_SUB;
- slashn :
- op:=OP_DIV;
- else
- internalerror(200312231);
- end;
- location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
- if sqr_sum then
- begin
- if nf_swapped in flags then
- swapleftright;
- hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
- hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
- location:=left.location;
- if is_double(resultdef) then
- begin
- current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
- case nodetype of
- addn:
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
- subn:
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
- else
- internalerror(201108162);
- end;
- end
- else
- begin
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
- { ensure that bits 64..127 contain valid values }
- current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
- { the data is now in bits 0..32 and 64..95 }
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
- case nodetype of
- addn:
- begin
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
- end;
- subn:
- begin
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
- end;
- else
- internalerror(201108163);
- end;
- end
- end
- { we can use only right as left operand if the operation is commutative }
- else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
- begin
- location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
- cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
- { force floating point reg. location to be written to memory,
- we don't force it to mm register because writing to memory
- allows probably shorter code because there is no direct fpu->mm register
- copy instruction
- }
- if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
- hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
- cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
- if left.location.loc=LOC_REFERENCE then
- tg.ungetiftemp(current_asmdata.CurrAsmList,left.location.reference);
- end
- else
- begin
- if nf_swapped in flags then
- swapleftright;
- { force floating point reg. location to be written to memory,
- we don't force it to mm register because writing to memory
- allows probably shorter code because there is no direct fpu->mm register
- copy instruction
- }
- if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
- hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
- location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
- cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
- if left.location.loc=LOC_REFERENCE then
- tg.ungetiftemp(current_asmdata.CurrAsmList,left.location.reference);
- { force floating point reg. location to be written to memory,
- we don't force it to mm register because writing to memory
- allows probably shorter code because there is no direct fpu->mm register
- copy instruction
- }
- if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
- hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
- cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
- if right.location.loc=LOC_REFERENCE then
- tg.ungetiftemp(current_asmdata.CurrAsmList,right.location.reference);
- end;
- end;
- procedure tx86addnode.second_addfloatavx;
- var
- op : topcg;
- sqr_sum : boolean;
- {$ifdef dummy}
- tmp : tnode;
- {$endif dummy}
- begin
- sqr_sum:=false;
- {$ifdef dummy}
- if (current_settings.fputype>=fpu_sse3) and
- use_vectorfpu(resultdef) and
- (nodetype in [addn,subn]) and
- (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
- (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
- begin
- sqr_sum:=true;
- tmp:=tinlinenode(left).left;
- tinlinenode(left).left:=nil;
- left.free;
- left:=tmp;
- tmp:=tinlinenode(right).left;
- tinlinenode(right).left:=nil;
- right.free;
- right:=tmp;
- end;
- {$endif dummy}
- pass_left_right;
- { fpu operands are always in reversed order on the stack }
- if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
- toggleflag(nf_swapped);
- if (nf_swapped in flags) then
- { can't use swapleftright if both are on the fpu stack, since then }
- { both are "R_ST" -> nothing would change -> manually switch }
- if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and
- (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
- emit_none(A_FXCH,S_NO)
- else
- swapleftright;
- case nodetype of
- addn :
- op:=OP_ADD;
- muln :
- op:=OP_MUL;
- subn :
- op:=OP_SUB;
- slashn :
- op:=OP_DIV;
- else
- internalerror(2003122303);
- end;
- location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
- if sqr_sum then
- begin
- if nf_swapped in flags then
- swapleftright;
- hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
- hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
- location:=left.location;
- if is_double(resultdef) then
- begin
- current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
- case nodetype of
- addn:
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
- subn:
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
- else
- internalerror(2011081601);
- end;
- end
- else
- begin
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
- { ensure that bits 64..127 contain valid values }
- current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
- { the data is now in bits 0..32 and 64..95 }
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
- case nodetype of
- addn:
- begin
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
- end;
- subn:
- begin
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
- end;
- else
- internalerror(2011081604);
- end;
- end
- end
- { left*2 ? }
- else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
- begin
- location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
- hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
- cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
- left.location.register,
- left.location.register,
- location.register,
- mms_movescalar);
- end
- { right*2 ? }
- else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
- begin
- location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
- hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
- cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
- right.location.register,
- right.location.register,
- location.register,
- mms_movescalar);
- end
- { we can use only right as left operand if the operation is commutative }
- else if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) and (op in [OP_ADD,OP_MUL]) then
- begin
- location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
- { force floating point reg. location to be written to memory,
- we don't force it to mm register because writing to memory
- allows probably shorter code because there is no direct fpu->mm register
- copy instruction
- }
- if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
- hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
- cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
- left.location,
- right.location.register,
- location.register,
- mms_movescalar);
- end
- else
- begin
- if (nf_swapped in flags) then
- swapleftright;
- hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
- location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
- { force floating point reg. location to be written to memory,
- we don't force it to mm register because writing to memory
- allows probably shorter code because there is no direct fpu->mm register
- copy instruction
- }
- if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
- hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
- cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
- right.location,
- left.location.register,
- location.register,
- mms_movescalar);
- end;
- end;
- function tx86addnode.pass_1: tnode;
- begin
- { on x86, we do not support fpu registers, so in case of operations using the x87, it
- is normally useful, not to put the operands into registers which would be mm register }
- if ((left.resultdef.typ=floatdef) or (right.resultdef.typ=floatdef)) and
- (not(use_vectorfpu(left.resultdef)) and not(use_vectorfpu(right.resultdef)) and
- not(use_vectorfpu(resultdef))) then
- begin
- make_not_regable(left,[ra_addr_regable]);
- make_not_regable(right,[ra_addr_regable]);
- end;
- Result:=inherited pass_1;
- { correct expectloc, it does not matter of Result is set as another pass_1 is run on it
- which will fix that one }
- if use_vectorfpu(resultdef) then
- expectloc:=LOC_MMREGISTER;
- end;
- function tx86addnode.simplify(forinline : boolean) : tnode;
- var
- t, m, ThisNode, ConstNode: TNode;
- lt,rt, ThisType: TNodeType;
- ThisDef: TDef;
- DoOptimisation: Boolean;
- reciprocal, comparison, divisor: AWord;
- shift, N: Byte;
- begin
- { Load into local variables to reduce the number of pointer deallocations }
- rt:=right.nodetype;
- lt:=left.nodetype;
- DoOptimisation:=False;
- {$if defined(cpu64bitalu) or defined(cpu32bitalu) or defined(cpu16bitalu)}
- if (cs_opt_level1 in current_settings.optimizerswitches) and
- { The presence of overflow checks tends to cause internal errors with the multiplication nodes }
- not (cs_check_overflow in current_settings.localswitches) and
- (nodetype in [equaln,unequaln]) then
- begin
- if (lt=modn) and (rt=ordconstn) and (TOrdConstNode(right).value.uvalue=0) then
- begin
- t:=left;
- m:=right;
- end
- else if (rt=modn) and (lt=ordconstn) and (TOrdConstNode(left).value.uvalue=0) then
- begin
- t:=right;
- m:=left;
- end
- else
- begin
- t:=nil;
- m:=nil;
- end;
- if Assigned(t) and (TModDivNode(t).right.nodetype=ordconstn) and
- {$ifndef cpu64bitalu}
- { Converting Int64 and QWord division doesn't work under i386 }
- {$ifndef cpu32bitalu}
- (TModDivNode(t).resultdef.size < 4) and
- {$else cpu32bitalu}
- (TModDivNode(t).resultdef.size < 8) and
- {$endif cpu32bitalu}
- {$endif cpu64bitalu}
- (TOrdConstNode(TModDivNode(t).right).value>=3) then
- begin
- divisor:=TOrdConstNode(TModDivNode(t).right).value.uvalue;
- { Exclude powers of 2, as there are more efficient ways to handle those }
- if PopCnt(divisor)>1 then
- begin
- if is_signed(TModDivNode(t).left.resultdef) then
- begin
- { See pages 250-251 of Hacker's Delight, Second Edition
- for an explanation and proof of the algorithm, but
- essentially, we're doing the following:
- - Convert the divisor d to the form k.2^b if it isn't
- already odd (in which case, k = d and b = 0)
- - Calculate r, the multiplicative inverse of k modulo 2^N
- - Calculate c = floor(2^(N-1) / k) & -(2^b)
- - Let q = ((n * r) + c) ror b (mod 2^N)
- - Repurpose c to equal floor(2c / 2^b) = c shr (b - 1)
- (some RISC platforms will benefit from doing this over
- precalculating the modified constant. For x86,
- it's better with the constant precalculated for
- 32-bit and under, but for 64-bit, use SHR. )
- - If q is below or equal to c, then (n mod d) = 0
- }
- while True do
- begin
- ThisNode:=TModDivNode(t).left;
- case ThisNode.nodetype of
- typeconvn:
- begin
- ThisDef:=TTypeConvNode(ThisNode).left.resultdef;
- { See if we can simplify things to a smaller ordinal to
- reduce code size and increase speed }
- if is_signed(ThisDef) and
- is_integer(ThisDef) and
- { Byte-sized multiplications can cause problems }
- (ThisDef.size>=2) and
- { Make sure the divisor is in range }
- (divisor>=TOrdDef(ThisDef).low) and
- (divisor<=TOrdDef(ThisDef).high) then
- begin
- TOrdConstNode(TModDivNode(t).right).resultdef:=ThisDef;
- TOrdConstNode(m).resultdef:=ThisDef;
- TModDivNode(t).resultdef:=ThisDef;
- { Destroy the typeconv node }
- TModDivNode(t).left:=TTypeConvNode(ThisNode).left;
- TTypeConvNode(ThisNode).left:=nil;
- ThisNode.Free;
- Continue;
- end;
- end;
- ordconstn:
- begin
- { Just simplify into a constant }
- Result:=inherited simplify(forinline);
- Exit;
- end;
- else
- ;
- end;
- DoOptimisation:=True;
- Break;
- end;
- if DoOptimisation then
- begin
- ThisDef:=TModDivNode(t).left.resultdef;
- if nodetype = equaln then
- ThisType:=lten
- else
- ThisType:=gtn;
- N:=ThisDef.size*8;
- calc_mul_inverse(N, TOrdConstNode(TModDivNode(t).right).value.uvalue, reciprocal, shift);
- { Construct the following node tree for odd divisors:
- <lten> (for equaln) or <gtn> (for notequaln)
- <addn>
- <muln>
- <typeconv signed-to-unsigned>
- <numerator node (TModDivNode(t).left)>
- <reciprocal constant>
- <comparison constant (effectively a signed shift)>
- <comparison constant * 2>
- For even divisors, convert them to the form k.2^b, with
- odd k, then construct the following:
- <lten> (for equaln) or <gtn> (for notequaln)
- <ror>
- (b)
- <addn>
- <muln>
- <typeconv signed-to-unsigned>
- <numerator node (TModDivNode(t).left)>
- <reciprocal constant>
- <comparison constant (effectively a signed shift)>
- <comparison constant shr (b - 1)>
- }
- ThisNode:=ctypeconvnode.create_internal(TModDivNode(t).left, ThisDef);
- TTypeConvNode(ThisNode).convtype:=tc_int_2_int;
- ThisDef:=get_unsigned_inttype(ThisDef);
- ThisNode.resultdef:=ThisDef;
- TModDivNode(t).left:=nil;
- ConstNode:=cordconstnode.create(reciprocal, ThisDef, False);
- ConstNode.resultdef:=ThisDef;
- ThisNode:=caddnode.create_internal(muln, ThisNode, ConstNode);
- ThisNode.resultdef:=ThisDef;
- {$push}
- {$warnings off}
- if shift>0 then
- comparison:=((aWord(1) shl ((N-1) and (SizeOf(aWord)*8-1))) div (divisor shr shift)) and -(1 shl shift)
- else
- comparison:=(aWord(1) shl ((N-1) and (SizeOf(aWord)*8-1))) div divisor;
- {$pop}
- ConstNode:=cordconstnode.create(comparison, ThisDef, False);
- ConstNode.resultdef:=ThisDef;
- ThisNode:=caddnode.create_internal(addn, ThisNode, ConstNode);
- ThisNode.resultdef:=ThisDef;
- if shift>0 then
- begin
- ConstNode:=cordconstnode.create(shift, u8inttype, False);
- ConstNode.resultdef:=u8inttype;
- ThisNode:=cinlinenode.createintern(in_ror_x_y,false,
- ccallparanode.create(ConstNode,
- ccallparanode.create(ThisNode, nil)));
- ThisNode.resultdef:=ThisDef;
- ConstNode:=cordconstnode.create(comparison shr (shift - 1), ThisDef, False);
- end
- else
- ConstNode:=cordconstnode.create(comparison*2, ThisDef, False);
- ConstNode.resultdef:=ThisDef;
- Result:=CAddNode.create_internal(ThisType, ThisNode, ConstNode);
- Result.resultdef:=resultdef;
- Exit;
- end;
- end
- else
- begin
- { For bit length N, convert "(x mod d) = 0" or "(x mod d) <> 0", where
- d is an odd-numbered integer constant, to "(x * r) <= m", where
- dr = 1 (mod 2^N) and m = floor(2^N / d).
- If d is even, convert to the form k.2^b, where k is odd, then
- convert to "(x * r) ror b <= m", where kr = 1 (mod 2^N) and
- m = floor(2^N / d) = floor(2^(N-b) / k) }
- while True do
- begin
- ThisNode:=TModDivNode(t).left;
- case ThisNode.nodetype of
- typeconvn:
- begin
- ThisDef:=TTypeConvNode(ThisNode).left.resultdef;
- { See if we can simplify things to a smaller ordinal to
- reduce code size and increase speed }
- if not is_signed(ThisDef) and
- is_integer(ThisDef) and
- { Byte-sized multiplications can cause problems }
- (ThisDef.size>=2) and
- { Make sure the divisor is in range }
- (divisor>=TOrdDef(ThisDef).low) and
- (divisor<=TOrdDef(ThisDef).high) then
- begin
- TOrdConstNode(TModDivNode(t).right).resultdef:=ThisDef;
- TOrdConstNode(m).resultdef:=ThisDef;
- TModDivNode(t).resultdef:=ThisDef;
- { Destroy the typeconv node }
- TModDivNode(t).left:=TTypeConvNode(ThisNode).left;
- TTypeConvNode(ThisNode).left:=nil;
- ThisNode.Free;
- Continue;
- end;
- end;
- ordconstn:
- begin
- { Just simplify into a constant }
- Result:=inherited simplify(forinline);
- Exit;
- end;
- else
- ;
- end;
- DoOptimisation:=True;
- Break;
- end;
- if DoOptimisation then
- begin
- ThisDef:=TModDivNode(t).left.resultdef;
- { Construct the following node tree for odd divisors:
- <lten> (for equaln) or <gtn> (for notequaln)
- <muln>
- <numerator node (TModDivNode(t).left)>
- <reciprocal constant>
- (2^N / divisor)
- For even divisors, convert them to the form k.2^b, with
- odd k, then construct the following:
- <lten> (for equaln) or <gtn> (for notequaln)
- <ror>
- (b)
- <muln>
- <numerator node (TModDivNode(t).left)>
- <reciprocal constant>
- (2^N / divisor)
- }
- if nodetype=equaln then
- ThisType:=lten
- else
- ThisType:=gtn;
- N:=ThisDef.size*8;
- calc_mul_inverse(N, TOrdConstNode(TModDivNode(t).right).value.uvalue, reciprocal, shift);
- ConstNode:=cordconstnode.create(reciprocal, ThisDef, False);
- ConstNode.resultdef:=ThisDef;
- ThisNode:=caddnode.create_internal(muln, TModDivNode(t).left, ConstNode);
- ThisNode.resultdef:=ThisDef;
- TModDivNode(t).left:=nil;
- if shift>0 then
- begin
- ConstNode:=cordconstnode.create(shift, u8inttype, False);
- ConstNode.resultdef:=u8inttype;
- ThisNode:=cinlinenode.createintern(in_ror_x_y,false,
- ccallparanode.create(ConstNode,
- ccallparanode.create(ThisNode, nil)));
- ThisNode.resultdef:=ThisDef;
- comparison:=(aWord(1) shl ((N-shift) and (SizeOf(aWord)*8-1))) div (divisor shr shift);
- end
- else
- begin
- {$push}
- {$warnings off}
- { Because 2^N and divisor are relatively prime,
- floor(2^N / divisor) = floor((2^N - 1) / divisor) }
- comparison:=(aWord(not 0) shr (((SizeOf(aWord)*8)-N) and (SizeOf(aWord)*8-1))) div divisor;
- {$pop}
- end;
- ConstNode:=cordconstnode.create(comparison, ThisDef, False);
- ConstNode.resultdef:=ThisDef;
- Result:=CAddNode.create_internal(ThisType, ThisNode, ConstNode);
- Result.resultdef:=resultdef;
- Exit;
- end;
- end;
- end;
- end;
- end;
- {$ifend defined(cpu64bitalu) or defined(cpu32bitalu) or defined(cpu16bitalu)}
- Result:=inherited simplify(forinline);
- end;
- function tx86addnode.use_fma : boolean;
- begin
- {$ifndef i8086}
- { test if the result stays in an xmm register, fiddeling with fpu registers and fma makes no sense }
- Result:=use_vectorfpu(resultdef) and
- ((fpu_capabilities[current_settings.fputype]*[FPUX86_HAS_FMA,FPUX86_HAS_FMA4])<>[]);
- {$else i8086}
- Result:=inherited use_fma;
- {$endif i8086}
- end;
- procedure tx86addnode.second_cmpfloatvector;
- var
- op : tasmop;
- const
- ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
- ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
- begin
- if is_single(left.resultdef) then
- op:=ops_single[UseAVX]
- else if is_double(left.resultdef) then
- op:=ops_double[UseAVX]
- else
- internalerror(200402222);
- pass_left_right;
- { fpu operands are always in reversed order on the stack }
- if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
- toggleflag(nf_swapped);
- location_reset(location,LOC_FLAGS,OS_NO);
- { Direct move fpu->mm register is not possible, so force any fpu operands to
- memory (not to mm registers because one of the memory locations can be used
- directly in compare instruction, yielding shorter code) }
- if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
- hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
- if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
- hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
- if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
- begin
- case left.location.loc of
- LOC_REFERENCE,LOC_CREFERENCE:
- begin
- tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
- current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
- end;
- LOC_MMREGISTER,LOC_CMMREGISTER:
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
- else
- internalerror(200402221);
- end;
- toggleflag(nf_swapped);
- end
- else
- begin
- hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
- case right.location.loc of
- LOC_REFERENCE,LOC_CREFERENCE:
- begin
- tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
- current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
- end;
- LOC_MMREGISTER,LOC_CMMREGISTER:
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
- else
- internalerror(200402223);
- end;
- end;
- location.resflags:=getfpuresflags;
- location_freetemp(current_asmdata.CurrAsmList,left.location);
- location_freetemp(current_asmdata.CurrAsmList,right.location);
- end;
- procedure tx86addnode.second_opvector;
- var
- op : topcg;
- begin
- pass_left_right;
- if (nf_swapped in flags) then
- swapleftright;
- case nodetype of
- addn :
- op:=OP_ADD;
- muln :
- op:=OP_MUL;
- subn :
- op:=OP_SUB;
- slashn :
- op:=OP_DIV;
- else
- internalerror(200610071);
- end;
- if fits_in_mm_register(left.resultdef) then
- begin
- location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
- { we can use only right as left operand if the operation is commutative }
- if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
- begin
- if UseAVX then
- begin
- location.register:=cg.getmmregister(current_asmdata.CurrAsmList,OS_VECTOR);
- cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,right.location.register,location.register,nil);
- end
- else
- begin
- location.register:=right.location.register;
- cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
- end;
- end
- else
- begin
- location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
- if UseAVX then
- begin
- location.register:=cg.getmmregister(current_asmdata.CurrAsmList,OS_VECTOR);
- cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,
- tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,left.location.register,location.register,nil);
- end
- else
- begin
- location.register:=left.location.register;
- cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
- tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
- end;
- end;
- end
- else
- begin
- { not yet supported }
- internalerror(200610072);
- end
- end;
- procedure tx86addnode.second_addfloat;
- const
- ops_add: array[boolean] of TAsmOp = (A_FADDP,A_FADD);
- ops_mul: array[boolean] of TAsmOp = (A_FMULP,A_FMUL);
- ops_sub: array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);
- ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);
- ops_div: array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);
- ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);
- var
- op : TAsmOp;
- refnode, hp: tnode;
- hasref : boolean;
- begin
- if use_vectorfpu(resultdef) then
- begin
- if UseAVX then
- second_addfloatavx
- else
- second_addfloatsse;
- exit;
- end;
- { can the operation do the conversion? }
- if (left.nodetype=typeconvn) and (is_double(ttypeconvnode(left).left.resultdef) or is_single(ttypeconvnode(left).left.resultdef)) then
- begin
- hp:=left;
- left:=ttypeconvnode(left).left;
- ttypeconvnode(hp).left:=nil;
- hp.Free;
- end;
- if (right.nodetype=typeconvn) and (is_double(ttypeconvnode(right).left.resultdef) or is_single(ttypeconvnode(right).left.resultdef)) then
- begin
- hp:=right;
- right:=ttypeconvnode(right).left;
- ttypeconvnode(hp).left:=nil;
- hp.Free;
- end;
- pass_left_right;
- prepare_x87_locations(refnode);
- hasref:=assigned(refnode);
- case nodetype of
- addn :
- op:=ops_add[hasref];
- muln :
- op:=ops_mul[hasref];
- subn :
- if (nf_swapped in flags) then
- op:=ops_rsub[hasref]
- else
- op:=ops_sub[hasref];
- slashn :
- if (nf_swapped in flags) then
- op:=ops_rdiv[hasref]
- else
- op:=ops_div[hasref];
- else
- internalerror(2003042203);
- end;
- if hasref then
- emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)
- else
- begin
- emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
- tcgx86(cg).dec_fpu_stack;
- end;
- location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
- location.register:=NR_ST;
- end;
- procedure tx86addnode.second_cmpfloat;
- {$ifdef i8086}
- var
- tmpref: treference;
- {$endif i8086}
- begin
- if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
- begin
- second_cmpfloatvector;
- exit;
- end;
- pass_left_right;
- force_left_and_right_fpureg;
- {$ifndef x86_64}
- if current_settings.cputype<cpu_Pentium2 then
- begin
- emit_none(A_FCOMPP,S_NO);
- tcgx86(cg).dec_fpu_stack;
- tcgx86(cg).dec_fpu_stack;
- { load fpu flags }
- {$ifdef i8086}
- if current_settings.cputype < cpu_286 then
- begin
- tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
- emit_ref(A_FSTSW,S_NO,tmpref);
- cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
- inc(tmpref.offset);
- emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
- dec(tmpref.offset);
- emit_none(A_SAHF,S_NO);
- cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
- tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
- end
- else
- {$endif i8086}
- begin
- cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
- emit_reg(A_FNSTSW,S_NO,NR_AX);
- emit_none(A_SAHF,S_NO);
- cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
- end;
- if cs_fpu_fwait in current_settings.localswitches then
- current_asmdata.CurrAsmList.concat(Taicpu.Op_none(A_FWAIT,S_NO));
- end
- else
- {$endif x86_64}
- begin
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
- { fcomip pops only one fpu register }
- current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
- tcgx86(cg).dec_fpu_stack;
- tcgx86(cg).dec_fpu_stack;
- end;
- location_reset(location,LOC_FLAGS,OS_NO);
- location.resflags:=getfpuresflags;
- end;
- {*****************************************************************************
- Add64bit
- *****************************************************************************}
- procedure tx86addnode.second_add64bit;
- begin
- {$ifdef cpu64bitalu}
- second_addordinal;
- {$else cpu64bitalu}
- { must be implemented separate }
- internalerror(200402042);
- {$endif cpu64bitalu}
- end;
- procedure tx86addnode.second_cmp64bit;
- begin
- {$ifdef cpu64bitalu}
- second_cmpordinal;
- {$else cpu64bitalu}
- { must be implemented separate }
- internalerror(200402043);
- {$endif cpu64bitalu}
- end;
- {*****************************************************************************
- AddOrdinal
- *****************************************************************************}
- procedure tx86addnode.second_addordinal;
- var
- opsize : tcgsize;
- unsigned : boolean;
- cgop : topcg;
- checkoverflow : Boolean;
- ovloc : tlocation;
- tmpreg : TRegister;
- begin
- { determine if the comparison will be unsigned }
- unsigned:=not(is_signed(left.resultdef)) or
- not(is_signed(right.resultdef));
- { assume no overflow checking is require }
- checkoverflow := false;
- ovloc.loc:=LOC_VOID;
- case nodetype of
- addn:
- begin
- cgop:=OP_ADD;
- checkoverflow:=true;
- end;
- xorn :
- begin
- cgop:=OP_XOR;
- end;
- orn :
- begin
- cgop:=OP_OR;
- end;
- andn:
- begin
- cgop:=OP_AND;
- end;
- muln:
- begin
- checkoverflow:=true;
- if unsigned then
- cgop:=OP_MUL
- else
- cgop:=OP_IMUL;
- end;
- subn :
- begin
- checkoverflow:=true;
- cgop:=OP_SUB;
- end;
- else
- internalerror(2015022501);
- end;
- checkoverflow:=
- checkoverflow and
- needoverflowcheck;
- opsize:=def_cgsize(left.resultdef);
- pass_left_right;
- { do we have to allocate a register? If yes, then three opcode instructions are better, however for sub three op code instructions
- make no sense if right is a reference }
- if ((left.location.loc<>LOC_REGISTER) and (right.location.loc<>LOC_REGISTER) and
- ((nodetype<>subn) or not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE])) and
- { 3 op mul makes only sense if a constant is involed }
- ((nodetype<>muln) or (left.location.loc=LOC_CONSTANT) or (right.location.loc=LOC_CONSTANT)
- {$ifndef i8086}
- or ((CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) and (not(needoverflowcheck))
- )
- {$endif i8086}
- ) and
- (not(nodetype in [orn,andn,xorn]))) or
- ((nodetype=addn) and (left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT]) and (right.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT])) then
- begin
- { allocate registers }
- force_reg_left_right(false,true);
- set_result_location_reg;
- if nodetype<>subn then
- begin
- if checkoverflow then
- cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
- if (right.location.loc<>LOC_CONSTANT) then
- hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
- left.location.register,right.location.register,
- location.register,checkoverflow,ovloc)
- else
- hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
- right.location.value,left.location.register,
- location.register,checkoverflow,ovloc);
- end
- else { subtract is a special case since its not commutative }
- begin
- if (nf_swapped in flags) then
- swapleftright;
- if left.location.loc<>LOC_CONSTANT then
- begin
- if checkoverflow then
- cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
- if right.location.loc<>LOC_CONSTANT then
- hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
- right.location.register,left.location.register,
- location.register,checkoverflow,ovloc)
- else
- hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
- right.location.value,left.location.register,
- location.register,checkoverflow,ovloc);
- end
- else
- begin
- tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
- hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,
- left.location.value,tmpreg);
- if checkoverflow then
- cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
- hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
- right.location.register,tmpreg,location.register,checkoverflow,ovloc);
- end;
- end
- end
- else
- begin
- { at least one location should be a register, if yes, try to re-use it, so we can try two operand opcodes }
- if left.location.loc<>LOC_REGISTER then
- begin
- if right.location.loc<>LOC_REGISTER then
- hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false)
- else
- begin
- location_swap(left.location,right.location);
- toggleflag(nf_swapped);
- end;
- end;
- { at this point, left.location.loc should be LOC_REGISTER }
- if right.location.loc=LOC_REGISTER then
- begin
- if checkoverflow then
- cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
- { when swapped another result register }
- if (nodetype=subn) and (nf_swapped in flags) then
- begin
- cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
- left.location.register,right.location.register);
- location_swap(left.location,right.location);
- toggleflag(nf_swapped);
- end
- else
- cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
- right.location.register,left.location.register);
- end
- else
- begin
- { right.location<>LOC_REGISTER }
- if right.location.loc in [LOC_CSUBSETREF,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_SUBSETREG] then
- hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,left.resultdef,true);
- if (nodetype=subn) and (nf_swapped in flags) then
- begin
- tmpreg:=left.location.register;
- left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
- cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,left.location.register);
- if checkoverflow then
- cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
- cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,tmpreg,left.location.register);
- end
- else
- begin
- if checkoverflow then
- cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
- cg.a_op_loc_reg(current_asmdata.CurrAsmList,cgop,opsize,right.location,left.location.register);
- end;
- location_freetemp(current_asmdata.CurrAsmList,right.location);
- end;
- location_copy(location,left.location);
- end;
- { emit overflow check if required }
- if checkoverflow then
- cg.g_overflowcheck_loc(current_asmdata.CurrAsmList,Location,resultdef,ovloc);
- end;
- procedure tx86addnode.second_addboolean;
- begin
- if (nodetype in [orn,andn]) and
- (not(cs_full_boolean_eval in current_settings.localswitches) or
- (nf_short_bool in flags)) then
- inherited second_addboolean
- else if is_64bit(left.resultdef) then
- inherited
- else
- second_addordinal;
- end;
- procedure tx86addnode.second_cmpordinal;
- var
- opdef : tdef;
- opsize : tcgsize;
- unsigned : boolean;
- begin
- unsigned:=not(is_signed(left.resultdef)) or
- not(is_signed(right.resultdef));
- opdef:=left.resultdef;
- opsize:=def_cgsize(opdef);
- pass_left_right;
- if (right.location.loc=LOC_CONSTANT) and
- (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
- {$ifdef x86_64}
- and ((not (opsize in [OS_64,OS_S64])) or (
- (right.location.value>=low(longint)) and (right.location.value<=high(longint))
- ))
- {$endif x86_64}
- then
- begin
- emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
- location_freetemp(current_asmdata.CurrAsmList,left.location);
- end
- else
- begin
- left_must_be_reg(opdef,opsize,false);
- emit_generic_code(A_CMP,opsize,unsigned,false,false);
- location_freetemp(current_asmdata.CurrAsmList,right.location);
- location_freetemp(current_asmdata.CurrAsmList,left.location);
- end;
- location_reset(location,LOC_FLAGS,OS_NO);
- location.resflags:=getresflags(unsigned);
- end;
- begin
- caddnode:=tx86addnode;
- end.
|