123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080 |
- {
- Copyright (c) 2000-2002 by Florian Klaempfl
- Common code generation for add nodes on the i386 and x86
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- ****************************************************************************
- }
- unit nx86add;
- {$i fpcdefs.inc}
- interface
- uses
- cgbase,
- cpubase,
- node,nadd,ncgadd;
- type
- tx86addnode = class(tcgaddnode)
- protected
- function getresflags(unsigned : boolean) : tresflags;
- procedure left_must_be_reg(opsize:TCGSize;noswap:boolean);
- procedure check_left_and_right_fpureg(force_fpureg: boolean);
- procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
- procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
- procedure second_cmpfloatsse;
- procedure second_addfloatsse;
- public
- procedure second_addfloat;override;
- procedure second_addsmallset;override;
- procedure second_add64bit;override;
- procedure second_cmpfloat;override;
- procedure second_cmpsmallset;override;
- procedure second_cmp64bit;override;
- procedure second_cmpordinal;override;
- {$ifdef SUPPORT_MMX}
- procedure second_opmmx;override;
- {$endif SUPPORT_MMX}
- procedure second_opvector;override;
- end;
- implementation
- uses
- globtype,globals,
- verbose,cutils,
- cpuinfo,
- aasmbase,aasmtai,aasmdata,aasmcpu,
- symconst,symdef,
- cgobj,cgx86,cga,cgutils,
- paramgr,tgobj,ncgutil,
- ncon,nset,ninl,
- defutil;
- {*****************************************************************************
- Helpers
- *****************************************************************************}
- procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
- var
- power : longint;
- hl4 : tasmlabel;
- r : Tregister;
- begin
- { at this point, left.location.loc should be LOC_REGISTER }
- if right.location.loc=LOC_REGISTER then
- begin
- { right.location is a LOC_REGISTER }
- { when swapped another result register }
- if (nodetype=subn) and (nf_swapped in flags) then
- begin
- if extra_not then
- emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
- emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
- { newly swapped also set swapped flag }
- location_swap(left.location,right.location);
- toggleflag(nf_swapped);
- end
- else
- begin
- if extra_not then
- emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
- if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
- location_swap(left.location,right.location);
- emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
- end;
- end
- else
- begin
- { right.location is not a LOC_REGISTER }
- if (nodetype=subn) and (nf_swapped in flags) then
- begin
- if extra_not then
- cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
- r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
- cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
- emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
- cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
- end
- else
- begin
- { Optimizations when right.location is a constant value }
- if (op=A_CMP) and
- (nodetype in [equaln,unequaln]) and
- (right.location.loc=LOC_CONSTANT) and
- (right.location.value=0) then
- begin
- emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
- end
- else
- if (op=A_ADD) and
- (right.location.loc=LOC_CONSTANT) and
- (right.location.value=1) and
- not(cs_check_overflow in current_settings.localswitches) then
- begin
- emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
- end
- else
- if (op=A_SUB) and
- (right.location.loc=LOC_CONSTANT) and
- (right.location.value=1) and
- not(cs_check_overflow in current_settings.localswitches) then
- begin
- emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
- end
- else
- if (op=A_IMUL) and
- (right.location.loc=LOC_CONSTANT) and
- (ispowerof2(int64(right.location.value),power)) and
- not(cs_check_overflow in current_settings.localswitches) then
- begin
- emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
- end
- else
- begin
- if extra_not then
- begin
- r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
- cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,r);
- emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
- emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
- end
- else
- begin
- emit_op_right_left(op,opsize);
- end;
- end;
- end;
- end;
- { only in case of overflow operations }
- { produce overflow code }
- { we must put it here directly, because sign of operation }
- { is in unsigned VAR!! }
- if mboverflow then
- begin
- if cs_check_overflow in current_settings.localswitches then
- begin
- current_asmdata.getjumplabel(hl4);
- if unsigned then
- cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
- else
- cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
- cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
- cg.a_label(current_asmdata.CurrAsmList,hl4);
- end;
- end;
- end;
- procedure tx86addnode.left_must_be_reg(opsize:TCGSize;noswap:boolean);
- begin
- { left location is not a register? }
- if (left.location.loc<>LOC_REGISTER) then
- begin
- { if right is register then we can swap the locations }
- if (not noswap) and
- (right.location.loc=LOC_REGISTER) then
- begin
- location_swap(left.location,right.location);
- toggleflag(nf_swapped);
- end
- else
- begin
- { maybe we can reuse a constant register when the
- operation is a comparison that doesn't change the
- value of the register }
- location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
- end;
- end;
- if (right.location.loc<>LOC_CONSTANT) and
- (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
- location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true);
- if (left.location.loc<>LOC_CONSTANT) and
- (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
- location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
- end;
- procedure tx86addnode.check_left_and_right_fpureg(force_fpureg: boolean);
- begin
- if (right.location.loc<>LOC_FPUREGISTER) then
- begin
- if (force_fpureg) then
- begin
- location_force_fpureg(current_asmdata.CurrAsmList,right.location,false);
- if (left.location.loc<>LOC_FPUREGISTER) then
- location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
- else
- { left was on the stack => swap }
- toggleflag(nf_swapped);
- end
- end
- { the nominator in st0 }
- else if (left.location.loc<>LOC_FPUREGISTER) then
- begin
- if (force_fpureg) then
- location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
- end
- else
- begin
- { fpu operands are always in the wrong order on the stack }
- toggleflag(nf_swapped);
- end;
- end;
- procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
- {$ifdef x86_64}
- var
- tmpreg : tregister;
- {$endif x86_64}
- begin
- if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
- location_force_reg(current_asmdata.CurrAsmList,right.location,def_cgsize(right.resultdef),true);
- { left must be a register }
- case right.location.loc of
- LOC_REGISTER,
- LOC_CREGISTER :
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
- LOC_REFERENCE,
- LOC_CREFERENCE :
- begin
- tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
- current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
- end;
- LOC_CONSTANT :
- begin
- {$ifdef x86_64}
- { x86_64 only supports signed 32 bits constants directly }
- if (opsize in [OS_S64,OS_64]) and
- ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
- begin
- tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
- cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
- end
- else
- {$endif x86_64}
- current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
- end;
- else
- internalerror(200203232);
- end;
- end;
- function tx86addnode.getresflags(unsigned : boolean) : tresflags;
- begin
- case nodetype of
- equaln : getresflags:=F_E;
- unequaln : getresflags:=F_NE;
- else
- if not(unsigned) then
- begin
- if nf_swapped in flags then
- case nodetype of
- ltn : getresflags:=F_G;
- lten : getresflags:=F_GE;
- gtn : getresflags:=F_L;
- gten : getresflags:=F_LE;
- end
- else
- case nodetype of
- ltn : getresflags:=F_L;
- lten : getresflags:=F_LE;
- gtn : getresflags:=F_G;
- gten : getresflags:=F_GE;
- end;
- end
- else
- begin
- if nf_swapped in flags then
- case nodetype of
- ltn : getresflags:=F_A;
- lten : getresflags:=F_AE;
- gtn : getresflags:=F_B;
- gten : getresflags:=F_BE;
- end
- else
- case nodetype of
- ltn : getresflags:=F_B;
- lten : getresflags:=F_BE;
- gtn : getresflags:=F_A;
- gten : getresflags:=F_AE;
- end;
- end;
- end;
- end;
- {*****************************************************************************
- AddSmallSet
- *****************************************************************************}
- procedure tx86addnode.second_addsmallset;
- var
- setbase : aint;
- opsize : TCGSize;
- op : TAsmOp;
- extra_not,
- noswap : boolean;
- all_member_optimization:boolean;
- begin
- pass_left_right;
- noswap:=false;
- extra_not:=false;
- all_member_optimization:=false;
- opsize:=int_cgsize(resultdef.size);
- if (left.resultdef.typ=setdef) then
- setbase:=tsetdef(left.resultdef).setbase
- else
- setbase:=tsetdef(right.resultdef).setbase;
- case nodetype of
- addn :
- begin
- { adding elements is not commutative }
- if (nf_swapped in flags) and (left.nodetype=setelementn) then
- swapleftright;
- { are we adding set elements ? }
- if right.nodetype=setelementn then
- begin
- { no range support for smallsets! }
- if assigned(tsetelementnode(right).right) then
- internalerror(43244);
- { btsb isn't supported }
- if opsize=OS_8 then
- opsize:=OS_32;
- { bts requires both elements to be registers }
- location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
- location_force_reg(current_asmdata.CurrAsmList,right.location,opsize,true);
- register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
- op:=A_BTS;
- noswap:=true;
- end
- else
- op:=A_OR;
- end;
- symdifn :
- op:=A_XOR;
- muln :
- op:=A_AND;
- subn :
- begin
- op:=A_AND;
- if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
- ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
- all_member_optimization:=true;
- if (not(nf_swapped in flags)) and
- (right.location.loc=LOC_CONSTANT) then
- right.location.value := not(right.location.value)
- else if (nf_swapped in flags) and
- (left.location.loc=LOC_CONSTANT) then
- left.location.value := not(left.location.value)
- else
- extra_not:=true;
- end;
- xorn :
- op:=A_XOR;
- orn :
- op:=A_OR;
- andn :
- op:=A_AND;
- else
- internalerror(2003042215);
- end;
- if all_member_optimization then
- begin
- {A set expression [0..31]-x can be implemented with a simple NOT.}
- if nf_swapped in flags then
- begin
- { newly swapped also set swapped flag }
- location_swap(left.location,right.location);
- toggleflag(nf_swapped);
- end;
- location_force_reg(current_asmdata.currAsmList,right.location,opsize,false);
- emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
- location:=right.location;
- end
- else
- begin
- { left must be a register }
- left_must_be_reg(opsize,noswap);
- emit_generic_code(op,opsize,true,extra_not,false);
- location_freetemp(current_asmdata.CurrAsmList,right.location);
- { left is always a register and contains the result }
- location:=left.location;
- end;
- { fix the changed opsize we did above because of the missing btsb }
- if opsize<>int_cgsize(resultdef.size) then
- location_force_reg(current_asmdata.CurrAsmList,location,int_cgsize(resultdef.size),false);
- end;
- procedure tx86addnode.second_cmpsmallset;
- var
- opsize : TCGSize;
- op : TAsmOp;
- begin
- pass_left_right;
- opsize:=int_cgsize(left.resultdef.size);
- case nodetype of
- equaln,
- unequaln :
- op:=A_CMP;
- lten,gten:
- begin
- if (not(nf_swapped in flags) and (nodetype = lten)) or
- ((nf_swapped in flags) and (nodetype = gten)) then
- swapleftright;
- location_force_reg(current_asmdata.CurrAsmList,left.location,opsize,false);
- emit_op_right_left(A_AND,opsize);
- op:=A_CMP;
- { warning: ugly hack, we need a JE so change the node to equaln }
- nodetype:=equaln;
- end;
- else
- internalerror(2003042215);
- end;
- { left must be a register }
- left_must_be_reg(opsize,false);
- emit_generic_code(op,opsize,true,false,false);
- location_freetemp(current_asmdata.CurrAsmList,right.location);
- location_freetemp(current_asmdata.CurrAsmList,left.location);
- location_reset(location,LOC_FLAGS,OS_NO);
- location.resflags:=getresflags(true);
- end;
- {*****************************************************************************
- AddMMX
- *****************************************************************************}
- {$ifdef SUPPORT_MMX}
- procedure tx86addnode.second_opmmx;
- var
- op : TAsmOp;
- cmpop : boolean;
- mmxbase : tmmxtype;
- hreg,
- hregister : tregister;
- begin
- pass_left_right;
- cmpop:=false;
- mmxbase:=mmx_type(left.resultdef);
- location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
- case nodetype of
- addn :
- begin
- if (cs_mmx_saturation in current_settings.localswitches) then
- begin
- case mmxbase of
- mmxs8bit:
- op:=A_PADDSB;
- mmxu8bit:
- op:=A_PADDUSB;
- mmxs16bit,mmxfixed16:
- op:=A_PADDSW;
- mmxu16bit:
- op:=A_PADDUSW;
- end;
- end
- else
- begin
- case mmxbase of
- mmxs8bit,mmxu8bit:
- op:=A_PADDB;
- mmxs16bit,mmxu16bit,mmxfixed16:
- op:=A_PADDW;
- mmxs32bit,mmxu32bit:
- op:=A_PADDD;
- end;
- end;
- end;
- muln :
- begin
- case mmxbase of
- mmxs16bit,mmxu16bit:
- op:=A_PMULLW;
- mmxfixed16:
- op:=A_PMULHW;
- end;
- end;
- subn :
- begin
- if (cs_mmx_saturation in current_settings.localswitches) then
- begin
- case mmxbase of
- mmxs8bit:
- op:=A_PSUBSB;
- mmxu8bit:
- op:=A_PSUBUSB;
- mmxs16bit,mmxfixed16:
- op:=A_PSUBSB;
- mmxu16bit:
- op:=A_PSUBUSW;
- end;
- end
- else
- begin
- case mmxbase of
- mmxs8bit,mmxu8bit:
- op:=A_PSUBB;
- mmxs16bit,mmxu16bit,mmxfixed16:
- op:=A_PSUBW;
- mmxs32bit,mmxu32bit:
- op:=A_PSUBD;
- end;
- end;
- end;
- xorn:
- op:=A_PXOR;
- orn:
- op:=A_POR;
- andn:
- op:=A_PAND;
- else
- internalerror(2003042214);
- end;
- { left and right no register? }
- { then one must be demanded }
- if (left.location.loc<>LOC_MMXREGISTER) then
- begin
- if (right.location.loc=LOC_MMXREGISTER) then
- begin
- location_swap(left.location,right.location);
- toggleflag(nf_swapped);
- end
- else
- begin
- { register variable ? }
- if (left.location.loc=LOC_CMMXREGISTER) then
- begin
- hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
- emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
- end
- else
- begin
- if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
- internalerror(200203245);
- hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
- tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
- emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
- end;
- location_reset(left.location,LOC_MMXREGISTER,OS_NO);
- left.location.register:=hregister;
- end;
- end;
- { at this point, left.location.loc should be LOC_MMXREGISTER }
- if right.location.loc<>LOC_MMXREGISTER then
- begin
- if (nodetype=subn) and (nf_swapped in flags) then
- begin
- hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
- if right.location.loc=LOC_CMMXREGISTER then
- begin
- emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
- emit_reg_reg(op,S_NO,left.location.register,hreg);
- end
- else
- begin
- if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
- internalerror(200203247);
- tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
- emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
- emit_reg_reg(op,S_NO,left.location.register,hreg);
- end;
- location.register:=hreg;
- end
- else
- begin
- if (right.location.loc=LOC_CMMXREGISTER) then
- emit_reg_reg(op,S_NO,right.location.register,left.location.register)
- else
- begin
- if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
- internalerror(200203246);
- tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
- emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
- end;
- location.register:=left.location.register;
- end;
- end
- else
- begin
- { right.location=LOC_MMXREGISTER }
- if (nodetype=subn) and (nf_swapped in flags) then
- begin
- emit_reg_reg(op,S_NO,left.location.register,right.location.register);
- location_swap(left.location,right.location);
- toggleflag(nf_swapped);
- end
- else
- begin
- emit_reg_reg(op,S_NO,right.location.register,left.location.register);
- end;
- location.register:=left.location.register;
- end;
- location_freetemp(current_asmdata.CurrAsmList,right.location);
- if cmpop then
- location_freetemp(current_asmdata.CurrAsmList,left.location);
- end;
- {$endif SUPPORT_MMX}
- {*****************************************************************************
- AddFloat
- *****************************************************************************}
- procedure tx86addnode.second_addfloatsse;
- var
- op : topcg;
- sqr_sum : boolean;
- tmp : tnode;
- begin
- sqr_sum:=false;
- if (current_settings.fputype>=fpu_sse3) and
- use_vectorfpu(resultdef) and
- (nodetype in [addn,subn]) and
- (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
- (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
- begin
- sqr_sum:=true;
- tmp:=tinlinenode(left).left;
- tinlinenode(left).left:=nil;
- left.free;
- left:=tmp;
- tmp:=tinlinenode(right).left;
- tinlinenode(right).left:=nil;
- right.free;
- right:=tmp;
- end;
- pass_left_right;
- check_left_and_right_fpureg(false);
- if (nf_swapped in flags) then
- { can't use swapleftright if both are on the fpu stack, since then }
- { both are "R_ST" -> nothing would change -> manually switch }
- if (left.location.loc = LOC_FPUREGISTER) and
- (right.location.loc = LOC_FPUREGISTER) then
- emit_none(A_FXCH,S_NO)
- else
- swapleftright;
- case nodetype of
- addn :
- op:=OP_ADD;
- muln :
- op:=OP_MUL;
- subn :
- op:=OP_SUB;
- slashn :
- op:=OP_DIV;
- else
- internalerror(200312231);
- end;
- location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
- if sqr_sum then
- begin
- if nf_swapped in flags then
- swapleftright;
- location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
- location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
- location:=left.location;
- if is_double(resultdef) then
- begin
- current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
- case nodetype of
- addn:
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
- subn:
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
- else
- internalerror(201108162);
- end;
- end
- else
- begin
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
- { ensure that bits 64..127 contain valid values }
- current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
- { the data is now in bits 0..32 and 64..95 }
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
- case nodetype of
- addn:
- begin
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
- end;
- subn:
- begin
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
- end;
- else
- internalerror(201108163);
- end;
- end
- end
- { we can use only right as left operand if the operation is commutative }
- else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
- begin
- location.register:=right.location.register;
- { force floating point reg. location to be written to memory,
- we don't force it to mm register because writing to memory
- allows probably shorter code because there is no direct fpu->mm register
- copy instruction
- }
- if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
- location_force_mem(current_asmdata.CurrAsmList,left.location);
- cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
- end
- else
- begin
- if (nf_swapped in flags) then
- swapleftright;
- location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
- location.register:=left.location.register;
- { force floating point reg. location to be written to memory,
- we don't force it to mm register because writing to memory
- allows probably shorter code because there is no direct fpu->mm register
- copy instruction
- }
- if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
- location_force_mem(current_asmdata.CurrAsmList,right.location);
- cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
- end;
- end;
- procedure tx86addnode.second_cmpfloatsse;
- var
- op : tasmop;
- begin
- if is_single(left.resultdef) then
- op:=A_COMISS
- else if is_double(left.resultdef) then
- op:=A_COMISD
- else
- internalerror(200402222);
- pass_left_right;
- location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
- { we can use only right as left operand if the operation is commutative }
- if (right.location.loc=LOC_MMREGISTER) then
- begin
- { force floating point reg. location to be written to memory,
- we don't force it to mm register because writing to memory
- allows probably shorter code because there is no direct fpu->mm register
- copy instruction
- }
- if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
- location_force_mem(current_asmdata.CurrAsmList,left.location);
- case left.location.loc of
- LOC_REFERENCE,LOC_CREFERENCE:
- begin
- tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
- current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
- end;
- LOC_MMREGISTER,LOC_CMMREGISTER:
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
- else
- internalerror(200402221);
- end;
- if nf_swapped in flags then
- exclude(flags,nf_swapped)
- else
- include(flags,nf_swapped)
- end
- else
- begin
- location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
- { force floating point reg. location to be written to memory,
- we don't force it to mm register because writing to memory
- allows probably shorter code because there is no direct fpu->mm register
- copy instruction
- }
- if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
- location_force_mem(current_asmdata.CurrAsmList,right.location);
- case right.location.loc of
- LOC_REFERENCE,LOC_CREFERENCE:
- begin
- tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
- current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
- end;
- LOC_MMREGISTER,LOC_CMMREGISTER:
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
- else
- internalerror(200402223);
- end;
- end;
- location.resflags:=getresflags(true);
- end;
- procedure tx86addnode.second_opvector;
- var
- op : topcg;
- begin
- pass_left_right;
- if (nf_swapped in flags) then
- swapleftright;
- case nodetype of
- addn :
- op:=OP_ADD;
- muln :
- op:=OP_MUL;
- subn :
- op:=OP_SUB;
- slashn :
- op:=OP_DIV;
- else
- internalerror(200610071);
- end;
- if fits_in_mm_register(left.resultdef) then
- begin
- location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
- { we can use only right as left operand if the operation is commutative }
- if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
- begin
- location.register:=right.location.register;
- cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
- end
- else
- begin
- location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
- location.register:=left.location.register;
- cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
- tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
- end;
- end
- else
- begin
- { not yet supported }
- internalerror(200610072);
- end
- end;
- procedure tx86addnode.second_addfloat;
- var
- op : TAsmOp;
- begin
- if use_vectorfpu(resultdef) then
- begin
- second_addfloatsse;
- exit;
- end;
- pass_left_right;
- case nodetype of
- addn :
- op:=A_FADDP;
- muln :
- op:=A_FMULP;
- subn :
- op:=A_FSUBP;
- slashn :
- op:=A_FDIVP;
- else
- internalerror(2003042214);
- end;
- check_left_and_right_fpureg(true);
- { if we swaped the tree nodes, then use the reverse operator }
- if nf_swapped in flags then
- begin
- if (nodetype=slashn) then
- op:=A_FDIVRP
- else if (nodetype=subn) then
- op:=A_FSUBRP;
- end;
- emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
- tcgx86(cg).dec_fpu_stack;
- location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
- location.register:=NR_ST;
- end;
- procedure tx86addnode.second_cmpfloat;
- var
- resflags : tresflags;
- begin
- if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
- begin
- second_cmpfloatsse;
- exit;
- end;
- pass_left_right;
- check_left_and_right_fpureg(true);
- {$ifndef x86_64}
- if current_settings.cputype<cpu_Pentium2 then
- begin
- emit_none(A_FCOMPP,S_NO);
- tcgx86(cg).dec_fpu_stack;
- tcgx86(cg).dec_fpu_stack;
- { load fpu flags }
- cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
- emit_reg(A_FSTSW,S_NO,NR_AX);
- emit_none(A_SAHF,S_NO);
- cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
- if nf_swapped in flags then
- begin
- case nodetype of
- equaln : resflags:=F_E;
- unequaln : resflags:=F_NE;
- ltn : resflags:=F_A;
- lten : resflags:=F_AE;
- gtn : resflags:=F_B;
- gten : resflags:=F_BE;
- end;
- end
- else
- begin
- case nodetype of
- equaln : resflags:=F_E;
- unequaln : resflags:=F_NE;
- ltn : resflags:=F_B;
- lten : resflags:=F_BE;
- gtn : resflags:=F_A;
- gten : resflags:=F_AE;
- end;
- end;
- end
- else
- {$endif x86_64}
- begin
- current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
- { fcomip pops only one fpu register }
- current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
- tcgx86(cg).dec_fpu_stack;
- tcgx86(cg).dec_fpu_stack;
- { load fpu flags }
- if nf_swapped in flags then
- begin
- case nodetype of
- equaln : resflags:=F_E;
- unequaln : resflags:=F_NE;
- ltn : resflags:=F_A;
- lten : resflags:=F_AE;
- gtn : resflags:=F_B;
- gten : resflags:=F_BE;
- end;
- end
- else
- begin
- case nodetype of
- equaln : resflags:=F_E;
- unequaln : resflags:=F_NE;
- ltn : resflags:=F_B;
- lten : resflags:=F_BE;
- gtn : resflags:=F_A;
- gten : resflags:=F_AE;
- end;
- end;
- end;
- location_reset(location,LOC_FLAGS,OS_NO);
- location.resflags:=resflags;
- end;
- {*****************************************************************************
- Add64bit
- *****************************************************************************}
- procedure tx86addnode.second_add64bit;
- begin
- {$ifdef cpu64bitalu}
- second_addordinal;
- {$else cpu64bitalu}
- { must be implemented separate }
- internalerror(200402042);
- {$endif cpu64bitalu}
- end;
- procedure tx86addnode.second_cmp64bit;
- begin
- {$ifdef cpu64bitalu}
- second_cmpordinal;
- {$else cpu64bitalu}
- { must be implemented separate }
- internalerror(200402043);
- {$endif cpu64bitalu}
- end;
- {*****************************************************************************
- AddOrdinal
- *****************************************************************************}
- procedure tx86addnode.second_cmpordinal;
- var
- opsize : tcgsize;
- unsigned : boolean;
- begin
- unsigned:=not(is_signed(left.resultdef)) or
- not(is_signed(right.resultdef));
- opsize:=def_cgsize(left.resultdef);
- pass_left_right;
- left_must_be_reg(opsize,false);
- emit_generic_code(A_CMP,opsize,unsigned,false,false);
- location_freetemp(current_asmdata.CurrAsmList,right.location);
- location_freetemp(current_asmdata.CurrAsmList,left.location);
- location_reset(location,LOC_FLAGS,OS_NO);
- location.resflags:=getresflags(unsigned);
- end;
- begin
- caddnode:=tx86addnode;
- end.
|