123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558 |
- {
- Copyright (c) 1998-2002 by Florian Klaempfl
- Generate x86 code for math nodes
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- ****************************************************************************
- }
- unit nx86mat;
- {$i fpcdefs.inc}
- interface
- uses
- node,nmat,ncgmat;
- type
- tx86unaryminusnode = class(tcgunaryminusnode)
- {$ifdef SUPPORT_MMX}
- procedure second_mmx;override;
- {$endif SUPPORT_MMX}
- procedure second_float;override;
- function pass_1:tnode;override;
- end;
- tx86notnode = class(tcgnotnode)
- procedure second_boolean;override;
- {$ifdef SUPPORT_MMX}
- procedure second_mmx;override;
- {$endif SUPPORT_MMX}
- end;
- tx86moddivnode = class(tcgmoddivnode)
- procedure pass_generate_code;override;
- end;
- implementation
- uses
- globtype,
- systems,constexp,
- cutils,verbose,globals,
- symconst,symdef,
- aasmbase,aasmtai,aasmdata,defutil,
- cgbase,pass_1,pass_2,
- ncon,
- cpubase,procinfo,
- cga,ncgutil,cgobj,hlcgobj,cgx86,cgutils;
- {*****************************************************************************
- TI386UNARYMINUSNODE
- *****************************************************************************}
- function tx86unaryminusnode.pass_1 : tnode;
- begin
- result:=nil;
- firstpass(left);
- if codegenerror then
- exit;
- if (left.resultdef.typ=floatdef) then
- begin
- if use_vectorfpu(left.resultdef) then
- expectloc:=LOC_MMREGISTER
- else
- expectloc:=LOC_FPUREGISTER;
- end
- {$ifdef SUPPORT_MMX}
- else
- if (cs_mmx in current_settings.localswitches) and
- is_mmx_able_array(left.resultdef) then
- begin
- expectloc:=LOC_MMXREGISTER;
- end
- {$endif SUPPORT_MMX}
- else
- inherited pass_1;
- end;
- {$ifdef SUPPORT_MMX}
- procedure tx86unaryminusnode.second_mmx;
- var
- op : tasmop;
- hreg : tregister;
- begin
- op:=A_NONE;
- secondpass(left);
- location_reset(location,LOC_MMXREGISTER,OS_NO);
- hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
- emit_reg_reg(A_PXOR,S_NO,hreg,hreg);
- case left.location.loc of
- LOC_MMXREGISTER:
- begin
- location.register:=left.location.register;
- end;
- LOC_CMMXREGISTER:
- begin
- location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
- emit_reg_reg(A_MOVQ,S_NO,left.location.register,location.register);
- end;
- LOC_REFERENCE,
- LOC_CREFERENCE:
- begin
- location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
- emit_ref_reg(A_MOVQ,S_NO,left.location.reference,location.register);
- end;
- else
- internalerror(200203225);
- end;
- if cs_mmx_saturation in current_settings.localswitches then
- case mmx_type(resultdef) of
- mmxs8bit:
- op:=A_PSUBSB;
- mmxu8bit:
- op:=A_PSUBUSB;
- mmxs16bit,mmxfixed16:
- op:=A_PSUBSW;
- mmxu16bit:
- op:=A_PSUBUSW;
- end
- else
- case mmx_type(resultdef) of
- mmxs8bit,mmxu8bit:
- op:=A_PSUBB;
- mmxs16bit,mmxu16bit,mmxfixed16:
- op:=A_PSUBW;
- mmxs32bit,mmxu32bit:
- op:=A_PSUBD;
- end;
- if op = A_NONE then
- internalerror(201408202);
- emit_reg_reg(op,S_NO,location.register,hreg);
- emit_reg_reg(A_MOVQ,S_NO,hreg,location.register);
- end;
- {$endif SUPPORT_MMX}
- procedure tx86unaryminusnode.second_float;
- var
- reg : tregister;
- href : treference;
- l1 : tasmlabel;
- begin
- secondpass(left);
- if expectloc=LOC_MMREGISTER then
- begin
- hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
- location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
- { make life of register allocator easier }
- location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
- current_asmdata.getdatalabel(l1);
- new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata_norel,l1.name,const_align(sizeof(pint)));
- current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1));
- case def_cgsize(resultdef) of
- OS_F32:
- current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(longint(1 shl 31)));
- OS_F64:
- begin
- current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(0));
- current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(-(1 shl 31)));
- end
- else
- internalerror(2004110215);
- end;
- reference_reset_symbol(href,l1,0,resultdef.alignment);
- if UseAVX then
- cg.a_opmm_ref_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,href,left.location.register,location.register,nil)
- else
- begin
- reg:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
- cg.a_loadmm_ref_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),def_cgsize(resultdef),href,reg,mms_movescalar);
- cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),def_cgsize(resultdef),left.location.register,location.register,mms_movescalar);
- cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,reg,location.register,nil);
- end;
- end
- else
- begin
- location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
- case left.location.loc of
- LOC_REFERENCE,
- LOC_CREFERENCE:
- begin
- location.register:=NR_ST;
- cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
- left.location.size,location.size,
- left.location.reference,location.register);
- emit_none(A_FCHS,S_NO);
- end;
- LOC_FPUREGISTER,
- LOC_CFPUREGISTER:
- begin
- { "load st,st" is ignored by the code generator }
- cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,left.location.size,location.size,left.location.register,NR_ST);
- location.register:=NR_ST;
- emit_none(A_FCHS,S_NO);
- end;
- else
- internalerror(200312241);
- end;
- end;
- end;
- {*****************************************************************************
- TX86NOTNODE
- *****************************************************************************}
- procedure tx86notnode.second_boolean;
- var
- opsize : tcgsize;
- hreg: tregister;
- begin
- opsize:=def_cgsize(resultdef);
- if not handle_locjump then
- begin
- { the second pass could change the location of left }
- { if it is a register variable, so we've to do }
- { this before the case statement }
- secondpass(left);
- case left.expectloc of
- LOC_FLAGS :
- begin
- location_reset(location,LOC_FLAGS,OS_NO);
- location.resflags:=left.location.resflags;
- inverse_flags(location.resflags);
- end;
- LOC_CREFERENCE,
- LOC_REFERENCE:
- begin
- {$if defined(cpu32bitalu)}
- if is_64bit(resultdef) then
- begin
- hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_32);
- tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
- cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_32,OS_32,left.location.reference,hreg);
- inc(left.location.reference.offset,4);
- cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_32,left.location.reference,hreg);
- end
- else
- {$elseif defined(cpu16bitalu)}
- if is_64bit(resultdef) then
- begin
- hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_16);
- tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
- cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg);
- inc(left.location.reference.offset,2);
- cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
- inc(left.location.reference.offset,2);
- cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
- inc(left.location.reference.offset,2);
- cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
- end
- else if is_32bit(resultdef) then
- begin
- hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_16);
- tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
- cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg);
- inc(left.location.reference.offset,2);
- cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
- end
- else
- {$endif}
- emit_const_ref(A_CMP, TCGSize2Opsize[opsize], 0, left.location.reference);
- location_reset(location,LOC_FLAGS,OS_NO);
- location.resflags:=F_E;
- end;
- LOC_CONSTANT,
- LOC_REGISTER,
- LOC_CREGISTER,
- LOC_SUBSETREG,
- LOC_CSUBSETREG,
- LOC_SUBSETREF,
- LOC_CSUBSETREF :
- begin
- {$if defined(cpu32bitalu)}
- if is_64bit(resultdef) then
- begin
- hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
- emit_reg_reg(A_OR,S_L,left.location.register64.reghi,left.location.register64.reglo);
- end
- else
- {$elseif defined(cpu16bitalu)}
- if is_64bit(resultdef) then
- begin
- hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
- emit_reg_reg(A_OR,S_W,GetNextReg(left.location.register64.reghi),left.location.register64.reghi);
- emit_reg_reg(A_OR,S_W,GetNextReg(left.location.register64.reglo),left.location.register64.reglo);
- emit_reg_reg(A_OR,S_W,left.location.register64.reghi,left.location.register64.reglo);
- end
- else if is_32bit(resultdef) then
- begin
- hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
- emit_reg_reg(A_OR,S_L,GetNextReg(left.location.register),left.location.register);
- end
- else
- {$endif}
- begin
- hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,true);
- emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
- end;
- location_reset(location,LOC_FLAGS,OS_NO);
- location.resflags:=F_E;
- end;
- else
- internalerror(200203224);
- end;
- end;
- end;
- {$ifdef SUPPORT_MMX}
- procedure tx86notnode.second_mmx;
- var hreg,r:Tregister;
- begin
- secondpass(left);
- location_reset(location,LOC_MMXREGISTER,OS_NO);
- r:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
- emit_const_reg(A_MOV,S_L,longint($ffffffff),r);
- { load operand }
- case left.location.loc of
- LOC_MMXREGISTER:
- location_copy(location,left.location);
- LOC_CMMXREGISTER:
- begin
- location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
- emit_reg_reg(A_MOVQ,S_NO,left.location.register,location.register);
- end;
- LOC_REFERENCE,
- LOC_CREFERENCE:
- begin
- location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
- emit_ref_reg(A_MOVQ,S_NO,left.location.reference,location.register);
- end;
- end;
- { load mask }
- hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
- emit_reg_reg(A_MOVD,S_NO,r,hreg);
- { lower 32 bit }
- emit_reg_reg(A_PXOR,S_NO,hreg,location.register);
- { shift mask }
- emit_const_reg(A_PSLLQ,S_B,32,hreg);
- { higher 32 bit }
- emit_reg_reg(A_PXOR,S_NO,hreg,location.register);
- end;
- {$endif SUPPORT_MMX}
- {*****************************************************************************
- TX86MODDIVNODE
- *****************************************************************************}
- procedure tx86moddivnode.pass_generate_code;
- var
- hreg1,hreg2,rega,regd:Tregister;
- power:longint;
- op:Tasmop;
- cgsize:TCgSize;
- opsize:topsize;
- e, sm: aint;
- d,m: aword;
- m_add: boolean;
- s: byte;
- begin
- secondpass(left);
- if codegenerror then
- exit;
- secondpass(right);
- if codegenerror then
- exit;
- { put numerator in register }
- cgsize:=def_cgsize(resultdef);
- opsize:=TCGSize2OpSize[cgsize];
- if not (cgsize in [OS_32,OS_S32,OS_64,OS_S64]) then
- InternalError(2013102702);
- rega:=newreg(R_INTREGISTER,RS_EAX,cgsize2subreg(R_INTREGISTER,cgsize));
- regd:=newreg(R_INTREGISTER,RS_EDX,cgsize2subreg(R_INTREGISTER,cgsize));
- location_reset(location,LOC_REGISTER,cgsize);
- hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
- hreg1:=left.location.register;
- if (nodetype=divn) and (right.nodetype=ordconstn) then
- begin
- if ispowerof2(int64(tordconstnode(right).value),power) then
- begin
- { for signed numbers, the numerator must be adjusted before the
- shift instruction, but not wih unsigned numbers! Otherwise,
- "Cardinal($ffffffff) div 16" overflows! (JM) }
- if is_signed(left.resultdef) Then
- begin
- { use a sequence without jumps, saw this in
- comp.compilers (JM) }
- { no jumps, but more operations }
- hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
- emit_reg_reg(A_MOV,opsize,hreg1,hreg2);
- {If the left value is signed, hreg2=$ffffffff, otherwise 0.}
- emit_const_reg(A_SAR,opsize,resultdef.size*8-1,hreg2);
- {If signed, hreg2=right value-1, otherwise 0.}
- { (don't use emit_const_reg, because if value>high(longint)
- then it must first be loaded into a register) }
- cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,cgsize,tordconstnode(right).value-1,hreg2);
- { add to the left value }
- emit_reg_reg(A_ADD,opsize,hreg2,hreg1);
- { do the shift }
- emit_const_reg(A_SAR,opsize,power,hreg1);
- end
- else
- emit_const_reg(A_SHR,opsize,power,hreg1);
- location.register:=hreg1;
- end
- else
- begin
- if is_signed(left.resultdef) then
- begin
- e:=tordconstnode(right).value.svalue;
- calc_divconst_magic_signed(resultdef.size*8,e,sm,s);
- cg.getcpuregister(current_asmdata.CurrAsmList,rega);
- emit_const_reg(A_MOV,opsize,sm,rega);
- cg.getcpuregister(current_asmdata.CurrAsmList,regd);
- emit_reg(A_IMUL,opsize,hreg1);
- { only the high half of result is used }
- cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
- { add or subtract dividend }
- if (e>0) and (sm<0) then
- emit_reg_reg(A_ADD,opsize,hreg1,regd)
- else if (e<0) and (sm>0) then
- emit_reg_reg(A_SUB,opsize,hreg1,regd);
- { shift if necessary }
- if (s<>0) then
- emit_const_reg(A_SAR,opsize,s,regd);
- { extract and add the sign bit }
- if (e<0) then
- emit_reg_reg(A_MOV,opsize,regd,hreg1);
- { if e>=0, hreg1 still contains dividend }
- emit_const_reg(A_SHR,opsize,left.resultdef.size*8-1,hreg1);
- emit_reg_reg(A_ADD,opsize,hreg1,regd);
- cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
- location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
- cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register)
- end
- else
- begin
- d:=tordconstnode(right).value.svalue;
- if d>=aword(1) shl (left.resultdef.size*8-1) then
- begin
- if (cgsize in [OS_64,OS_S64]) then
- begin
- hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
- emit_const_reg(A_MOV,opsize,aint(d),hreg2);
- emit_reg_reg(A_CMP,opsize,hreg2,hreg1);
- end
- else
- emit_const_reg(A_CMP,opsize,aint(d),hreg1);
- location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
- emit_const_reg(A_MOV,opsize,0,location.register);
- emit_const_reg(A_SBB,opsize,-1,location.register);
- end
- else
- begin
- calc_divconst_magic_unsigned(resultdef.size*8,d,m,m_add,s);
- cg.getcpuregister(current_asmdata.CurrAsmList,rega);
- emit_const_reg(A_MOV,opsize,aint(m),rega);
- cg.getcpuregister(current_asmdata.CurrAsmList,regd);
- emit_reg(A_MUL,opsize,hreg1);
- cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
- if m_add then
- begin
- { addition can overflow, shift first bit considering carry,
- then shift remaining bits in regular way. }
- emit_reg_reg(A_ADD,opsize,hreg1,regd);
- emit_const_reg(A_RCR,opsize,1,regd);
- dec(s);
- end;
- if s<>0 then
- emit_const_reg(A_SHR,opsize,aint(s),regd);
- cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
- location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
- cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register)
- end;
- end;
- end;
- end
- else
- begin
- {Bring denominator to a register.}
- cg.getcpuregister(current_asmdata.CurrAsmList,rega);
- emit_reg_reg(A_MOV,opsize,hreg1,rega);
- cg.getcpuregister(current_asmdata.CurrAsmList,regd);
- {Sign extension depends on the left type.}
- if is_signed(left.resultdef) then
- case left.resultdef.size of
- {$ifdef x86_64}
- 8:
- emit_none(A_CQO,S_NO);
- {$endif x86_64}
- 4:
- emit_none(A_CDQ,S_NO);
- else
- internalerror(2013102701);
- end
- else
- emit_reg_reg(A_XOR,opsize,regd,regd);
- {Division depends on the right type.}
- if is_signed(right.resultdef) then
- op:=A_IDIV
- else
- op:=A_DIV;
- if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
- emit_ref(op,opsize,right.location.reference)
- else if right.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
- emit_reg(op,opsize,right.location.register)
- else
- begin
- hreg1:=cg.getintregister(current_asmdata.CurrAsmList,right.location.size);
- hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,right.resultdef,right.location,hreg1);
- emit_reg(op,opsize,hreg1);
- end;
- { Copy the result into a new register. Release R/EAX & R/EDX.}
- cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
- cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
- location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
- if nodetype=divn then
- cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,rega,location.register)
- else
- cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register);
- end;
- end;
- end.
|