123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611 |
- {
- Copyright (c) 2000-2002 by Florian Klaempfl
- Code generation for add nodes on the i386
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- ****************************************************************************
- }
- unit n386add;
- {$i fpcdefs.inc}
- interface
- uses
- node,nadd,cpubase,nx86add;
- type
- ti386addnode = class(tx86addnode)
- function use_generic_mul32to64: boolean; override;
- function use_generic_mul64bit: boolean; override;
- procedure second_addordinal; override;
- procedure second_add64bit;override;
- procedure second_cmp64bit;override;
- procedure second_mul(unsigned: boolean);
- procedure second_mul64bit;
- protected
- procedure set_mul_result_location;
- end;
- implementation
- uses
- globtype,systems,
- cutils,verbose,globals,
- symconst,symdef,paramgr,defutil,
- aasmbase,aasmtai,aasmdata,aasmcpu,
- cgbase,procinfo,
- ncon,nset,cgutils,tgobj,
- cga,ncgutil,cgobj,cg64f32,cgx86,
- hlcgobj;
- {*****************************************************************************
- use_generic_mul32to64
- *****************************************************************************}
- function ti386addnode.use_generic_mul32to64: boolean;
- begin
- result := False;
- end;
- function ti386addnode.use_generic_mul64bit: boolean;
- begin
- result:=(cs_check_overflow in current_settings.localswitches) or
- (cs_opt_size in current_settings.optimizerswitches);
- end;
- { handles all unsigned multiplications, and 32->64 bit signed ones.
- 32bit-only signed mul is handled by generic codegen }
- procedure ti386addnode.second_addordinal;
- var
- unsigned: boolean;
- begin
- unsigned:=not(is_signed(left.resultdef)) or
- not(is_signed(right.resultdef));
- { use IMUL instead of MUL in case overflow checking is off and we're
- doing a 32->32-bit multiplication }
- if not (cs_check_overflow in current_settings.localswitches) and
- not is_64bit(resultdef) then
- unsigned:=false;
- if (nodetype=muln) and (unsigned or is_64bit(resultdef)) then
- second_mul(unsigned)
- else
- inherited second_addordinal;
- end;
- {*****************************************************************************
- Add64bit
- *****************************************************************************}
- procedure ti386addnode.second_add64bit;
- var
- op : TOpCG;
- op1,op2 : TAsmOp;
- opsize : TOpSize;
- hregister,
- hregister2 : tregister;
- hl4 : tasmlabel;
- mboverflow,
- unsigned:boolean;
- r:Tregister;
- begin
- pass_left_right;
- op1:=A_NONE;
- op2:=A_NONE;
- mboverflow:=false;
- opsize:=S_L;
- unsigned:=((left.resultdef.typ=orddef) and
- (torddef(left.resultdef).ordtype=u64bit)) or
- ((right.resultdef.typ=orddef) and
- (torddef(right.resultdef).ordtype=u64bit));
- case nodetype of
- addn :
- begin
- op:=OP_ADD;
- mboverflow:=true;
- end;
- subn :
- begin
- op:=OP_SUB;
- op1:=A_SUB;
- op2:=A_SBB;
- mboverflow:=true;
- end;
- xorn:
- op:=OP_XOR;
- orn:
- op:=OP_OR;
- andn:
- op:=OP_AND;
- muln:
- begin
- second_mul64bit;
- exit;
- end
- else
- begin
- { everything should be handled in pass_1 (JM) }
- internalerror(200109051);
- end;
- end;
- { left and right no register? }
- { then one must be demanded }
- if (left.location.loc<>LOC_REGISTER) then
- begin
- if (right.location.loc<>LOC_REGISTER) then
- begin
- hregister:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
- hregister2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
- cg64.a_load64_loc_reg(current_asmdata.CurrAsmList,left.location,joinreg64(hregister,hregister2));
- location_reset(left.location,LOC_REGISTER,left.location.size);
- left.location.register64.reglo:=hregister;
- left.location.register64.reghi:=hregister2;
- end
- else
- begin
- location_swap(left.location,right.location);
- toggleflag(nf_swapped);
- end;
- end;
- { at this point, left.location.loc should be LOC_REGISTER }
- if right.location.loc=LOC_REGISTER then
- begin
- { when swapped another result register }
- if (nodetype=subn) and (nf_swapped in flags) then
- begin
- cg64.a_op64_reg_reg(current_asmdata.CurrAsmList,op,location.size,
- left.location.register64,
- right.location.register64);
- location_swap(left.location,right.location);
- toggleflag(nf_swapped);
- end
- else
- begin
- cg64.a_op64_reg_reg(current_asmdata.CurrAsmList,op,location.size,
- right.location.register64,
- left.location.register64);
- end;
- end
- else
- begin
- { right.location<>LOC_REGISTER }
- if (nodetype=subn) and (nf_swapped in flags) then
- begin
- r:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
- cg64.a_load64low_loc_reg(current_asmdata.CurrAsmList,right.location,r);
- cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
- emit_reg_reg(op1,opsize,left.location.register64.reglo,r);
- emit_reg_reg(A_MOV,opsize,r,left.location.register64.reglo);
- cg64.a_load64high_loc_reg(current_asmdata.CurrAsmList,right.location,r);
- { the carry flag is still ok }
- emit_reg_reg(op2,opsize,left.location.register64.reghi,r);
- cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
- emit_reg_reg(A_MOV,opsize,r,left.location.register64.reghi);
- end
- else
- begin
- cg64.a_op64_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,
- left.location.register64);
- end;
- location_freetemp(current_asmdata.CurrAsmList,right.location);
- end;
- { only in case of overflow operations }
- { produce overflow code }
- { we must put it here directly, because sign of operation }
- { is in unsigned VAR!! }
- if mboverflow then
- begin
- if cs_check_overflow in current_settings.localswitches then
- begin
- current_asmdata.getjumplabel(hl4);
- if unsigned then
- cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
- else
- cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
- cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
- cg.a_label(current_asmdata.CurrAsmList,hl4);
- end;
- end;
- location_copy(location,left.location);
- end;
- procedure ti386addnode.second_cmp64bit;
- var
- truelabel,
- falselabel,
- hlab : tasmlabel;
- href : treference;
- unsigned : boolean;
- procedure firstjmp64bitcmp;
- var
- oldnodetype : tnodetype;
- begin
- {$ifdef OLDREGVARS}
- load_all_regvars(current_asmdata.CurrAsmList);
- {$endif OLDREGVARS}
- { the jump the sequence is a little bit hairy }
- case nodetype of
- ltn,gtn:
- begin
- if (hlab<>location.truelabel) then
- cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),location.truelabel);
- { cheat a little bit for the negative test }
- toggleflag(nf_swapped);
- if (hlab<>location.falselabel) then
- cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),location.falselabel);
- toggleflag(nf_swapped);
- end;
- lten,gten:
- begin
- oldnodetype:=nodetype;
- if nodetype=lten then
- nodetype:=ltn
- else
- nodetype:=gtn;
- if (hlab<>location.truelabel) then
- cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),location.truelabel);
- { cheat for the negative test }
- if nodetype=ltn then
- nodetype:=gtn
- else
- nodetype:=ltn;
- if (hlab<>location.falselabel) then
- cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),location.falselabel);
- nodetype:=oldnodetype;
- end;
- equaln:
- cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,location.falselabel);
- unequaln:
- cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,location.truelabel);
- end;
- end;
- procedure secondjmp64bitcmp;
- begin
- { the jump the sequence is a little bit hairy }
- case nodetype of
- ltn,gtn,lten,gten:
- begin
- { the comparisaion of the low dword have to be }
- { always unsigned! }
- cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(true),location.truelabel);
- cg.a_jmp_always(current_asmdata.CurrAsmList,location.falselabel);
- end;
- equaln:
- begin
- cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,location.falselabel);
- cg.a_jmp_always(current_asmdata.CurrAsmList,location.truelabel);
- end;
- unequaln:
- begin
- cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,location.truelabel);
- cg.a_jmp_always(current_asmdata.CurrAsmList,location.falselabel);
- end;
- end;
- end;
- begin
- truelabel:=nil;
- falselabel:=nil;
- pass_left_right;
- unsigned:=((left.resultdef.typ=orddef) and
- (torddef(left.resultdef).ordtype=u64bit)) or
- ((right.resultdef.typ=orddef) and
- (torddef(right.resultdef).ordtype=u64bit));
- { we have LOC_JUMP as result }
- current_asmdata.getjumplabel(truelabel);
- current_asmdata.getjumplabel(falselabel);
- location_reset_jump(location,truelabel,falselabel);
- { Relational compares against constants having low dword=0 can omit the
- second compare based on the fact that any unsigned value is >=0 }
- hlab:=nil;
- if (right.location.loc=LOC_CONSTANT) and
- (lo(right.location.value64)=0) then
- begin
- case getresflags(true) of
- F_AE: hlab:=location.truelabel ;
- F_B: hlab:=location.falselabel;
- end;
- end;
- if (right.location.loc=LOC_CONSTANT) and
- (left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
- begin
- tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
- href:=left.location.reference;
- inc(href.offset,4);
- cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
- emit_const_ref(A_CMP,S_L,aint(hi(right.location.value64)),href);
- firstjmp64bitcmp;
- cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
- if assigned(hlab) then
- cg.a_jmp_always(current_asmdata.CurrAsmList,hlab)
- else
- begin
- cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
- emit_const_ref(A_CMP,S_L,aint(lo(right.location.value64)),left.location.reference);
- secondjmp64bitcmp;
- cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
- end;
- location_freetemp(current_asmdata.CurrAsmList,left.location);
- exit;
- end;
- { left and right no register? }
- { then one must be demanded }
- if not (left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
- begin
- if not (right.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
- hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true)
- else
- begin
- location_swap(left.location,right.location);
- toggleflag(nf_swapped);
- end;
- end;
- { at this point, left.location.loc should be LOC_[C]REGISTER }
- case right.location.loc of
- LOC_REGISTER,
- LOC_CREGISTER :
- begin
- cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
- emit_reg_reg(A_CMP,S_L,right.location.register64.reghi,left.location.register64.reghi);
- firstjmp64bitcmp;
- emit_reg_reg(A_CMP,S_L,right.location.register64.reglo,left.location.register64.reglo);
- secondjmp64bitcmp;
- cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
- end;
- LOC_CREFERENCE,
- LOC_REFERENCE :
- begin
- tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
- href:=right.location.reference;
- inc(href.offset,4);
- cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
- emit_ref_reg(A_CMP,S_L,href,left.location.register64.reghi);
- firstjmp64bitcmp;
- emit_ref_reg(A_CMP,S_L,right.location.reference,left.location.register64.reglo);
- secondjmp64bitcmp;
- cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
- location_freetemp(current_asmdata.CurrAsmList,right.location);
- end;
- LOC_CONSTANT :
- begin
- cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
- current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(hi(right.location.value64)),left.location.register64.reghi));
- firstjmp64bitcmp;
- cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
- if assigned(hlab) then
- cg.a_jmp_always(current_asmdata.CurrAsmList,hlab)
- else
- begin
- cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
- current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(lo(right.location.value64)),left.location.register64.reglo));
- secondjmp64bitcmp;
- cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
- end;
- end;
- else
- internalerror(200203282);
- end;
- end;
- {*****************************************************************************
- x86 MUL
- *****************************************************************************}
- procedure ti386addnode.set_mul_result_location;
- begin
- location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
- {Free EAX,EDX}
- cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX);
- if is_64bit(resultdef) then
- begin
- {Allocate a couple of registers and store EDX:EAX into it}
- location.register64.reghi := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
- cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EDX, location.register64.reghi);
- cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
- location.register64.reglo := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
- cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EAX, location.register64.reglo);
- end
- else
- begin
- {Allocate a new register and store the result in EAX in it.}
- location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
- cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
- cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EAX,location.register);
- end;
- location_freetemp(current_asmdata.CurrAsmList,left.location);
- location_freetemp(current_asmdata.CurrAsmList,right.location);
- end;
- procedure ti386addnode.second_mul(unsigned: boolean);
- var reg:Tregister;
- ref:Treference;
- use_ref:boolean;
- hl4 : tasmlabel;
- const
- asmops: array[boolean] of tasmop = (A_IMUL, A_MUL);
- begin
- pass_left_right;
- reg:=NR_NO;
- reference_reset(ref,sizeof(pint),[]);
- { Mul supports registers and references, so if not register/reference,
- load the location into a register.
- The variant of IMUL which is capable of doing 32->64 bits has the same restrictions. }
- use_ref:=false;
- if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
- reg:=left.location.register
- else if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
- begin
- tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
- ref:=left.location.reference;
- use_ref:=true;
- end
- else
- begin
- {LOC_CONSTANT for example.}
- reg:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
- hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,left.resultdef,osuinttype,left.location,reg);
- end;
- {Allocate EAX.}
- cg.getcpuregister(current_asmdata.CurrAsmList,NR_EAX);
- {Load the right value.}
- hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,osuinttype,right.location,NR_EAX);
- {Also allocate EDX, since it is also modified by a mul (JM).}
- cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX);
- if use_ref then
- emit_ref(asmops[unsigned],S_L,ref)
- else
- emit_reg(asmops[unsigned],S_L,reg);
- if (cs_check_overflow in current_settings.localswitches) and
- { 32->64 bit cannot overflow }
- (not is_64bit(resultdef)) then
- begin
- current_asmdata.getjumplabel(hl4);
- cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4);
- cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
- cg.a_label(current_asmdata.CurrAsmList,hl4);
- end;
- set_mul_result_location;
- end;
- procedure ti386addnode.second_mul64bit;
- var
- list: TAsmList;
- hreg1,hreg2: tregister;
- begin
- { 64x64 multiplication yields 128-bit result, but we're only
- interested in its lower 64 bits. This lower part is independent
- of operand signs, and so is the generated code. }
- { pass_left_right already called from second_add64bit }
- list:=current_asmdata.CurrAsmList;
- if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
- tcgx86(cg).make_simple_ref(list,left.location.reference);
- if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
- tcgx86(cg).make_simple_ref(list,right.location.reference);
- { calculate 32-bit terms lo(right)*hi(left) and hi(left)*lo(right) }
- if (right.location.loc=LOC_CONSTANT) then
- begin
- { Omit zero terms, if any }
- hreg1:=NR_NO;
- hreg2:=NR_NO;
- if lo(right.location.value64)<>0 then
- hreg1:=cg.getintregister(list,OS_INT);
- if hi(right.location.value64)<>0 then
- hreg2:=cg.getintregister(list,OS_INT);
- { Take advantage of 3-operand form of IMUL }
- case left.location.loc of
- LOC_REGISTER,LOC_CREGISTER:
- begin
- if hreg1<>NR_NO then
- emit_const_reg_reg(A_IMUL,S_L,longint(lo(right.location.value64)),left.location.register64.reghi,hreg1);
- if hreg2<>NR_NO then
- emit_const_reg_reg(A_IMUL,S_L,longint(hi(right.location.value64)),left.location.register64.reglo,hreg2);
- end;
- LOC_REFERENCE,LOC_CREFERENCE:
- begin
- if hreg2<>NR_NO then
- list.concat(taicpu.op_const_ref_reg(A_IMUL,S_L,longint(hi(right.location.value64)),left.location.reference,hreg2));
- inc(left.location.reference.offset,4);
- if hreg1<>NR_NO then
- list.concat(taicpu.op_const_ref_reg(A_IMUL,S_L,longint(lo(right.location.value64)),left.location.reference,hreg1));
- dec(left.location.reference.offset,4);
- end;
- else
- InternalError(2014011602);
- end;
- end
- else
- begin
- hreg1:=cg.getintregister(list,OS_INT);
- hreg2:=cg.getintregister(list,OS_INT);
- cg64.a_load64low_loc_reg(list,left.location,hreg1);
- cg64.a_load64high_loc_reg(list,left.location,hreg2);
- case right.location.loc of
- LOC_REGISTER,LOC_CREGISTER:
- begin
- emit_reg_reg(A_IMUL,S_L,right.location.register64.reghi,hreg1);
- emit_reg_reg(A_IMUL,S_L,right.location.register64.reglo,hreg2);
- end;
- LOC_REFERENCE,LOC_CREFERENCE:
- begin
- emit_ref_reg(A_IMUL,S_L,right.location.reference,hreg2);
- inc(right.location.reference.offset,4);
- emit_ref_reg(A_IMUL,S_L,right.location.reference,hreg1);
- dec(right.location.reference.offset,4);
- end;
- else
- InternalError(2014011603);
- end;
- end;
- { add hi*lo and lo*hi terms together }
- if (hreg1<>NR_NO) and (hreg2<>NR_NO) then
- emit_reg_reg(A_ADD,S_L,hreg2,hreg1);
- { load lo(right) into EAX }
- cg.getcpuregister(list,NR_EAX);
- cg64.a_load64low_loc_reg(list,right.location,NR_EAX);
- { multiply EAX by lo(left), producing 64-bit value in EDX:EAX }
- cg.getcpuregister(list,NR_EDX);
- if (left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
- emit_reg(A_MUL,S_L,left.location.register64.reglo)
- else if (left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
- emit_ref(A_MUL,S_L,left.location.reference)
- else
- InternalError(2014011604);
- { add previously calculated terms to the high half }
- if (hreg1<>NR_NO) then
- emit_reg_reg(A_ADD,S_L,hreg1,NR_EDX)
- else if (hreg2<>NR_NO) then
- emit_reg_reg(A_ADD,S_L,hreg2,NR_EDX)
- else
- InternalError(2014011604);
- { Result is now in EDX:EAX. Copy it to virtual registers. }
- set_mul_result_location;
- end;
- begin
- caddnode:=ti386addnode;
- end.
|