123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303 |
- {
- Copyright (c) 2015 by Jonas Maebe
- Generate AArch64 assembler for in set/case nodes
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- ****************************************************************************
- }
- unit ncpuset;
- {$i fpcdefs.inc}
- interface
- uses
- node,nset,ncgset,cpubase,cgbase,cgobj,aasmbase,aasmtai,aasmdata,globtype;
- type
- taarch64casenode = class(tcgcasenode)
- protected
- procedure optimizevalues(var max_linear_list: int64; var max_dist: qword);override;
- function has_jumptable: boolean;override;
- procedure genjumptable(hp: pcaselabel ;min_, max_: int64);override;
- procedure genlinearlist(hp: pcaselabel);override;
- end;
- implementation
- uses
- systems,
- verbose,globals,constexp,cutils,
- symconst,symdef,defutil,
- paramgr,
- cpuinfo,
- pass_2,cgcpu,
- ncon,
- tgobj,ncgutil,rgobj,aasmcpu,
- procinfo,
- cgutils;
- {*****************************************************************************
- TCGCASENODE
- *****************************************************************************}
- procedure taarch64casenode.optimizevalues(var max_linear_list: int64; var max_dist: qword);
- begin
- max_linear_list:=10;
- end;
-
- function taarch64casenode.has_jumptable: boolean;
- begin
- has_jumptable:=true;
- end;
- procedure taarch64casenode.genlinearlist(hp : pcaselabel);
- var
- first : boolean;
- lastrange : boolean;
- last : TConstExprInt;
- cond_lt,cond_le : tresflags;
- opcgsize, unsigned_opcgsize: tcgsize;
- procedure genitem(t : pcaselabel);
- var
- ovloc: tlocation;
- begin
- if assigned(t^.less) then
- genitem(t^.less);
- { need we to test the first value }
- if first and (t^._low>get_min_value(left.resultdef)) then
- begin
- cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opcgsize,jmp_lt,aint(t^._low.svalue),hregister,elselabel);
- end;
- if t^._low=t^._high then
- begin
- if t^._low-last=0 then
- cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList, opcgsize, OC_EQ,0,hregister,blocklabel(t^.blockid))
- else
- begin
- { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
- then genlinearlist wouldn't be used }
- cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue-last.svalue), hregister, hregister,
- true,ovloc);
- cg.a_jmp_flags(current_asmdata.CurrAsmList,F_EQ,blocklabel(t^.blockid));
- end;
- last:=t^._low;
- lastrange:=false;
- end
- else
- begin
- { it begins with the smallest label, if the value }
- { is even smaller then jump immediately to the }
- { ELSE-label }
- if first then
- begin
- { have we to ajust the first value ? }
- if (t^._low>get_min_value(left.resultdef)) or (get_min_value(left.resultdef)<>0) then
- begin
- { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
- then genlinearlist wouldn't be use }
- cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue), hregister, hregister,
- true,ovloc);
- end;
- end
- else
- begin
- { if there is no unused label between the last and the }
- { present label then the lower limit can be checked }
- { immediately. else check the range in between: }
- { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
- then genlinearlist wouldn't be use }
- cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue - last.svalue), hregister, hregister,
- true,ovloc);
- { no jump necessary here if the new range starts at }
- { at the value following the previous one }
- if (aint(t^._low.svalue - last.svalue) <> 1) or
- (not lastrange) then
- cg.a_jmp_flags(current_asmdata.CurrAsmList,cond_lt,elselabel);
- end;
- { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
- then genlinearlist wouldn't be use }
- cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,unsigned_opcgsize,aint(t^._high.svalue - t^._low.svalue), hregister, hregister,
- true,ovloc);
- cg.a_jmp_flags(current_asmdata.CurrAsmList,cond_le,blocklabel(t^.blockid));
- last:=t^._high;
- lastrange:=true;
- end;
- first:=false;
- if assigned(t^.greater) then
- genitem(t^.greater);
- end;
- begin
- opcgsize:=def_cgsize(opsize);
- case opcgsize of
- OS_8,OS_16,OS_32,OS_S8,OS_S16,OS_S32:
- unsigned_opcgsize:=OS_32;
- OS_64,OS_S64:
- unsigned_opcgsize:=OS_64;
- else
- Internalerror(2019090902);
- end;
- if with_sign then
- begin
- cond_lt:=F_LT;
- cond_le:=F_LE;
- end
- else
- begin
- cond_lt:=F_CC;
- cond_le:=F_LS;
- end;
- { do we need to generate cmps? }
- if (with_sign and (min_label<0)) then
- genlinearcmplist(hp)
- else
- begin
- last:=0;
- lastrange:=false;
- first:=true;
- genitem(hp);
- cg.a_jmp_always(current_asmdata.CurrAsmList,elselabel);
- end;
- end;
- procedure taarch64casenode.genjumptable(hp: pcaselabel; min_, max_: int64);
- var
- last: TConstExprInt;
- tablelabel: TAsmLabel;
- basereg,indexreg,jumpreg: TRegister;
- href: TReference;
- opcgsize: tcgsize;
- sectype: TAsmSectiontype;
- jtitemconsttype: taiconst_type;
- procedure genitem(list:TAsmList;t : pcaselabel);
- var
- i : int64;
- begin
- if assigned(t^.less) then
- genitem(list,t^.less);
- { fill possible hole }
- i:=last.svalue+1;
- while i<=t^._low.svalue-1 do
- begin
- list.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,elselabel));
- inc(i);
- end;
- i:=t^._low.svalue;
- while i<=t^._high.svalue do
- begin
- list.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,blocklabel(t^.blockid)));
- inc(i);
- end;
- last:=t^._high;
- if assigned(t^.greater) then
- genitem(list,t^.greater);
- end;
- begin
- if not(target_info.system in systems_darwin) then
- jtitemconsttype:=aitconst_32bit
- else
- { see https://gmplib.org/list-archives/gmp-bugs/2012-December/002836.html }
- jtitemconsttype:=aitconst_darwin_dwarf_delta32;
- last:=min_;
- opcgsize:=def_cgsize(opsize);
- { a <= x <= b <-> unsigned(x-a) <= (b-a) }
- cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SUB,opcgsize,aint(min_),hregister);
- if not(jumptable_no_range) then
- begin
- { case expr greater than max_ => goto elselabel }
- cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opcgsize,OC_A,aint(max_)-aint(min_),hregister,elselabel);
- min_:=0;
- end;
- { local label in order to avoid using GOT }
- current_asmdata.getlabel(tablelabel,alt_data);
- indexreg:=cg.makeregsize(current_asmdata.CurrAsmList,hregister,OS_ADDR);
- cg.a_load_reg_reg(current_asmdata.CurrAsmList,opcgsize,OS_ADDR,hregister,indexreg);
- { load table address }
- reference_reset_symbol(href,tablelabel,0,4,[]);
- basereg:=cg.getaddressregister(current_asmdata.CurrAsmList);
- cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,basereg);
- { load table slot, 32-bit sign extended }
- reference_reset_base(href,basereg,0,href.temppos,4,[]);
- href.index:=indexreg;
- href.shiftmode:=SM_LSL;
- href.shiftimm:=2;
- jumpreg:=cg.getaddressregister(current_asmdata.CurrAsmList);
- cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_S32,OS_ADDR,href,jumpreg);
- { add table address }
- cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_ADD,OS_ADDR,basereg,jumpreg);
- { and finally jump }
- current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_BR,jumpreg));
- { generate jump table }
- if target_info.system=system_aarch64_win64 then
- begin
- { for Windows we need to make sure that the jump table is located in the
- same section as the corresponding code as for one clang generates a
- ABSOLUTE32 relocation that can not be handled correctly and armasm64
- rejects the difference entries due to the symbols being located in
- different sections }
- sectype:=sec_code;
- new_section(current_procinfo.aktlocaldata,sectype,lower(current_procinfo.procdef.mangledname),getprocalign);
- end
- else
- begin
- if not(target_info.system in systems_darwin) then
- sectype:=sec_rodata
- else
- begin
- { on Mac OS X, dead code stripping ("smart linking") happens based on
- global symbols: every global/static symbol (symbols that do not
- start with "L") marks the start of a new "subsection" that is
- discarded by the linker if there are no references to this symbol.
- This means that if you put the jump table in the rodata section, it
- will become part of the block of data associated with the previous
- non-L-label in the rodata section and stay or be thrown away
- depending on whether that block of data is referenced. Therefore,
- jump tables must be added in the code section and since aktlocaldata
- is inserted right after the routine, it will become part of the
- same subsection that contains the routine's code }
- sectype:=sec_code;
- end;
- new_section(current_procinfo.aktlocaldata,sectype,current_procinfo.procdef.mangledname,4);
- end;
- if target_info.system in systems_darwin then
- begin
- { additionally, these tables are now marked via ".data_region jt32"
- and ".end_data_region" }
- current_procinfo.aktlocaldata.concat(tai_directive.Create(asd_data_region,'jt32'));
- end;
- current_procinfo.aktlocaldata.concat(Tai_label.Create(tablelabel));
- genitem(current_procinfo.aktlocaldata,hp);
- if target_info.system in systems_darwin then
- current_procinfo.aktlocaldata.concat(tai_directive.Create(asd_end_data_region,''));
- end;
- begin
- ccasenode:=taarch64casenode;
- end.
|