|
@@ -0,0 +1,1816 @@
|
|
|
|
+{
|
|
|
|
+ Copyright (c) 2014 by Jonas Maebe
|
|
|
|
+
|
|
|
|
+ This unit implements the code generator for AArch64
|
|
|
|
+
|
|
|
|
+ This program is free software; you can redistribute it and/or modify
|
|
|
|
+ it under the terms of the GNU General Public License as published by
|
|
|
|
+ the Free Software Foundation; either version 2 of the License, or
|
|
|
|
+ (at your option) any later version.
|
|
|
|
+
|
|
|
|
+ This program is distributed in the hope that it will be useful,
|
|
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
+ GNU General Public License for more details.
|
|
|
|
+
|
|
|
|
+ You should have received a copy of the GNU General Public License
|
|
|
|
+ along with this program; if not, write to the Free Software
|
|
|
|
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
|
+
|
|
|
|
+ ****************************************************************************
|
|
|
|
+}
|
|
|
|
+unit cgcpu;
|
|
|
|
+
|
|
|
|
+{$i fpcdefs.inc}
|
|
|
|
+
|
|
|
|
+interface
|
|
|
|
+
|
|
|
|
+ uses
|
|
|
|
+ globtype,parabase,
|
|
|
|
+ cgbase,cgutils,cgobj,
|
|
|
|
+ aasmbase,aasmtai,aasmdata,aasmcpu,
|
|
|
|
+ cpubase,cpuinfo,
|
|
|
|
+ node,symconst,SymType,symdef,
|
|
|
|
+ rgcpu;
|
|
|
|
+
|
|
|
|
+ type
|
|
|
|
+ tcgaarch64=class(tcg)
|
|
|
|
+ protected
|
|
|
|
+ { simplifies "ref" so it can be used with "op". If "ref" can be used
|
|
|
|
+ with a different load/Store operation that has the same meaning as the
|
|
|
|
+ original one, "op" will be replaced with the alternative }
|
|
|
|
+ procedure make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
|
|
|
|
+ { changes register size without adding register allocation info }
|
|
|
|
+ function makeregsize(reg: tregister; size: tcgsize): tregister; overload;
|
|
|
|
+ public
|
|
|
|
+ function getfpuregister(list: TAsmList; size: Tcgsize): Tregister; override;
|
|
|
|
+ procedure handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
|
|
|
|
+ procedure init_register_allocators;override;
|
|
|
|
+ procedure done_register_allocators;override;
|
|
|
|
+ function getmmregister(list:TAsmList;size:tcgsize):tregister;override;
|
|
|
|
+ function handle_load_store(list:TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
|
|
|
|
+ procedure a_call_name(list:TAsmList;const s:string; weak: boolean);override;
|
|
|
|
+ procedure a_call_reg(list:TAsmList;Reg:tregister);override;
|
|
|
|
+ { General purpose instructions }
|
|
|
|
+ procedure maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
|
|
|
|
+ procedure a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);override;
|
|
|
|
+ procedure a_op_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src, dst: tregister);override;
|
|
|
|
+ procedure a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);override;
|
|
|
|
+ procedure a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);override;
|
|
|
|
+ procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
|
|
|
|
+ procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
|
|
|
|
+ { move instructions }
|
|
|
|
+ procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
|
|
|
|
+ procedure a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister;const ref: TReference);override;
|
|
|
|
+ procedure a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference); override;
|
|
|
|
+ procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister);override;
|
|
|
|
+ procedure a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister); override;
|
|
|
|
+ procedure a_load_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);override;
|
|
|
|
+ procedure a_loadaddr_ref_reg(list: TAsmList; const ref: TReference; r: tregister);override;
|
|
|
|
+ { fpu move instructions (not used, all floating point is vector unit-based) }
|
|
|
|
+ procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
|
|
|
|
+ procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
|
|
|
|
+ procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
|
|
|
|
+ procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister;shuffle : pmmshuffle);override;
|
|
|
|
+ procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister; shuffle: pmmshuffle);override;
|
|
|
|
+ procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: TReference; shuffle: pmmshuffle);override;
|
|
|
|
+ { comparison operations }
|
|
|
|
+ procedure a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);override;
|
|
|
|
+ procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);override;
|
|
|
|
+ procedure a_jmp_always(list: TAsmList; l: TAsmLabel);override;
|
|
|
|
+ procedure a_jmp_name(list: TAsmList; const s: string);override;
|
|
|
|
+ procedure a_jmp_cond(list: TAsmList; cond: TOpCmp; l: tasmlabel);{ override;}
|
|
|
|
+ procedure a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);override;
|
|
|
|
+ procedure g_flags2reg(list: TAsmList; size: tcgsize; const f:tresflags; reg: tregister);override;
|
|
|
|
+ procedure g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);override;
|
|
|
|
+ procedure g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc: tlocation);override;
|
|
|
|
+ procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
|
|
|
|
+ procedure g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);override;
|
|
|
|
+ procedure g_maybe_got_init(list: TAsmList); override;
|
|
|
|
+ procedure g_restore_registers(list: TAsmList);override;
|
|
|
|
+ procedure g_save_registers(list: TAsmList);override;
|
|
|
|
+ procedure g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);override;
|
|
|
|
+ procedure g_concatcopy_unaligned(list: TAsmList; const source, dest: treference; len: tcgint);override;
|
|
|
|
+ procedure g_concatcopy_move(list: TAsmList; const source, dest: treference; len : tcgint);
|
|
|
|
+ procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: tcgint);override;
|
|
|
|
+ procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
|
|
|
|
+ private
|
|
|
|
+ procedure save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
|
|
|
|
+ procedure load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ procedure create_codegen;
|
|
|
|
+
|
|
|
|
+ const
|
|
|
|
+ TOpCG2AsmOpReg: array[topcg] of TAsmOp = (
|
|
|
|
+ A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASRV,A_LSLV,A_LSRV,A_SUB,A_EOR,A_NONE,A_RORV
|
|
|
|
+ );
|
|
|
|
+ TOpCG2AsmOpImm: array[topcg] of TAsmOp = (
|
|
|
|
+ A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR
|
|
|
|
+ );
|
|
|
|
+ TOpCmp2AsmCond: array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
|
|
|
|
+ C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI
|
|
|
|
+ );
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+implementation
|
|
|
|
+
|
|
|
|
+ uses
|
|
|
|
+ globals,verbose,systems,cutils,
|
|
|
|
+ paramgr,fmodule,
|
|
|
|
+ symtable,symsym,
|
|
|
|
+ tgobj,
|
|
|
|
+ procinfo,cpupi;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
|
|
|
|
+ var
|
|
|
|
+ href: treference;
|
|
|
|
+ so: tshifterop;
|
|
|
|
+ accesssize: longint;
|
|
|
|
+ begin
|
|
|
|
+ if (ref.base=NR_NO) then
|
|
|
|
+ begin
|
|
|
|
+ if ref.shiftmode<>SM_None then
|
|
|
|
+ internalerror(2014110701);
|
|
|
|
+ ref.base:=ref.index;
|
|
|
|
+ ref.index:=NR_NO;
|
|
|
|
+ end;
|
|
|
|
+ { no abitrary scale factor support (the generic code doesn't set it,
|
|
|
|
+ AArch-specific code shouldn't either) }
|
|
|
|
+ if not(ref.scalefactor in [0,1]) then
|
|
|
|
+ internalerror(2014111002);
|
|
|
|
+
|
|
|
|
+ case simple_ref_type(op,size,oppostfix,ref) of
|
|
|
|
+ sr_simple:
|
|
|
|
+ exit;
|
|
|
|
+ sr_internal_illegal:
|
|
|
|
+ internalerror(2014121702);
|
|
|
|
+ sr_complex:
|
|
|
|
+ { continue } ;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ if assigned(ref.symbol) then
|
|
|
|
+ begin
|
|
|
|
+ { internal "load symbol" instructions should already be valid }
|
|
|
|
+ if assigned(ref.symboldata) or
|
|
|
|
+ (ref.refaddr in [addr_pic,addr_gotpage,addr_gotpageoffset]) then
|
|
|
|
+ internalerror(2014110802);
|
|
|
|
+ { no relative symbol support (needed) yet }
|
|
|
|
+ if assigned(ref.relsymbol) then
|
|
|
|
+ internalerror(2014111001);
|
|
|
|
+ { on Darwin: load the address from the GOT. There does not appear to
|
|
|
|
+ be a non-GOT variant. This consists of first loading the address
|
|
|
|
+ of the page containing the GOT entry for this variable, and then
|
|
|
|
+ the address of the entry itself from that page (can be relaxed by
|
|
|
|
+ the linker in case the variable itself can be stored directly in
|
|
|
|
+ the GOT) }
|
|
|
|
+ if target_info.system in systems_darwin then
|
|
|
|
+ begin
|
|
|
|
+ if (preferred_newbasereg=NR_NO) or
|
|
|
|
+ (ref.base=preferred_newbasereg) or
|
|
|
|
+ (ref.index=preferred_newbasereg) then
|
|
|
|
+ preferred_newbasereg:=getaddressregister(list);
|
|
|
|
+ { load the GOT page }
|
|
|
|
+ reference_reset_symbol(href,ref.symbol,0,8);
|
|
|
|
+ href.refaddr:=addr_gotpage;
|
|
|
|
+ list.concat(taicpu.op_reg_ref(A_ADRP,preferred_newbasereg,href));
|
|
|
|
+ { load the GOT entry (= address of the variable) }
|
|
|
|
+ reference_reset_base(href,preferred_newbasereg,0,sizeof(pint));
|
|
|
|
+ href.symbol:=ref.symbol;
|
|
|
|
+ href.refaddr:=addr_gotpageoffset;
|
|
|
|
+ { use a_load_ref_reg() rather than directly encoding the LDR,
|
|
|
|
+ so that we'll check the validity of the reference }
|
|
|
|
+ a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,preferred_newbasereg);
|
|
|
|
+ { set as new base register }
|
|
|
|
+ if ref.base=NR_NO then
|
|
|
|
+ ref.base:=preferred_newbasereg
|
|
|
|
+ else if ref.index=NR_NO then
|
|
|
|
+ ref.index:=preferred_newbasereg
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ { make sure it's valid in case ref.base is SP -> make it
|
|
|
|
+ the second operand}
|
|
|
|
+ a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,preferred_newbasereg,ref.base,preferred_newbasereg);
|
|
|
|
+ ref.base:=preferred_newbasereg
|
|
|
|
+ end;
|
|
|
|
+ ref.symbol:=nil;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ { todo }
|
|
|
|
+ internalerror(2014111003);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ { base & index }
|
|
|
|
+ if (ref.base<>NR_NO) and
|
|
|
|
+ (ref.index<>NR_NO) then
|
|
|
|
+ begin
|
|
|
|
+ case op of
|
|
|
|
+ A_LDR, A_STR:
|
|
|
|
+ begin
|
|
|
|
+ if (ref.shiftmode=SM_None) and
|
|
|
|
+ (ref.shiftimm<>0) then
|
|
|
|
+ internalerror(2014110805);
|
|
|
|
+ { wrong shift? (possible in case of something like
|
|
|
|
+ array_of_2byte_rec[x].bytefield -> shift will be set 1, but
|
|
|
|
+ the final load is a 1 byte -> can't use shift after all }
|
|
|
|
+ if (ref.shiftmode in [SM_LSL,SM_UXTW,SM_SXTW]) and
|
|
|
|
+ ((ref.shiftimm<>BsfDWord(tcgsizep2size[size])) or
|
|
|
|
+ (ref.offset<>0)) then
|
|
|
|
+ begin
|
|
|
|
+ if preferred_newbasereg=NR_NO then
|
|
|
|
+ preferred_newbasereg:=getaddressregister(list);
|
|
|
|
+ { "add" supports a superset of the shift modes supported by
|
|
|
|
+ load/store instructions }
|
|
|
|
+ shifterop_reset(so);
|
|
|
|
+ so.shiftmode:=ref.shiftmode;
|
|
|
|
+ so.shiftimm:=ref.shiftimm;
|
|
|
|
+ list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
|
|
|
|
+ reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment);
|
|
|
|
+ { possibly still an invalid offset -> fall through }
|
|
|
|
+ end
|
|
|
|
+ else if ref.offset<>0 then
|
|
|
|
+ begin
|
|
|
|
+ if (preferred_newbasereg=NR_NO) or
|
|
|
|
+ { we keep ref.index, so it must not be overwritten }
|
|
|
|
+ (ref.index=preferred_newbasereg) then
|
|
|
|
+ preferred_newbasereg:=getaddressregister(list);
|
|
|
|
+ { add to the base and not to the index, because the index
|
|
|
|
+ may be scaled; this works even if the base is SP }
|
|
|
|
+ a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
|
|
|
|
+ ref.offset:=0;
|
|
|
|
+ ref.base:=preferred_newbasereg;
|
|
|
|
+ { finished }
|
|
|
|
+ exit;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ { valid -> exit }
|
|
|
|
+ exit;
|
|
|
|
+ end;
|
|
|
|
+ { todo }
|
|
|
|
+ A_LD1,A_LD2,A_LD3,A_LD4,
|
|
|
|
+ A_ST1,A_ST2,A_ST3,A_ST4:
|
|
|
|
+ internalerror(2014110704);
|
|
|
|
+ { these don't support base+index }
|
|
|
|
+ A_LDUR,A_STUR,
|
|
|
|
+ A_LDP,A_STP:
|
|
|
|
+ begin
|
|
|
|
+ { these either don't support pre-/post-indexing, or don't
|
|
|
|
+ support it with base+index }
|
|
|
|
+ if ref.addressmode<>AM_OFFSET then
|
|
|
|
+ internalerror(2014110911);
|
|
|
|
+ if preferred_newbasereg=NR_NO then
|
|
|
|
+ preferred_newbasereg:=getaddressregister(list);
|
|
|
|
+ if ref.shiftmode<>SM_None then
|
|
|
|
+ begin
|
|
|
|
+ { "add" supports a superset of the shift modes supported by
|
|
|
|
+ load/store instructions }
|
|
|
|
+ shifterop_reset(so);
|
|
|
|
+ so.shiftmode:=ref.shiftmode;
|
|
|
|
+ so.shiftimm:=ref.shiftimm;
|
|
|
|
+ list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,ref.index,ref.base,preferred_newbasereg);
|
|
|
|
+ reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment);
|
|
|
|
+ { fall through to the handling of base + offset, since the
|
|
|
|
+ offset may still be too big }
|
|
|
|
+ end;
|
|
|
|
+ else
|
|
|
|
+ internalerror(2014110901);
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ { base + offset }
|
|
|
|
+ if ref.base<>NR_NO then
|
|
|
|
+ begin
|
|
|
|
+ { valid offset for LDUR/STUR -> use that }
|
|
|
|
+ if (ref.addressmode=AM_OFFSET) and
|
|
|
|
+ (op in [A_LDR,A_STR]) and
|
|
|
|
+ (ref.offset>=-256) and
|
|
|
|
+ (ref.offset<=255) then
|
|
|
|
+ begin
|
|
|
|
+ if op=A_LDR then
|
|
|
|
+ op:=A_LDUR
|
|
|
|
+ else
|
|
|
|
+ op:=A_STUR
|
|
|
|
+ end
|
|
|
|
+ { if it's not a valid LDUR/STUR, use LDR/STR }
|
|
|
|
+ else if (op in [A_LDUR,A_STUR]) and
|
|
|
|
+ ((ref.offset<-256) or
|
|
|
|
+ (ref.offset>255) or
|
|
|
|
+ (ref.addressmode<>AM_OFFSET)) then
|
|
|
|
+ begin
|
|
|
|
+ if op=A_LDUR then
|
|
|
|
+ op:=A_LDR
|
|
|
|
+ else
|
|
|
|
+ op:=A_STR
|
|
|
|
+ end;
|
|
|
|
+ case op of
|
|
|
|
+ A_LDR,A_STR:
|
|
|
|
+ begin
|
|
|
|
+ case ref.addressmode of
|
|
|
|
+ AM_PREINDEXED:
|
|
|
|
+ begin
|
|
|
|
+ { since the loaded/stored register cannot be the same
|
|
|
|
+ as the base register, we can safely add the
|
|
|
|
+ offset to the base if it doesn't fit}
|
|
|
|
+ if (ref.offset<-256) or
|
|
|
|
+ (ref.offset>255) then
|
|
|
|
+ begin
|
|
|
|
+ a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base);
|
|
|
|
+ ref.offset:=0;
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+ AM_POSTINDEXED:
|
|
|
|
+ begin
|
|
|
|
+ { cannot emulate post-indexing if we have to fold the
|
|
|
|
+ offset into the base register }
|
|
|
|
+ if (ref.offset<-256) or
|
|
|
|
+ (ref.offset>255) then
|
|
|
|
+ internalerror(2014110909);
|
|
|
|
+ { ok }
|
|
|
|
+ end;
|
|
|
|
+ AM_OFFSET:
|
|
|
|
+ begin
|
|
|
|
+ { unsupported offset -> fold into base register }
|
|
|
|
+ accesssize:=1 shl tcgsizep2size[size];
|
|
|
|
+ if (ref.offset<0) or
|
|
|
|
+ (ref.offset>(((1 shl 12)-1)*accesssize)) or
|
|
|
|
+ ((ref.offset mod accesssize)<>0) then
|
|
|
|
+ begin
|
|
|
|
+ if preferred_newbasereg=NR_NO then
|
|
|
|
+ preferred_newbasereg:=getaddressregister(list);
|
|
|
|
+ { can we split the offset beween an
|
|
|
|
+ "add/sub (imm12 shl 12)" and the load (also an
|
|
|
|
+ imm12)?
|
|
|
|
+ -- the offset from the load will always be added,
|
|
|
|
+ that's why the lower bound has a smaller range
|
|
|
|
+ than the upper bound; it must also be a multiple
|
|
|
|
+ of the access size }
|
|
|
|
+ if (ref.offset>=-(((1 shl 12)-1) shl 12)) and
|
|
|
|
+ (ref.offset<=((1 shl 12)-1) shl 12 + ((1 shl 12)-1)) and
|
|
|
|
+ ((ref.offset mod accesssize)=0) then
|
|
|
|
+ begin
|
|
|
|
+ a_op_const_reg_reg(list,OP_ADD,OS_ADDR,(ref.offset shr 12) shl 12,ref.base,preferred_newbasereg);
|
|
|
|
+ ref.offset:=ref.offset-(ref.offset shr 12) shl 12;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
|
|
|
|
+ ref.offset:=0;
|
|
|
|
+ end;
|
|
|
|
+ reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment);
|
|
|
|
+ end;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ internalerror(2014110904);
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+ A_LDP,A_STP:
|
|
|
|
+ begin
|
|
|
|
+ { unsupported offset -> fold into base register (these
|
|
|
|
+ instructions support all addressmodes) }
|
|
|
|
+ if (ref.offset<-(1 shl (6+tcgsizep2size[size]))) or
|
|
|
|
+ (ref.offset>(1 shl (6+tcgsizep2size[size]))-1) then
|
|
|
|
+ begin
|
|
|
|
+ case ref.addressmode of
|
|
|
|
+ AM_POSTINDEXED:
|
|
|
|
+ { don't emulate post-indexing if we have to fold the
|
|
|
|
+ offset into the base register }
|
|
|
|
+ internalerror(2014110910);
|
|
|
|
+ AM_PREINDEXED:
|
|
|
|
+ { this means the offset must be added to the current
|
|
|
|
+ base register }
|
|
|
|
+ preferred_newbasereg:=ref.base;
|
|
|
|
+ AM_OFFSET:
|
|
|
|
+ if preferred_newbasereg=NR_NO then
|
|
|
|
+ preferred_newbasereg:=getaddressregister(list);
|
|
|
|
+ end;
|
|
|
|
+ a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
|
|
|
|
+ reference_reset_base(ref,preferred_newbasereg,0,ref.alignment);
|
|
|
|
+ end
|
|
|
|
+ end;
|
|
|
|
+ A_LDUR,A_STUR:
|
|
|
|
+ begin
|
|
|
|
+ { valid, checked above }
|
|
|
|
+ end;
|
|
|
|
+ { todo }
|
|
|
|
+ A_LD1,A_LD2,A_LD3,A_LD4,
|
|
|
|
+ A_ST1,A_ST2,A_ST3,A_ST4:
|
|
|
|
+ internalerror(2014110908);
|
|
|
|
+ else
|
|
|
|
+ internalerror(2014110708);
|
|
|
|
+ end;
|
|
|
|
+ { done }
|
|
|
|
+ exit;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ { only an offset -> change to base (+ offset 0) }
|
|
|
|
+ if preferred_newbasereg=NR_NO then
|
|
|
|
+ preferred_newbasereg:=getaddressregister(list);
|
|
|
|
+ a_load_const_reg(list,OS_ADDR,ref.offset,preferred_newbasereg);
|
|
|
|
+ reference_reset_base(ref,preferred_newbasereg,0,newalignment(8,ref.offset));
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ function tcgaarch64.makeregsize(reg: tregister; size: tcgsize): tregister;
|
|
|
|
+ var
|
|
|
|
+ subreg:Tsubregister;
|
|
|
|
+ begin
|
|
|
|
+ subreg:=cgsize2subreg(getregtype(reg),size);
|
|
|
|
+ result:=reg;
|
|
|
|
+ setsubreg(result,subreg);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ function tcgaarch64.getfpuregister(list: TAsmList; size: Tcgsize): Tregister;
|
|
|
|
+ begin
|
|
|
|
+ internalerror(2014122110);
|
|
|
|
+ { squash warning }
|
|
|
|
+ result:=NR_NO;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ function tcgaarch64.handle_load_store(list: TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
|
|
|
|
+ begin
|
|
|
|
+ make_simple_ref(list,op,size,oppostfix,ref,NR_NO);
|
|
|
|
+ list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
|
|
|
|
+ result:=ref;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
|
|
|
|
+ var
|
|
|
|
+ instr: taicpu;
|
|
|
|
+ so: tshifterop;
|
|
|
|
+ hadtmpreg: boolean;
|
|
|
|
+ begin
|
|
|
|
+ { imm12 }
|
|
|
|
+ if (a>=0) and
|
|
|
|
+ (a<=((1 shl 12)-1)) then
|
|
|
|
+ if usedest then
|
|
|
|
+ instr:=taicpu.op_reg_reg_const(op,dst,src,a)
|
|
|
|
+ else
|
|
|
|
+ instr:=taicpu.op_reg_const(op,src,a)
|
|
|
|
+ { imm12 lsl 12 }
|
|
|
|
+ else if (a and not(((tcgint(1) shl 12)-1) shl 12))=0 then
|
|
|
|
+ begin
|
|
|
|
+ so.shiftmode:=SM_LSL;
|
|
|
|
+ so.shiftimm:=12;
|
|
|
|
+ if usedest then
|
|
|
|
+ instr:=taicpu.op_reg_reg_const_shifterop(op,dst,src,a shr 12,so)
|
|
|
|
+ else
|
|
|
|
+ instr:=taicpu.op_reg_const_shifterop(op,src,a shr 12,so)
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ { todo: other possible optimizations (e.g. load 16 bit constant in
|
|
|
|
+ register and then add/sub/cmp/cmn shifted the rest) }
|
|
|
|
+ if tmpreg=NR_NO then
|
|
|
|
+ begin
|
|
|
|
+ hadtmpreg:=false;
|
|
|
|
+ tmpreg:=getintregister(list,size);
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ hadtmpreg:=true;
|
|
|
|
+ getcpuregister(list,tmpreg);
|
|
|
|
+ end;
|
|
|
|
+ a_load_const_reg(list,size,a,tmpreg);
|
|
|
|
+ if usedest then
|
|
|
|
+ instr:=taicpu.op_reg_reg_reg(op,dst,src,tmpreg)
|
|
|
|
+ else
|
|
|
|
+ instr:=taicpu.op_reg_reg(op,src,tmpreg);
|
|
|
|
+ if hadtmpreg then
|
|
|
|
+ ungetcpuregister(list,tmpreg);
|
|
|
|
+ end;
|
|
|
|
+ if setflags then
|
|
|
|
+ setoppostfix(instr,PF_S);
|
|
|
|
+ list.concat(instr);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+{****************************************************************************
|
|
|
|
+ Assembler code
|
|
|
|
+****************************************************************************}
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.init_register_allocators;
|
|
|
|
+ begin
|
|
|
|
+ inherited init_register_allocators;
|
|
|
|
+
|
|
|
|
+ rg[R_INTREGISTER]:=Trgcpu.create(R_INTREGISTER,R_SUBWHOLE,
|
|
|
|
+ [RS_X0,RS_X1,RS_X2,RS_X3,RS_X4,RS_X5,RS_X6,RS_X7,RS_X8,
|
|
|
|
+ RS_X9,RS_X10,RS_X11,RS_X12,RS_X13,RS_X14,RS_X15,RS_X16,RS_X17,
|
|
|
|
+ RS_X19,RS_X20,RS_X21,RS_X22,RS_X23,RS_X24,RS_X25,RS_X26,RS_X27,RS_X28
|
|
|
|
+ { maybe we can enable this in the future for leaf functions (it's
|
|
|
|
+ the frame pointer)
|
|
|
|
+ ,RS_X29 }],
|
|
|
|
+ first_int_imreg,[]);
|
|
|
|
+
|
|
|
|
+ { needs at least one element for rgobj not to crash }
|
|
|
|
+ rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
|
|
|
|
+ [RS_B0],first_fpu_imreg,[]);
|
|
|
|
+
|
|
|
|
+ rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBMMD,
|
|
|
|
+ [RS_Q0,RS_Q1,RS_Q2,RS_Q3,RS_Q4,RS_Q5,RS_Q6,RS_Q7,
|
|
|
|
+ RS_Q8,RS_Q9,RS_Q10,RS_Q11,RS_Q12,RS_Q13,RS_Q14,RS_Q15,
|
|
|
|
+ RS_Q16,RS_Q17,RS_Q18,RS_Q19,RS_Q20,RS_Q21,RS_Q22,RS_Q23,
|
|
|
|
+ RS_Q24,RS_Q25,RS_Q26,RS_Q27,RS_Q28,RS_Q29,RS_Q30,RS_Q31],
|
|
|
|
+ first_mm_imreg,[]);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.done_register_allocators;
|
|
|
|
+ begin
|
|
|
|
+ rg[R_INTREGISTER].free;
|
|
|
|
+ rg[R_FPUREGISTER].free;
|
|
|
|
+ rg[R_MMREGISTER].free;
|
|
|
|
+ inherited done_register_allocators;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ function tcgaarch64.getmmregister(list: TAsmList; size: tcgsize):tregister;
|
|
|
|
+ begin
|
|
|
|
+ case size of
|
|
|
|
+ OS_F32:
|
|
|
|
+ result:=rg[R_MMREGISTER].getregister(list,R_SUBMMS);
|
|
|
|
+ OS_F64:
|
|
|
|
+ result:=rg[R_MMREGISTER].getregister(list,R_SUBMMD)
|
|
|
|
+ else
|
|
|
|
+ internalerror(2014102701);
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_call_name(list: TAsmList; const s: string; weak: boolean);
|
|
|
|
+ begin
|
|
|
|
+ if not weak then
|
|
|
|
+ list.concat(taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s)))
|
|
|
|
+ else
|
|
|
|
+ list.concat(taicpu.op_sym(A_BL,current_asmdata.WeakRefAsmSymbol(s)));
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_call_reg(list:TAsmList;Reg:tregister);
|
|
|
|
+ begin
|
|
|
|
+ list.concat(taicpu.op_reg(A_BLR,reg));
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ {********************** load instructions ********************}
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg : tregister);
|
|
|
|
+ var
|
|
|
|
+ preva: tcgint;
|
|
|
|
+ opc: tasmop;
|
|
|
|
+ shift,maxshift: byte;
|
|
|
|
+ so: tshifterop;
|
|
|
|
+ reginited: boolean;
|
|
|
|
+ mask: tcgint;
|
|
|
|
+ begin
|
|
|
|
+ { if we load a value into a 32 bit register, it is automatically
|
|
|
|
+ zero-extended to 64 bit }
|
|
|
|
+ if (high(a)=0) and
|
|
|
|
+ (size in [OS_64,OS_S64]) then
|
|
|
|
+ begin
|
|
|
|
+ size:=OS_32;
|
|
|
|
+ reg:=makeregsize(reg,size);
|
|
|
|
+ end;
|
|
|
|
+ { values <= 32 bit are stored in a 32 bit register }
|
|
|
|
+ if not(size in [OS_64,OS_S64]) then
|
|
|
|
+ a:=cardinal(a);
|
|
|
|
+
|
|
|
|
+ if size in [OS_64,OS_S64] then
|
|
|
|
+ begin
|
|
|
|
+ mask:=-1;
|
|
|
|
+ maxshift:=64;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ mask:=$ffffffff;
|
|
|
|
+ maxshift:=32;
|
|
|
|
+ end;
|
|
|
|
+ { single movn enough? (to be extended) }
|
|
|
|
+ shift:=16;
|
|
|
|
+ preva:=a;
|
|
|
|
+ repeat
|
|
|
|
+ if (a shr shift)=(mask shr shift) then
|
|
|
|
+ begin
|
|
|
|
+ if shift=16 then
|
|
|
|
+ list.concat(taicpu.op_reg_const(A_MOVN,reg,not(word(preva))))
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ shifterop_reset(so);
|
|
|
|
+ so.shiftmode:=SM_LSL;
|
|
|
|
+ so.shiftimm:=shift-16;
|
|
|
|
+ list.concat(taicpu.op_reg_const_shifterop(A_MOVN,reg,not(word(preva)),so));
|
|
|
|
+ end;
|
|
|
|
+ exit;
|
|
|
|
+ end;
|
|
|
|
+ { only try the next 16 bits if the current one is all 1 bits, since
|
|
|
|
+ the movn will set all lower bits to 1 }
|
|
|
|
+ if word(a shr (shift-16))<>$ffff then
|
|
|
|
+ break;
|
|
|
|
+ inc(shift,16);
|
|
|
|
+ until shift=maxshift;
|
|
|
|
+ reginited:=false;
|
|
|
|
+ shift:=0;
|
|
|
|
+ { can be optimized later to use more movn }
|
|
|
|
+ repeat
|
|
|
|
+ { leftover is shifterconst? (don't check if we can represent it just
|
|
|
|
+ as effectively with movz/movk, as this check is expensive) }
|
|
|
|
+ if ((shift<tcgsize2size[size]*(8 div 2)) and
|
|
|
|
+ (word(a)<>0) and
|
|
|
|
+ ((a shr 16)<>0)) and
|
|
|
|
+ is_shifter_const(a shl shift,size) then
|
|
|
|
+ begin
|
|
|
|
+ if reginited then
|
|
|
|
+ list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,a shl shift))
|
|
|
|
+ else
|
|
|
|
+ list.concat(taicpu.op_reg_reg_const(A_ORR,reg,makeregsize(NR_XZR,size),a shl shift));
|
|
|
|
+ exit;
|
|
|
|
+ end;
|
|
|
|
+ { set all 16 bit parts <> 0 }
|
|
|
|
+ if (word(a)<>0) or
|
|
|
|
+ ((shift=0) and
|
|
|
|
+ (a=0)) then
|
|
|
|
+ if shift=0 then
|
|
|
|
+ begin
|
|
|
|
+ list.concat(taicpu.op_reg_const(A_MOVZ,reg,word(a)));
|
|
|
|
+ reginited:=true;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ shifterop_reset(so);
|
|
|
|
+ so.shiftmode:=SM_LSL;
|
|
|
|
+ so.shiftimm:=shift;
|
|
|
|
+ if not reginited then
|
|
|
|
+ begin
|
|
|
|
+ opc:=A_MOVZ;
|
|
|
|
+ reginited:=true;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ opc:=A_MOVK;
|
|
|
|
+ list.concat(taicpu.op_reg_const_shifterop(opc,reg,word(a),so));
|
|
|
|
+ end;
|
|
|
|
+ preva:=a;
|
|
|
|
+ a:=a shr 16;
|
|
|
|
+ inc(shift,16);
|
|
|
|
+ until word(preva)=preva;
|
|
|
|
+ if not reginited then
|
|
|
|
+ internalerror(2014102702);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
|
|
|
|
+ var
|
|
|
|
+ oppostfix:toppostfix;
|
|
|
|
+ hreg: tregister;
|
|
|
|
+ begin
|
|
|
|
+ if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
|
|
|
|
+ fromsize:=tosize
|
|
|
|
+ { have a 32 bit register but need a 64 bit one? }
|
|
|
|
+ else if tosize in [OS_64,OS_S64] then
|
|
|
|
+ begin
|
|
|
|
+ { sign extend if necessary }
|
|
|
|
+ if fromsize in [OS_S8,OS_S16,OS_S32] then
|
|
|
|
+ begin
|
|
|
|
+ { can't overwrite reg, may be a constant reg }
|
|
|
|
+ hreg:=getintregister(list,tosize);
|
|
|
|
+ a_load_reg_reg(list,fromsize,tosize,reg,hreg);
|
|
|
|
+ reg:=hreg;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ { top 32 bit are zero by default }
|
|
|
|
+ reg:=makeregsize(reg,OS_64);
|
|
|
|
+ fromsize:=tosize;
|
|
|
|
+ end;
|
|
|
|
+ if (ref.alignment<>0) and
|
|
|
|
+ (ref.alignment<tcgsize2size[tosize]) then
|
|
|
|
+ begin
|
|
|
|
+ a_load_reg_ref_unaligned(list,fromsize,tosize,reg,ref);
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ case tosize of
|
|
|
|
+ { signed integer registers }
|
|
|
|
+ OS_8,
|
|
|
|
+ OS_S8:
|
|
|
|
+ oppostfix:=PF_B;
|
|
|
|
+ OS_16,
|
|
|
|
+ OS_S16:
|
|
|
|
+ oppostfix:=PF_H;
|
|
|
|
+ OS_32,
|
|
|
|
+ OS_S32,
|
|
|
|
+ OS_64,
|
|
|
|
+ OS_S64:
|
|
|
|
+ oppostfix:=PF_None;
|
|
|
|
+ else
|
|
|
|
+ InternalError(200308299);
|
|
|
|
+ end;
|
|
|
|
+ handle_load_store(list,A_STR,tosize,oppostfix,reg,ref);
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
|
|
|
|
+ var
|
|
|
|
+ oppostfix:toppostfix;
|
|
|
|
+ begin
|
|
|
|
+ if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
|
|
|
|
+ fromsize:=tosize;
|
|
|
|
+ { ensure that all bits of the 32/64 register are always correctly set:
|
|
|
|
+ * default behaviour is always to zero-extend to the entire (64 bit)
|
|
|
|
+ register -> unsigned 8/16/32 bit loads only exist with a 32 bit
|
|
|
|
+ target register, as the upper 32 bit will be zeroed implicitly
|
|
|
|
+ -> always make target register 32 bit
|
|
|
|
+ * signed loads exist both with 32 and 64 bit target registers,
|
|
|
|
+ depending on whether the value should be sign extended to 32 or
|
|
|
|
+ to 64 bit (if sign extended to 32 bit, the upper 32 bits of the
|
|
|
|
+ corresponding 64 bit register are again zeroed) -> no need to
|
|
|
|
+ change anything (we only have 32 and 64 bit registers), except that
|
|
|
|
+ when loading an OS_S32 to a 32 bit register, we don't need/can't
|
|
|
|
+ use sign extension
|
|
|
|
+ }
|
|
|
|
+ if fromsize in [OS_8,OS_16,OS_32] then
|
|
|
|
+ reg:=makeregsize(reg,OS_32);
|
|
|
|
+ if (ref.alignment<>0) and
|
|
|
|
+ (ref.alignment<tcgsize2size[fromsize]) then
|
|
|
|
+ begin
|
|
|
|
+ a_load_ref_reg_unaligned(list,fromsize,tosize,ref,reg);
|
|
|
|
+ exit;
|
|
|
|
+ end;
|
|
|
|
+ case fromsize of
|
|
|
|
+ { signed integer registers }
|
|
|
|
+ OS_8:
|
|
|
|
+ oppostfix:=PF_B;
|
|
|
|
+ OS_S8:
|
|
|
|
+ oppostfix:=PF_SB;
|
|
|
|
+ OS_16:
|
|
|
|
+ oppostfix:=PF_H;
|
|
|
|
+ OS_S16:
|
|
|
|
+ oppostfix:=PF_SH;
|
|
|
|
+ OS_S32:
|
|
|
|
+ if getsubreg(reg)=R_SUBD then
|
|
|
|
+ oppostfix:=PF_NONE
|
|
|
|
+ else
|
|
|
|
+ oppostfix:=PF_SW;
|
|
|
|
+ OS_32,
|
|
|
|
+ OS_64,
|
|
|
|
+ OS_S64:
|
|
|
|
+ oppostfix:=PF_None;
|
|
|
|
+ else
|
|
|
|
+ InternalError(200308297);
|
|
|
|
+ end;
|
|
|
|
+ handle_load_store(list,A_LDR,fromsize,oppostfix,reg,ref);
|
|
|
|
+
|
|
|
|
+ { clear upper 16 bits if the value was negative }
|
|
|
|
+ if (fromsize=OS_S8) and (tosize=OS_16) then
|
|
|
|
+ a_load_reg_reg(list,fromsize,tosize,reg,reg);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister);
|
|
|
|
+ var
|
|
|
|
+ href: treference;
|
|
|
|
+ hreg1, hreg2, tmpreg: tregister;
|
|
|
|
+ begin
|
|
|
|
+ if fromsize in [OS_64,OS_S64] then
|
|
|
|
+ begin
|
|
|
|
+ { split into two 32 bit loads }
|
|
|
|
+ hreg1:=makeregsize(register,OS_32);
|
|
|
|
+ hreg2:=getintregister(list,OS_32);
|
|
|
|
+ if target_info.endian=endian_big then
|
|
|
|
+ begin
|
|
|
|
+ tmpreg:=hreg1;
|
|
|
|
+ hreg1:=hreg2;
|
|
|
|
+ hreg2:=tmpreg;
|
|
|
|
+ end;
|
|
|
|
+ { can we use LDP? }
|
|
|
|
+ if (ref.alignment=4) and
|
|
|
|
+ (simple_ref_type(A_LDP,OS_32,PF_None,ref)=sr_simple) then
|
|
|
|
+ list.concat(taicpu.op_reg_reg_ref(A_LDP,hreg1,hreg2,ref))
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ a_load_ref_reg(list,OS_32,OS_32,ref,hreg1);
|
|
|
|
+ href:=ref;
|
|
|
|
+ inc(href.offset,4);
|
|
|
|
+ a_load_ref_reg(list,OS_32,OS_32,href,hreg2);
|
|
|
|
+ end;
|
|
|
|
+ list.concat(taicpu.op_reg_reg_const_const(A_BFI,register,makeregsize(hreg2,OS_64),32,32));
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ inherited;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_load_reg_reg(list:TAsmList;fromsize,tosize:tcgsize;reg1,reg2:tregister);
|
|
|
|
+ var
|
|
|
|
+ instr: taicpu;
|
|
|
|
+ begin
|
|
|
|
+ { we use both 32 and 64 bit registers -> insert conversion when when
|
|
|
|
+ we have to truncate/sign extend inside the (32 or 64 bit) register
|
|
|
|
+ holding the value, and when we sign extend from a 32 to a 64 bit
|
|
|
|
+ register }
|
|
|
|
+ if ((tcgsize2size[fromsize]>tcgsize2size[tosize]) and
|
|
|
|
+ not(tosize in [OS_32,OS_S32])) or
|
|
|
|
+ ((tcgsize2size[fromsize]=tcgsize2size[tosize]) and
|
|
|
|
+ (fromsize<>tosize) and
|
|
|
|
+ not(fromsize in [OS_32,OS_S32,OS_64,OS_S64])) or
|
|
|
|
+ ((fromsize in [OS_S8,OS_S16,OS_S32]) and
|
|
|
|
+ (tosize in [OS_64,OS_S64])) or
|
|
|
|
+ { needs to mask out the sign in the top 16 bits }
|
|
|
|
+ ((fromsize=OS_S8) and
|
|
|
|
+ (tosize=OS_16)) then
|
|
|
|
+ begin
|
|
|
|
+ case tosize of
|
|
|
|
+ OS_8:
|
|
|
|
+ list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_B));
|
|
|
|
+ OS_16:
|
|
|
|
+ list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_H));
|
|
|
|
+ OS_S8:
|
|
|
|
+ list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_B));
|
|
|
|
+ OS_S16:
|
|
|
|
+ list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_H));
|
|
|
|
+ OS_64,
|
|
|
|
+ OS_S64:
|
|
|
|
+ list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_W));
|
|
|
|
+ else
|
|
|
|
+ internalerror(2002090901);
|
|
|
|
+ end;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ { 32 -> 32 bit move implies zero extension (sign extensions have
|
|
|
|
+ been handled above) -> also use for 32 <-> 64 bit moves }
|
|
|
|
+ if not(fromsize in [OS_64,OS_S64]) or
|
|
|
|
+ not(tosize in [OS_64,OS_S64]) then
|
|
|
|
+ instr:=taicpu.op_reg_reg(A_MOV,makeregsize(reg2,OS_32),makeregsize(reg1,OS_32))
|
|
|
|
+ else
|
|
|
|
+ instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
|
|
|
|
+ list.Concat(instr);
|
|
|
|
+ { Notify the register allocator that we have written a move instruction so
|
|
|
|
+ it can try to eliminate it. }
|
|
|
|
+ add_move_instruction(instr);
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_loadaddr_ref_reg(list: TAsmList; const ref: treference; r: tregister);
|
|
|
|
+ var
|
|
|
|
+ href: treference;
|
|
|
|
+ so: tshifterop;
|
|
|
|
+ op: tasmop;
|
|
|
|
+ begin
|
|
|
|
+ op:=A_LDR;
|
|
|
|
+ href:=ref;
|
|
|
|
+ { simplify as if we're going to perform a regular 64 bit load, using
|
|
|
|
+ "r" as the new base register if possible/necessary }
|
|
|
|
+ make_simple_ref(list,op,OS_ADDR,PF_None,href,r);
|
|
|
|
+ { load literal? }
|
|
|
|
+ if assigned(href.symbol) then
|
|
|
|
+ begin
|
|
|
|
+ if (href.base<>NR_NO) or
|
|
|
|
+ (href.index<>NR_NO) or
|
|
|
|
+ not assigned(href.symboldata) then
|
|
|
|
+ internalerror(2014110912);
|
|
|
|
+ list.concat(taicpu.op_reg_sym_ofs(A_ADR,r,href.symbol,href.offset));
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ if href.index<>NR_NO then
|
|
|
|
+ begin
|
|
|
|
+ if href.shiftmode<>SM_None then
|
|
|
|
+ begin
|
|
|
|
+ { "add" supports a supperset of the shift modes supported by
|
|
|
|
+ load/store instructions }
|
|
|
|
+ shifterop_reset(so);
|
|
|
|
+ so.shiftmode:=href.shiftmode;
|
|
|
|
+ so.shiftimm:=href.shiftimm;
|
|
|
|
+ list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,r,href.base,href.index,so));
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,href.index,href.base,r);
|
|
|
|
+ end
|
|
|
|
+ else if href.offset<>0 then
|
|
|
|
+ a_op_const_reg_reg(list,OP_ADD,OS_ADDR,href.offset,href.base,r)
|
|
|
|
+ else
|
|
|
|
+ a_load_reg_reg(list,OS_ADDR,OS_ADDR,href.base,r);
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);
|
|
|
|
+ begin
|
|
|
|
+ internalerror(2014122107)
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
|
|
|
|
+ begin
|
|
|
|
+ internalerror(2014122108)
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
|
|
|
|
+ begin
|
|
|
|
+ internalerror(2014122109)
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
|
|
|
|
+ var
|
|
|
|
+ instr: taicpu;
|
|
|
|
+ begin
|
|
|
|
+ if assigned(shuffle) and
|
|
|
|
+ not shufflescalar(shuffle) then
|
|
|
|
+ internalerror(2014122104);
|
|
|
|
+ if fromsize=tosize then
|
|
|
|
+ begin
|
|
|
|
+ instr:=taicpu.op_reg_reg(A_FMOV,reg2,reg1);
|
|
|
|
+ { Notify the register allocator that we have written a move
|
|
|
|
+ instruction so it can try to eliminate it. }
|
|
|
|
+ add_move_instruction(instr);
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ if (reg_cgsize(reg1)<>fromsize) or
|
|
|
|
+ (reg_cgsize(reg2)<>tosize) then
|
|
|
|
+ internalerror(2014110913);
|
|
|
|
+ instr:=taicpu.op_reg_reg(A_FCVT,reg2,reg1);
|
|
|
|
+ end;
|
|
|
|
+ list.Concat(instr);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
|
|
|
|
+ var
|
|
|
|
+ tmpreg: tregister;
|
|
|
|
+ begin
|
|
|
|
+ if assigned(shuffle) and
|
|
|
|
+ not shufflescalar(shuffle) then
|
|
|
|
+ internalerror(2014122105);
|
|
|
|
+ tmpreg:=NR_NO;
|
|
|
|
+ if (fromsize<>tosize) then
|
|
|
|
+ begin
|
|
|
|
+ tmpreg:=reg;
|
|
|
|
+ reg:=getmmregister(list,fromsize);
|
|
|
|
+ end;
|
|
|
|
+ handle_load_store(list,A_LDR,fromsize,PF_None,reg,ref);
|
|
|
|
+ if (fromsize<>tosize) then
|
|
|
|
+ a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
|
|
|
|
+ var
|
|
|
|
+ tmpreg: tregister;
|
|
|
|
+ begin
|
|
|
|
+ if assigned(shuffle) and
|
|
|
|
+ not shufflescalar(shuffle) then
|
|
|
|
+ internalerror(2014122106);
|
|
|
|
+ if (fromsize<>tosize) then
|
|
|
|
+ begin
|
|
|
|
+ tmpreg:=getmmregister(list,tosize);
|
|
|
|
+ a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
|
|
|
|
+ reg:=tmpreg;
|
|
|
|
+ end;
|
|
|
|
+ handle_load_store(list,A_STR,tosize,PF_NONE,reg,ref);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference);
|
|
|
|
+ var
|
|
|
|
+ href: treference;
|
|
|
|
+ hreg1, hreg2, tmpreg: tregister;
|
|
|
|
+ begin
|
|
|
|
+ if fromsize in [OS_64,OS_S64] then
|
|
|
|
+ begin
|
|
|
|
+ { split into two 32 bit stores }
|
|
|
|
+ hreg1:=makeregsize(register,OS_32);
|
|
|
|
+ hreg2:=getintregister(list,OS_32);
|
|
|
|
+ a_op_const_reg_reg(list,OP_SHR,OS_64,32,register,makeregsize(hreg2,OS_64));
|
|
|
|
+ if target_info.endian=endian_big then
|
|
|
|
+ begin
|
|
|
|
+ tmpreg:=hreg1;
|
|
|
|
+ hreg1:=hreg2;
|
|
|
|
+ hreg2:=tmpreg;
|
|
|
|
+ end;
|
|
|
|
+ { can we use STP? }
|
|
|
|
+ if (ref.alignment=4) and
|
|
|
|
+ (simple_ref_type(A_STP,OS_32,PF_None,ref)=sr_simple) then
|
|
|
|
+ list.concat(taicpu.op_reg_reg_ref(A_STP,hreg1,hreg2,ref))
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ a_load_reg_ref(list,OS_32,OS_32,hreg1,ref);
|
|
|
|
+ href:=ref;
|
|
|
|
+ inc(href.offset,4);
|
|
|
|
+ a_load_reg_ref(list,OS_32,OS_32,hreg2,href);
|
|
|
|
+ end;
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ inherited;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
|
|
|
|
+ const
|
|
|
|
+ overflowops = [OP_MUL,OP_IMUL,OP_SHL,OP_ADD,OP_SUB,OP_NOT,OP_NEG];
|
|
|
|
+ begin
|
|
|
|
+ if (op in overflowops) and
|
|
|
|
+ (size in [OS_8,OS_S8,OS_16,OS_S16]) then
|
|
|
|
+ a_load_reg_reg(list,OS_32,size,makeregsize(dst,OS_32),makeregsize(dst,OS_32))
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);
|
|
|
|
+ begin
|
|
|
|
+ optimize_op_const(size,op,a);
|
|
|
|
+ case op of
|
|
|
|
+ OP_NONE:
|
|
|
|
+ exit;
|
|
|
|
+ OP_MOVE:
|
|
|
|
+ a_load_const_reg(list,size,a,reg);
|
|
|
|
+ OP_NEG,OP_NOT:
|
|
|
|
+ internalerror(200306011);
|
|
|
|
+ else
|
|
|
|
+ a_op_const_reg_reg(list,op,size,a,reg,reg);
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_op_reg_reg(list:TAsmList;op:topcg;size:tcgsize;src,dst:tregister);
|
|
|
|
+ begin
|
|
|
|
+ Case op of
|
|
|
|
+ OP_NEG,
|
|
|
|
+ OP_NOT:
|
|
|
|
+ begin
|
|
|
|
+ list.concat(taicpu.op_reg_reg(TOpCG2AsmOpReg[op],dst,src));
|
|
|
|
+ maybeadjustresult(list,op,size,dst);
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ a_op_reg_reg_reg(list,op,size,src,dst,dst);
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);
|
|
|
|
+ var
|
|
|
|
+ l: tlocation;
|
|
|
|
+ begin
|
|
|
|
+ a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,l);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);
|
|
|
|
+ var
|
|
|
|
+ hreg: tregister;
|
|
|
|
+ begin
|
|
|
|
+ { no ROLV opcode... }
|
|
|
|
+ if op=OP_ROL then
|
|
|
|
+ begin
|
|
|
|
+ case size of
|
|
|
|
+ OS_32,OS_S32,
|
|
|
|
+ OS_64,OS_S64:
|
|
|
|
+ begin
|
|
|
|
+ hreg:=getintregister(list,size);
|
|
|
|
+ a_load_const_reg(list,size,tcgsize2size[size]*8,hreg);
|
|
|
|
+ a_op_reg_reg(list,OP_SUB,size,src1,hreg);
|
|
|
|
+ a_op_reg_reg_reg(list,OP_ROR,size,hreg,src2,dst);
|
|
|
|
+ exit;
|
|
|
|
+ end;
|
|
|
|
+ else
|
|
|
|
+ internalerror(2014111005);
|
|
|
|
+ end;
|
|
|
|
+ end
|
|
|
|
+ else if (op=OP_ROR) and
|
|
|
|
+ not(size in [OS_32,OS_S32,OS_64,OS_S64]) then
|
|
|
|
+ internalerror(2014111006);
|
|
|
|
+ if TOpCG2AsmOpReg[op]=A_NONE then
|
|
|
|
+ internalerror(2014111007);
|
|
|
|
+ list.concat(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1));
|
|
|
|
+ maybeadjustresult(list,op,size,dst);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);
|
|
|
|
+ var
|
|
|
|
+ shiftcountmask: longint;
|
|
|
|
+ constreg: tregister;
|
|
|
|
+ begin
|
|
|
|
+ { add/sub instructions have only positive immediate operands }
|
|
|
|
+ if (op in [OP_ADD,OP_SUB]) and
|
|
|
|
+ (a<0) then
|
|
|
|
+ begin
|
|
|
|
+ if op=OP_ADD then
|
|
|
|
+ op:=op_SUB
|
|
|
|
+ else
|
|
|
|
+ op:=OP_ADD;
|
|
|
|
+ { avoid range/overflow error in case a = low(tcgint) }
|
|
|
|
+{$push}{$r-}{$q-}
|
|
|
|
+ a:=-a;
|
|
|
|
+{$pop}
|
|
|
|
+ end;
|
|
|
|
+ ovloc.loc:=LOC_VOID;
|
|
|
|
+ optimize_op_const(size,op,a);
|
|
|
|
+ case op of
|
|
|
|
+ OP_NONE:
|
|
|
|
+ begin
|
|
|
|
+ a_load_reg_reg(list,size,size,src,dst);
|
|
|
|
+ exit;
|
|
|
|
+ end;
|
|
|
|
+ OP_MOVE:
|
|
|
|
+ begin
|
|
|
|
+ a_load_const_reg(list,size,a,dst);
|
|
|
|
+ exit;
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+ case op of
|
|
|
|
+ OP_ADD,
|
|
|
|
+ OP_SUB:
|
|
|
|
+ begin
|
|
|
|
+ handle_reg_imm12_reg(list,TOpCG2AsmOpImm[op],size,src,a,dst,NR_NO,setflags,true);
|
|
|
|
+ { on a 64 bit target, overflows with smaller data types
|
|
|
|
+ are handled via range errors }
|
|
|
|
+ if setflags and
|
|
|
|
+ (size in [OS_64,OS_S64]) then
|
|
|
|
+ begin
|
|
|
|
+ location_reset(ovloc,LOC_FLAGS,OS_8);
|
|
|
|
+ if size=OS_64 then
|
|
|
|
+ if op=OP_ADD then
|
|
|
|
+ ovloc.resflags:=F_CS
|
|
|
|
+ else
|
|
|
|
+ ovloc.resflags:=F_CC
|
|
|
|
+ else
|
|
|
|
+ ovloc.resflags:=F_VS;
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+ OP_OR,
|
|
|
|
+ OP_AND,
|
|
|
|
+ OP_XOR:
|
|
|
|
+ begin
|
|
|
|
+ if not(size in [OS_64,OS_S64]) then
|
|
|
|
+ a:=cardinal(a);
|
|
|
|
+ if is_shifter_const(a,size) then
|
|
|
|
+ list.concat(taicpu.op_reg_reg_const(TOpCG2AsmOpReg[op],dst,src,a))
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ constreg:=getintregister(list,size);
|
|
|
|
+ a_load_const_reg(list,size,a,constreg);
|
|
|
|
+ a_op_reg_reg_reg(list,op,size,constreg,src,dst);
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+ OP_SHL,
|
|
|
|
+ OP_SHR,
|
|
|
|
+ OP_SAR:
|
|
|
|
+ begin
|
|
|
|
+ if size in [OS_64,OS_S64] then
|
|
|
|
+ shiftcountmask:=63
|
|
|
|
+ else
|
|
|
|
+ shiftcountmask:=31;
|
|
|
|
+ if (a and shiftcountmask)<>0 Then
|
|
|
|
+ list.concat(taicpu.op_reg_reg_const(
|
|
|
|
+ TOpCG2AsmOpImm[Op],dst,src,a and shiftcountmask))
|
|
|
|
+ else
|
|
|
|
+ a_load_reg_reg(list,size,size,src,dst);
|
|
|
|
+ if (a and not(tcgint(shiftcountmask)))<>0 then
|
|
|
|
+ internalError(2014112101);
|
|
|
|
+ end;
|
|
|
|
+ OP_ROL,
|
|
|
|
+ OP_ROR:
|
|
|
|
+ begin
|
|
|
|
+ case size of
|
|
|
|
+ OS_32,OS_S32:
|
|
|
|
+ if (a and not(tcgint(31)))<>0 then
|
|
|
|
+ internalError(2014112102);
|
|
|
|
+ OS_64,OS_S64:
|
|
|
|
+ if (a and not(tcgint(63)))<>0 then
|
|
|
|
+ internalError(2014112103);
|
|
|
|
+ else
|
|
|
|
+ internalError(2014112104);
|
|
|
|
+ end;
|
|
|
|
+ { there's only a ror opcode }
|
|
|
|
+ if op=OP_ROL then
|
|
|
|
+ a:=(tcgsize2size[size]*8)-a;
|
|
|
|
+ list.concat(taicpu.op_reg_reg_const(A_ROR,dst,src,a));
|
|
|
|
+ end;
|
|
|
|
+ OP_MUL,
|
|
|
|
+ OP_IMUL,
|
|
|
|
+ OP_DIV,
|
|
|
|
+ OP_IDIV:
|
|
|
|
+ begin
|
|
|
|
+ constreg:=getintregister(list,size);
|
|
|
|
+ a_load_const_reg(list,size,a,constreg);
|
|
|
|
+ a_op_reg_reg_reg_checkoverflow(list,op,size,constreg,src,dst,setflags,ovloc);
|
|
|
|
+ end;
|
|
|
|
+ else
|
|
|
|
+ internalerror(2014111403);
|
|
|
|
+ end;
|
|
|
|
+ maybeadjustresult(list,op,size,dst);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);
|
|
|
|
+ var
|
|
|
|
+ tmpreg1: tregister;
|
|
|
|
+ begin
|
|
|
|
+ ovloc.loc:=LOC_VOID;
|
|
|
|
+ { overflow can only occur with 64 bit calculations on 64 bit cpus }
|
|
|
|
+ if setflags and
|
|
|
|
+ (size in [OS_64,OS_S64]) then
|
|
|
|
+ begin
|
|
|
|
+ case op of
|
|
|
|
+ OP_ADD,
|
|
|
|
+ OP_SUB:
|
|
|
|
+ begin
|
|
|
|
+ list.concat(setoppostfix(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1),PF_S));
|
|
|
|
+ ovloc.loc:=LOC_FLAGS;
|
|
|
|
+ if size=OS_64 then
|
|
|
|
+ if op=OP_ADD then
|
|
|
|
+ ovloc.resflags:=F_CS
|
|
|
|
+ else
|
|
|
|
+ ovloc.resflags:=F_CC
|
|
|
|
+ else
|
|
|
|
+ ovloc.resflags:=F_VS;
|
|
|
|
+ { finished; since we won't call through to a_op_reg_reg_reg,
|
|
|
|
+ adjust the result here if necessary }
|
|
|
|
+ maybeadjustresult(list,op,size,dst);
|
|
|
|
+ exit;
|
|
|
|
+ end;
|
|
|
|
+ OP_MUL:
|
|
|
|
+ begin
|
|
|
|
+ { check whether the upper 64 bit of the 128 bit product is 0 }
|
|
|
|
+ tmpreg1:=getintregister(list,OS_64);
|
|
|
|
+ list.concat(taicpu.op_reg_reg_reg(A_UMULH,tmpreg1,src2,src1));
|
|
|
|
+ list.concat(taicpu.op_reg_const(A_CMP,tmpreg1,0));
|
|
|
|
+ ovloc.loc:=LOC_FLAGS;
|
|
|
|
+ ovloc.resflags:=F_NE;
|
|
|
|
+ { still have to perform the actual multiplication }
|
|
|
|
+ end;
|
|
|
|
+ OP_IMUL:
|
|
|
|
+ begin
|
|
|
|
+ { check whether the sign bit of the (128 bit) result is the
|
|
|
|
+ same as "sign bit of src1" xor "signbit of src2" (if so, no
|
|
|
|
+ overflow and the xor-product of all sign bits is 0) }
|
|
|
|
+ tmpreg1:=getintregister(list,OS_64);
|
|
|
|
+ list.concat(taicpu.op_reg_reg_reg(A_SMULH,tmpreg1,src2,src1));
|
|
|
|
+ list.concat(taicpu.op_reg_reg_reg(A_EOR,tmpreg1,tmpreg1,src1));
|
|
|
|
+ list.concat(taicpu.op_reg_reg_reg(A_EOR,tmpreg1,tmpreg1,src2));
|
|
|
|
+ list.concat(taicpu.op_reg_const(A_TST,tmpreg1,$80000000));
|
|
|
|
+ ovloc.loc:=LOC_FLAGS;
|
|
|
|
+ ovloc.resflags:=F_NE;
|
|
|
|
+ { still have to perform the actual multiplication }
|
|
|
|
+ end;
|
|
|
|
+ OP_IDIV,
|
|
|
|
+ OP_DIV:
|
|
|
|
+ begin
|
|
|
|
+ { not handled here, needs div-by-zero check (dividing by zero
|
|
|
|
+ just gives a 0 result on aarch64), and low(int64) div -1
|
|
|
|
+ check for overflow) }
|
|
|
|
+ internalerror(2014122101);
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+ a_op_reg_reg_reg(list,op,size,src1,src2,dst);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ {*************** compare instructructions ****************}
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);
|
|
|
|
+ var
|
|
|
|
+ op: tasmop;
|
|
|
|
+ begin
|
|
|
|
+ if a>=0 then
|
|
|
|
+ op:=A_CMP
|
|
|
|
+ else
|
|
|
|
+ op:=A_CMN;
|
|
|
|
+ { avoid range/overflow error in case a=low(tcgint) }
|
|
|
|
+{$push}{$r-}{$q-}
|
|
|
|
+ handle_reg_imm12_reg(list,op,size,reg,abs(a),NR_XZR,NR_NO,false,false);
|
|
|
|
+{$pop}
|
|
|
|
+ a_jmp_cond(list,cmp_op,l);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1,reg2: tregister; l: tasmlabel);
|
|
|
|
+ begin
|
|
|
|
+ list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
|
|
|
|
+ a_jmp_cond(list,cmp_op,l);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_jmp_always(list: TAsmList; l: TAsmLabel);
|
|
|
|
+ var
|
|
|
|
+ ai: taicpu;
|
|
|
|
+ begin
|
|
|
|
+ ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(l.name));
|
|
|
|
+ ai.is_jmp:=true;
|
|
|
|
+ list.Concat(ai);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_jmp_name(list: TAsmList; const s: string);
|
|
|
|
+ var
|
|
|
|
+ ai: taicpu;
|
|
|
|
+ begin
|
|
|
|
+ ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s));
|
|
|
|
+ ai.is_jmp:=true;
|
|
|
|
+ list.Concat(ai);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_jmp_cond(list: TAsmList; cond: TOpCmp; l: TAsmLabel);
|
|
|
|
+ var
|
|
|
|
+ ai: taicpu;
|
|
|
|
+ begin
|
|
|
|
+ ai:=TAiCpu.op_sym(A_B,l);
|
|
|
|
+ ai.is_jmp:=true;
|
|
|
|
+ ai.SetCondition(TOpCmp2AsmCond[cond]);
|
|
|
|
+ list.Concat(ai);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);
|
|
|
|
+ var
|
|
|
|
+ ai : taicpu;
|
|
|
|
+ begin
|
|
|
|
+ ai:=Taicpu.op_sym(A_B,l);
|
|
|
|
+ ai.is_jmp:=true;
|
|
|
|
+ ai.SetCondition(flags_to_cond(f));
|
|
|
|
+ list.Concat(ai);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.g_flags2reg(list: TAsmList; size: tcgsize; const f: tresflags; reg: tregister);
|
|
|
|
+ var
|
|
|
|
+ ai: taicpu;
|
|
|
|
+ begin
|
|
|
|
+ ai:=taicpu.op_reg(A_CSET,reg)
|
|
|
|
+ ai.SetCondition(flags_to_cond(f));
|
|
|
|
+ list.concat(ai);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);
|
|
|
|
+ begin
|
|
|
|
+ { we need an explicit overflow location, because there are many
|
|
|
|
+ possibilities (not just the overflow flag, which is only used for
|
|
|
|
+ signed add/sub) }
|
|
|
|
+ internalerror(2014112303);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc : tlocation);
|
|
|
|
+ var
|
|
|
|
+ hl : tasmlabel;
|
|
|
|
+ hflags : tresflags;
|
|
|
|
+ begin
|
|
|
|
+ if not(cs_check_overflow in current_settings.localswitches) then
|
|
|
|
+ exit;
|
|
|
|
+ current_asmdata.getjumplabel(hl);
|
|
|
|
+ case ovloc.loc of
|
|
|
|
+ LOC_FLAGS:
|
|
|
|
+ begin
|
|
|
|
+ hflags:=ovloc.resflags;
|
|
|
|
+ inverse_flags(hflags);
|
|
|
|
+ cg.a_jmp_flags(list,hflags,hl);
|
|
|
|
+ end;
|
|
|
|
+ else
|
|
|
|
+ internalerror(2014112304);
|
|
|
|
+ end;
|
|
|
|
+ a_call_name(list,'FPC_OVERFLOW',false);
|
|
|
|
+ a_label(list,hl);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ { *********** entry/exit code and address loading ************ }
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
|
|
|
|
+ var
|
|
|
|
+ ref: treference;
|
|
|
|
+ sr: tsuperregister;
|
|
|
|
+ pairreg: tregister;
|
|
|
|
+ begin
|
|
|
|
+ reference_reset_base(ref,NR_SP,-16,16);
|
|
|
|
+ ref.addressmode:=AM_PREINDEXED;
|
|
|
|
+ pairreg:=NR_NO;
|
|
|
|
+ { store all used registers pairwise }
|
|
|
|
+ for sr:=lowsr to highsr do
|
|
|
|
+ if sr in rg[rt].used_in_proc then
|
|
|
|
+ if pairreg=NR_NO then
|
|
|
|
+ pairreg:=newreg(rt,sr,sub)
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
|
|
|
|
+ pairreg:=NR_NO
|
|
|
|
+ end;
|
|
|
|
+ { one left -> store twice (stack must be 16 bytes aligned) }
|
|
|
|
+ if pairreg<>NR_NO then
|
|
|
|
+ list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,pairreg,ref));
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);
|
|
|
|
+ var
|
|
|
|
+ ref: treference;
|
|
|
|
+ begin
|
|
|
|
+ if nostackframe then
|
|
|
|
+ exit;
|
|
|
|
+ { stack pointer has to be aligned to 16 bytes at all times }
|
|
|
|
+ localsize:=align(localsize,16);
|
|
|
|
+
|
|
|
|
+ { save stack pointer and return address }
|
|
|
|
+ reference_reset_base(ref,NR_SP,-16,16);
|
|
|
|
+ ref.addressmode:=AM_PREINDEXED;
|
|
|
|
+ list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
|
|
|
|
+ { initialise frame pointer }
|
|
|
|
+ a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
|
|
|
|
+
|
|
|
|
+ { save modified integer registers }
|
|
|
|
+ save_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE);
|
|
|
|
+ { only the lower 64 bits of the modified vector registers need to be
|
|
|
|
+ saved; if the caller needs the upper 64 bits, it has to save them
|
|
|
|
+ itself }
|
|
|
|
+ save_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD);
|
|
|
|
+
|
|
|
|
+ { allocate stack space }
|
|
|
|
+ if localsize<>0 then
|
|
|
|
+ begin
|
|
|
|
+ localsize:=align(localsize,16);
|
|
|
|
+ current_procinfo.final_localsize:=localsize;
|
|
|
|
+ handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.g_maybe_got_init(list : TAsmList);
|
|
|
|
+ begin
|
|
|
|
+ { nothing to do on Darwin; check on ELF targets }
|
|
|
|
+ if not(target_info.system in systems_darwin) then
|
|
|
|
+ internalerror(2014112601);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.g_restore_registers(list:TAsmList);
|
|
|
|
+ begin
|
|
|
|
+ { done in g_proc_exit }
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
|
|
|
|
+ var
|
|
|
|
+ ref: treference;
|
|
|
|
+ sr, highestsetsr: tsuperregister;
|
|
|
|
+ pairreg: tregister;
|
|
|
|
+ regcount: longint;
|
|
|
|
+ begin
|
|
|
|
+ reference_reset_base(ref,NR_SP,16,16);
|
|
|
|
+ ref.addressmode:=AM_POSTINDEXED;
|
|
|
|
+ { highest reg stored twice? }
|
|
|
|
+ regcount:=0;
|
|
|
|
+ highestsetsr:=RS_NO;
|
|
|
|
+ for sr:=lowsr to highsr do
|
|
|
|
+ if sr in rg[rt].used_in_proc then
|
|
|
|
+ begin
|
|
|
|
+ inc(regcount);
|
|
|
|
+ highestsetsr:=sr;
|
|
|
|
+ end;
|
|
|
|
+ if odd(regcount) then
|
|
|
|
+ begin
|
|
|
|
+ list.concat(taicpu.op_reg_ref(A_LDR,newreg(rt,highestsetsr,sub),ref));
|
|
|
|
+ highestsetsr:=pred(highestsetsr);
|
|
|
|
+ end;
|
|
|
|
+ { load all (other) used registers pairwise }
|
|
|
|
+ pairreg:=NR_NO;
|
|
|
|
+ for sr:=highestsetsr downto lowsr do
|
|
|
|
+ if sr in rg[rt].used_in_proc then
|
|
|
|
+ if pairreg=NR_NO then
|
|
|
|
+ pairreg:=newreg(rt,sr,sub)
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ list.concat(taicpu.op_reg_reg_ref(A_LDP,newreg(rt,sr,sub),pairreg,ref));
|
|
|
|
+ pairreg:=NR_NO
|
|
|
|
+ end;
|
|
|
|
+ { There can't be any register left }
|
|
|
|
+ if pairreg<>NR_NO then
|
|
|
|
+ internalerror(2014112602);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
|
|
|
|
+ var
|
|
|
|
+ ref: treference;
|
|
|
|
+ regsstored: boolean;
|
|
|
|
+ sr: tsuperregister;
|
|
|
|
+ begin
|
|
|
|
+ if not nostackframe then
|
|
|
|
+ begin
|
|
|
|
+ { if no registers have been stored, we don't have to subtract the
|
|
|
|
+ allocated temp space from the stack pointer }
|
|
|
|
+ regsstored:=false;
|
|
|
|
+ for sr:=RS_X19 to RS_X28 do
|
|
|
|
+ if sr in rg[R_INTREGISTER].used_in_proc then
|
|
|
|
+ begin
|
|
|
|
+ regsstored:=true;
|
|
|
|
+ break;
|
|
|
|
+ end;
|
|
|
|
+ if not regsstored then
|
|
|
|
+ for sr:=RS_D8 to RS_D15 do
|
|
|
|
+ if sr in rg[R_MMREGISTER].used_in_proc then
|
|
|
|
+ begin
|
|
|
|
+ regsstored:=true;
|
|
|
|
+ break;
|
|
|
|
+ end;
|
|
|
|
+ { restore registers (and stack pointer) }
|
|
|
|
+ if regsstored then
|
|
|
|
+ begin
|
|
|
|
+ if current_procinfo.final_localsize<>0 then
|
|
|
|
+ handle_reg_imm12_reg(list,A_ADD,OS_ADDR,NR_SP,current_procinfo.final_localsize,NR_SP,NR_IP0,false,true);
|
|
|
|
+ load_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD);
|
|
|
|
+ load_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE);
|
|
|
|
+ end
|
|
|
|
+ else if current_procinfo.final_localsize<>0 then
|
|
|
|
+ { restore stack pointer }
|
|
|
|
+ a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
|
|
|
|
+
|
|
|
|
+ { restore framepointer and return address }
|
|
|
|
+ reference_reset_base(ref,NR_SP,16,16);
|
|
|
|
+ ref.addressmode:=AM_POSTINDEXED;
|
|
|
|
+ list.concat(taicpu.op_reg_reg_ref(A_LDP,NR_FP,NR_LR,ref));
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ { return }
|
|
|
|
+ list.concat(taicpu.op_none(A_RET));
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.g_save_registers(list : TAsmList);
|
|
|
|
+ begin
|
|
|
|
+ { done in g_proc_entry }
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ { ************* concatcopy ************ }
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
|
|
|
|
+ var
|
|
|
|
+ paraloc1,paraloc2,paraloc3 : TCGPara;
|
|
|
|
+ pd : tprocdef;
|
|
|
|
+ begin
|
|
|
|
+ pd:=search_system_proc('MOVE');
|
|
|
|
+ paraloc1.init;
|
|
|
|
+ paraloc2.init;
|
|
|
|
+ paraloc3.init;
|
|
|
|
+ paramanager.getintparaloc(pd,1,paraloc1);
|
|
|
|
+ paramanager.getintparaloc(pd,2,paraloc2);
|
|
|
|
+ paramanager.getintparaloc(pd,3,paraloc3);
|
|
|
|
+ a_load_const_cgpara(list,OS_SINT,len,paraloc3);
|
|
|
|
+ a_loadaddr_ref_cgpara(list,dest,paraloc2);
|
|
|
|
+ a_loadaddr_ref_cgpara(list,source,paraloc1);
|
|
|
|
+ paramanager.freecgpara(list,paraloc3);
|
|
|
|
+ paramanager.freecgpara(list,paraloc2);
|
|
|
|
+ paramanager.freecgpara(list,paraloc1);
|
|
|
|
+ alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
|
|
|
|
+ alloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
|
|
|
|
+ a_call_name(list,'FPC_MOVE',false);
|
|
|
|
+ dealloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
|
|
|
|
+ dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
|
|
|
|
+ paraloc3.done;
|
|
|
|
+ paraloc2.done;
|
|
|
|
+ paraloc1.done;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.g_concatcopy(list:TAsmList;const source,dest:treference;len:tcgint);
|
|
|
|
+(*
|
|
|
|
+ var
|
|
|
|
+ tmpreg1,
|
|
|
|
+ hreg,
|
|
|
|
+ countreg: tregister;
|
|
|
|
+ src, dst: treference;
|
|
|
|
+ lab: tasmlabel;
|
|
|
|
+ count, count2: aint;
|
|
|
|
+*)
|
|
|
|
+ begin
|
|
|
|
+(*
|
|
|
|
+ { anybody wants to determine a good value here :)? }
|
|
|
|
+ if len>100 then
|
|
|
|
+*)
|
|
|
|
+ g_concatcopy_move(list,source,dest,len)
|
|
|
|
+(*
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ count:=len div 4;
|
|
|
|
+ if (count<=4) and reference_is_reusable(source) then
|
|
|
|
+ src:=source
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ reference_reset_base(src,getintregister(list,OS_ADDR),0,sizeof(aint));
|
|
|
|
+ a_loadaddr_ref_reg(list,source,src.base);
|
|
|
|
+ end;
|
|
|
|
+ if (count<=4) and reference_is_reusable(dest) then
|
|
|
|
+ dst:=dest
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ reference_reset_base(dst,getintregister(list,OS_ADDR),0,sizeof(aint));
|
|
|
|
+ a_loadaddr_ref_reg(list,dest,dst.base);
|
|
|
|
+ end;
|
|
|
|
+ { generate a loop }
|
|
|
|
+ if count>4 then
|
|
|
|
+ begin
|
|
|
|
+ countreg:=GetIntRegister(list,OS_INT);
|
|
|
|
+ tmpreg1:=GetIntRegister(list,OS_INT);
|
|
|
|
+ a_load_const_reg(list,OS_INT,count,countreg);
|
|
|
|
+ current_asmdata.getjumplabel(lab);
|
|
|
|
+ a_label(list, lab);
|
|
|
|
+ list.concat(taicpu.op_ref_reg(A_LD,src,tmpreg1));
|
|
|
|
+ list.concat(taicpu.op_reg_ref(A_ST,tmpreg1,dst));
|
|
|
|
+ list.concat(taicpu.op_reg_const_reg(A_ADD,src.base,4,src.base));
|
|
|
|
+ list.concat(taicpu.op_reg_const_reg(A_ADD,dst.base,4,dst.base));
|
|
|
|
+ list.concat(taicpu.op_reg_const_reg(A_SUBcc,countreg,1,countreg));
|
|
|
|
+ a_jmp_cond(list,OC_NE,lab);
|
|
|
|
+ len := len mod 4;
|
|
|
|
+ end;
|
|
|
|
+ { unrolled loop }
|
|
|
|
+ count:=len div 4;
|
|
|
|
+ if count>0 then
|
|
|
|
+ begin
|
|
|
|
+ tmpreg1:=GetIntRegister(list,OS_INT);
|
|
|
|
+ for count2 := 1 to count do
|
|
|
|
+ begin
|
|
|
|
+ list.concat(taicpu.op_ref_reg(A_LD,src,tmpreg1));
|
|
|
|
+ list.concat(taicpu.op_reg_ref(A_ST,tmpreg1,dst));
|
|
|
|
+ inc(src.offset,4);
|
|
|
|
+ inc(dst.offset,4);
|
|
|
|
+ end;
|
|
|
|
+ len := len mod 4;
|
|
|
|
+ end;
|
|
|
|
+ if (len and 4) <> 0 then
|
|
|
|
+ begin
|
|
|
|
+ hreg:=GetIntRegister(list,OS_INT);
|
|
|
|
+ a_load_ref_reg(list,OS_32,OS_32,src,hreg);
|
|
|
|
+ a_load_reg_ref(list,OS_32,OS_32,hreg,dst);
|
|
|
|
+ inc(src.offset,4);
|
|
|
|
+ inc(dst.offset,4);
|
|
|
|
+ end;
|
|
|
|
+ { copy the leftovers }
|
|
|
|
+ if (len and 2) <> 0 then
|
|
|
|
+ begin
|
|
|
|
+ hreg:=GetIntRegister(list,OS_INT);
|
|
|
|
+ a_load_ref_reg(list,OS_16,OS_16,src,hreg);
|
|
|
|
+ a_load_reg_ref(list,OS_16,OS_16,hreg,dst);
|
|
|
|
+ inc(src.offset,2);
|
|
|
|
+ inc(dst.offset,2);
|
|
|
|
+ end;
|
|
|
|
+ if (len and 1) <> 0 then
|
|
|
|
+ begin
|
|
|
|
+ hreg:=GetIntRegister(list,OS_INT);
|
|
|
|
+ a_load_ref_reg(list,OS_8,OS_8,src,hreg);
|
|
|
|
+ a_load_reg_ref(list,OS_8,OS_8,hreg,dst);
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+*)
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
|
|
|
|
+(*
|
|
|
|
+ var
|
|
|
|
+ src, dst: treference;
|
|
|
|
+ tmpreg1,
|
|
|
|
+ countreg: tregister;
|
|
|
|
+ i : aint;
|
|
|
|
+ lab: tasmlabel;
|
|
|
|
+*)
|
|
|
|
+ begin
|
|
|
|
+(*
|
|
|
|
+ if len>31 then
|
|
|
|
+*)
|
|
|
|
+ g_concatcopy_move(list,source,dest,len)
|
|
|
|
+(*
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ reference_reset(src,source.alignment);
|
|
|
|
+ reference_reset(dst,dest.alignment);
|
|
|
|
+ { load the address of source into src.base }
|
|
|
|
+ src.base:=GetAddressRegister(list);
|
|
|
|
+ a_loadaddr_ref_reg(list,source,src.base);
|
|
|
|
+ { load the address of dest into dst.base }
|
|
|
|
+ dst.base:=GetAddressRegister(list);
|
|
|
|
+ a_loadaddr_ref_reg(list,dest,dst.base);
|
|
|
|
+ { generate a loop }
|
|
|
|
+ if len>4 then
|
|
|
|
+ begin
|
|
|
|
+ countreg:=GetIntRegister(list,OS_INT);
|
|
|
|
+ tmpreg1:=GetIntRegister(list,OS_INT);
|
|
|
|
+ a_load_const_reg(list,OS_INT,len,countreg);
|
|
|
|
+ current_asmdata.getjumplabel(lab);
|
|
|
|
+ a_label(list, lab);
|
|
|
|
+ list.concat(taicpu.op_ref_reg(A_LDUB,src,tmpreg1));
|
|
|
|
+ list.concat(taicpu.op_reg_ref(A_STB,tmpreg1,dst));
|
|
|
|
+ list.concat(taicpu.op_reg_const_reg(A_ADD,src.base,1,src.base));
|
|
|
|
+ list.concat(taicpu.op_reg_const_reg(A_ADD,dst.base,1,dst.base));
|
|
|
|
+ list.concat(taicpu.op_reg_const_reg(A_SUBcc,countreg,1,countreg));
|
|
|
|
+ a_jmp_cond(list,OC_NE,lab);
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ { unrolled loop }
|
|
|
|
+ tmpreg1:=GetIntRegister(list,OS_INT);
|
|
|
|
+ for i:=1 to len do
|
|
|
|
+ begin
|
|
|
|
+ list.concat(taicpu.op_ref_reg(A_LDUB,src,tmpreg1));
|
|
|
|
+ list.concat(taicpu.op_reg_ref(A_STB,tmpreg1,dst));
|
|
|
|
+ inc(src.offset);
|
|
|
|
+ inc(dst.offset);
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+ end;
|
|
|
|
+*)
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
|
|
|
|
+ begin
|
|
|
|
+ { This method is integrated into g_intf_wrapper and shouldn't be called separately }
|
|
|
|
+ InternalError(2013020102);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure tcgaarch64.g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);
|
|
|
|
+ var
|
|
|
|
+ make_global: boolean;
|
|
|
|
+ href: treference;
|
|
|
|
+ hsym: tsym;
|
|
|
|
+ paraloc: pcgparalocation;
|
|
|
|
+ op: tasmop;
|
|
|
|
+ begin
|
|
|
|
+ if not(procdef.proctypeoption in [potype_function,potype_procedure]) then
|
|
|
|
+ Internalerror(200006137);
|
|
|
|
+ if not assigned(procdef.struct) or
|
|
|
|
+ (procdef.procoptions*[po_classmethod, po_staticmethod,
|
|
|
|
+ po_methodpointer, po_interrupt, po_iocheck]<>[]) then
|
|
|
|
+ Internalerror(200006138);
|
|
|
|
+ if procdef.owner.symtabletype<>ObjectSymtable then
|
|
|
|
+ Internalerror(200109191);
|
|
|
|
+
|
|
|
|
+ make_global:=false;
|
|
|
|
+ if (not current_module.is_unit) or create_smartlink_library or
|
|
|
|
+ (procdef.owner.defowner.owner.symtabletype=globalsymtable) then
|
|
|
|
+ make_global:=true;
|
|
|
|
+
|
|
|
|
+ if make_global then
|
|
|
|
+ list.concat(Tai_symbol.Createname_global(labelname,AT_FUNCTION,0))
|
|
|
|
+ else
|
|
|
|
+ list.concat(Tai_symbol.Createname(labelname,AT_FUNCTION,0));
|
|
|
|
+
|
|
|
|
+ { set param1 interface to self }
|
|
|
|
+ procdef.init_paraloc_info(callerside);
|
|
|
|
+ hsym:=tsym(procdef.parast.Find('self'));
|
|
|
|
+ if not(assigned(hsym) and
|
|
|
|
+ (hsym.typ=paravarsym)) then
|
|
|
|
+ internalerror(2010103101);
|
|
|
|
+ paraloc:=tparavarsym(hsym).paraloc[callerside].location;
|
|
|
|
+ if assigned(paraloc^.next) then
|
|
|
|
+ InternalError(2013020101);
|
|
|
|
+
|
|
|
|
+ case paraloc^.loc of
|
|
|
|
+ LOC_REGISTER:
|
|
|
|
+ handle_reg_imm12_reg(list,A_SUB,paraloc^.size,paraloc^.register,ioffset,paraloc^.register,NR_IP0,false,true);
|
|
|
|
+ else
|
|
|
|
+ internalerror(2010103102);
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+ if (po_virtualmethod in procdef.procoptions) and
|
|
|
|
+ not is_objectpascal_helper(procdef.struct) then
|
|
|
|
+ begin
|
|
|
|
+ if (procdef.extnumber=$ffff) then
|
|
|
|
+ Internalerror(200006139);
|
|
|
|
+ { mov 0(%rdi),%rax ; load vmt}
|
|
|
|
+ reference_reset_base(href,paraloc^.register,0,sizeof(pint));
|
|
|
|
+ getcpuregister(list,NR_IP0);
|
|
|
|
+ a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_IP0);
|
|
|
|
+ { jmp *vmtoffs(%eax) ; method offs }
|
|
|
|
+ reference_reset_base(href,NR_IP0,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),sizeof(pint));
|
|
|
|
+ op:=A_LDR;
|
|
|
|
+ make_simple_ref(list,op,OS_ADDR,PF_None,href,NR_IP0);
|
|
|
|
+ list.concat(taicpu.op_reg_ref(op,NR_IP0,href));
|
|
|
|
+ ungetcpuregister(list,NR_IP0);
|
|
|
|
+ list.concat(taicpu.op_reg(A_BR,NR_IP0));
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ a_jmp_name(list,procdef.mangledname);
|
|
|
|
+ list.concat(Tai_symbol_end.Createname(labelname));
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ procedure create_codegen;
|
|
|
|
+ begin
|
|
|
|
+ cg:=tcgaarch64.Create;
|
|
|
|
+ cg128:=tcg128.Create;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+end.
|