{ Copyright (c) 1998-2002 by Florian Klaempfl This unit implements the code generator for the PowerPC This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. **************************************************************************** } unit cgcpu; {$I fpcdefs.inc} interface uses globtype, symtype, symdef, cgbase, cgobj, aasmbase, aasmcpu, aasmtai, cpubase, cpuinfo, cgutils, rgcpu, parabase; type tcgppc = class(tcg) procedure init_register_allocators; override; procedure done_register_allocators; override; { passing parameters, per default the parameter is pushed } { nr gives the number of the parameter (enumerated from } { left to right), this allows to move the parameter to } { register, if the cpu supports register calling } { conventions } procedure a_param_const(list: taasmoutput; size: tcgsize; a: aint; const paraloc: tcgpara); override; procedure a_param_ref(list: taasmoutput; size: tcgsize; const r: treference; const paraloc: tcgpara); override; procedure a_paramaddr_ref(list: taasmoutput; const r: treference; const paraloc: tcgpara); override; procedure a_call_name(list: taasmoutput; const s: string); override; procedure a_call_name_direct(list: taasmoutput; s: string; prependDot : boolean); procedure a_call_reg(list: taasmoutput; reg: tregister); override; procedure a_op_const_reg(list: taasmoutput; Op: TOpCG; size: TCGSize; a: aint; reg: TRegister); override; procedure a_op_reg_reg(list: taasmoutput; Op: TOpCG; size: TCGSize; src, dst: TRegister); override; procedure a_op_const_reg_reg(list: taasmoutput; op: TOpCg; size: tcgsize; a: aint; src, dst: tregister); override; procedure a_op_reg_reg_reg(list: taasmoutput; op: TOpCg; size: tcgsize; src1, src2, dst: tregister); override; { move instructions } procedure a_load_const_reg(list: taasmoutput; size: tcgsize; a: aint; reg: tregister); override; procedure a_load_reg_ref(list: taasmoutput; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override; procedure a_load_ref_reg(list: taasmoutput; fromsize, tosize: tcgsize; const Ref: treference; reg: tregister); override; procedure a_load_reg_reg(list: taasmoutput; fromsize, tosize: tcgsize; reg1, reg2: tregister); override; { fpu move instructions } procedure a_loadfpu_reg_reg(list: taasmoutput; size: tcgsize; reg1, reg2: tregister); override; procedure a_loadfpu_ref_reg(list: taasmoutput; size: tcgsize; const ref: treference; reg: tregister); override; procedure a_loadfpu_reg_ref(list: taasmoutput; size: tcgsize; reg: tregister; const ref: treference); override; { comparison operations } procedure a_cmp_const_reg_label(list: taasmoutput; size: tcgsize; cmp_op: topcmp; a: aint; reg: tregister; l: tasmlabel); override; procedure a_cmp_reg_reg_label(list: taasmoutput; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel); override; procedure a_jmp_name(list: taasmoutput; const s: string); override; procedure a_jmp_always(list: taasmoutput; l: tasmlabel); override; procedure a_jmp_flags(list: taasmoutput; const f: TResFlags; l: tasmlabel); override; procedure g_flags2reg(list: taasmoutput; size: TCgSize; const f: TResFlags; reg: TRegister); override; procedure g_proc_entry(list: taasmoutput; localsize: longint; nostackframe: boolean); override; procedure g_proc_exit(list: taasmoutput; parasize: longint; nostackframe: boolean); override; procedure g_save_standard_registers(list: Taasmoutput); override; procedure g_restore_standard_registers(list: Taasmoutput); override; procedure a_loadaddr_ref_reg(list: taasmoutput; const ref: treference; r: tregister); override; procedure g_concatcopy(list: taasmoutput; const source, dest: treference; len: aint); override; procedure g_overflowcheck(list: taasmoutput; const l: tlocation; def: tdef); override; procedure a_jmp_cond(list: taasmoutput; cond: TOpCmp; l: tasmlabel); procedure g_intf_wrapper(list: TAAsmoutput; procdef: tprocdef; const labelname: string; ioffset: longint); override; private { Make sure ref is a valid reference for the PowerPC and sets the } { base to the value of the index if (base = R_NO). } { Returns true if the reference contained a base, index and an } { offset or symbol, in which case the base will have been changed } { to a tempreg (which has to be freed by the caller) containing } { the sum of part of the original reference } function fixref(list: taasmoutput; var ref: treference): boolean; { returns whether a reference can be used immediately in a powerpc } { instruction } function issimpleref(const ref: treference): boolean; { contains the common code of a_load_reg_ref and a_load_ref_reg } procedure a_load_store(list: taasmoutput; op: tasmop; reg: tregister; ref: treference); { creates the correct branch instruction for a given combination } { of asmcondflags and destination addressing mode } procedure a_jmp(list: taasmoutput; op: tasmop; c: tasmcondflag; crval: longint; l: tasmlabel); end; const TOpCG2AsmOpConstLo: array[topcg] of TAsmOp = (A_NONE, A_ADDI, A_ANDI_, A_DIVWU, A_DIVW, A_MULLW, A_MULLW, A_NONE, A_NONE, A_ORI, A_SRAWI, A_SLWI, A_SRWI, A_SUBI, A_XORI); TOpCG2AsmOpConstHi: array[topcg] of TAsmOp = (A_NONE, A_ADDIS, A_ANDIS_, A_DIVWU, A_DIVW, A_MULLW, A_MULLW, A_NONE, A_NONE, A_ORIS, A_NONE, A_NONE, A_NONE, A_SUBIS, A_XORIS); TShiftOpCG2AsmOpConst32 : array[OP_SAR..OP_SHR] of TAsmOp = (A_SRAWI, A_SLWI, A_SRWI); TShiftOpCG2AsmOpConst64 : array[OP_SAR..OP_SHR] of TAsmOp = (A_SRADI, A_SLDI, A_SRDI); TOpCmp2AsmCond: array[topcmp] of TAsmCondFlag = (C_NONE, C_EQ, C_GT, C_LT, C_GE, C_LE, C_NE, C_LE, C_LT, C_GE, C_GT); implementation uses sysutils, globals, verbose, systems, cutils, symconst, symsym, fmodule, rgobj, tgobj, cpupi, procinfo, paramgr; procedure tcgppc.init_register_allocators; begin inherited init_register_allocators; rg[R_INTREGISTER] := trgcpu.create(R_INTREGISTER, R_SUBWHOLE, [RS_R3, RS_R4, RS_R5, RS_R6, RS_R7, RS_R8, RS_R9, RS_R10, RS_R11, RS_R12, RS_R31, RS_R30, RS_R29, RS_R28, RS_R27, RS_R26, RS_R25, RS_R24, RS_R23, RS_R22, RS_R21, RS_R20, RS_R19, RS_R18, RS_R17, RS_R16, RS_R15, RS_R14, RS_R13], first_int_imreg, []); rg[R_FPUREGISTER] := trgcpu.create(R_FPUREGISTER, R_SUBNONE, [RS_F0, RS_F1, RS_F2, RS_F3, RS_F4, RS_F5, RS_F6, RS_F7, RS_F8, RS_F9, RS_F10, RS_F11, RS_F12, RS_F13, RS_F31, RS_F30, RS_F29, RS_F28, RS_F27, RS_F26, RS_F25, RS_F24, RS_F23, RS_F22, RS_F21, RS_F20, RS_F19, RS_F18, RS_F17, RS_F16, RS_F15, RS_F14], first_fpu_imreg, []); {$WARNING FIX ME} rg[R_MMREGISTER] := trgcpu.create(R_MMREGISTER, R_SUBNONE, [RS_M0, RS_M1, RS_M2], first_mm_imreg, []); end; procedure tcgppc.done_register_allocators; begin rg[R_INTREGISTER].free; rg[R_FPUREGISTER].free; rg[R_MMREGISTER].free; inherited done_register_allocators; end; procedure tcgppc.a_param_const(list: taasmoutput; size: tcgsize; a: aint; const paraloc: tcgpara); var ref: treference; begin paraloc.check_simple_location; case paraloc.location^.loc of LOC_REGISTER, LOC_CREGISTER: a_load_const_reg(list, size, a, paraloc.location^.register); LOC_REFERENCE: begin reference_reset(ref); ref.base := paraloc.location^.reference.index; ref.offset := paraloc.location^.reference.offset; a_load_const_ref(list, size, a, ref); end; else internalerror(2002081101); end; end; procedure tcgppc.a_param_ref(list: taasmoutput; size: tcgsize; const r: treference; const paraloc: tcgpara); var tmpref, ref: treference; location: pcgparalocation; sizeleft: aint; begin location := paraloc.location; tmpref := r; sizeleft := paraloc.intsize; while assigned(location) do begin case location^.loc of LOC_REGISTER, LOC_CREGISTER: begin a_load_ref_reg(list, location^.size, location^.size, tmpref, location^.register); end; LOC_REFERENCE: begin reference_reset_base(ref, location^.reference.index, location^.reference.offset); g_concatcopy(list, tmpref, ref, sizeleft); if assigned(location^.next) then internalerror(2005010710); end; LOC_FPUREGISTER, LOC_CFPUREGISTER: case location^.size of OS_F32, OS_F64: a_loadfpu_ref_reg(list, location^.size, tmpref, location^.register); else internalerror(2002072801); end; LOC_VOID: begin // nothing to do end; else internalerror(2002081103); end; inc(tmpref.offset, tcgsize2size[location^.size]); dec(sizeleft, tcgsize2size[location^.size]); location := location^.next; end; end; procedure tcgppc.a_paramaddr_ref(list: taasmoutput; const r: treference; const paraloc: tcgpara); var ref: treference; tmpreg: tregister; begin paraloc.check_simple_location; case paraloc.location^.loc of LOC_REGISTER, LOC_CREGISTER: a_loadaddr_ref_reg(list, r, paraloc.location^.register); LOC_REFERENCE: begin reference_reset(ref); ref.base := paraloc.location^.reference.index; ref.offset := paraloc.location^.reference.offset; tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE); a_loadaddr_ref_reg(list, r, tmpreg); a_load_reg_ref(list, OS_ADDR, OS_ADDR, tmpreg, ref); end; else internalerror(2002080701); end; end; { calling a procedure by name } procedure tcgppc.a_call_name(list: taasmoutput; const s: string); begin a_call_name_direct(list, s, true); end; procedure tcgppc.a_call_name_direct(list: taasmoutput; s: string; prependDot : boolean); begin if (prependDot) then begin s := '.' + s; end; list.concat(taicpu.op_sym(A_BL, objectlibrary.newasmsymbol(s, AB_EXTERNAL, AT_FUNCTION))); list.concat(taicpu.op_none(A_NOP)); { the compiler does not properly set this flag anymore in pass 1, and for now we only need it after pass 2 (I hope) (JM) if not(pi_do_call in current_procinfo.flags) then internalerror(2003060703); } include(current_procinfo.flags, pi_do_call); end; { calling a procedure by address } procedure tcgppc.a_call_reg(list: taasmoutput; reg: tregister); var tmpreg: tregister; tmpref: treference; gotref : treference; begin tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE); reference_reset(tmpref); tmpref.offset := 0; tmpref.base := reg; list.concat(taicpu.op_reg_ref(A_LD, tmpreg, tmpref)); // TODO: GOT change // reference_reset(gotref); // tmpref.offset := 40; // tmpref.base := rg[R_INTREGISTER].getregister(list, NR_STACK_POINTER_REG); // taicpu.op_load_reg_ref(list, OS_INT, OS_INT, list.concat(taicpu.op_reg(A_MTCTR, tmpreg)); list.concat(taicpu.op_none(A_BCTRL)); //if target_info.system=system_powerpc_macos then // //NOP is not needed here. // list.concat(taicpu.op_none(A_NOP)); include(current_procinfo.flags, pi_do_call); end; {********************** load instructions ********************} procedure tcgppc.a_load_const_reg(list: taasmoutput; size: TCGSize; a: aint; reg: TRegister); var scratchreg : TRegister; procedure load32bitconstant(list : taasmoutput; size : TCGSize; a : longint; reg : TRegister); var is_half_signed : boolean; begin (* // ts: test optimized code using LI/ADDIS if (smallint(a) = 0) and ((a shr 16) <> 0) then begin list.concat(taicpu.op_reg_const(A_LIS, reg, smallint(a shr 16))); end else begin is_half_signed := smallint(a) < 0; list.concat(taicpu.op_reg_const(A_LI, reg, smallint(a))); if smallint((a shr 16) + ord(is_half_signed)) <> 0 then begin list.concat(taicpu.op_reg_reg_const(A_ADDIS, reg, reg, smallint((a shr 16) + ord(is_half_signed)))); end; end; *) // only 16 bit constant? (-2^15 <= a <= +2^15-1) if (a >= low(smallint)) and (a <= high(smallint)) then begin list.concat(taicpu.op_reg_const(A_LI, reg, smallint(a))); end else begin { check if we have to start with LI or LIS, load as 32 bit constant } if ((a and $FFFF) <> 0) then begin list.concat(taicpu.op_reg_const(A_LIS, reg, smallint(a shr 16))); list.concat(taicpu.op_reg_reg_const(A_ORI, reg, reg, word(a))); end else begin list.concat(taicpu.op_reg_const(A_LIS, reg, smallint(a shr 16))); end; end; end; var astring : string; begin astring := 'a_load_const reg ' + inttostr(a) + ' ' + inttostr(tcgsize2size[size]); list.concat(tai_comment.create(strpnew(astring))); if not (size in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then internalerror(2002090902); // load low 32 bit (as signed number) load32bitconstant(list, size, lo(a), reg); // load high 32 bit if needed :( (the second expression is optimization, to be enabled and tested later!) if (size in [OS_64, OS_S64]) {and (hi(a) <> 0)} then begin // allocate scratch reg (=R0 because it might be called at places where register // allocation has already happened - either procedure entry/exit, and stack check // code generation) // Note: I hope this restriction can be lifted at some time //scratchreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE); // load high 32 bit load32bitconstant(list, size, hi(a), NR_R0); // combine both registers list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, reg, NR_R0, 32, 0)); end; (* // for 16/32 bit unsigned constants we need to make sure that the difference from this size to // 32 bits is cleared (since we optimize loading them as signed 16 bit parts, but 32 bit ops are // used for them. // e.g. for 16 bit there's a problem if the (unsigned) constant is of the form // xx..xx xx..xx 00..00 1x..xx // same problem as above for 32 bit: unsigned constants of the form // xx..xx xx..xx 00..00 1x..xx // cause troubles. Signed are ok. // for now, just clear the upper 48/32 bits (also because full 32 bit op usage isn't done yet) if (size in [OS_16, OS_32]) {and (lo(a) < 0)} then begin a_load_reg_reg(list, size, size, reg, reg); end; *) // need to clear MSB for unsigned 64 bit int because we did not load the upper // 32 bit at all (second expression is optimization: enable and test later!) // e.g. constants of the form 00..00 00..00 1x..xx xx..xx if (size in [OS_64]) and (hi(a) = 0) then begin list.concat(taicpu.op_reg_reg_const_const(A_RLDICL, reg, reg, 0, 32)); end; end; procedure tcgppc.a_load_reg_ref(list: taasmoutput; fromsize, tosize: TCGSize; reg: tregister; const ref: treference); const StoreInstr: array[OS_8..OS_64, boolean, boolean] of TAsmOp = { indexed? updating?} (((A_STB, A_STBU), (A_STBX, A_STBUX)), ((A_STH, A_STHU), (A_STHX, A_STHUX)), ((A_STW, A_STWU), (A_STWX, A_STWUX)), ((A_STD, A_STDU), (A_STDX, A_STDUX)) ); var op: TAsmOp; ref2: TReference; begin ref2 := ref; fixref(list, ref2); if tosize in [OS_S8..OS_S64] then { storing is the same for signed and unsigned values } tosize := tcgsize(ord(tosize) - (ord(OS_S8) - ord(OS_8))); op := storeinstr[tcgsize2unsigned[tosize], ref2.index <> NR_NO, false]; a_load_store(list, op, reg, ref2); end; procedure tcgppc.a_load_ref_reg(list: taasmoutput; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); const LoadInstr: array[OS_8..OS_S64, boolean, boolean] of TAsmOp = { indexed? updating?} (((A_LBZ, A_LBZU), (A_LBZX, A_LBZUX)), ((A_LHZ, A_LHZU), (A_LHZX, A_LHZUX)), ((A_LWZ, A_LWZU), (A_LWZX, A_LWZUX)), ((A_LD, A_LDU), (A_LDX, A_LDUX)), { 128bit stuff too } ((A_NONE, A_NONE), (A_NONE, A_NONE)), { there's no load-byte-with-sign-extend :( } ((A_LBZ, A_LBZU), (A_LBZX, A_LBZUX)), ((A_LHA, A_LHAU), (A_LHAX, A_LHAUX)), { there's no load-word-arithmetic-indexed with update, simulate it in code :( } ((A_LWA, A_LWAU), (A_LWAX, A_LWAUX)), ((A_LD, A_LDU), (A_LDX, A_LDUX)) ); var op: tasmop; ref2: treference; begin { TODO: optimize/take into consideration fromsize/tosize. Will } { probably only matter for OS_S8 loads though } if not (fromsize in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then internalerror(2002090902); ref2 := ref; fixref(list, ref2); { the caller is expected to have adjusted the reference already } { in this case } if (TCGSize2Size[fromsize] >= TCGSize2Size[tosize]) then fromsize := tosize; op := loadinstr[fromsize, ref2.index <> NR_NO, false]; // there is no LWAU instruction, simulate using ADDI and LWA if (op = A_LWAU) then begin list.concat(taicpu.op_reg_reg_const(A_ADDI, reg, reg, ref2.offset)); ref2.offset := 0; op := A_LWA; end; a_load_store(list, op, reg, ref2); // sign extend shortint if necessary, since there is no // load instruction that does that automatically (JM) if fromsize = OS_S8 then list.concat(taicpu.op_reg_reg(A_EXTSB, reg, reg)); end; procedure tcgppc.a_load_reg_reg(list: taasmoutput; fromsize, tosize: tcgsize; reg1, reg2: tregister); const movemap : array[OS_8..OS_S128, OS_8..OS_S128] of tasmop = ( { to -> OS_8 OS_16 OS_32 OS_64 OS_128 OS_S8 OS_S16 OS_S32 OS_S64 OS_S128 } { from } { OS_8 } (A_MR, A_RLDICL, A_RLDICL, A_RLDICL, A_NONE, A_RLDICL, A_RLDICL, A_RLDICL, A_RLDICL, A_NOP ), { OS_16 } (A_RLDICL, A_MR, A_RLDICL, A_RLDICL, A_NONE, A_RLDICL, A_RLDICL, A_RLDICL, A_RLDICL, A_NOP ), { OS_32 } (A_RLDICL, A_RLDICL, A_MR, A_RLDICL, A_NONE, A_RLDICL, A_RLDICL, A_RLDICL, A_RLDICL, A_NOP ), { OS_64 } (A_RLDICL, A_RLDICL, A_RLDICL, A_MR, A_NONE, A_RLDICL, A_RLDICL, A_RLDICL, A_RLDICL, A_NOP ), { OS_128 } (A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NOP ), { OS_S8 } (A_EXTSB, A_EXTSB, A_EXTSB, A_EXTSB, A_NONE, A_MR, A_EXTSB, A_EXTSB, A_EXTSB, A_NOP ), { OS_S16 } (A_RLDICL, A_EXTSH, A_EXTSH, A_EXTSH, A_NONE, A_EXTSB, A_MR, A_EXTSH, A_EXTSH, A_NOP ), { OS_S32 } (A_RLDICL, A_RLDICL, A_EXTSW, A_EXTSW, A_NONE, A_EXTSB, A_EXTSH, A_MR, A_EXTSW, A_NOP ), { OS_S64 } (A_RLDICL, A_RLDICL, A_RLDICL, A_MR, A_NONE, A_EXTSB, A_EXTSH, A_EXTSW, A_MR, A_NOP ), { OS_S128 } (A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NONE, A_NOP ) ); var instr: taicpu; op : tasmop; begin op := movemap[fromsize, tosize]; case op of A_MR, A_EXTSB, A_EXTSH, A_EXTSW : instr := taicpu.op_reg_reg(op, reg2, reg1); A_RLDICL : instr := taicpu.op_reg_reg_const_const(A_RLDICL, reg2, reg1, 0, (8-tcgsize2size[fromsize])*8); else internalerror(2002090901); end; list.concat(instr); rg[R_INTREGISTER].add_move_instruction(instr); end; procedure tcgppc.a_loadfpu_reg_reg(list: taasmoutput; size: tcgsize; reg1, reg2: tregister); var instr: taicpu; begin instr := taicpu.op_reg_reg(A_FMR, reg2, reg1); list.concat(instr); rg[R_FPUREGISTER].add_move_instruction(instr); end; procedure tcgppc.a_loadfpu_ref_reg(list: taasmoutput; size: tcgsize; const ref: treference; reg: tregister); const FpuLoadInstr: array[OS_F32..OS_F64, boolean, boolean] of TAsmOp = { indexed? updating?} (((A_LFS, A_LFSU), (A_LFSX, A_LFSUX)), ((A_LFD, A_LFDU), (A_LFDX, A_LFDUX))); var op: tasmop; ref2: treference; begin { several functions call this procedure with OS_32 or OS_64 } { so this makes life easier (FK) } case size of OS_32, OS_F32: size := OS_F32; OS_64, OS_F64, OS_C64: size := OS_F64; else internalerror(200201121); end; ref2 := ref; fixref(list, ref2); op := fpuloadinstr[size, ref2.index <> NR_NO, false]; a_load_store(list, op, reg, ref2); end; procedure tcgppc.a_loadfpu_reg_ref(list: taasmoutput; size: tcgsize; reg: tregister; const ref: treference); const FpuStoreInstr: array[OS_F32..OS_F64, boolean, boolean] of TAsmOp = { indexed? updating?} (((A_STFS, A_STFSU), (A_STFSX, A_STFSUX)), ((A_STFD, A_STFDU), (A_STFDX, A_STFDUX))); var op: tasmop; ref2: treference; begin if not (size in [OS_F32, OS_F64]) then internalerror(200201122); ref2 := ref; fixref(list, ref2); op := fpustoreinstr[size, ref2.index <> NR_NO, false]; a_load_store(list, op, reg, ref2); end; procedure tcgppc.a_op_const_reg(list: taasmoutput; Op: TOpCG; size: TCGSize; a: aint; reg: TRegister); begin a_op_const_reg_reg(list, op, size, a, reg, reg); end; procedure tcgppc.a_op_reg_reg(list: taasmoutput; Op: TOpCG; size: TCGSize; src, dst: TRegister); begin a_op_reg_reg_reg(list, op, size, src, dst, dst); end; procedure tcgppc.a_op_const_reg_reg(list: taasmoutput; op: TOpCg; size: tcgsize; a: aint; src, dst: tregister); var l1, l2: longint; oplo, ophi: tasmop; scratchreg: tregister; useReg : boolean; shiftmask : longint; procedure do_lo_hi; begin usereg := false; if (size in [OS_64, OS_S64]) then begin // ts: use register method for 64 bit consts. Sloooooow usereg := true; end else if (size in [OS_32, OS_S32]) then begin list.concat(taicpu.op_reg_reg_const(oplo, dst, src, word(a))); list.concat(taicpu.op_reg_reg_const(ophi, dst, dst, word(a shr 16))); end else begin list.concat(taicpu.op_reg_reg_const(oplo, dst, src, word(a))); end; end; begin if op = OP_SUB then begin a_op_const_reg_reg(list, OP_ADD, size, -a, src, dst); exit; end; ophi := TOpCG2AsmOpConstHi[op]; oplo := TOpCG2AsmOpConstLo[op]; // peephole optimizations for AND, OR, XOR - can't this be done at // some higher level, independent of architecture? if (op in [OP_AND, OP_OR, OP_XOR]) then begin if (a = 0) then begin if op = OP_AND then list.concat(taicpu.op_reg_const(A_LI, dst, 0)) else a_load_reg_reg(list, size, size, src, dst); exit; end else if (a = -1) then begin case op of OP_OR: list.concat(taicpu.op_reg_const(A_LI, dst, -1)); OP_XOR: list.concat(taicpu.op_reg_reg(A_NOT, dst, src)); OP_AND: a_load_reg_reg(list, size, size, src, dst); end; exit; end; { optimization for add } end else if (op = OP_ADD) then if a = 0 then begin a_load_reg_reg(list, size, size, src, dst); exit; end else if (a >= low(smallint)) and (a <= high(smallint)) then begin list.concat(taicpu.op_reg_reg_const(A_ADDI, dst, src, smallint(a))); exit; end; { otherwise, the instructions we can generate depend on the } { operation } useReg := false; case op of OP_DIV, OP_IDIV: if (a = 0) then internalerror(200208103) else if (a = 1) then begin a_load_reg_reg(list, OS_INT, OS_INT, src, dst); exit end else if false {and ispowerof2(a, l1)} then begin internalerror(200208103); case op of OP_DIV: begin list.concat(taicpu.op_reg_reg_const(A_SRDI, dst, src, l1)); end; OP_IDIV: begin list.concat(taicpu.op_reg_reg_const(A_SRADI, dst, src, l1)); list.concat(taicpu.op_reg_reg(A_ADDZE, dst, dst)); end; end; exit; end else usereg := true; OP_IMUL, OP_MUL: if (a = 0) then begin list.concat(taicpu.op_reg_const(A_LI, dst, 0)); exit end else if (a = -1) then begin list.concat(taicpu.op_reg_reg(A_NEG, dst, dst)); end else if (a = 1) then begin a_load_reg_reg(list, OS_INT, OS_INT, src, dst); exit end else if ispowerof2(a, l1) then list.concat(taicpu.op_reg_reg_const(A_SLDI, dst, src, l1)) else if (a >= low(smallint)) and (a <= high(smallint)) then list.concat(taicpu.op_reg_reg_const(A_MULLI, dst, src, smallint(a))) else usereg := true; OP_ADD: do_lo_hi; OP_OR: do_lo_hi; OP_AND: useReg := true; OP_XOR: do_lo_hi; OP_SHL, OP_SHR, OP_SAR: begin {$note ts: cleanup todo, fix remaining bugs} if (size in [OS_64, OS_S64]) then begin if (a and 63) <> 0 then list.concat(taicpu.op_reg_reg_const( TShiftOpCG2AsmOpConst64[Op], dst, src, a and 63)) else a_load_reg_reg(list, size, size, src, dst); if (a shr 6) <> 0 then internalError(68991); end else begin if (a and 31) <> 0 then list.concat(taicpu.op_reg_reg_const( TShiftOpCG2AsmOpConst32[Op], dst, src, a and 31)) else a_load_reg_reg(list, size, size, src, dst); if (a shr 5) <> 0 then internalError(68991); end; end else internalerror(200109091); end; { if all else failed, load the constant in a register and then } { perform the operation } if useReg then begin scratchreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE); a_load_const_reg(list, size, a, scratchreg); a_op_reg_reg_reg(list, op, size, scratchreg, src, dst); end; end; procedure tcgppc.a_op_reg_reg_reg(list: taasmoutput; op: TOpCg; size: tcgsize; src1, src2, dst: tregister); const op_reg_reg_opcg2asmop32: array[TOpCG] of tasmop = (A_NONE, A_ADD, A_AND, A_DIVWU, A_DIVW, A_MULLW, A_MULLW, A_NEG, A_NOT, A_OR, A_SRAW, A_SLW, A_SRW, A_SUB, A_XOR); op_reg_reg_opcg2asmop64: array[TOpCG] of tasmop = (A_NONE, A_ADD, A_AND, A_DIVDU, A_DIVD, A_MULLD, A_MULLD, A_NEG, A_NOT, A_OR, A_SRAD, A_SLD, A_SRD, A_SUB, A_XOR); begin case op of OP_NEG, OP_NOT: begin list.concat(taicpu.op_reg_reg(op_reg_reg_opcg2asmop64[op], dst, src1)); if (op = OP_NOT) and not (size in [OS_64, OS_S64]) then { zero/sign extend result again, fromsize is not important here } a_load_reg_reg(list, OS_S64, size, dst, dst) end; else {$NOTE ts:testme} if (size in [OS_64, OS_S64]) then begin list.concat(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop64[op], dst, src2, src1)); end else begin list.concat(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop32[op], dst, src2, src1)); end; end; end; {*************** compare instructructions ****************} procedure tcgppc.a_cmp_const_reg_label(list: taasmoutput; size: tcgsize; cmp_op: topcmp; a: aint; reg: tregister; l: tasmlabel); var scratch_register: TRegister; signed: boolean; begin signed := cmp_op in [OC_GT, OC_LT, OC_GTE, OC_LTE]; { in the following case, we generate more efficient code when } { signed is true } if (cmp_op in [OC_EQ, OC_NE]) and (aword(a) > $FFFF) then signed := true; if signed then if (a >= low(smallint)) and (a <= high(smallint)) then list.concat(taicpu.op_reg_reg_const(A_CMPDI, NR_CR0, reg, a)) else begin scratch_register := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE); a_load_const_reg(list, OS_64, a, scratch_register); list.concat(taicpu.op_reg_reg_reg(A_CMPD, NR_CR0, reg, scratch_register)); end else if (aword(a) <= $FFFF) then list.concat(taicpu.op_reg_reg_const(A_CMPLDI, NR_CR0, reg, aword(a))) else begin scratch_register := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE); a_load_const_reg(list, OS_64, a, scratch_register); list.concat(taicpu.op_reg_reg_reg(A_CMPLD, NR_CR0, reg, scratch_register)); end; a_jmp(list, A_BC, TOpCmp2AsmCond[cmp_op], 0, l); end; procedure tcgppc.a_cmp_reg_reg_label(list: taasmoutput; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel); var op: tasmop; begin if cmp_op in [OC_GT, OC_LT, OC_GTE, OC_LTE] then if (size in [OS_64, OS_S64]) then op := A_CMPD else op := A_CMPW else if (size in [OS_64, OS_S64]) then op := A_CMPLD else op := A_CMPLW; list.concat(taicpu.op_reg_reg_reg(op, NR_CR0, reg2, reg1)); a_jmp(list, A_BC, TOpCmp2AsmCond[cmp_op], 0, l); end; procedure tcgppc.a_jmp_cond(list: taasmoutput; cond: TOpCmp; l: tasmlabel); begin a_jmp(list, A_BC, TOpCmp2AsmCond[cond], 0, l); end; procedure tcgppc.a_jmp_name(list: taasmoutput; const s: string); var p: taicpu; begin p := taicpu.op_sym(A_B, objectlibrary.newasmsymbol(s, AB_EXTERNAL, AT_FUNCTION)); p.is_jmp := true; list.concat(p) end; procedure tcgppc.a_jmp_always(list: taasmoutput; l: tasmlabel); begin a_jmp(list, A_B, C_None, 0, l); end; procedure tcgppc.a_jmp_flags(list: taasmoutput; const f: TResFlags; l: tasmlabel); var c: tasmcond; begin c := flags_to_cond(f); a_jmp(list, A_BC, c.cond, c.cr - RS_CR0, l); end; procedure tcgppc.g_flags2reg(list: taasmoutput; size: TCgSize; const f: TResFlags; reg: TRegister); var testbit: byte; bitvalue: boolean; begin { get the bit to extract from the conditional register + its } { requested value (0 or 1) } testbit := ((f.cr - RS_CR0) * 4); case f.flag of F_EQ, F_NE: begin inc(testbit, 2); bitvalue := f.flag = F_EQ; end; F_LT, F_GE: begin bitvalue := f.flag = F_LT; end; F_GT, F_LE: begin inc(testbit); bitvalue := f.flag = F_GT; end; else internalerror(200112261); end; { load the conditional register in the destination reg } list.concat(taicpu.op_reg(A_MFCR, reg)); { we will move the bit that has to be tested to bit 0 by rotating } { left } testbit := (testbit + 1) and 31; { extract bit } list.concat(taicpu.op_reg_reg_const_const_const( A_RLWINM,reg,reg,testbit,31,31)); { if we need the inverse, xor with 1 } if not bitvalue then list.concat(taicpu.op_reg_reg_const(A_XORI, reg, reg, 1)); end; { *********** entry/exit code and address loading ************ } procedure tcgppc.g_save_standard_registers(list: Taasmoutput); begin { this work is done in g_proc_entry } end; procedure tcgppc.g_restore_standard_registers(list: Taasmoutput); begin { this work is done in g_proc_exit } end; procedure tcgppc.g_proc_entry(list: taasmoutput; localsize: longint; nostackframe: boolean); { generated the entry code of a procedure/function. Note: localsize is the } { sum of the size necessary for local variables and the maximum possible } { combined size of ALL the parameters of a procedure called by the current } { one. } { This procedure may be called before, as well as after g_return_from_proc } { is called. NOTE registers are not to be allocated through the register } { allocator here, because the register colouring has already occured !! } procedure calcFirstUsedFPR(out firstfpr : TSuperRegister; out fprcount : aint); var reg : TSuperRegister; begin fprcount := 0; firstfpr := RS_F31; if not (po_assembler in current_procinfo.procdef.procoptions) then begin for reg := RS_F14 to RS_F31 do begin if reg in rg[R_FPUREGISTER].used_in_proc then begin fprcount := ord(RS_F31)-ord(reg)+1; firstfpr := reg; break; end; end; end; end; procedure calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint); var reg : TSuperRegister; begin gprcount := 0; firstgpr := RS_R31; if not (po_assembler in current_procinfo.procdef.procoptions) then begin for reg := RS_R14 to RS_R31 do begin if reg in rg[R_INTREGISTER].used_in_proc then begin gprcount := ord(RS_R31)-ord(reg)+1; firstgpr := reg; break; end; end; end; end; var firstregfpu, firstreggpr: TSuperRegister; href: treference; needslinkreg: boolean; regcount : TSuperRegister; fprcount, gprcount : aint; begin { CR and LR only have to be saved in case they are modified by the current } { procedure, but currently this isn't checked, so save them always } { following is the entry code as described in "Altivec Programming } { Interface Manual", bar the saving of AltiVec registers } a_reg_alloc(list, NR_STACK_POINTER_REG); a_reg_alloc(list, NR_R0); calcFirstUsedFPR(firstregfpu, fprcount); calcFirstUsedGPR(firstreggpr, gprcount); // calculate real stack frame size localsize := tppcprocinfo(current_procinfo).calc_stackframe_size( gprcount, fprcount); // determine whether we need to save the link register needslinkreg := ((not (po_assembler in current_procinfo.procdef.procoptions)) and (pi_do_call in current_procinfo.flags)); // move link register to r0 if (needslinkreg) then begin list.concat(taicpu.op_reg(A_MFLR, NR_R0)); end; // save old stack frame pointer if (localsize > 0) then begin a_reg_alloc(list, NR_R12); list.concat(taicpu.op_reg_reg(A_MR, NR_R12, NR_STACK_POINTER_REG)); end; // save registers, FPU first, then GPR reference_reset_base(href, NR_STACK_POINTER_REG, -8); if (fprcount > 0) then begin for regcount := RS_F31 downto firstregfpu do begin a_loadfpu_reg_ref(list, OS_FLOAT, newreg(R_FPUREGISTER, regcount, R_SUBNONE), href); dec(href.offset, tcgsize2size[OS_FLOAT]); end; end; if (gprcount > 0) then begin for regcount := RS_R31 downto firstreggpr do begin a_load_reg_ref(list, OS_INT, OS_INT, newreg(R_INTREGISTER, regcount, R_SUBNONE), href); dec(href.offset, tcgsize2size[OS_INT]); end; end; // VMX registers not supported by FPC atm // we may need to store R0 (=LR) ourselves if (needslinkreg) then begin reference_reset_base(href, NR_STACK_POINTER_REG, LA_LR_ELF); list.concat(taicpu.op_reg_ref(A_STD, NR_R0, href)); end; // create stack frame if (not nostackframe) and (localsize > 0) then begin if (localsize <= high(smallint)) then begin reference_reset_base(href, NR_STACK_POINTER_REG, -localsize); a_load_store(list, A_STDU, NR_STACK_POINTER_REG, href); end else begin writeln(localsize); reference_reset_base(href, NR_NO, -localsize); // use R0 for loading the constant (which is definitely > 32k when entering // this branch) // inlined because it must not use temp registers because register allocations // have already been done :( { Code template: lis r0,ofs@highest ori r0,r0,ofs@higher sldi r0,r0,32 oris r0,r0,ofs@h ori r0,r0,ofs@l } list.concat(taicpu.op_reg_const(A_LIS, NR_R0, word(href.offset shr 48))); list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset shr 32))); list.concat(taicpu.op_reg_reg_const(A_SLDI, NR_R0, NR_R0, 32)); list.concat(taicpu.op_reg_reg_const(A_ORIS, NR_R0, NR_R0, word(href.offset shr 16))); list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset))); list.concat(taicpu.op_reg_reg_reg(A_STDUX, NR_R1, NR_R1, NR_R0)); end; end; // CR register not used by FPC atm // keep R1 allocated??? a_reg_dealloc(list, NR_R0); end; procedure tcgppc.g_proc_exit(list: taasmoutput; parasize: longint; nostackframe: boolean); procedure calcFirstUsedFPR(out firstfpr : TSuperRegister; out fprcount : aint); var reg : TSuperRegister; begin fprcount := 0; firstfpr := RS_F31; if not (po_assembler in current_procinfo.procdef.procoptions) then begin for reg := RS_F14 to RS_F31 do begin if reg in rg[R_FPUREGISTER].used_in_proc then begin fprcount := ord(RS_F31)-ord(reg)+1; firstfpr := reg; break; end; end; end; end; procedure calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint); var reg : TSuperRegister; begin gprcount := 0; firstgpr := RS_R31; if not (po_assembler in current_procinfo.procdef.procoptions) then begin for reg := RS_R14 to RS_R31 do begin if reg in rg[R_INTREGISTER].used_in_proc then begin gprcount := ord(RS_R31)-ord(reg)+1; firstgpr := reg; break; end; end; end; end; { This procedure may be called before, as well as after g_stackframe_entry } { is called. NOTE registers are not to be allocated through the register } { allocator here, because the register colouring has already occured !! } var regcount, firstregfpu, firstreggpr: TSuperRegister; href: treference; needslinkreg : boolean; localsize, fprcount, gprcount: aint; begin calcFirstUsedFPR(firstregfpu, fprcount); calcFirstUsedGPR(firstreggpr, gprcount); // determine whether we need to restore the link register needslinkreg := ((not (po_assembler in current_procinfo.procdef.procoptions)) and (pi_do_call in current_procinfo.flags)); // calculate stack frame localsize := tppcprocinfo(current_procinfo).calc_stackframe_size( gprcount, fprcount); // CR register not supported // restore stack pointer if (not nostackframe) and (localsize > 0) then begin if (localsize <= high(smallint)) then begin list.concat(taicpu.op_reg_reg_const(A_ADDI, NR_STACK_POINTER_REG, NR_STACK_POINTER_REG, localsize)); end else begin reference_reset_base(href, NR_NO, localsize); // use R0 for loading the constant (which is definitely > 32k when entering // this branch) // inlined because it must not use temp registers because register allocations // have already been done :( { Code template: lis r0,ofs@highest ori r0,ofs@higher sldi r0,r0,32 oris r0,r0,ofs@h ori r0,r0,ofs@l } list.concat(taicpu.op_reg_const(A_LIS, NR_R0, word(href.offset shr 48))); list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset shr 32))); list.concat(taicpu.op_reg_reg_const(A_SLDI, NR_R0, NR_R0, 32)); list.concat(taicpu.op_reg_reg_const(A_ORIS, NR_R0, NR_R0, word(href.offset shr 16))); list.concat(taicpu.op_reg_reg_const(A_ORI, NR_R0, NR_R0, word(href.offset))); list.concat(taicpu.op_reg_reg_reg(A_ADD, NR_R1, NR_R1, NR_R0)); end; end; // load registers, FPR first, then GPR {$note ts:todo change order of loading} reference_reset_base(href, NR_STACK_POINTER_REG, -tcgsize2size[OS_FLOAT]); if (fprcount > 0) then begin for regcount := RS_F31 downto firstregfpu do begin a_loadfpu_ref_reg(list, OS_FLOAT, href, newreg(R_FPUREGISTER, regcount, R_SUBNONE)); dec(href.offset, tcgsize2size[OS_FLOAT]); end; end; if (gprcount > 0) then begin for regcount := RS_R31 downto firstreggpr do begin a_load_ref_reg(list, OS_INT, OS_INT, href, newreg(R_INTREGISTER, regcount, R_SUBNONE)); dec(href.offset, tcgsize2size[OS_INT]); end; end; // VMX not supported... // restore LR (if needed) if (needslinkreg) then begin reference_reset_base(href, NR_STACK_POINTER_REG, LA_LR_ELF); list.concat(taicpu.op_reg_ref(A_LD, NR_R0, href)); list.concat(taicpu.op_reg(A_MTLR, NR_R0)); end; // generate return instruction list.concat(taicpu.op_none(A_BLR)); end; procedure tcgppc.a_loadaddr_ref_reg(list: taasmoutput; const ref: treference; r: tregister); var ref2, tmpref: treference; // register used to construct address tempreg : TRegister; begin ref2 := ref; fixref(list, ref2); { load a symbol } if assigned(ref2.symbol) or (ref2.offset < low(smallint)) or (ref2.offset > high(smallint)) then begin { add the symbol's value to the base of the reference, and if the } { reference doesn't have a base, create one } reference_reset(tmpref); tmpref.offset := ref2.offset; tmpref.symbol := ref2.symbol; tmpref.relsymbol := ref2.relsymbol; // load 64 bit reference into r. If the reference already has a base register, // first load the 64 bit value into a temp register, then add it to the result // register rD if (ref2.base <> NR_NO) then begin // already have a base register, so allocate a new one tempreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE); end else begin tempreg := r; end; // code for loading a reference from a symbol into a register rD. (* lis rX,SYM@highest ori rX,SYM@higher sldi rX,rX,32 oris rX,rX,SYM@h ori rX,rX,SYM@l *) tmpref.refaddr := addr_highest; list.concat(taicpu.op_reg_ref(A_LIS, tempreg, tmpref)); tmpref.refaddr := addr_higher; list.concat(taicpu.op_reg_reg_ref(A_ORI, tempreg, tempreg, tmpref)); list.concat(taicpu.op_reg_reg_const(A_SLDI, tempreg, tempreg, 32)); tmpref.refaddr := addr_high; list.concat(taicpu.op_reg_reg_ref(A_ORIS, tempreg, tempreg, tmpref)); tmpref.refaddr := addr_low; list.concat(taicpu.op_reg_reg_ref(A_ORI, tempreg, tempreg, tmpref)); // if there's already a base register, add the temp register contents to // the base register if (ref2.base <> NR_NO) then begin list.concat(taicpu.op_reg_reg_reg(A_ADD, r, tempreg, ref2.base)); end; end else if ref2.offset <> 0 then begin { no symbol, but offset <> 0 } if ref2.base <> NR_NO then begin a_op_const_reg_reg(list, OP_ADD, OS_64, ref2.offset, ref2.base, r) { FixRef makes sure that "(ref.index <> R_NO) and (ref.offset <> 0)" never} { occurs, so now only ref.offset has to be loaded } end else begin a_load_const_reg(list, OS_64, ref2.offset, r) end; end else if ref.index <> NR_NO then list.concat(taicpu.op_reg_reg_reg(A_ADD, r, ref2.base, ref2.index)) else if (ref2.base <> NR_NO) and (r <> ref2.base) then a_load_reg_reg(list, OS_ADDR, OS_ADDR, ref2.base, r) else begin list.concat(taicpu.op_reg_const(A_LI, r, 0)); end; end; { ************* concatcopy ************ } const maxmoveunit = 8; procedure tcgppc.g_concatcopy(list: taasmoutput; const source, dest: treference; len: aint); var countreg, tempreg: TRegister; src, dst: TReference; lab: tasmlabel; count, count2: longint; size: tcgsize; begin {$IFDEF extdebug} if len > high(aint) then internalerror(2002072704); {$ENDIF extdebug} { make sure short loads are handled as optimally as possible } if (len <= maxmoveunit) and (byte(len) in [1, 2, 4, 8]) then begin if len < 8 then begin size := int_cgsize(len); a_load_ref_ref(list, size, size, source, dest); end else begin a_reg_alloc(list, NR_F0); a_loadfpu_ref_reg(list, OS_F64, source, NR_F0); a_loadfpu_reg_ref(list, OS_F64, NR_F0, dest); a_reg_dealloc(list, NR_F0); end; exit; end; count := len div maxmoveunit; reference_reset(src); reference_reset(dst); { load the address of source into src.base } if (count > 4) or not issimpleref(source) or ((source.index <> NR_NO) and ((source.offset + len) > high(smallint))) then begin src.base := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE); a_loadaddr_ref_reg(list, source, src.base); end else begin src := source; end; { load the address of dest into dst.base } if (count > 4) or not issimpleref(dest) or ((dest.index <> NR_NO) and ((dest.offset + len) > high(smallint))) then begin dst.base := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE); a_loadaddr_ref_reg(list, dest, dst.base); end else begin dst := dest; end; { generate a loop } if count > 4 then begin { the offsets are zero after the a_loadaddress_ref_reg and just } { have to be set to 8. I put an Inc there so debugging may be } { easier (should offset be different from zero here, it will be } { easy to notice in the generated assembler } inc(dst.offset, 8); inc(src.offset, 8); list.concat(taicpu.op_reg_reg_const(A_SUBI, src.base, src.base, 8)); list.concat(taicpu.op_reg_reg_const(A_SUBI, dst.base, dst.base, 8)); countreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE); a_load_const_reg(list, OS_32, count, countreg); { explicitely allocate R_0 since it can be used safely here } { (for holding date that's being copied) } a_reg_alloc(list, NR_F0); objectlibrary.getjumplabel(lab); a_label(list, lab); list.concat(taicpu.op_reg_reg_const(A_SUBIC_, countreg, countreg, 1)); list.concat(taicpu.op_reg_ref(A_LFDU, NR_F0, src)); list.concat(taicpu.op_reg_ref(A_STFDU, NR_F0, dst)); a_jmp(list, A_BC, C_NE, 0, lab); a_reg_dealloc(list, NR_F0); len := len mod 8; end; count := len div 8; { unrolled loop } if count > 0 then begin a_reg_alloc(list, NR_F0); for count2 := 1 to count do begin a_loadfpu_ref_reg(list, OS_F64, src, NR_F0); a_loadfpu_reg_ref(list, OS_F64, NR_F0, dst); inc(src.offset, 8); inc(dst.offset, 8); end; a_reg_dealloc(list, NR_F0); len := len mod 8; end; if (len and 4) <> 0 then begin a_reg_alloc(list, NR_R0); a_load_ref_reg(list, OS_32, OS_32, src, NR_R0); a_load_reg_ref(list, OS_32, OS_32, NR_R0, dst); inc(src.offset, 4); inc(dst.offset, 4); a_reg_dealloc(list, NR_R0); end; { copy the leftovers } if (len and 2) <> 0 then begin a_reg_alloc(list, NR_R0); a_load_ref_reg(list, OS_16, OS_16, src, NR_R0); a_load_reg_ref(list, OS_16, OS_16, NR_R0, dst); inc(src.offset, 2); inc(dst.offset, 2); a_reg_dealloc(list, NR_R0); end; if (len and 1) <> 0 then begin a_reg_alloc(list, NR_R0); a_load_ref_reg(list, OS_8, OS_8, src, NR_R0); a_load_reg_ref(list, OS_8, OS_8, NR_R0, dst); a_reg_dealloc(list, NR_R0); end; end; procedure tcgppc.g_overflowcheck(list: taasmoutput; const l: tlocation; def: tdef); var hl: tasmlabel; flags : TResFlags; begin if not (cs_check_overflow in aktlocalswitches) then exit; objectlibrary.getjumplabel(hl); if not ((def.deftype = pointerdef) or ((def.deftype = orddef) and (torddef(def).typ in [u64bit, u16bit, u32bit, u8bit, uchar, bool8bit, bool16bit, bool32bit]))) then begin // ... instruction setting overflow flag ... // mfxerf R0 // mtcrf 128, R0 // ble cr0, label list.concat(taicpu.op_reg(A_MFXER, NR_R0)); list.concat(taicpu.op_const_reg(A_MTCRF, 128, NR_R0)); flags.cr := RS_CR0; flags.flag := F_LE; a_jmp_flags(list, flags, hl); end else a_jmp_cond(list, OC_AE, hl); a_call_name(list, 'FPC_OVERFLOW'); a_label(list, hl); end; procedure tcgppc.g_intf_wrapper(list: TAAsmoutput; procdef: tprocdef; const labelname: string; ioffset: longint); procedure loadvmttor11; var href: treference; begin reference_reset_base(href, NR_R3, 0); cg.a_load_ref_reg(list, OS_ADDR, OS_ADDR, href, NR_R11); end; procedure op_onr11methodaddr; var href: treference; begin if (procdef.extnumber = $FFFF) then Internalerror(200006139); { call/jmp vmtoffs(%eax) ; method offs } reference_reset_base(href, NR_R11, procdef._class.vmtmethodoffset(procdef.extnumber)); if not ((aint(href.offset) >= low(smallint)) and (aint(href.offset) <= high(smallint))) then begin {$warning ts:adapt me} list.concat(taicpu.op_reg_reg_const(A_ADDIS, NR_R11, NR_R11, smallint((href.offset shr 16) + ord(smallint(href.offset and $FFFF) < 0)))); href.offset := smallint(href.offset and $FFFF); end; list.concat(taicpu.op_reg_ref(A_LD, NR_R11, href)); // the loaded reference is a function descriptor reference, so deref again // (at ofs 0 there's the real pointer) {$warning ts:TODO: update GOT reference} reference_reset_base(href, NR_R11, 0); list.concat(taicpu.op_reg_ref(A_LD, NR_R11, href)); list.concat(taicpu.op_reg(A_MTCTR, NR_R11)); list.concat(taicpu.op_none(A_BCTR)); // NOP needed for the linker...? list.concat(taicpu.op_none(A_NOP)); end; var make_global: boolean; begin if procdef.proctypeoption <> potype_none then Internalerror(200006137); if not assigned(procdef._class) or (procdef.procoptions * [po_classmethod, po_staticmethod, po_methodpointer, po_interrupt, po_iocheck] <> []) then Internalerror(200006138); if procdef.owner.symtabletype <> objectsymtable then Internalerror(200109191); make_global := false; if (not current_module.is_unit) or (cs_create_smart in aktmoduleswitches) or (procdef.owner.defowner.owner.symtabletype = globalsymtable) then make_global := true; if make_global then List.concat(Tai_symbol.Createname_global(labelname, AT_FUNCTION, 0)) else List.concat(Tai_symbol.Createname(labelname, AT_FUNCTION, 0)); { set param1 interface to self } g_adjust_self_value(list, procdef, ioffset); { case 4 } if po_virtualmethod in procdef.procoptions then begin loadvmttor11; op_onr11methodaddr; end { case 0 } else {$note ts:todo add GOT change?? - think not needed :) } list.concat(taicpu.op_sym(A_B, objectlibrary.newasmsymbol('.' + procdef.mangledname, AB_EXTERNAL, AT_FUNCTION))); List.concat(Tai_symbol_end.Createname(labelname)); end; {***************** This is private property, keep out! :) *****************} function tcgppc.issimpleref(const ref: treference): boolean; begin if (ref.base = NR_NO) and (ref.index <> NR_NO) then internalerror(200208101); result := not (assigned(ref.symbol)) and (((ref.index = NR_NO) and (ref.offset >= low(smallint)) and (ref.offset <= high(smallint))) or ((ref.index <> NR_NO) and (ref.offset = 0))); end; function tcgppc.fixref(list: taasmoutput; var ref: treference): boolean; var tmpreg: tregister; begin result := false; if (ref.base = NR_NO) then begin ref.base := ref.index; ref.base := NR_NO; end; if (ref.base <> NR_NO) then begin if (ref.index <> NR_NO) and ((ref.offset <> 0) or assigned(ref.symbol)) then begin result := true; tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE); list.concat(taicpu.op_reg_reg_reg( A_ADD, tmpreg, ref.base, ref.index)); ref.index := NR_NO; ref.base := tmpreg; end end else if ref.index <> NR_NO then internalerror(200208102); end; procedure tcgppc.a_load_store(list: taasmoutput; op: tasmop; reg: tregister; ref: treference); var tmpreg: tregister; tmpref: treference; largeOffset: Boolean; begin tmpreg := NR_NO; // if we have to load/store from a symbol or large addresses, use a temporary register // containing the address if assigned(ref.symbol) or (ref.offset < low(smallint)) or (ref.offset > high(smallint)) then begin tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE); reference_reset(tmpref); tmpref.symbol := ref.symbol; tmpref.relsymbol := ref.relsymbol; tmpref.offset := ref.offset; (* code template when there's no base register lis rT,SYM+offs@highesta addi rT,SYM+offs@highera sldi rT,rT,32 addis rT,rT,SYM+offs@ha ld rD,SYM+offs@l(rT) code template when there's a base register lis rT,SYM+offs@highesta addis rT,SYM+offs@highera sldi rT,rT,32 addis rT,rT,SYM+offs@ha add rT,rBase,rT ld rD,SYM+offs@l(rT) *) //list.concat(tai_comment.create(strpnew('symbol: ' + tmpref.symbol.name + ' offset: ' + inttostr(tmpref.offset)))); tmpref.refaddr := addr_highesta; list.concat(taicpu.op_reg_ref(A_LIS, tmpreg, tmpref)); tmpref.refaddr := addr_highera; list.concat(taicpu.op_reg_reg_ref(A_ORI, tmpreg, tmpreg, tmpref)); list.concat(taicpu.op_reg_reg_const(A_SLDI, tmpreg, tmpreg, 32)); tmpref.refaddr := addr_higha; list.concat(taicpu.op_reg_reg_ref(A_ORIS, tmpreg, tmpreg, tmpref)); if (ref.base <> NR_NO) then begin list.concat(taicpu.op_reg_reg_reg(A_ADD, tmpreg, tmpreg, ref.base)); end; tmpref.base := tmpreg; tmpref.refaddr := addr_low; list.concat(taicpu.op_reg_ref(op, reg, tmpref)); end else begin list.concat(taicpu.op_reg_ref(op, reg, ref)); end; end; procedure tcgppc.a_jmp(list: taasmoutput; op: tasmop; c: tasmcondflag; crval: longint; l: tasmlabel); var p: taicpu; begin p := taicpu.op_sym(op, objectlibrary.newasmsymbol(l.name, AB_EXTERNAL, AT_FUNCTION)); if op <> A_B then create_cond_norm(c, crval, p.condition); p.is_jmp := true; list.concat(p) end; begin cg := tcgppc.create; end.