{ Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal Development Team This unit implements the ARM64 optimizer object This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. **************************************************************************** } Unit aoptcpu; {$i fpcdefs.inc} {$ifdef EXTDEBUG} {$define DEBUG_AOPTCPU} {$endif EXTDEBUG} Interface uses globtype, globals, cutils, cgbase, cpubase, aasmtai, aasmcpu, aopt, aoptcpub, aoptarm, aoptobj; Type TCpuAsmOptimizer = class(TARMAsmOptimizer) { uses the same constructor as TAopObj } function PrePeepHoleOptsCpu(var p: tai): boolean; override; function PeepHoleOptPass1Cpu(var p: tai): boolean; override; function PeepHoleOptPass2Cpu(var p: tai): boolean; override; function PostPeepHoleOptsCpu(var p: tai): boolean; override; function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;override; function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;override; function LookForPostindexedPattern(var p : tai) : boolean; public { With these routines, there's optimisation code that's general for all ARM platforms } function OptPass1LDR(var p: tai): Boolean; override; function OptPass1STR(var p: tai): Boolean; override; private function RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string): boolean; function OptPass1Shift(var p: tai): boolean; function OptPass1Data(var p: tai): boolean; function OptPass1FData(var p: tai): Boolean; function OptPass1STP(var p: tai): boolean; function OptPass1Mov(var p: tai): boolean; function OptPass1MOVZ(var p: tai): boolean; function OptPass1FMov(var p: tai): Boolean; function OptPass1B(var p: tai): boolean; function OptPass1SXTW(var p: tai): Boolean; function OptPass2CSEL(var p: tai): Boolean; function OptPass2B(var p: tai): Boolean; function OptPass2LDRSTR(var p: tai): boolean; function OptPass2MOV(var p: tai): Boolean; function PostPeepholeOptAND(var p: tai): Boolean; function PostPeepholeOptCMP(var p: tai): boolean; function PostPeepholeOptTST(var p: tai): Boolean; protected { Like UpdateUsedRegs, but ignores deallocations } class procedure UpdateIntRegsNoDealloc(var AUsedRegs: TAllUsedRegs; p: Tai); static; { Attempts to allocate a volatile integer register for use between p and hp, using AUsedRegs for the current register usage information. Returns NR_NO if no free register could be found } function GetIntRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai; DontAlloc: Boolean = False): TRegister; End; Implementation uses aasmbase, aoptbase, aoptutils, cgutils, procinfo, paramgr, verbose; {$ifdef DEBUG_AOPTCPU} const SPeepholeOptimization: shortstring = 'Peephole Optimization: '; {$else DEBUG_AOPTCPU} { Empty strings help the optimizer to remove string concatenations that won't ever appear to the user on release builds. [Kit] } const SPeepholeOptimization = ''; {$endif DEBUG_AOPTCPU} MAX_CSEL_INSTRUCTIONS = 8; MAX_CSEL_REGISTERS = 30; type TCSELTrackingState = (tsInvalid, tsSimple, tsDetour, tsBranching, tsDouble, tsDoubleBranchSame, tsDoubleBranchDifferent, tsDoubleSecondBranching, tsProcessed); { For OptPass2Jcc } TCSELTracking = object private CSELScore, ConstCount: LongInt; RegWrites: array[0..MAX_CSEL_INSTRUCTIONS*2 - 1] of TRegister; ConstRegs: array[0..MAX_CSEL_REGISTERS - 1] of TRegister; ConstVals: array[0..MAX_CSEL_REGISTERS - 1] of TCGInt; ConstSizes: array[0..MAX_CSEL_REGISTERS - 1] of TSubRegister; { May not match ConstRegs if one is shared over multiple CSELs. } ConstMovs: array[0..MAX_CSEL_REGISTERS - 1] of tai; { Location of initialisation instruction } ConstWriteSizes: array[0..first_int_imreg - 1] of TSubRegister; { Largest size of register written. } fOptimizer: TCpuAsmOptimizer; fLabel: TAsmSymbol; fInsertionPoint, fCondition, fInitialJump, fFirstMovBlock, fFirstMovBlockStop, fSecondJump, fThirdJump, fSecondMovBlock, fSecondMovBlockStop, fMidLabel, fEndLabel, fAllocationRange: tai; fState: TCSELTrackingState; function TryCSELConst(p, start, stop: tai; var Count: LongInt): Boolean; function InitialiseBlock(BlockStart, OneBeforeBlock: tai; out BlockStop: tai; out EndJump: tai): Boolean; function AnalyseMOVBlock(BlockStart, BlockStop, SearchStart: tai): LongInt; public RegisterTracking: TAllUsedRegs; constructor Init(Optimizer: TCpuAsmOptimizer; var p_initialjump, p_initialmov: tai; var AFirstLabel: TAsmLabel); destructor Done; procedure Process(out new_p: tai); property State: TCSELTrackingState read fState; end; PCSELTracking = ^TCSELTracking; function CanBeCond(p : tai) : boolean; begin result:=(p.typ=ait_instruction) and (taicpu(p).condition=C_None); end; function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean; var p: taicpu; begin Result := false; if not ((assigned(hp)) and (hp.typ = ait_instruction)) then exit; p := taicpu(hp); case p.opcode of { These operations do not write into a register at all LDR/STR with post/pre-indexed operations do not need special treatment because post-/preindexed does not mean that a register is loaded with a new value, it is only modified } A_STR, A_CMP, A_CMN, A_TST, A_B, A_BL, A_MSR, A_FCMP: exit; else ; end; if p.ops=0 then exit; case p.oper[0]^.typ of top_reg: Result := SuperRegistersEqual(p.oper[0]^.reg,reg); top_ref: Result := (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and (taicpu(p).oper[0]^.ref^.base = reg); else ; end; end; function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean; var p: taicpu; i: longint; begin instructionLoadsFromReg := false; if not (assigned(hp) and (hp.typ = ait_instruction)) then exit; p:=taicpu(hp); i:=1; { Start on oper[0]? } if taicpu(hp).spilling_get_operation_type(0) in [operand_read, operand_readwrite] then i:=0; while(i