|
@@ -62,6 +62,21 @@ Type
|
|
|
protected
|
|
|
function LookForPreindexedPattern(p: taicpu): boolean;
|
|
|
function LookForPostindexedPattern(p: taicpu): boolean;
|
|
|
+
|
|
|
+
|
|
|
+ { Individual optimisation routines }
|
|
|
+ function OptPass1DataCheckMov(var p: tai): Boolean;
|
|
|
+ function OptPass1ADDSUB(var p: tai): Boolean;
|
|
|
+ function OptPass1And(var p: tai): Boolean; override; { There's optimisation code that's general for all ARM platforms }
|
|
|
+ function OptPass1CMP(var p: tai): Boolean;
|
|
|
+ function OptPass1LDR(var p: tai): Boolean;
|
|
|
+ function OptPass1STM(var p: tai): Boolean;
|
|
|
+ function OptPass1STR(var p: tai): Boolean;
|
|
|
+ function OptPass1MOV(var p: tai): Boolean;
|
|
|
+ function OptPass1MUL(var p: tai): Boolean;
|
|
|
+ function OptPass1MVN(var p: tai): Boolean;
|
|
|
+ function OptPass1VMov(var p: tai): Boolean;
|
|
|
+ function OptPass1VOp(var p: tai): Boolean;
|
|
|
End;
|
|
|
|
|
|
TCpuPreRegallocScheduler = class(TAsmScheduler)
|
|
@@ -117,7 +132,7 @@ Implementation
|
|
|
(taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
|
|
|
(taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
|
|
|
begin
|
|
|
- asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
|
|
|
+ asml.insertafter(tai_comment.Create(strpnew('Peephole Optimization: CmpMovMov - Removed redundant moveq')), movp);
|
|
|
asml.remove(movp);
|
|
|
movp.free;
|
|
|
Result:=true;
|
|
@@ -355,7 +370,7 @@ Implementation
|
|
|
dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
|
|
|
if assigned(dealloc) then
|
|
|
begin
|
|
|
- DebugMsg('Peephole '+optimizer+' removed superfluous vmov', movp);
|
|
|
+ DebugMsg('Peephole Optimization: '+optimizer+' removed superfluous vmov', movp);
|
|
|
result:=true;
|
|
|
|
|
|
{ taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
|
|
@@ -498,7 +513,7 @@ Implementation
|
|
|
not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) and
|
|
|
GenerateARMCode then
|
|
|
begin
|
|
|
- DebugMsg('Peephole Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
|
|
|
+ DebugMsg('Peephole Optimization: Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
|
|
|
p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
|
|
|
if taicpu(hp1).oper[2]^.typ=top_const then
|
|
|
begin
|
|
@@ -522,1297 +537,1408 @@ Implementation
|
|
|
end;
|
|
|
|
|
|
|
|
|
- function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
|
|
|
+ function TCpuAsmOptimizer.OptPass1ADDSUB(var p: tai): Boolean;
|
|
|
var
|
|
|
- hp1,hp2,hp3,hp4: tai;
|
|
|
- i, i2: longint;
|
|
|
- tempop: tasmop;
|
|
|
+ hp1,hp2: tai;
|
|
|
oldreg: tregister;
|
|
|
- dealloc: tai_regalloc;
|
|
|
+ begin
|
|
|
+ Result := OptPass1DataCheckMov(p);
|
|
|
+
|
|
|
+ {
|
|
|
+ change
|
|
|
+ add/sub reg2,reg1,const1
|
|
|
+ str/ldr reg3,[reg2,const2]
|
|
|
+ dealloc reg2
|
|
|
+ to
|
|
|
+ str/ldr reg3,[reg1,const2+/-const1]
|
|
|
+ }
|
|
|
+ if (not GenerateThumbCode) and
|
|
|
+ (taicpu(p).ops>2) and
|
|
|
+ (taicpu(p).oper[1]^.typ = top_reg) and
|
|
|
+ (taicpu(p).oper[2]^.typ = top_const) then
|
|
|
+ begin
|
|
|
+ hp1:=p;
|
|
|
+ while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
|
|
|
+ { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
|
|
|
+ MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
|
|
|
+ (taicpu(hp1).oper[1]^.typ = top_ref) and
|
|
|
+ (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
|
|
|
+ { don't optimize if the register is stored/overwritten }
|
|
|
+ (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
|
|
|
+ (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
|
|
|
+ (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
|
|
|
+ { new offset must be valid: either in the range of 8 or 12 bit, depend on the
|
|
|
+ ldr postfix }
|
|
|
+ (((taicpu(p).opcode=A_ADD) and
|
|
|
+ isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
|
|
|
+ ) or
|
|
|
+ ((taicpu(p).opcode=A_SUB) and
|
|
|
+ isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
|
|
|
+ )
|
|
|
+ ) do
|
|
|
+ begin
|
|
|
+ { neither reg1 nor reg2 might be changed inbetween }
|
|
|
+ if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or
|
|
|
+ RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then
|
|
|
+ break;
|
|
|
+ { reg2 must be either overwritten by the ldr or it is deallocated afterwards }
|
|
|
+ if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or
|
|
|
+ assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
|
|
|
+ begin
|
|
|
+ { remember last instruction }
|
|
|
+ hp2:=hp1;
|
|
|
+ DebugMsg('Peephole Optimization: Add/SubLdr2Ldr done', p);
|
|
|
+ hp1:=p;
|
|
|
+ { fix all ldr/str }
|
|
|
+ while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do
|
|
|
+ begin
|
|
|
+ taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
|
|
|
+ if taicpu(p).opcode=A_ADD then
|
|
|
+ inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val)
|
|
|
+ else
|
|
|
+ dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val);
|
|
|
+ if hp1=hp2 then
|
|
|
+ break;
|
|
|
+ end;
|
|
|
+ RemoveCurrentP(p);
|
|
|
+ result:=true;
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
|
|
|
- function IsPowerOf2(const value: DWord): boolean; inline;
|
|
|
- begin
|
|
|
- Result:=(value and (value - 1)) = 0;
|
|
|
- end;
|
|
|
+ if (taicpu(p).condition = C_None) and
|
|
|
+ (taicpu(p).oppostfix = PF_None) and
|
|
|
+ LookForPreindexedPattern(taicpu(p)) then
|
|
|
+ begin
|
|
|
+ DebugMsg('Peephole Optimization: Add/Sub to Preindexed done', p);
|
|
|
+ RemoveCurrentP(p);
|
|
|
+ Result:=true;
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
|
|
|
+
|
|
|
+ function TCpuAsmOptimizer.OptPass1MUL(var p: tai): Boolean;
|
|
|
+ var
|
|
|
+ hp1,hp2: tai;
|
|
|
+ oldreg: tregister;
|
|
|
begin
|
|
|
- result := false;
|
|
|
- case p.typ of
|
|
|
- ait_instruction:
|
|
|
- begin
|
|
|
- {
|
|
|
- change
|
|
|
- <op> reg,x,y
|
|
|
- cmp reg,#0
|
|
|
- into
|
|
|
- <op>s reg,x,y
|
|
|
- }
|
|
|
- { this optimization can applied only to the currently enabled operations because
|
|
|
- the other operations do not update all flags and FPC does not track flag usage }
|
|
|
- if MatchInstruction(p, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,A_EOR,A_AND,
|
|
|
- A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and
|
|
|
- GetNextInstruction(p, hp1) and
|
|
|
- { mlas is only allowed in arm mode }
|
|
|
- ((taicpu(p).opcode<>A_MLA) or
|
|
|
- (current_settings.instructionset<>is_thumb)) and
|
|
|
- MatchInstruction(hp1, A_CMP, [C_None], [PF_None]) and
|
|
|
- (taicpu(hp1).oper[1]^.typ = top_const) and
|
|
|
- (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
|
|
|
- (taicpu(hp1).oper[1]^.val = 0) and
|
|
|
- GetNextInstruction(hp1, hp2) and
|
|
|
- { be careful here, following instructions could use other flags
|
|
|
- however after a jump fpc never depends on the value of flags }
|
|
|
- { All above instructions set Z and N according to the following
|
|
|
- Z := result = 0;
|
|
|
- N := result[31];
|
|
|
- EQ = Z=1; NE = Z=0;
|
|
|
- MI = N=1; PL = N=0; }
|
|
|
- (MatchInstruction(hp2, A_B, [C_EQ,C_NE,C_MI,C_PL], []) or
|
|
|
- { mov is also possible, but only if there is no shifter operand, it could be an rxx,
|
|
|
- we are too lazy to check if it is rxx or something else }
|
|
|
- (MatchInstruction(hp2, A_MOV, [C_EQ,C_NE,C_MI,C_PL], []) and (taicpu(hp2).ops=2))) and
|
|
|
- assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then
|
|
|
- begin
|
|
|
- DebugMsg('Peephole OpCmp2OpS done', p);
|
|
|
-
|
|
|
- taicpu(p).oppostfix:=PF_S;
|
|
|
-
|
|
|
- { move flag allocation if possible }
|
|
|
- GetLastInstruction(hp1, hp2);
|
|
|
- hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
|
|
|
- if assigned(hp2) then
|
|
|
- begin
|
|
|
- asml.Remove(hp2);
|
|
|
- asml.insertbefore(hp2, p);
|
|
|
- end;
|
|
|
-
|
|
|
- asml.remove(hp1);
|
|
|
- hp1.free;
|
|
|
- Result:=true;
|
|
|
- end
|
|
|
- else
|
|
|
- case taicpu(p).opcode of
|
|
|
- A_STR:
|
|
|
- begin
|
|
|
- { change
|
|
|
- str reg1,ref
|
|
|
- ldr reg2,ref
|
|
|
- into
|
|
|
- str reg1,ref
|
|
|
- mov reg2,reg1
|
|
|
- }
|
|
|
- if (taicpu(p).oper[1]^.typ = top_ref) and
|
|
|
- (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
|
|
|
- (taicpu(p).oppostfix=PF_None) and
|
|
|
- (taicpu(p).condition=C_None) and
|
|
|
- GetNextInstructionUsingRef(p,hp1,taicpu(p).oper[1]^.ref^) and
|
|
|
- MatchInstruction(hp1, A_LDR, [taicpu(p).condition], [PF_None]) and
|
|
|
- (taicpu(hp1).oper[1]^.typ=top_ref) and
|
|
|
- (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
|
|
|
- not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
|
|
|
- ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1))) and
|
|
|
- ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.base, p, hp1))) then
|
|
|
- begin
|
|
|
- if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
|
|
|
- begin
|
|
|
- DebugMsg('Peephole StrLdr2StrMov 1 done', hp1);
|
|
|
- asml.remove(hp1);
|
|
|
- hp1.free;
|
|
|
- end
|
|
|
- else
|
|
|
- begin
|
|
|
- taicpu(hp1).opcode:=A_MOV;
|
|
|
- taicpu(hp1).oppostfix:=PF_None;
|
|
|
- taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
|
|
|
- DebugMsg('Peephole StrLdr2StrMov 2 done', hp1);
|
|
|
- end;
|
|
|
- result := true;
|
|
|
- end
|
|
|
- { change
|
|
|
- str reg1,ref
|
|
|
- str reg2,ref
|
|
|
- into
|
|
|
- strd reg1,reg2,ref
|
|
|
- }
|
|
|
- else if (GenerateARMCode or GenerateThumb2Code) and
|
|
|
- (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
|
|
|
- (taicpu(p).oppostfix=PF_None) and
|
|
|
- (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
|
|
|
- GetNextInstruction(p,hp1) and
|
|
|
- MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
|
|
|
- not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
|
|
|
- (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
|
|
|
- { str ensures that either base or index contain no register, else ldr wouldn't
|
|
|
- use an offset either
|
|
|
- }
|
|
|
- (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
|
|
|
- (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
|
|
|
- (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
|
|
|
- (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
|
|
|
- AlignedToQWord(taicpu(p).oper[1]^.ref^) then
|
|
|
- begin
|
|
|
- DebugMsg('Peephole StrStr2Strd done', p);
|
|
|
- taicpu(p).oppostfix:=PF_D;
|
|
|
- taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
|
|
|
- taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
|
|
|
- taicpu(p).ops:=3;
|
|
|
- asml.remove(hp1);
|
|
|
- hp1.free;
|
|
|
- result:=true;
|
|
|
- end;
|
|
|
- Result:=LookForPostindexedPattern(taicpu(p)) or Result;
|
|
|
- end;
|
|
|
- A_LDR:
|
|
|
- begin
|
|
|
- { change
|
|
|
- ldr reg1,ref
|
|
|
- ldr reg2,ref
|
|
|
- into ...
|
|
|
- }
|
|
|
- if (taicpu(p).oper[1]^.typ = top_ref) and
|
|
|
- (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
|
|
|
- GetNextInstruction(p,hp1) and
|
|
|
- { ldrd is not allowed here }
|
|
|
- MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
|
|
|
- begin
|
|
|
- {
|
|
|
- ...
|
|
|
- ldr reg1,ref
|
|
|
- mov reg2,reg1
|
|
|
- }
|
|
|
- if (taicpu(p).oppostfix=taicpu(hp1).oppostfix) and
|
|
|
- RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
|
|
|
- (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
|
|
|
- (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
|
|
|
- (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
|
|
|
- begin
|
|
|
- if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
|
|
|
- begin
|
|
|
- DebugMsg('Peephole LdrLdr2Ldr done', hp1);
|
|
|
- asml.remove(hp1);
|
|
|
- hp1.free;
|
|
|
- end
|
|
|
- else
|
|
|
- begin
|
|
|
- DebugMsg('Peephole LdrLdr2LdrMov done', hp1);
|
|
|
- taicpu(hp1).opcode:=A_MOV;
|
|
|
- taicpu(hp1).oppostfix:=PF_None;
|
|
|
- taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
|
|
|
- end;
|
|
|
- result := true;
|
|
|
- end
|
|
|
- {
|
|
|
- ...
|
|
|
- ldrd reg1,reg1+1,ref
|
|
|
- }
|
|
|
- else if (GenerateARMCode or GenerateThumb2Code) and
|
|
|
- (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
|
|
|
- { ldrd does not allow any postfixes ... }
|
|
|
- (taicpu(p).oppostfix=PF_None) and
|
|
|
- not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
|
|
|
- (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
|
|
|
- { ldr ensures that either base or index contain no register, else ldr wouldn't
|
|
|
- use an offset either
|
|
|
- }
|
|
|
- (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
|
|
|
- (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
|
|
|
- (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
|
|
|
- (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
|
|
|
- AlignedToQWord(taicpu(p).oper[1]^.ref^) then
|
|
|
- begin
|
|
|
- DebugMsg('Peephole LdrLdr2Ldrd done', p);
|
|
|
- taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
|
|
|
- taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
|
|
|
- taicpu(p).ops:=3;
|
|
|
- taicpu(p).oppostfix:=PF_D;
|
|
|
- asml.remove(hp1);
|
|
|
- hp1.free;
|
|
|
- result:=true;
|
|
|
- end;
|
|
|
- end;
|
|
|
+ Result := OptPass1DataCheckMov(p);
|
|
|
+ {
|
|
|
+ Turn
|
|
|
+ mul reg0, z,w
|
|
|
+ sub/add x, y, reg0
|
|
|
+ dealloc reg0
|
|
|
+
|
|
|
+ into
|
|
|
+
|
|
|
+ mls/mla x,z,w,y
|
|
|
+ }
|
|
|
+ if (taicpu(p).condition = C_None) and
|
|
|
+ (taicpu(p).oppostfix = PF_None) and
|
|
|
+ (taicpu(p).ops=3) and
|
|
|
+ (taicpu(p).oper[0]^.typ = top_reg) and
|
|
|
+ (taicpu(p).oper[1]^.typ = top_reg) and
|
|
|
+ (taicpu(p).oper[2]^.typ = top_reg) and
|
|
|
+ GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
|
|
|
+ MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and
|
|
|
+ (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
|
|
|
+ (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p, hp1)) and
|
|
|
+
|
|
|
+ (((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype>=cpu_armv4)) or
|
|
|
+ ((taicpu(hp1).opcode=A_SUB) and (current_settings.cputype in [cpu_armv6t2,cpu_armv7,cpu_armv7a,cpu_armv7r,cpu_armv7m,cpu_armv7em]))) and
|
|
|
+
|
|
|
+ // CPUs before ARMv6 don't recommend having the same Rd and Rm for MLA.
|
|
|
+ // TODO: A workaround would be to swap Rm and Rs
|
|
|
+ (not ((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype<=cpu_armv6) and MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^))) and
|
|
|
+
|
|
|
+ (((taicpu(hp1).ops=3) and
|
|
|
+ (taicpu(hp1).oper[2]^.typ=top_reg) and
|
|
|
+ ((MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
|
|
|
+ (not RegModifiedBetween(taicpu(hp1).oper[1]^.reg, p, hp1))) or
|
|
|
+ ((MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
|
|
|
+ (taicpu(hp1).opcode=A_ADD) and
|
|
|
+ (not RegModifiedBetween(taicpu(hp1).oper[2]^.reg, p, hp1)))))) or
|
|
|
+ ((taicpu(hp1).ops=2) and
|
|
|
+ (taicpu(hp1).oper[1]^.typ=top_reg) and
|
|
|
+ MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
|
|
|
+ (RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1))) then
|
|
|
+ begin
|
|
|
+ if taicpu(hp1).opcode=A_ADD then
|
|
|
+ begin
|
|
|
+ taicpu(hp1).opcode:=A_MLA;
|
|
|
|
|
|
- {
|
|
|
- Change
|
|
|
-
|
|
|
- ldrb dst1, [REF]
|
|
|
- and dst2, dst1, #255
|
|
|
-
|
|
|
- into
|
|
|
-
|
|
|
- ldrb dst2, [ref]
|
|
|
- }
|
|
|
- if not(GenerateThumbCode) and
|
|
|
- (taicpu(p).oppostfix=PF_B) and
|
|
|
- GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
|
|
|
- MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_NONE]) and
|
|
|
- (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
|
|
|
- (taicpu(hp1).oper[2]^.typ = top_const) and
|
|
|
- (taicpu(hp1).oper[2]^.val = $FF) and
|
|
|
- not(RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) and
|
|
|
- RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
|
|
|
- begin
|
|
|
- DebugMsg('Peephole LdrbAnd2Ldrb done', p);
|
|
|
- taicpu(p).oper[0]^.reg := taicpu(hp1).oper[0]^.reg;
|
|
|
- asml.remove(hp1);
|
|
|
- hp1.free;
|
|
|
- result:=true;
|
|
|
- end;
|
|
|
- Result:=LookForPostindexedPattern(taicpu(p)) or Result;
|
|
|
- { Remove superfluous mov after ldr
|
|
|
- changes
|
|
|
- ldr reg1, ref
|
|
|
- mov reg2, reg1
|
|
|
- to
|
|
|
- ldr reg2, ref
|
|
|
-
|
|
|
- conditions are:
|
|
|
- * no ldrd usage
|
|
|
- * reg1 must be released after mov
|
|
|
- * mov can not contain shifterops
|
|
|
- * ldr+mov have the same conditions
|
|
|
- * mov does not set flags
|
|
|
- }
|
|
|
- if (taicpu(p).oppostfix<>PF_D) and
|
|
|
- GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
|
|
|
- RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr') then
|
|
|
- Result:=true;
|
|
|
- end;
|
|
|
- A_MOV:
|
|
|
+ if taicpu(hp1).ops=3 then
|
|
|
+ begin
|
|
|
+ if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^) then
|
|
|
+ oldreg:=taicpu(hp1).oper[2]^.reg
|
|
|
+ else
|
|
|
+ oldreg:=taicpu(hp1).oper[1]^.reg;
|
|
|
+ end
|
|
|
+ else
|
|
|
+ oldreg:=taicpu(hp1).oper[0]^.reg;
|
|
|
+
|
|
|
+ taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
|
|
|
+ taicpu(hp1).loadreg(2,taicpu(p).oper[2]^.reg);
|
|
|
+ taicpu(hp1).loadreg(3,oldreg);
|
|
|
+
|
|
|
+ DebugMsg('Peephole Optimization: MulAdd2MLA done', p);
|
|
|
+ end
|
|
|
+ else
|
|
|
+ begin
|
|
|
+ taicpu(hp1).opcode:=A_MLS;
|
|
|
+
|
|
|
+ taicpu(hp1).loadreg(3,taicpu(hp1).oper[1]^.reg);
|
|
|
+
|
|
|
+ if taicpu(hp1).ops=2 then
|
|
|
+ taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg)
|
|
|
+ else
|
|
|
+ taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
|
|
|
+
|
|
|
+ taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
|
|
|
+
|
|
|
+ DebugMsg('Peephole Optimization: MulSub2MLS done', p);
|
|
|
+ AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
|
|
|
+ AllocRegBetween(taicpu(hp1).oper[2]^.reg,p,hp1,UsedRegs);
|
|
|
+ AllocRegBetween(taicpu(hp1).oper[3]^.reg,p,hp1,UsedRegs);
|
|
|
+
|
|
|
+ end;
|
|
|
+
|
|
|
+ taicpu(hp1).ops:=4;
|
|
|
+ RemoveCurrentP(p);
|
|
|
+ Result := True;
|
|
|
+ Exit;
|
|
|
+ end
|
|
|
+ end;
|
|
|
+
|
|
|
+
|
|
|
+ function TCpuAsmOptimizer.OptPass1And(var p: tai): Boolean;
|
|
|
+ begin
|
|
|
+ Result := OptPass1DataCheckMov(p);
|
|
|
+ Result := inherited OptPass1And(p) or Result;
|
|
|
+ end;
|
|
|
+
|
|
|
+
|
|
|
+ function TCpuAsmOptimizer.OptPass1DataCheckMov(var p: tai): Boolean;
|
|
|
+ var
|
|
|
+ hp1: tai;
|
|
|
+ begin
|
|
|
+ {
|
|
|
+ change
|
|
|
+ op reg1, ...
|
|
|
+ mov reg2, reg1
|
|
|
+ to
|
|
|
+ op reg2, ...
|
|
|
+ }
|
|
|
+ Result := (taicpu(p).ops >= 3) and
|
|
|
+ GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
|
|
|
+ RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
|
|
|
+ end;
|
|
|
+
|
|
|
+
|
|
|
+ function TCpuAsmOptimizer.OptPass1CMP(var p: tai): Boolean;
|
|
|
+ var
|
|
|
+ hp1, hp2, hp_last: tai;
|
|
|
+ MovRem1, MovRem2: Boolean;
|
|
|
+ begin
|
|
|
+ Result := False;
|
|
|
+
|
|
|
+ { These optimizations can be applied only to the currently enabled operations because
|
|
|
+ the other operations do not update all flags and FPC does not track flag usage }
|
|
|
+ if (taicpu(p).condition = C_None) and
|
|
|
+ (taicpu(p).oper[1]^.typ = top_const) and
|
|
|
+ GetNextInstruction(p, hp1) then
|
|
|
+ begin
|
|
|
+ {
|
|
|
+ change
|
|
|
+ cmp reg,const1
|
|
|
+ moveq reg,const1
|
|
|
+ movne reg,const2
|
|
|
+ to
|
|
|
+ cmp reg,const1
|
|
|
+ movne reg,const2
|
|
|
+ }
|
|
|
+ if MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
|
|
|
+ (taicpu(hp1).oper[1]^.typ = top_const) and
|
|
|
+ GetNextInstruction(hp1, hp2) and
|
|
|
+ MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
|
|
|
+ (taicpu(hp2).oper[1]^.typ = top_const) then
|
|
|
+ begin
|
|
|
+ MovRem1 := RemoveRedundantMove(p, hp1, asml);
|
|
|
+ MovRem2 := RemoveRedundantMove(p, hp2, asml);
|
|
|
+
|
|
|
+ Result:= MovRem1 or MovRem2;
|
|
|
+
|
|
|
+ { Make sure that hp1 is still the next instruction after p }
|
|
|
+ if MovRem1 then
|
|
|
+ if MovRem2 then
|
|
|
begin
|
|
|
- { fold
|
|
|
- mov reg1,reg0, shift imm1
|
|
|
- mov reg1,reg1, shift imm2
|
|
|
- }
|
|
|
- if (taicpu(p).ops=3) and
|
|
|
- (taicpu(p).oper[2]^.typ = top_shifterop) and
|
|
|
- (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
|
|
|
- getnextinstruction(p,hp1) and
|
|
|
- MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
|
|
|
- (taicpu(hp1).ops=3) and
|
|
|
- MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
|
|
|
- MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
|
|
|
- (taicpu(hp1).oper[2]^.typ = top_shifterop) and
|
|
|
- (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
|
|
|
- begin
|
|
|
- { fold
|
|
|
- mov reg1,reg0, lsl 16
|
|
|
- mov reg1,reg1, lsr 16
|
|
|
- strh reg1, ...
|
|
|
- dealloc reg1
|
|
|
- to
|
|
|
- strh reg1, ...
|
|
|
- dealloc reg1
|
|
|
- }
|
|
|
- if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
|
|
|
- (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
|
|
|
- (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
|
|
|
- (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
|
|
|
- getnextinstruction(hp1,hp2) and
|
|
|
- MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
|
|
|
- MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
|
|
|
- begin
|
|
|
- TransferUsedRegs(TmpUsedRegs);
|
|
|
- UpdateUsedRegs(TmpUsedRegs, tai(p.next));
|
|
|
- UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
|
|
|
- if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
|
|
|
- begin
|
|
|
- DebugMsg('Peephole optimizer removed superfluous 16 Bit zero extension', hp1);
|
|
|
- taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
|
|
|
- asml.remove(p);
|
|
|
- asml.remove(hp1);
|
|
|
- p.free;
|
|
|
- hp1.free;
|
|
|
- p:=hp2;
|
|
|
- Result:=true;
|
|
|
- end;
|
|
|
- end
|
|
|
- { fold
|
|
|
- mov reg1,reg0, shift imm1
|
|
|
- mov reg1,reg1, shift imm2
|
|
|
- to
|
|
|
- mov reg1,reg0, shift imm1+imm2
|
|
|
- }
|
|
|
- else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
|
|
|
- { asr makes no use after a lsr, the asr can be foled into the lsr }
|
|
|
- ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
|
|
|
- begin
|
|
|
- inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
|
|
|
- { avoid overflows }
|
|
|
- if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
|
|
|
- case taicpu(p).oper[2]^.shifterop^.shiftmode of
|
|
|
- SM_ROR:
|
|
|
- taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
|
|
|
- SM_ASR:
|
|
|
- taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
|
|
|
- SM_LSR,
|
|
|
- SM_LSL:
|
|
|
- begin
|
|
|
- hp2:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
|
|
|
- InsertLLItem(p.previous, p.next, hp2);
|
|
|
- p.free;
|
|
|
- p:=hp2;
|
|
|
- end;
|
|
|
- else
|
|
|
- internalerror(2008072803);
|
|
|
- end;
|
|
|
- DebugMsg('Peephole ShiftShift2Shift 1 done', p);
|
|
|
- asml.remove(hp1);
|
|
|
- hp1.free;
|
|
|
- result := true;
|
|
|
- end
|
|
|
- { fold
|
|
|
- mov reg1,reg0, shift imm1
|
|
|
- mov reg1,reg1, shift imm2
|
|
|
- mov reg1,reg1, shift imm3 ...
|
|
|
- mov reg2,reg1, shift imm3 ...
|
|
|
- }
|
|
|
- else if GetNextInstructionUsingReg(hp1,hp2, taicpu(hp1).oper[0]^.reg) and
|
|
|
- MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
|
|
|
- (taicpu(hp2).ops=3) and
|
|
|
- MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
|
|
|
- RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp2)) and
|
|
|
- (taicpu(hp2).oper[2]^.typ = top_shifterop) and
|
|
|
- (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
|
|
|
- begin
|
|
|
- { mov reg1,reg0, lsl imm1
|
|
|
- mov reg1,reg1, lsr/asr imm2
|
|
|
- mov reg2,reg1, lsl imm3 ...
|
|
|
- to
|
|
|
- mov reg1,reg0, lsl imm1
|
|
|
- mov reg2,reg1, lsr/asr imm2-imm3
|
|
|
- if
|
|
|
- imm1>=imm2
|
|
|
- }
|
|
|
- if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
|
|
|
- (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
|
|
|
- (taicpu(p).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
|
|
|
- begin
|
|
|
- if (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
|
|
|
- begin
|
|
|
- if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,p,hp1)) and
|
|
|
- not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
|
|
|
- begin
|
|
|
- DebugMsg('Peephole ShiftShiftShift2ShiftShift 1a done', p);
|
|
|
- inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm-taicpu(hp1).oper[2]^.shifterop^.shiftimm);
|
|
|
- taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
|
|
|
- asml.remove(hp1);
|
|
|
- asml.remove(hp2);
|
|
|
- hp1.free;
|
|
|
- hp2.free;
|
|
|
-
|
|
|
- if taicpu(p).oper[2]^.shifterop^.shiftimm>=32 then
|
|
|
- begin
|
|
|
- taicpu(p).freeop(1);
|
|
|
- taicpu(p).freeop(2);
|
|
|
- taicpu(p).loadconst(1,0);
|
|
|
- end;
|
|
|
- result := true;
|
|
|
- end;
|
|
|
- end
|
|
|
- else if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
|
|
|
- begin
|
|
|
- DebugMsg('Peephole ShiftShiftShift2ShiftShift 1b done', p);
|
|
|
-
|
|
|
- dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
|
|
|
- taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
|
|
|
- asml.remove(hp2);
|
|
|
- hp2.free;
|
|
|
- result := true;
|
|
|
- end;
|
|
|
- end
|
|
|
- { mov reg1,reg0, lsr/asr imm1
|
|
|
- mov reg1,reg1, lsl imm2
|
|
|
- mov reg1,reg1, lsr/asr imm3 ...
|
|
|
-
|
|
|
- if imm3>=imm1 and imm2>=imm1
|
|
|
- to
|
|
|
- mov reg1,reg0, lsl imm2-imm1
|
|
|
- mov reg1,reg1, lsr/asr imm3 ...
|
|
|
- }
|
|
|
- else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
|
|
|
- (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
|
|
|
- (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
|
|
|
- (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
|
|
|
- begin
|
|
|
- dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
|
|
|
- taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
|
|
|
- DebugMsg('Peephole ShiftShiftShift2ShiftShift 2 done', p);
|
|
|
- asml.remove(p);
|
|
|
- p.free;
|
|
|
- p:=hp2;
|
|
|
- if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
|
|
|
- begin
|
|
|
- taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
|
|
|
- asml.remove(hp1);
|
|
|
- hp1.free;
|
|
|
- p:=hp2;
|
|
|
- end;
|
|
|
- result := true;
|
|
|
- end;
|
|
|
- end;
|
|
|
- end;
|
|
|
- { Change the common
|
|
|
- mov r0, r0, lsr #xxx
|
|
|
- and r0, r0, #yyy/bic r0, r0, #xxx
|
|
|
-
|
|
|
- and remove the superfluous and/bic if possible
|
|
|
-
|
|
|
- This could be extended to handle more cases.
|
|
|
- }
|
|
|
- if (taicpu(p).ops=3) and
|
|
|
- (taicpu(p).oper[2]^.typ = top_shifterop) and
|
|
|
- (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
|
|
|
- (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
|
|
|
- GetNextInstructionUsingReg(p,hp1, taicpu(p).oper[0]^.reg) and
|
|
|
- (hp1.typ=ait_instruction) and
|
|
|
- (taicpu(hp1).ops>=1) and
|
|
|
- (taicpu(hp1).oper[0]^.typ=top_reg) and
|
|
|
- (not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) and
|
|
|
- RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
|
|
|
- begin
|
|
|
- if (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
|
|
|
- MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
|
|
|
- (taicpu(hp1).ops=3) and
|
|
|
- MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
|
|
|
- (taicpu(hp1).oper[2]^.typ = top_const) and
|
|
|
- { Check if the AND actually would only mask out bits being already zero because of the shift
|
|
|
- }
|
|
|
- ((($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm) and taicpu(hp1).oper[2]^.val) =
|
|
|
- ($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm)) then
|
|
|
- begin
|
|
|
- DebugMsg('Peephole LsrAnd2Lsr done', hp1);
|
|
|
- taicpu(p).oper[0]^.reg:=taicpu(hp1).oper[0]^.reg;
|
|
|
- asml.remove(hp1);
|
|
|
- hp1.free;
|
|
|
- result:=true;
|
|
|
- end
|
|
|
- else if MatchInstruction(hp1, A_BIC, [taicpu(p).condition], [taicpu(p).oppostfix]) and
|
|
|
- (taicpu(hp1).ops=3) and
|
|
|
- MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
|
|
|
- (taicpu(hp1).oper[2]^.typ = top_const) and
|
|
|
- { Check if the BIC actually would only mask out bits beeing already zero because of the shift }
|
|
|
- (taicpu(hp1).oper[2]^.val<>0) and
|
|
|
- (BsfDWord(taicpu(hp1).oper[2]^.val)>=32-taicpu(p).oper[2]^.shifterop^.shiftimm) then
|
|
|
- begin
|
|
|
- DebugMsg('Peephole LsrBic2Lsr done', hp1);
|
|
|
- taicpu(p).oper[0]^.reg:=taicpu(hp1).oper[0]^.reg;
|
|
|
- asml.remove(hp1);
|
|
|
- hp1.free;
|
|
|
- result:=true;
|
|
|
- end;
|
|
|
- end;
|
|
|
- { Change
|
|
|
- mov rx, ry, lsr/ror #xxx
|
|
|
- uxtb/uxth rz,rx/and rz,rx,0xFF
|
|
|
- dealloc rx
|
|
|
-
|
|
|
- to
|
|
|
-
|
|
|
- uxtb/uxth rz,ry,ror #xxx
|
|
|
- }
|
|
|
- if (taicpu(p).ops=3) and
|
|
|
- (taicpu(p).oper[2]^.typ = top_shifterop) and
|
|
|
- (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
|
|
|
- (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ROR]) and
|
|
|
- (GenerateThumb2Code) and
|
|
|
- GetNextInstructionUsingReg(p,hp1, taicpu(p).oper[0]^.reg) and
|
|
|
- RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
|
|
|
- begin
|
|
|
- if MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
|
|
|
- (taicpu(hp1).ops = 2) and
|
|
|
- (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
|
|
|
- MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
|
|
|
- begin
|
|
|
- taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
|
|
|
- taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
|
|
|
- taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
|
|
|
- taicpu(hp1).ops := 3;
|
|
|
+ if not GetNextInstruction(p, hp1) then
|
|
|
+ Exit;
|
|
|
+ end
|
|
|
+ else
|
|
|
+ hp1 := hp2;
|
|
|
+ end;
|
|
|
|
|
|
- GetNextInstruction(p,hp1);
|
|
|
+ {
|
|
|
+ change
|
|
|
+ <op> reg,x,y
|
|
|
+ cmp reg,#0
|
|
|
+ into
|
|
|
+ <op>s reg,x,y
|
|
|
+ }
|
|
|
+ if (taicpu(p).oppostfix = PF_None) and
|
|
|
+ (taicpu(p).oper[1]^.val = 0) and
|
|
|
+ { be careful here, following instructions could use other flags
|
|
|
+ however after a jump fpc never depends on the value of flags }
|
|
|
+ { All above instructions set Z and N according to the following
|
|
|
+ Z := result = 0;
|
|
|
+ N := result[31];
|
|
|
+ EQ = Z=1; NE = Z=0;
|
|
|
+ MI = N=1; PL = N=0; }
|
|
|
+ (MatchInstruction(hp1, A_B, [C_EQ,C_NE,C_MI,C_PL], []) or
|
|
|
+ { mov is also possible, but only if there is no shifter operand, it could be an rxx,
|
|
|
+ we are too lazy to check if it is rxx or something else }
|
|
|
+ (MatchInstruction(hp1, A_MOV, [C_EQ,C_NE,C_MI,C_PL], []) and (taicpu(hp1).ops=2))) and
|
|
|
+ GetLastInstruction(p, hp_last) and
|
|
|
+ MatchInstruction(hp_last, [A_ADC,A_ADD,A_BIC,A_SUB,A_MUL,A_MVN,A_MOV,A_ORR,
|
|
|
+ A_EOR,A_AND,A_RSB,A_RSC,A_SBC,A_MLA], [C_None], [PF_None]) and
|
|
|
+ (
|
|
|
+ { mlas is only allowed in arm mode }
|
|
|
+ (taicpu(hp_last).opcode<>A_MLA) or
|
|
|
+ (current_settings.instructionset<>is_thumb)
|
|
|
+ ) and
|
|
|
+ (taicpu(hp_last).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
|
|
|
+ assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp1.Next))) then
|
|
|
+ begin
|
|
|
+ DebugMsg('Peephole Optimization: OpCmp2OpS done', hp_last);
|
|
|
|
|
|
- asml.Remove(p);
|
|
|
- p.Free;
|
|
|
+ taicpu(hp_last).oppostfix:=PF_S;
|
|
|
|
|
|
- p:=hp1;
|
|
|
+ { move flag allocation if possible }
|
|
|
+ hp1:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp_last.Next));
|
|
|
+ if assigned(hp1) then
|
|
|
+ begin
|
|
|
+ asml.Remove(hp1);
|
|
|
+ asml.insertbefore(hp1, hp_last);
|
|
|
+ end;
|
|
|
|
|
|
- result:=true;
|
|
|
- exit;
|
|
|
- end
|
|
|
- else if MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
|
|
|
- (taicpu(hp1).ops=2) and
|
|
|
- (taicpu(p).oper[2]^.shifterop^.shiftimm in [16]) and
|
|
|
- MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
|
|
|
- begin
|
|
|
- taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
|
|
|
- taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
|
|
|
- taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
|
|
|
- taicpu(hp1).ops := 3;
|
|
|
-
|
|
|
- GetNextInstruction(p,hp1);
|
|
|
-
|
|
|
- asml.Remove(p);
|
|
|
- p.Free;
|
|
|
-
|
|
|
- p:=hp1;
|
|
|
-
|
|
|
- result:=true;
|
|
|
- exit;
|
|
|
- end
|
|
|
- else if MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
|
|
|
- (taicpu(hp1).ops = 3) and
|
|
|
- (taicpu(hp1).oper[2]^.typ = top_const) and
|
|
|
- (taicpu(hp1).oper[2]^.val = $FF) and
|
|
|
- (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
|
|
|
- MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
|
|
|
- begin
|
|
|
- taicpu(hp1).ops := 3;
|
|
|
- taicpu(hp1).opcode := A_UXTB;
|
|
|
- taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
|
|
|
- taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
|
|
|
- taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
|
|
|
+ RemoveCurrentP(p);
|
|
|
+ Result:=true;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
|
|
|
- GetNextInstruction(p,hp1);
|
|
|
|
|
|
- asml.Remove(p);
|
|
|
- p.Free;
|
|
|
+ function TCpuAsmOptimizer.OptPass1LDR(var p: tai): Boolean;
|
|
|
+ var
|
|
|
+ hp1: tai;
|
|
|
+ begin
|
|
|
+ Result := False;
|
|
|
+
|
|
|
+ { change
|
|
|
+ ldr reg1,ref
|
|
|
+ ldr reg2,ref
|
|
|
+ into ...
|
|
|
+ }
|
|
|
+ if (taicpu(p).oper[1]^.typ = top_ref) and
|
|
|
+ (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
|
|
|
+ GetNextInstruction(p,hp1) and
|
|
|
+ { ldrd is not allowed here }
|
|
|
+ MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
|
|
|
+ begin
|
|
|
+ {
|
|
|
+ ...
|
|
|
+ ldr reg1,ref
|
|
|
+ mov reg2,reg1
|
|
|
+ }
|
|
|
+ if (taicpu(p).oppostfix=taicpu(hp1).oppostfix) and
|
|
|
+ RefsEqual(taicpu(p).oper[1]^.ref^,taicpu(hp1).oper[1]^.ref^) and
|
|
|
+ (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.index) and
|
|
|
+ (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and
|
|
|
+ (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) then
|
|
|
+ begin
|
|
|
+ if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
|
|
|
+ begin
|
|
|
+ DebugMsg('Peephole Optimization: LdrLdr2Ldr done', hp1);
|
|
|
+ asml.remove(hp1);
|
|
|
+ hp1.free;
|
|
|
+ end
|
|
|
+ else
|
|
|
+ begin
|
|
|
+ DebugMsg('Peephole Optimization: LdrLdr2LdrMov done', hp1);
|
|
|
+ taicpu(hp1).opcode:=A_MOV;
|
|
|
+ taicpu(hp1).oppostfix:=PF_None;
|
|
|
+ taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
|
|
|
+ end;
|
|
|
+ result := true;
|
|
|
+ end
|
|
|
+ {
|
|
|
+ ...
|
|
|
+ ldrd reg1,reg1+1,ref
|
|
|
+ }
|
|
|
+ else if (GenerateARMCode or GenerateThumb2Code) and
|
|
|
+ (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
|
|
|
+ { ldrd does not allow any postfixes ... }
|
|
|
+ (taicpu(p).oppostfix=PF_None) and
|
|
|
+ not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
|
|
|
+ (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
|
|
|
+ { ldr ensures that either base or index contain no register, else ldr wouldn't
|
|
|
+ use an offset either
|
|
|
+ }
|
|
|
+ (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
|
|
|
+ (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
|
|
|
+ (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) and
|
|
|
+ (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
|
|
|
+ AlignedToQWord(taicpu(p).oper[1]^.ref^) then
|
|
|
+ begin
|
|
|
+ DebugMsg('Peephole Optimization: LdrLdr2Ldrd done', p);
|
|
|
+ taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
|
|
|
+ taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
|
|
|
+ taicpu(p).ops:=3;
|
|
|
+ taicpu(p).oppostfix:=PF_D;
|
|
|
+ asml.remove(hp1);
|
|
|
+ hp1.free;
|
|
|
+ result:=true;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
|
|
|
- p:=hp1;
|
|
|
+ {
|
|
|
+ Change
|
|
|
+
|
|
|
+ ldrb dst1, [REF]
|
|
|
+ and dst2, dst1, #255
|
|
|
+
|
|
|
+ into
|
|
|
+
|
|
|
+ ldrb dst2, [ref]
|
|
|
+ }
|
|
|
+ if not(GenerateThumbCode) and
|
|
|
+ (taicpu(p).oppostfix=PF_B) and
|
|
|
+ GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
|
|
|
+ MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_NONE]) and
|
|
|
+ (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
|
|
|
+ (taicpu(hp1).oper[2]^.typ = top_const) and
|
|
|
+ (taicpu(hp1).oper[2]^.val = $FF) and
|
|
|
+ not(RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) and
|
|
|
+ RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
|
|
|
+ begin
|
|
|
+ DebugMsg('Peephole Optimization: LdrbAnd2Ldrb done', p);
|
|
|
+ taicpu(p).oper[0]^.reg := taicpu(hp1).oper[0]^.reg;
|
|
|
+ asml.remove(hp1);
|
|
|
+ hp1.free;
|
|
|
+ result:=true;
|
|
|
+ end;
|
|
|
+ Result:=LookForPostindexedPattern(taicpu(p)) or Result;
|
|
|
+ { Remove superfluous mov after ldr
|
|
|
+ changes
|
|
|
+ ldr reg1, ref
|
|
|
+ mov reg2, reg1
|
|
|
+ to
|
|
|
+ ldr reg2, ref
|
|
|
+
|
|
|
+ conditions are:
|
|
|
+ * no ldrd usage
|
|
|
+ * reg1 must be released after mov
|
|
|
+ * mov can not contain shifterops
|
|
|
+ * ldr+mov have the same conditions
|
|
|
+ * mov does not set flags
|
|
|
+ }
|
|
|
+ if (taicpu(p).oppostfix<>PF_D) and
|
|
|
+ GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
|
|
|
+ RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr') then
|
|
|
+ Result:=true;
|
|
|
+ end;
|
|
|
|
|
|
- result:=true;
|
|
|
- exit;
|
|
|
- end;
|
|
|
- end;
|
|
|
- {
|
|
|
- optimize
|
|
|
- mov rX, yyyy
|
|
|
- ....
|
|
|
- }
|
|
|
- if (taicpu(p).ops = 2) and
|
|
|
- GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
|
|
|
- (tai(hp1).typ = ait_instruction) then
|
|
|
- begin
|
|
|
- {
|
|
|
- This removes the mul from
|
|
|
- mov rX,0
|
|
|
- ...
|
|
|
- mul ...,rX,...
|
|
|
- }
|
|
|
- if false and (taicpu(p).oper[1]^.typ = top_const) and
|
|
|
- (taicpu(p).oper[1]^.val=0) and
|
|
|
- MatchInstruction(hp1, [A_MUL,A_MLA], [taicpu(p).condition], [taicpu(p).oppostfix]) and
|
|
|
- (((taicpu(hp1).oper[1]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^)) or
|
|
|
- ((taicpu(hp1).oper[2]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[2]^))) then
|
|
|
- begin
|
|
|
- TransferUsedRegs(TmpUsedRegs);
|
|
|
- UpdateUsedRegs(TmpUsedRegs, tai(p.next));
|
|
|
- UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
|
|
|
- DebugMsg('Peephole MovMUL/MLA2Mov0 done', p);
|
|
|
- if taicpu(hp1).opcode=A_MUL then
|
|
|
- taicpu(hp1).loadconst(1,0)
|
|
|
- else
|
|
|
- taicpu(hp1).loadreg(1,taicpu(hp1).oper[3]^.reg);
|
|
|
- taicpu(hp1).ops:=2;
|
|
|
- taicpu(hp1).opcode:=A_MOV;
|
|
|
- if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
|
|
|
- RemoveCurrentP(p);
|
|
|
- Result:=true;
|
|
|
- exit;
|
|
|
- end
|
|
|
- else if (taicpu(p).oper[1]^.typ = top_const) and
|
|
|
- (taicpu(p).oper[1]^.val=0) and
|
|
|
- MatchInstruction(hp1, A_MLA, [taicpu(p).condition], [taicpu(p).oppostfix]) and
|
|
|
- MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[3]^) then
|
|
|
- begin
|
|
|
- TransferUsedRegs(TmpUsedRegs);
|
|
|
- UpdateUsedRegs(TmpUsedRegs, tai(p.next));
|
|
|
- UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
|
|
|
- DebugMsg('Peephole MovMLA2MUL 1 done', p);
|
|
|
- taicpu(hp1).ops:=3;
|
|
|
- taicpu(hp1).opcode:=A_MUL;
|
|
|
- if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
|
|
|
- RemoveCurrentP(p);
|
|
|
- Result:=true;
|
|
|
- exit;
|
|
|
- end
|
|
|
- {
|
|
|
- This changes the very common
|
|
|
- mov r0, #0
|
|
|
- str r0, [...]
|
|
|
- mov r0, #0
|
|
|
- str r0, [...]
|
|
|
-
|
|
|
- and removes all superfluous mov instructions
|
|
|
- }
|
|
|
- else if (taicpu(p).oper[1]^.typ = top_const) and
|
|
|
- (taicpu(hp1).opcode=A_STR) then
|
|
|
- while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
|
|
|
- MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
|
|
|
- GetNextInstruction(hp1, hp2) and
|
|
|
- MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
|
|
|
- (taicpu(hp2).ops = 2) and
|
|
|
- MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
|
|
|
- MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
|
|
|
- begin
|
|
|
- DebugMsg('Peephole MovStrMov done', hp2);
|
|
|
- GetNextInstruction(hp2,hp1);
|
|
|
- asml.remove(hp2);
|
|
|
- hp2.free;
|
|
|
- result:=true;
|
|
|
- if not assigned(hp1) then break;
|
|
|
- end
|
|
|
- {
|
|
|
- This removes the first mov from
|
|
|
- mov rX,...
|
|
|
- mov rX,...
|
|
|
- }
|
|
|
- else if taicpu(hp1).opcode=A_MOV then
|
|
|
- while MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
|
|
|
- (taicpu(hp1).ops = 2) and
|
|
|
- MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
|
|
|
- { don't remove the first mov if the second is a mov rX,rX }
|
|
|
- not(MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^)) do
|
|
|
- begin
|
|
|
- DebugMsg('Peephole MovMov done', p);
|
|
|
- asml.remove(p);
|
|
|
- p.free;
|
|
|
- p:=hp1;
|
|
|
- GetNextInstruction(hp1,hp1);
|
|
|
- result:=true;
|
|
|
- if not assigned(hp1) then
|
|
|
- break;
|
|
|
- end;
|
|
|
- if RedundantMovProcess(p,hp1) then
|
|
|
- begin
|
|
|
- Result:=true;
|
|
|
- { p might not point at a mov anymore }
|
|
|
- exit;
|
|
|
- end;
|
|
|
- end;
|
|
|
|
|
|
- { Fold the very common sequence
|
|
|
- mov regA, regB
|
|
|
- ldr* regA, [regA]
|
|
|
- to
|
|
|
- ldr* regA, [regB]
|
|
|
- CAUTION! If this one is successful p might not be a mov instruction anymore!
|
|
|
- }
|
|
|
- if (taicpu(p).opcode = A_MOV) and
|
|
|
- (taicpu(p).ops = 2) and
|
|
|
- (taicpu(p).oper[1]^.typ = top_reg) and
|
|
|
- (taicpu(p).oppostfix = PF_NONE) and
|
|
|
- GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
|
|
|
- MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], []) and
|
|
|
- (taicpu(hp1).oper[1]^.typ = top_ref) and
|
|
|
- { We can change the base register only when the instruction uses AM_OFFSET }
|
|
|
- ((taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) or
|
|
|
- ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
|
|
|
- (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg))
|
|
|
- ) and
|
|
|
- not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
|
|
|
-
|
|
|
- // Make sure that Thumb code doesn't propagate a high register into a reference
|
|
|
- ((GenerateThumbCode and
|
|
|
- (getsupreg(taicpu(p).oper[1]^.reg) < RS_R8)) or
|
|
|
- (not GenerateThumbCode)) and
|
|
|
-
|
|
|
- RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
|
|
|
- begin
|
|
|
- DebugMsg('Peephole MovLdr2Ldr done', hp1);
|
|
|
- if (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
|
|
|
- (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
|
|
|
- taicpu(hp1).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
|
|
|
-
|
|
|
- if taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
|
|
|
- taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
|
|
|
-
|
|
|
- dealloc:=FindRegDeAlloc(taicpu(p).oper[1]^.reg, tai(p.Next));
|
|
|
- if Assigned(dealloc) then
|
|
|
- begin
|
|
|
- asml.remove(dealloc);
|
|
|
- asml.InsertAfter(dealloc,hp1);
|
|
|
- end;
|
|
|
-
|
|
|
- GetNextInstruction(p, hp1);
|
|
|
- asml.remove(p);
|
|
|
- p.free;
|
|
|
- p:=hp1;
|
|
|
- result:=true;
|
|
|
- end;
|
|
|
+ function TCpuAsmOptimizer.OptPass1STM(var p: tai): Boolean;
|
|
|
+ var
|
|
|
+ hp1, hp2, hp3, hp4: tai;
|
|
|
+ begin
|
|
|
+ Result := False;
|
|
|
+
|
|
|
+ {
|
|
|
+ change
|
|
|
+ stmfd r13!,[r14]
|
|
|
+ sub r13,r13,#4
|
|
|
+ bl abc
|
|
|
+ add r13,r13,#4
|
|
|
+ ldmfd r13!,[r15]
|
|
|
+ into
|
|
|
+ b abc
|
|
|
+ }
|
|
|
+ if not(ts_thumb_interworking in current_settings.targetswitches) and
|
|
|
+ (taicpu(p).condition = C_None) and
|
|
|
+ (taicpu(p).oppostfix = PF_FD) and
|
|
|
+ (taicpu(p).oper[0]^.typ = top_ref) and
|
|
|
+ (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
|
|
|
+ (taicpu(p).oper[0]^.ref^.base=NR_NO) and
|
|
|
+ (taicpu(p).oper[0]^.ref^.offset=0) and
|
|
|
+ (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
|
|
|
+ (taicpu(p).oper[1]^.typ = top_regset) and
|
|
|
+ (taicpu(p).oper[1]^.regset^ = [RS_R14]) and
|
|
|
+ GetNextInstruction(p, hp1) and
|
|
|
+ MatchInstruction(hp1, A_SUB, [C_None], [PF_NONE]) and
|
|
|
+ (taicpu(hp1).oper[0]^.typ = top_reg) and
|
|
|
+ (taicpu(hp1).oper[0]^.reg = NR_STACK_POINTER_REG) and
|
|
|
+ MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^) and
|
|
|
+ (taicpu(hp1).oper[2]^.typ = top_const) and
|
|
|
+
|
|
|
+ GetNextInstruction(hp1, hp2) and
|
|
|
+ SkipEntryExitMarker(hp2, hp2) and
|
|
|
+
|
|
|
+ MatchInstruction(hp2, [A_BL,A_BLX], [C_None], [PF_NONE]) and
|
|
|
+ (taicpu(hp2).oper[0]^.typ = top_ref) and
|
|
|
+
|
|
|
+ GetNextInstruction(hp2, hp3) and
|
|
|
+ SkipEntryExitMarker(hp3, hp3) and
|
|
|
+ MatchInstruction(hp3, A_ADD, [C_None], [PF_NONE]) and
|
|
|
+ MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[0]^) and
|
|
|
+ MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[1]^) and
|
|
|
+ MatchOperand(taicpu(hp1).oper[2]^,taicpu(hp3).oper[2]^) and
|
|
|
+
|
|
|
+ GetNextInstruction(hp3, hp4) and
|
|
|
+ MatchInstruction(hp4, A_LDM, [C_None], [PF_FD]) and
|
|
|
+ MatchOperand(taicpu(p).oper[0]^,taicpu(hp4).oper[0]^) and
|
|
|
+ (taicpu(hp4).oper[1]^.typ = top_regset) and
|
|
|
+ (taicpu(hp4).oper[1]^.regset^ = [RS_R15]) then
|
|
|
+ begin
|
|
|
+ asml.Remove(hp1);
|
|
|
+ asml.Remove(hp3);
|
|
|
+ asml.Remove(hp4);
|
|
|
+ taicpu(hp2).opcode:=A_B;
|
|
|
+ hp1.free;
|
|
|
+ hp3.free;
|
|
|
+ hp4.free;
|
|
|
+ RemoveCurrentp(p, hp2);
|
|
|
+ DebugMsg('Peephole Optimization: Bl2B done', p);
|
|
|
+ Result := True;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
|
|
|
- { This folds shifterops into following instructions
|
|
|
- mov r0, r1, lsl #8
|
|
|
- add r2, r3, r0
|
|
|
-
|
|
|
- to
|
|
|
-
|
|
|
- add r2, r3, r1, lsl #8
|
|
|
- CAUTION! If this one is successful p might not be a mov instruction anymore!
|
|
|
- }
|
|
|
- if (taicpu(p).opcode = A_MOV) and
|
|
|
- (taicpu(p).ops = 3) and
|
|
|
- (taicpu(p).oper[1]^.typ = top_reg) and
|
|
|
- (taicpu(p).oper[2]^.typ = top_shifterop) and
|
|
|
- (taicpu(p).oppostfix = PF_NONE) and
|
|
|
- GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
|
|
|
- MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
|
|
|
- A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
|
|
|
- A_CMP, A_CMN],
|
|
|
- [taicpu(p).condition], [PF_None]) and
|
|
|
- (not ((GenerateThumb2Code) and
|
|
|
- (taicpu(hp1).opcode in [A_SBC]) and
|
|
|
- (((taicpu(hp1).ops=3) and
|
|
|
- MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^.reg)) or
|
|
|
- ((taicpu(hp1).ops=2) and
|
|
|
- MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg))))) and
|
|
|
- RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
|
|
|
- (taicpu(hp1).ops >= 2) and
|
|
|
- {Currently we can't fold into another shifterop}
|
|
|
- (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
|
|
|
- {Folding rrx is problematic because of the C-Flag, as we currently can't check
|
|
|
- NR_DEFAULTFLAGS for modification}
|
|
|
- (
|
|
|
- {Everything is fine if we don't use RRX}
|
|
|
- (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) or
|
|
|
- (
|
|
|
- {If it is RRX, then check if we're just accessing the next instruction}
|
|
|
- GetNextInstruction(p, hp2) and
|
|
|
- (hp1 = hp2)
|
|
|
- )
|
|
|
- ) and
|
|
|
- { reg1 might not be modified inbetween }
|
|
|
- not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
|
|
|
- { The shifterop can contain a register, might not be modified}
|
|
|
- (
|
|
|
- (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) or
|
|
|
- not(RegModifiedBetween(taicpu(p).oper[2]^.shifterop^.rs, p, hp1))
|
|
|
- ) and
|
|
|
- (
|
|
|
- {Only ONE of the two src operands is allowed to match}
|
|
|
- MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
|
|
|
- MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
|
|
|
- ) then
|
|
|
- begin
|
|
|
- if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
|
|
|
- I2:=0
|
|
|
- else
|
|
|
- I2:=1;
|
|
|
- for I:=I2 to taicpu(hp1).ops-1 do
|
|
|
- if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
|
|
|
+
|
|
|
+
|
|
|
+ function TCpuAsmOptimizer.OptPass1STR(var p: tai): Boolean;
|
|
|
+ var
|
|
|
+ hp1: tai;
|
|
|
+ begin
|
|
|
+ Result := False;
|
|
|
+
|
|
|
+ { Common conditions }
|
|
|
+ if (taicpu(p).oper[1]^.typ = top_ref) and
|
|
|
+ (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
|
|
|
+ (taicpu(p).oppostfix=PF_None) then
|
|
|
+ begin
|
|
|
+ { change
|
|
|
+ str reg1,ref
|
|
|
+ ldr reg2,ref
|
|
|
+ into
|
|
|
+ str reg1,ref
|
|
|
+ mov reg2,reg1
|
|
|
+ }
|
|
|
+ if (taicpu(p).condition=C_None) and
|
|
|
+ GetNextInstructionUsingRef(p,hp1,taicpu(p).oper[1]^.ref^) and
|
|
|
+ MatchInstruction(hp1, A_LDR, [taicpu(p).condition], [PF_None]) and
|
|
|
+ (taicpu(hp1).oper[1]^.typ=top_ref) and
|
|
|
+ (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
|
|
|
+ not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
|
|
|
+ ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1))) and
|
|
|
+ ((taicpu(hp1).oper[1]^.ref^.base=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.base, p, hp1))) then
|
|
|
+ begin
|
|
|
+ if taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg then
|
|
|
+ begin
|
|
|
+ DebugMsg('Peephole Optimization: StrLdr2StrMov 1 done', hp1);
|
|
|
+ asml.remove(hp1);
|
|
|
+ hp1.free;
|
|
|
+ end
|
|
|
+ else
|
|
|
+ begin
|
|
|
+ taicpu(hp1).opcode:=A_MOV;
|
|
|
+ taicpu(hp1).oppostfix:=PF_None;
|
|
|
+ taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
|
|
|
+ DebugMsg('Peephole Optimization: StrLdr2StrMov 2 done', hp1);
|
|
|
+ end;
|
|
|
+ result := True;
|
|
|
+ end
|
|
|
+ { change
|
|
|
+ str reg1,ref
|
|
|
+ str reg2,ref
|
|
|
+ into
|
|
|
+ strd reg1,reg2,ref
|
|
|
+ }
|
|
|
+ else if (GenerateARMCode or GenerateThumb2Code) and
|
|
|
+ (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
|
|
|
+ not(odd(getsupreg(taicpu(p).oper[0]^.reg))) and
|
|
|
+ (abs(taicpu(p).oper[1]^.ref^.offset)<256) and
|
|
|
+ AlignedToQWord(taicpu(p).oper[1]^.ref^) and
|
|
|
+ GetNextInstruction(p,hp1) and
|
|
|
+ MatchInstruction(hp1, A_STR, [taicpu(p).condition, C_None], [PF_None]) and
|
|
|
+ (getsupreg(taicpu(p).oper[0]^.reg)+1=getsupreg(taicpu(hp1).oper[0]^.reg)) and
|
|
|
+ { str ensures that either base or index contain no register, else ldr wouldn't
|
|
|
+ use an offset either
|
|
|
+ }
|
|
|
+ (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
|
|
|
+ (taicpu(p).oper[1]^.ref^.index=taicpu(hp1).oper[1]^.ref^.index) and
|
|
|
+ (taicpu(p).oper[1]^.ref^.offset+4=taicpu(hp1).oper[1]^.ref^.offset) then
|
|
|
+ begin
|
|
|
+ DebugMsg('Peephole Optimization: StrStr2Strd done', p);
|
|
|
+ taicpu(p).oppostfix:=PF_D;
|
|
|
+ taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
|
|
|
+ taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
|
|
|
+ taicpu(p).ops:=3;
|
|
|
+ asml.remove(hp1);
|
|
|
+ hp1.free;
|
|
|
+ result:=true;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+
|
|
|
+ Result:=LookForPostindexedPattern(taicpu(p)) or Result;
|
|
|
+ end;
|
|
|
+
|
|
|
+
|
|
|
+ function TCpuAsmOptimizer.OptPass1MOV(var p: tai): Boolean;
|
|
|
+ var
|
|
|
+ hp1, hpfar1, hp2, hp3: tai;
|
|
|
+ i, i2: longint;
|
|
|
+ tempop: tasmop;
|
|
|
+ dealloc: tai_regalloc;
|
|
|
+ begin
|
|
|
+ Result := False;
|
|
|
+ hp1 := nil;
|
|
|
+
|
|
|
+ { fold
|
|
|
+ mov reg1,reg0, shift imm1
|
|
|
+ mov reg1,reg1, shift imm2
|
|
|
+ }
|
|
|
+ if (taicpu(p).ops=3) and
|
|
|
+ (taicpu(p).oper[2]^.typ = top_shifterop) and
|
|
|
+ (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
|
|
|
+ getnextinstruction(p,hp1) and
|
|
|
+ MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
|
|
|
+ (taicpu(hp1).ops=3) and
|
|
|
+ MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^.reg) and
|
|
|
+ MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
|
|
|
+ (taicpu(hp1).oper[2]^.typ = top_shifterop) and
|
|
|
+ (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) then
|
|
|
+ begin
|
|
|
+ { fold
|
|
|
+ mov reg1,reg0, lsl 16
|
|
|
+ mov reg1,reg1, lsr 16
|
|
|
+ strh reg1, ...
|
|
|
+ dealloc reg1
|
|
|
+ to
|
|
|
+ strh reg1, ...
|
|
|
+ dealloc reg1
|
|
|
+ }
|
|
|
+ if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and
|
|
|
+ (taicpu(p).oper[2]^.shifterop^.shiftimm=16) and
|
|
|
+ (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ASR]) and
|
|
|
+ (taicpu(hp1).oper[2]^.shifterop^.shiftimm=16) and
|
|
|
+ getnextinstruction(hp1,hp2) and
|
|
|
+ MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
|
|
|
+ MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
|
|
|
+ begin
|
|
|
+ TransferUsedRegs(TmpUsedRegs);
|
|
|
+ UpdateUsedRegs(TmpUsedRegs, tai(p.next));
|
|
|
+ UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
|
|
|
+ if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
|
|
|
+ begin
|
|
|
+ DebugMsg('Peephole Optimization: removed superfluous 16 Bit zero extension', hp1);
|
|
|
+ taicpu(hp2).loadreg(0,taicpu(p).oper[1]^.reg);
|
|
|
+ asml.remove(hp1);
|
|
|
+ hp1.free;
|
|
|
+
|
|
|
+ RemoveCurrentP(p, hp2);
|
|
|
+ Result:=true;
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+ end
|
|
|
+ { fold
|
|
|
+ mov reg1,reg0, shift imm1
|
|
|
+ mov reg1,reg1, shift imm2
|
|
|
+ to
|
|
|
+ mov reg1,reg0, shift imm1+imm2
|
|
|
+ }
|
|
|
+ else if (taicpu(p).oper[2]^.shifterop^.shiftmode=taicpu(hp1).oper[2]^.shifterop^.shiftmode) or
|
|
|
+ { asr makes no use after a lsr, the asr can be foled into the lsr }
|
|
|
+ ((taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSR) and (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_ASR) ) then
|
|
|
+ begin
|
|
|
+ inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp1).oper[2]^.shifterop^.shiftimm);
|
|
|
+ { avoid overflows }
|
|
|
+ if taicpu(p).oper[2]^.shifterop^.shiftimm>31 then
|
|
|
+ case taicpu(p).oper[2]^.shifterop^.shiftmode of
|
|
|
+ SM_ROR:
|
|
|
+ taicpu(p).oper[2]^.shifterop^.shiftimm:=taicpu(p).oper[2]^.shifterop^.shiftimm and 31;
|
|
|
+ SM_ASR:
|
|
|
+ taicpu(p).oper[2]^.shifterop^.shiftimm:=31;
|
|
|
+ SM_LSR,
|
|
|
+ SM_LSL:
|
|
|
+ begin
|
|
|
+ hp2:=taicpu.op_reg_const(A_MOV,taicpu(p).oper[0]^.reg,0);
|
|
|
+ InsertLLItem(p.previous, p.next, hp2);
|
|
|
+ p.free;
|
|
|
+ p:=hp2;
|
|
|
+ end;
|
|
|
+ else
|
|
|
+ internalerror(2008072803);
|
|
|
+ end;
|
|
|
+ DebugMsg('Peephole Optimization: ShiftShift2Shift 1 done', p);
|
|
|
+ asml.remove(hp1);
|
|
|
+ hp1.free;
|
|
|
+ hp1 := nil;
|
|
|
+ result := true;
|
|
|
+ end
|
|
|
+ { fold
|
|
|
+ mov reg1,reg0, shift imm1
|
|
|
+ mov reg1,reg1, shift imm2
|
|
|
+ mov reg1,reg1, shift imm3 ...
|
|
|
+ mov reg2,reg1, shift imm3 ...
|
|
|
+ }
|
|
|
+ else if GetNextInstructionUsingReg(hp1,hp2, taicpu(hp1).oper[0]^.reg) and
|
|
|
+ MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
|
|
|
+ (taicpu(hp2).ops=3) and
|
|
|
+ MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
|
|
|
+ RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp2)) and
|
|
|
+ (taicpu(hp2).oper[2]^.typ = top_shifterop) and
|
|
|
+ (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) then
|
|
|
+ begin
|
|
|
+ { mov reg1,reg0, lsl imm1
|
|
|
+ mov reg1,reg1, lsr/asr imm2
|
|
|
+ mov reg2,reg1, lsl imm3 ...
|
|
|
+ to
|
|
|
+ mov reg1,reg0, lsl imm1
|
|
|
+ mov reg2,reg1, lsr/asr imm2-imm3
|
|
|
+ if
|
|
|
+ imm1>=imm2
|
|
|
+ }
|
|
|
+ if (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSL) and
|
|
|
+ (taicpu(hp1).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
|
|
|
+ (taicpu(p).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
|
|
|
+ begin
|
|
|
+ if (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(hp1).oper[2]^.shifterop^.shiftimm) then
|
|
|
+ begin
|
|
|
+ if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,p,hp1)) and
|
|
|
+ not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
|
|
|
+ begin
|
|
|
+ DebugMsg('Peephole Optimization: ShiftShiftShift2ShiftShift 1a done', p);
|
|
|
+ inc(taicpu(p).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm-taicpu(hp1).oper[2]^.shifterop^.shiftimm);
|
|
|
+ taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
|
|
|
+ asml.remove(hp1);
|
|
|
+ asml.remove(hp2);
|
|
|
+ hp1.free;
|
|
|
+ hp2.free;
|
|
|
+
|
|
|
+ if taicpu(p).oper[2]^.shifterop^.shiftimm>=32 then
|
|
|
begin
|
|
|
- { If the parameter matched on the second op from the RIGHT
|
|
|
- we have to switch the parameters, this will not happen for CMP
|
|
|
- were we're only evaluating the most right parameter
|
|
|
- }
|
|
|
- if I <> taicpu(hp1).ops-1 then
|
|
|
- begin
|
|
|
- {The SUB operators need to be changed when we swap parameters}
|
|
|
- case taicpu(hp1).opcode of
|
|
|
- A_SUB: tempop:=A_RSB;
|
|
|
- A_SBC: tempop:=A_RSC;
|
|
|
- A_RSB: tempop:=A_SUB;
|
|
|
- A_RSC: tempop:=A_SBC;
|
|
|
- else tempop:=taicpu(hp1).opcode;
|
|
|
- end;
|
|
|
- if taicpu(hp1).ops = 3 then
|
|
|
- hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
|
|
|
- taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
|
|
|
- taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
|
|
|
- else
|
|
|
- hp2:=taicpu.op_reg_reg_shifterop(tempop,
|
|
|
- taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
|
|
|
- taicpu(p).oper[2]^.shifterop^);
|
|
|
- end
|
|
|
- else
|
|
|
- if taicpu(hp1).ops = 3 then
|
|
|
- hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
|
|
|
- taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
|
|
|
- taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
|
|
|
- else
|
|
|
- hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
|
|
|
- taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
|
|
|
- taicpu(p).oper[2]^.shifterop^);
|
|
|
- if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
|
|
|
- AllocRegBetween(taicpu(p).oper[2]^.shifterop^.rs,p,hp1,UsedRegs);
|
|
|
- AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
|
|
|
- asml.insertbefore(hp2, hp1);
|
|
|
- GetNextInstruction(p, hp2);
|
|
|
- asml.remove(p);
|
|
|
- asml.remove(hp1);
|
|
|
- p.free;
|
|
|
- hp1.free;
|
|
|
- p:=hp2;
|
|
|
- DebugMsg('Peephole FoldShiftProcess done', p);
|
|
|
- Result:=true;
|
|
|
- break;
|
|
|
+ taicpu(p).freeop(1);
|
|
|
+ taicpu(p).freeop(2);
|
|
|
+ taicpu(p).loadconst(1,0);
|
|
|
end;
|
|
|
- end;
|
|
|
- {
|
|
|
- Fold
|
|
|
- mov r1, r1, lsl #2
|
|
|
- ldr/ldrb r0, [r0, r1]
|
|
|
- to
|
|
|
- ldr/ldrb r0, [r0, r1, lsl #2]
|
|
|
-
|
|
|
- XXX: This still needs some work, as we quite often encounter something like
|
|
|
- mov r1, r2, lsl #2
|
|
|
- add r2, r3, #imm
|
|
|
- ldr r0, [r2, r1]
|
|
|
- which can't be folded because r2 is overwritten between the shift and the ldr.
|
|
|
- We could try to shuffle the registers around and fold it into.
|
|
|
- add r1, r3, #imm
|
|
|
- ldr r0, [r1, r2, lsl #2]
|
|
|
- }
|
|
|
- if (not(GenerateThumbCode)) and
|
|
|
- (taicpu(p).opcode = A_MOV) and
|
|
|
- (taicpu(p).ops = 3) and
|
|
|
- (taicpu(p).oper[1]^.typ = top_reg) and
|
|
|
- (taicpu(p).oper[2]^.typ = top_shifterop) and
|
|
|
- { RRX is tough to handle, because it requires tracking the C-Flag,
|
|
|
- it is also extremly unlikely to be emitted this way}
|
|
|
- (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) and
|
|
|
- (taicpu(p).oper[2]^.shifterop^.shiftimm <> 0) and
|
|
|
- { thumb2 allows only lsl #0..#3 }
|
|
|
- (not(GenerateThumb2Code) or
|
|
|
- ((taicpu(p).oper[2]^.shifterop^.shiftimm in [0..3]) and
|
|
|
- (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL)
|
|
|
- )
|
|
|
- ) and
|
|
|
- (taicpu(p).oppostfix = PF_NONE) and
|
|
|
- GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
|
|
|
- {Only LDR, LDRB, STR, STRB can handle scaled register indexing}
|
|
|
- (MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B]) or
|
|
|
- (GenerateThumb2Code and
|
|
|
- MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B, PF_SB, PF_H, PF_SH]))
|
|
|
- ) and
|
|
|
- (
|
|
|
- {If this is address by offset, one of the two registers can be used}
|
|
|
- ((taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
|
|
|
- (
|
|
|
- (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) xor
|
|
|
- (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg)
|
|
|
- )
|
|
|
- ) or
|
|
|
- {For post and preindexed only the index register can be used}
|
|
|
- ((taicpu(hp1).oper[1]^.ref^.addressmode in [AM_POSTINDEXED, AM_PREINDEXED]) and
|
|
|
- (
|
|
|
- (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) and
|
|
|
- (taicpu(hp1).oper[1]^.ref^.base <> taicpu(p).oper[0]^.reg)
|
|
|
- ) and
|
|
|
- (not GenerateThumb2Code)
|
|
|
- )
|
|
|
- ) and
|
|
|
- { Only fold if both registers are used. Otherwise we are folding p with itself }
|
|
|
- (taicpu(hp1).oper[1]^.ref^.index<>NR_NO) and
|
|
|
- (taicpu(hp1).oper[1]^.ref^.base<>NR_NO) and
|
|
|
- { Only fold if there isn't another shifterop already, and offset is zero. }
|
|
|
- (taicpu(hp1).oper[1]^.ref^.offset = 0) and
|
|
|
- (taicpu(hp1).oper[1]^.ref^.shiftmode = SM_None) and
|
|
|
- not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
|
|
|
- RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
|
|
|
- begin
|
|
|
- { If the register we want to do the shift for resides in base, we need to swap that}
|
|
|
- if (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
|
|
|
- taicpu(hp1).oper[1]^.ref^.base := taicpu(hp1).oper[1]^.ref^.index;
|
|
|
- taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
|
|
|
- taicpu(hp1).oper[1]^.ref^.shiftmode := taicpu(p).oper[2]^.shifterop^.shiftmode;
|
|
|
- taicpu(hp1).oper[1]^.ref^.shiftimm := taicpu(p).oper[2]^.shifterop^.shiftimm;
|
|
|
- DebugMsg('Peephole FoldShiftLdrStr done', hp1);
|
|
|
- GetNextInstruction(p, hp1);
|
|
|
- asml.remove(p);
|
|
|
- p.free;
|
|
|
- p:=hp1;
|
|
|
- Result:=true;
|
|
|
- end;
|
|
|
- {
|
|
|
- Often we see shifts and then a superfluous mov to another register
|
|
|
- In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
|
|
|
- }
|
|
|
- if (taicpu(p).opcode = A_MOV) and
|
|
|
- GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
|
|
|
- RemoveSuperfluousMove(p, hp1, 'MovMov2Mov') then
|
|
|
- Result:=true;
|
|
|
- end;
|
|
|
- A_ADD,
|
|
|
- A_ADC,
|
|
|
- A_RSB,
|
|
|
- A_RSC,
|
|
|
- A_SUB,
|
|
|
- A_SBC,
|
|
|
- A_BIC,
|
|
|
- A_EOR,
|
|
|
- A_ORR,
|
|
|
- A_MLA,
|
|
|
- A_MLS,
|
|
|
- A_MUL,
|
|
|
- A_QADD,A_QADD16,A_QADD8,
|
|
|
- A_QSUB,A_QSUB16,A_QSUB8,
|
|
|
- A_QDADD,A_QDSUB,A_QASX,A_QSAX,
|
|
|
- A_SHADD16,A_SHADD8,A_UHADD16,A_UHADD8,
|
|
|
- A_SHSUB16,A_SHSUB8,A_UHSUB16,A_UHSUB8,
|
|
|
- A_PKHTB,A_PKHBT,
|
|
|
- A_SMUAD,A_SMUSD:
|
|
|
- begin
|
|
|
- {
|
|
|
- change
|
|
|
- add/sub reg2,reg1,const1
|
|
|
- str/ldr reg3,[reg2,const2]
|
|
|
- dealloc reg2
|
|
|
- to
|
|
|
- str/ldr reg3,[reg1,const2+/-const1]
|
|
|
- }
|
|
|
- if (not GenerateThumbCode) and
|
|
|
- (taicpu(p).opcode in [A_ADD,A_SUB]) and
|
|
|
- (taicpu(p).ops>2) and
|
|
|
- (taicpu(p).oper[1]^.typ = top_reg) and
|
|
|
- (taicpu(p).oper[2]^.typ = top_const) then
|
|
|
- begin
|
|
|
- hp1:=p;
|
|
|
- while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
|
|
|
- { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
|
|
|
- MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
|
|
|
- (taicpu(hp1).oper[1]^.typ = top_ref) and
|
|
|
- (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
|
|
|
- { don't optimize if the register is stored/overwritten }
|
|
|
- (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
|
|
|
- (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
|
|
|
- (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
|
|
|
- { new offset must be valid: either in the range of 8 or 12 bit, depend on the
|
|
|
- ldr postfix }
|
|
|
- (((taicpu(p).opcode=A_ADD) and
|
|
|
- isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset+taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
|
|
|
- ) or
|
|
|
- ((taicpu(p).opcode=A_SUB) and
|
|
|
- isValidConstLoadStoreOffset(taicpu(hp1).oper[1]^.ref^.offset-taicpu(p).oper[2]^.val, taicpu(hp1).oppostfix)
|
|
|
- )
|
|
|
- ) do
|
|
|
- begin
|
|
|
- { neither reg1 nor reg2 might be changed inbetween }
|
|
|
- if RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1) or
|
|
|
- RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1) then
|
|
|
- break;
|
|
|
- { reg2 must be either overwritten by the ldr or it is deallocated afterwards }
|
|
|
- if ((taicpu(hp1).opcode=A_LDR) and (taicpu(p).oper[0]^.reg=taicpu(hp1).oper[0]^.reg)) or
|
|
|
- assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
|
|
|
- begin
|
|
|
- { remember last instruction }
|
|
|
- hp2:=hp1;
|
|
|
- DebugMsg('Peephole Add/SubLdr2Ldr done', p);
|
|
|
- hp1:=p;
|
|
|
- { fix all ldr/str }
|
|
|
- while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) do
|
|
|
- begin
|
|
|
- taicpu(hp1).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
|
|
|
- if taicpu(p).opcode=A_ADD then
|
|
|
- inc(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val)
|
|
|
- else
|
|
|
- dec(taicpu(hp1).oper[1]^.ref^.offset,taicpu(p).oper[2]^.val);
|
|
|
- if hp1=hp2 then
|
|
|
- break;
|
|
|
- end;
|
|
|
- GetNextInstruction(p,hp1);
|
|
|
- asml.remove(p);
|
|
|
- p.free;
|
|
|
- p:=hp1;
|
|
|
- result:=true;
|
|
|
- break;
|
|
|
- end;
|
|
|
- end;
|
|
|
- end;
|
|
|
- {
|
|
|
- change
|
|
|
- add reg1, ...
|
|
|
- mov reg2, reg1
|
|
|
- to
|
|
|
- add reg2, ...
|
|
|
- }
|
|
|
- if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
|
|
|
- (taicpu(p).ops>=3) and
|
|
|
- RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
|
|
|
- Result:=true;
|
|
|
+ result := true;
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+ end
|
|
|
+ else if not(RegUsedBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2)) then
|
|
|
+ begin
|
|
|
+ DebugMsg('Peephole Optimization: ShiftShiftShift2ShiftShift 1b done', p);
|
|
|
+
|
|
|
+ dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(hp2).oper[2]^.shifterop^.shiftimm);
|
|
|
+ taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
|
|
|
+ asml.remove(hp2);
|
|
|
+ hp2.free;
|
|
|
+ result := true;
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+ end
|
|
|
+ { mov reg1,reg0, lsr/asr imm1
|
|
|
+ mov reg1,reg1, lsl imm2
|
|
|
+ mov reg1,reg1, lsr/asr imm3 ...
|
|
|
+
|
|
|
+ if imm3>=imm1 and imm2>=imm1
|
|
|
+ to
|
|
|
+ mov reg1,reg0, lsl imm2-imm1
|
|
|
+ mov reg1,reg1, lsr/asr imm3 ...
|
|
|
+ }
|
|
|
+ else if (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
|
|
|
+ (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
|
|
|
+ (taicpu(hp2).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) and
|
|
|
+ (taicpu(hp1).oper[2]^.shifterop^.shiftimm>=taicpu(p).oper[2]^.shifterop^.shiftimm) then
|
|
|
+ begin
|
|
|
+ dec(taicpu(hp1).oper[2]^.shifterop^.shiftimm,taicpu(p).oper[2]^.shifterop^.shiftimm);
|
|
|
+ taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
|
|
|
+ DebugMsg('Peephole Optimization: ShiftShiftShift2ShiftShift 2 done', p);
|
|
|
+ if taicpu(hp1).oper[2]^.shifterop^.shiftimm=0 then
|
|
|
+ begin
|
|
|
+ taicpu(hp2).oper[1]^.reg:=taicpu(hp1).oper[1]^.reg;
|
|
|
+ asml.remove(hp1);
|
|
|
+ hp1.free;
|
|
|
+ end;
|
|
|
|
|
|
- if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
|
|
|
- LookForPreindexedPattern(taicpu(p)) then
|
|
|
- begin
|
|
|
- GetNextInstruction(p,hp1);
|
|
|
- DebugMsg('Peephole Add/Sub to Preindexed done', p);
|
|
|
- asml.remove(p);
|
|
|
- p.free;
|
|
|
- p:=hp1;
|
|
|
- Result:=true;
|
|
|
- end;
|
|
|
- {
|
|
|
- Turn
|
|
|
- mul reg0, z,w
|
|
|
- sub/add x, y, reg0
|
|
|
- dealloc reg0
|
|
|
-
|
|
|
- into
|
|
|
-
|
|
|
- mls/mla x,z,w,y
|
|
|
- }
|
|
|
- if MatchInstruction(p, [A_MUL], [C_None], [PF_None]) and
|
|
|
- (taicpu(p).ops=3) and
|
|
|
- (taicpu(p).oper[0]^.typ = top_reg) and
|
|
|
- (taicpu(p).oper[1]^.typ = top_reg) and
|
|
|
- (taicpu(p).oper[2]^.typ = top_reg) and
|
|
|
- GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
|
|
|
- MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and
|
|
|
- (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
|
|
|
- (not RegModifiedBetween(taicpu(p).oper[2]^.reg, p, hp1)) and
|
|
|
-
|
|
|
- (((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype>=cpu_armv4)) or
|
|
|
- ((taicpu(hp1).opcode=A_SUB) and (current_settings.cputype in [cpu_armv6t2,cpu_armv7,cpu_armv7a,cpu_armv7r,cpu_armv7m,cpu_armv7em]))) and
|
|
|
-
|
|
|
- // CPUs before ARMv6 don't recommend having the same Rd and Rm for MLA.
|
|
|
- // TODO: A workaround would be to swap Rm and Rs
|
|
|
- (not ((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype<=cpu_armv6) and MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^))) and
|
|
|
-
|
|
|
- (((taicpu(hp1).ops=3) and
|
|
|
- (taicpu(hp1).oper[2]^.typ=top_reg) and
|
|
|
- ((MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
|
|
|
- (not RegModifiedBetween(taicpu(hp1).oper[1]^.reg, p, hp1))) or
|
|
|
- ((MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
|
|
|
- (taicpu(hp1).opcode=A_ADD) and
|
|
|
- (not RegModifiedBetween(taicpu(hp1).oper[2]^.reg, p, hp1)))))) or
|
|
|
- ((taicpu(hp1).ops=2) and
|
|
|
- (taicpu(hp1).oper[1]^.typ=top_reg) and
|
|
|
- MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
|
|
|
- (RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1))) then
|
|
|
- begin
|
|
|
- if taicpu(hp1).opcode=A_ADD then
|
|
|
- begin
|
|
|
- taicpu(hp1).opcode:=A_MLA;
|
|
|
+ RemoveCurrentp(p);
|
|
|
+ result := true;
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
|
|
|
- if taicpu(hp1).ops=3 then
|
|
|
- begin
|
|
|
- if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^) then
|
|
|
- oldreg:=taicpu(hp1).oper[2]^.reg
|
|
|
- else
|
|
|
- oldreg:=taicpu(hp1).oper[1]^.reg;
|
|
|
- end
|
|
|
- else
|
|
|
- oldreg:=taicpu(hp1).oper[0]^.reg;
|
|
|
+ { All the optimisations from this point on require GetNextInstructionUsingReg
|
|
|
+ to return True }
|
|
|
+ if not (
|
|
|
+ GetNextInstructionUsingReg(p, hpfar1, taicpu(p).oper[0]^.reg) and
|
|
|
+ (hpfar1.typ = ait_instruction)
|
|
|
+ ) then
|
|
|
+ Exit;
|
|
|
+
|
|
|
+ { Change the common
|
|
|
+ mov r0, r0, lsr #xxx
|
|
|
+ and r0, r0, #yyy/bic r0, r0, #xxx
|
|
|
+
|
|
|
+ and remove the superfluous and/bic if possible
|
|
|
+
|
|
|
+ This could be extended to handle more cases.
|
|
|
+ }
|
|
|
+
|
|
|
+ { Change
|
|
|
+ mov rx, ry, lsr/ror #xxx
|
|
|
+ uxtb/uxth rz,rx/and rz,rx,0xFF
|
|
|
+ dealloc rx
|
|
|
+
|
|
|
+ to
|
|
|
+
|
|
|
+ uxtb/uxth rz,ry,ror #xxx
|
|
|
+ }
|
|
|
+ if (GenerateThumb2Code) and
|
|
|
+ (taicpu(p).ops=3) and
|
|
|
+ (taicpu(p).oper[2]^.typ = top_shifterop) and
|
|
|
+ (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
|
|
|
+ (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ROR]) and
|
|
|
+ RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then
|
|
|
+ begin
|
|
|
+ if MatchInstruction(hpfar1, A_UXTB, [C_None], [PF_None]) and
|
|
|
+ (taicpu(hpfar1).ops = 2) and
|
|
|
+ (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
|
|
|
+ MatchOperand(taicpu(hpfar1).oper[1]^, taicpu(p).oper[0]^.reg) then
|
|
|
+ begin
|
|
|
+ taicpu(hpfar1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
|
|
|
+ taicpu(hpfar1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
|
|
|
+ taicpu(hpfar1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
|
|
|
+ taicpu(hpfar1).ops := 3;
|
|
|
|
|
|
- taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
|
|
|
- taicpu(hp1).loadreg(2,taicpu(p).oper[2]^.reg);
|
|
|
- taicpu(hp1).loadreg(3,oldreg);
|
|
|
+ if not Assigned(hp1) then
|
|
|
+ GetNextInstruction(p,hp1);
|
|
|
|
|
|
- DebugMsg('MulAdd2MLA done', p);
|
|
|
+ RemoveCurrentP(p, hp1);
|
|
|
|
|
|
- taicpu(hp1).ops:=4;
|
|
|
+ result:=true;
|
|
|
+ exit;
|
|
|
+ end
|
|
|
+ else if MatchInstruction(hpfar1, A_UXTH, [C_None], [PF_None]) and
|
|
|
+ (taicpu(hpfar1).ops=2) and
|
|
|
+ (taicpu(p).oper[2]^.shifterop^.shiftimm in [16]) and
|
|
|
+ MatchOperand(taicpu(hpfar1).oper[1]^, taicpu(p).oper[0]^.reg) then
|
|
|
+ begin
|
|
|
+ taicpu(hpfar1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
|
|
|
+ taicpu(hpfar1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
|
|
|
+ taicpu(hpfar1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
|
|
|
+ taicpu(hpfar1).ops := 3;
|
|
|
|
|
|
- asml.remove(p);
|
|
|
- p.free;
|
|
|
- p:=hp1;
|
|
|
- end
|
|
|
- else
|
|
|
- begin
|
|
|
- taicpu(hp1).opcode:=A_MLS;
|
|
|
+ if not Assigned(hp1) then
|
|
|
+ GetNextInstruction(p,hp1);
|
|
|
|
|
|
+ RemoveCurrentP(p, hp1);
|
|
|
|
|
|
- taicpu(hp1).loadreg(3,taicpu(hp1).oper[1]^.reg);
|
|
|
+ result:=true;
|
|
|
+ exit;
|
|
|
+ end
|
|
|
+ else if MatchInstruction(hpfar1, A_AND, [C_None], [PF_None]) and
|
|
|
+ (taicpu(hpfar1).ops = 3) and
|
|
|
+ (taicpu(hpfar1).oper[2]^.typ = top_const) and
|
|
|
+ (taicpu(hpfar1).oper[2]^.val = $FF) and
|
|
|
+ (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
|
|
|
+ MatchOperand(taicpu(hpfar1).oper[1]^, taicpu(p).oper[0]^.reg) then
|
|
|
+ begin
|
|
|
+ taicpu(hpfar1).ops := 3;
|
|
|
+ taicpu(hpfar1).opcode := A_UXTB;
|
|
|
+ taicpu(hpfar1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
|
|
|
+ taicpu(hpfar1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
|
|
|
+ taicpu(hpfar1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
|
|
|
|
|
|
- if taicpu(hp1).ops=2 then
|
|
|
- taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg)
|
|
|
- else
|
|
|
- taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
|
|
|
+ if not Assigned(hp1) then
|
|
|
+ GetNextInstruction(p,hp1);
|
|
|
|
|
|
- taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
|
|
|
+ RemoveCurrentP(p, hp1);
|
|
|
|
|
|
- DebugMsg('MulSub2MLS done', p);
|
|
|
- AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
|
|
|
- AllocRegBetween(taicpu(hp1).oper[2]^.reg,p,hp1,UsedRegs);
|
|
|
- AllocRegBetween(taicpu(hp1).oper[3]^.reg,p,hp1,UsedRegs);
|
|
|
+ result:=true;
|
|
|
+ exit;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
|
|
|
- taicpu(hp1).ops:=4;
|
|
|
- RemoveCurrentP(p, hp1); // <-- Is this actually safe? hp1 is not necessarily the next instruction. [Kit]
|
|
|
- end;
|
|
|
+ { 2-operald mov optimisations }
|
|
|
+ if (taicpu(p).ops = 2) then
|
|
|
+ begin
|
|
|
+ {
|
|
|
+ This removes the mul from
|
|
|
+ mov rX,0
|
|
|
+ ...
|
|
|
+ mul ...,rX,...
|
|
|
+ }
|
|
|
+ if (taicpu(p).oper[1]^.typ = top_const) then
|
|
|
+ begin
|
|
|
+(* if false and
|
|
|
+ (taicpu(p).oper[1]^.val=0) and
|
|
|
+ MatchInstruction(hpfar1, [A_MUL,A_MLA], [taicpu(p).condition], [taicpu(p).oppostfix]) and
|
|
|
+ (((taicpu(hpfar1).oper[1]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^)) or
|
|
|
+ ((taicpu(hpfar1).oper[2]^.typ=top_reg) and MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[2]^))) then
|
|
|
+ begin
|
|
|
+ TransferUsedRegs(TmpUsedRegs);
|
|
|
+ UpdateUsedRegs(TmpUsedRegs, tai(p.next));
|
|
|
+ UpdateUsedRegs(TmpUsedRegs, tai(hpfar1.next));
|
|
|
+ DebugMsg('Peephole Optimization: MovMUL/MLA2Mov0 done', p);
|
|
|
+ if taicpu(hpfar1).opcode=A_MUL then
|
|
|
+ taicpu(hpfar1).loadconst(1,0)
|
|
|
+ else
|
|
|
+ taicpu(hpfar1).loadreg(1,taicpu(hpfar1).oper[3]^.reg);
|
|
|
+ taicpu(hpfar1).ops:=2;
|
|
|
+ taicpu(hpfar1).opcode:=A_MOV;
|
|
|
+ if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hpfar1,TmpUsedRegs)) then
|
|
|
+ RemoveCurrentP(p);
|
|
|
+ Result:=true;
|
|
|
+ exit;
|
|
|
+ end
|
|
|
+ else*) if (taicpu(p).oper[1]^.val=0) and
|
|
|
+ MatchInstruction(hpfar1, A_MLA, [taicpu(p).condition], [taicpu(p).oppostfix]) and
|
|
|
+ MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[3]^) then
|
|
|
+ begin
|
|
|
+ TransferUsedRegs(TmpUsedRegs);
|
|
|
+ UpdateUsedRegs(TmpUsedRegs, tai(p.next));
|
|
|
+ UpdateUsedRegs(TmpUsedRegs, tai(hpfar1.next));
|
|
|
+ DebugMsg('Peephole Optimization: MovMLA2MUL 1 done', p);
|
|
|
+ taicpu(hpfar1).ops:=3;
|
|
|
+ taicpu(hpfar1).opcode:=A_MUL;
|
|
|
+ if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hpfar1,TmpUsedRegs)) then
|
|
|
+ begin
|
|
|
+ RemoveCurrentP(p);
|
|
|
+ Result:=true;
|
|
|
+ end;
|
|
|
+ exit;
|
|
|
+ end
|
|
|
+ {
|
|
|
+ This changes the very common
|
|
|
+ mov r0, #0
|
|
|
+ str r0, [...]
|
|
|
+ mov r0, #0
|
|
|
+ str r0, [...]
|
|
|
|
|
|
- result:=true;
|
|
|
- end
|
|
|
- end;
|
|
|
-{$ifdef dummy}
|
|
|
- A_MVN:
|
|
|
- begin
|
|
|
- {
|
|
|
- change
|
|
|
- mvn reg2,reg1
|
|
|
- and reg3,reg4,reg2
|
|
|
- dealloc reg2
|
|
|
- to
|
|
|
- bic reg3,reg4,reg1
|
|
|
- }
|
|
|
- if (taicpu(p).oper[1]^.typ = top_reg) and
|
|
|
- GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
|
|
|
- MatchInstruction(hp1,A_AND,[],[]) and
|
|
|
- (((taicpu(hp1).ops=3) and
|
|
|
- (taicpu(hp1).oper[2]^.typ=top_reg) and
|
|
|
- (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
|
|
|
- MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
|
|
|
- ((taicpu(hp1).ops=2) and
|
|
|
- (taicpu(hp1).oper[1]^.typ=top_reg) and
|
|
|
- MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
|
|
|
- assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
|
|
|
- { reg1 might not be modified inbetween }
|
|
|
- not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
|
|
|
- begin
|
|
|
- DebugMsg('Peephole MvnAnd2Bic done', p);
|
|
|
- taicpu(hp1).opcode:=A_BIC;
|
|
|
-
|
|
|
- if taicpu(hp1).ops=3 then
|
|
|
- begin
|
|
|
- if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
|
|
|
- taicpu(hp1).loadReg(1,taicpu(hp1).oper[2]^.reg); // Swap operands
|
|
|
-
|
|
|
- taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
|
|
|
- end
|
|
|
- else
|
|
|
- taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
|
|
|
- GetNextInstruction(p, hp1);
|
|
|
- asml.remove(p);
|
|
|
- p.free;
|
|
|
- p:=hp1;
|
|
|
- end;
|
|
|
- end;
|
|
|
-{$endif dummy}
|
|
|
- A_UXTB:
|
|
|
- Result:=OptPass1UXTB(p);
|
|
|
- A_UXTH:
|
|
|
- Result:=OptPass1UXTH(p);
|
|
|
- A_SXTB:
|
|
|
- Result:=OptPass1SXTB(p);
|
|
|
- A_SXTH:
|
|
|
- Result:=OptPass1SXTH(p);
|
|
|
- A_CMP:
|
|
|
- begin
|
|
|
- {
|
|
|
- change
|
|
|
- cmp reg,const1
|
|
|
- moveq reg,const1
|
|
|
- movne reg,const2
|
|
|
- to
|
|
|
- cmp reg,const1
|
|
|
- movne reg,const2
|
|
|
- }
|
|
|
- if (taicpu(p).oper[1]^.typ = top_const) and
|
|
|
- GetNextInstruction(p, hp1) and
|
|
|
- MatchInstruction(hp1, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
|
|
|
- (taicpu(hp1).oper[1]^.typ = top_const) and
|
|
|
- GetNextInstruction(hp1, hp2) and
|
|
|
- MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
|
|
|
- (taicpu(hp1).oper[1]^.typ = top_const) then
|
|
|
- begin
|
|
|
- Result:=RemoveRedundantMove(p, hp1, asml) or Result;
|
|
|
- Result:=RemoveRedundantMove(p, hp2, asml) or Result;
|
|
|
- end;
|
|
|
- end;
|
|
|
- A_STM:
|
|
|
- begin
|
|
|
- {
|
|
|
- change
|
|
|
- stmfd r13!,[r14]
|
|
|
- sub r13,r13,#4
|
|
|
- bl abc
|
|
|
- add r13,r13,#4
|
|
|
- ldmfd r13!,[r15]
|
|
|
- into
|
|
|
- b abc
|
|
|
- }
|
|
|
- if not(ts_thumb_interworking in current_settings.targetswitches) and
|
|
|
- MatchInstruction(p, A_STM, [C_None], [PF_FD]) and
|
|
|
- GetNextInstruction(p, hp1) and
|
|
|
+ and removes all superfluous mov instructions
|
|
|
+ }
|
|
|
+ else if (taicpu(hpfar1).opcode=A_STR) then
|
|
|
+ begin
|
|
|
+ hp1 := hpfar1;
|
|
|
+ while MatchInstruction(hp1, A_STR, [taicpu(p).condition], []) and
|
|
|
+ MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[0]^) and
|
|
|
GetNextInstruction(hp1, hp2) and
|
|
|
- SkipEntryExitMarker(hp2, hp2) and
|
|
|
- GetNextInstruction(hp2, hp3) and
|
|
|
- SkipEntryExitMarker(hp3, hp3) and
|
|
|
- GetNextInstruction(hp3, hp4) and
|
|
|
- (taicpu(p).oper[0]^.typ = top_ref) and
|
|
|
- (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
|
|
|
- (taicpu(p).oper[0]^.ref^.base=NR_NO) and
|
|
|
- (taicpu(p).oper[0]^.ref^.offset=0) and
|
|
|
- (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
|
|
|
- (taicpu(p).oper[1]^.typ = top_regset) and
|
|
|
- (taicpu(p).oper[1]^.regset^ = [RS_R14]) and
|
|
|
-
|
|
|
- MatchInstruction(hp1, A_SUB, [C_None], [PF_NONE]) and
|
|
|
- (taicpu(hp1).oper[0]^.typ = top_reg) and
|
|
|
- (taicpu(hp1).oper[0]^.reg = NR_STACK_POINTER_REG) and
|
|
|
- MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^) and
|
|
|
- (taicpu(hp1).oper[2]^.typ = top_const) and
|
|
|
-
|
|
|
- MatchInstruction(hp3, A_ADD, [C_None], [PF_NONE]) and
|
|
|
- MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[0]^) and
|
|
|
- MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp3).oper[1]^) and
|
|
|
- MatchOperand(taicpu(hp1).oper[2]^,taicpu(hp3).oper[2]^) and
|
|
|
-
|
|
|
- MatchInstruction(hp2, [A_BL,A_BLX], [C_None], [PF_NONE]) and
|
|
|
- (taicpu(hp2).oper[0]^.typ = top_ref) and
|
|
|
-
|
|
|
- MatchInstruction(hp4, A_LDM, [C_None], [PF_FD]) and
|
|
|
- MatchOperand(taicpu(p).oper[0]^,taicpu(hp4).oper[0]^) and
|
|
|
- (taicpu(hp4).oper[1]^.typ = top_regset) and
|
|
|
- (taicpu(hp4).oper[1]^.regset^ = [RS_R15]) then
|
|
|
- begin
|
|
|
- asml.Remove(p);
|
|
|
- asml.Remove(hp1);
|
|
|
- asml.Remove(hp3);
|
|
|
- asml.Remove(hp4);
|
|
|
- taicpu(hp2).opcode:=A_B;
|
|
|
- p.free;
|
|
|
- hp1.free;
|
|
|
- hp3.free;
|
|
|
- hp4.free;
|
|
|
- p:=hp2;
|
|
|
- DebugMsg('Peephole Bl2B done', p);
|
|
|
- end;
|
|
|
- end;
|
|
|
- A_VMOV:
|
|
|
+ MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
|
|
|
+ (taicpu(hp2).ops = 2) and
|
|
|
+ MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
|
|
|
+ MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) do
|
|
|
begin
|
|
|
- {
|
|
|
- change
|
|
|
- vmov reg0,reg1,reg2
|
|
|
- vmov reg1,reg2,reg0
|
|
|
- into
|
|
|
- vmov reg0,reg1,reg2
|
|
|
-
|
|
|
- can be applied regardless if reg0 or reg2 is the vfp register
|
|
|
- }
|
|
|
- if (taicpu(p).ops = 3) and
|
|
|
- GetNextInstruction(p, hp1) and
|
|
|
- MatchInstruction(hp1, A_VMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
|
|
|
- (taicpu(hp1).ops = 3) and
|
|
|
- MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[2]^) and
|
|
|
- MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[0]^) and
|
|
|
- MatchOperand(taicpu(p).oper[2]^, taicpu(hp1).oper[1]^) then
|
|
|
- begin
|
|
|
- asml.Remove(hp1);
|
|
|
- hp1.free;
|
|
|
- DebugMsg('Peephole VMovVMov2VMov done', p);
|
|
|
- end;
|
|
|
+ DebugMsg('Peephole Optimization: MovStrMov done', hp2);
|
|
|
+ GetNextInstruction(hp2,hp1);
|
|
|
+ asml.remove(hp2);
|
|
|
+ hp2.free;
|
|
|
+ result:=true;
|
|
|
+ if not assigned(hp1) then break;
|
|
|
end;
|
|
|
- A_AND:
|
|
|
- Result:=OptPass1And(p);
|
|
|
- A_VLDR,
|
|
|
- A_VADD,
|
|
|
- A_VMUL,
|
|
|
- A_VDIV,
|
|
|
- A_VSUB,
|
|
|
- A_VSQRT,
|
|
|
- A_VNEG,
|
|
|
- A_VCVT,
|
|
|
- A_VABS:
|
|
|
- begin
|
|
|
- if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
|
|
|
- RemoveSuperfluousVMov(p, hp1, 'VOpVMov2VOp') then
|
|
|
- Result:=true;
|
|
|
- end
|
|
|
- else
|
|
|
- ;
|
|
|
+
|
|
|
+ if Result then
|
|
|
+ Exit;
|
|
|
end;
|
|
|
+ end;
|
|
|
+ {
|
|
|
+ This removes the first mov from
|
|
|
+ mov rX,...
|
|
|
+ mov rX,...
|
|
|
+ }
|
|
|
+ if taicpu(hpfar1).opcode=A_MOV then
|
|
|
+ begin
|
|
|
+ hp1 := p;
|
|
|
+ while MatchInstruction(hpfar1, A_MOV, [taicpu(hp1).condition], [taicpu(hp1).oppostfix]) and
|
|
|
+ (taicpu(hpfar1).ops = 2) and
|
|
|
+ MatchOperand(taicpu(hp1).oper[0]^, taicpu(hpfar1).oper[0]^) and
|
|
|
+ { don't remove the first mov if the second is a mov rX,rX }
|
|
|
+ not(MatchOperand(taicpu(hpfar1).oper[0]^, taicpu(hpfar1).oper[1]^)) do
|
|
|
+ begin
|
|
|
+ { Defer removing the first p until after the while loop }
|
|
|
+ if p <> hp1 then
|
|
|
+ begin
|
|
|
+ DebugMsg('Peephole Optimization: MovMov done', hp1);
|
|
|
+ asml.remove(hp1);
|
|
|
+ hp1.free;
|
|
|
+ end;
|
|
|
+ hp1:=hpfar1;
|
|
|
+ GetNextInstruction(hpfar1,hpfar1);
|
|
|
+ result:=true;
|
|
|
+ if not assigned(hpfar1) then
|
|
|
+ Break;
|
|
|
+ end;
|
|
|
+
|
|
|
+ if Result then
|
|
|
+ begin
|
|
|
+ DebugMsg('Peephole Optimization: MovMov done', p);
|
|
|
+ RemoveCurrentp(p);
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+
|
|
|
+ if RedundantMovProcess(p,hpfar1) then
|
|
|
+ begin
|
|
|
+ Result:=true;
|
|
|
+ { p might not point at a mov anymore }
|
|
|
+ exit;
|
|
|
+ end;
|
|
|
+
|
|
|
+ { Fold the very common sequence
|
|
|
+ mov regA, regB
|
|
|
+ ldr* regA, [regA]
|
|
|
+ to
|
|
|
+ ldr* regA, [regB]
|
|
|
+ CAUTION! If this one is successful p might not be a mov instruction anymore!
|
|
|
+ }
|
|
|
+ if
|
|
|
+ // Make sure that Thumb code doesn't propagate a high register into a reference
|
|
|
+ (
|
|
|
+ (
|
|
|
+ GenerateThumbCode and
|
|
|
+ (getsupreg(taicpu(p).oper[1]^.reg) < RS_R8)
|
|
|
+ ) or (not GenerateThumbCode)
|
|
|
+ ) and
|
|
|
+ (taicpu(p).oper[1]^.typ = top_reg) and
|
|
|
+ (taicpu(p).oppostfix = PF_NONE) and
|
|
|
+ MatchInstruction(hpfar1, [A_LDR, A_STR], [taicpu(p).condition], []) and
|
|
|
+ (taicpu(hpfar1).oper[1]^.typ = top_ref) and
|
|
|
+ { We can change the base register only when the instruction uses AM_OFFSET }
|
|
|
+ ((taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) or
|
|
|
+ ((taicpu(hpfar1).oper[1]^.ref^.addressmode = AM_OFFSET) and
|
|
|
+ (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg))
|
|
|
+ ) and
|
|
|
+ not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hpfar1)) and
|
|
|
+ RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then
|
|
|
+ begin
|
|
|
+ DebugMsg('Peephole Optimization: MovLdr2Ldr done', hpfar1);
|
|
|
+ if (taicpu(hpfar1).oper[1]^.ref^.addressmode = AM_OFFSET) and
|
|
|
+ (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
|
|
|
+ taicpu(hpfar1).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
|
|
|
+
|
|
|
+ if taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
|
|
|
+ taicpu(hpfar1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
|
|
|
+
|
|
|
+ dealloc:=FindRegDeAlloc(taicpu(p).oper[1]^.reg, tai(p.Next));
|
|
|
+ if Assigned(dealloc) then
|
|
|
+ begin
|
|
|
+ asml.remove(dealloc);
|
|
|
+ asml.InsertAfter(dealloc,hpfar1);
|
|
|
+ end;
|
|
|
+
|
|
|
+ if not Assigned(hp1) then
|
|
|
+ GetNextInstruction(p, hp1);
|
|
|
+
|
|
|
+ RemoveCurrentP(p, hp1);
|
|
|
+
|
|
|
+ result:=true;
|
|
|
+ Exit;
|
|
|
+ end
|
|
|
+ end
|
|
|
+
|
|
|
+ { 3-operald mov optimisations }
|
|
|
+ else if (taicpu(p).ops = 3) then
|
|
|
+ begin
|
|
|
+
|
|
|
+ if (taicpu(p).oper[2]^.typ = top_shifterop) and
|
|
|
+ (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
|
|
|
+ (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
|
|
|
+ (taicpu(hpfar1).ops>=1) and
|
|
|
+ (taicpu(hpfar1).oper[0]^.typ=top_reg) and
|
|
|
+ (not RegModifiedBetween(taicpu(hpfar1).oper[0]^.reg, p, hpfar1)) and
|
|
|
+ RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then
|
|
|
+ begin
|
|
|
+ if (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
|
|
|
+ MatchInstruction(hpfar1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
|
|
|
+ (taicpu(hpfar1).ops=3) and
|
|
|
+ MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^) and
|
|
|
+ (taicpu(hpfar1).oper[2]^.typ = top_const) and
|
|
|
+ { Check if the AND actually would only mask out bits being already zero because of the shift
|
|
|
+ }
|
|
|
+ ((($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm) and taicpu(hpfar1).oper[2]^.val) =
|
|
|
+ ($ffffffff shr taicpu(p).oper[2]^.shifterop^.shiftimm)) then
|
|
|
+ begin
|
|
|
+ DebugMsg('Peephole Optimization: LsrAnd2Lsr done', hpfar1);
|
|
|
+ taicpu(p).oper[0]^.reg:=taicpu(hpfar1).oper[0]^.reg;
|
|
|
+ asml.remove(hpfar1);
|
|
|
+ hpfar1.free;
|
|
|
+ result:=true;
|
|
|
+ Exit;
|
|
|
+ end
|
|
|
+ else if MatchInstruction(hpfar1, A_BIC, [taicpu(p).condition], [taicpu(p).oppostfix]) and
|
|
|
+ (taicpu(hpfar1).ops=3) and
|
|
|
+ MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^) and
|
|
|
+ (taicpu(hpfar1).oper[2]^.typ = top_const) and
|
|
|
+ { Check if the BIC actually would only mask out bits beeing already zero because of the shift }
|
|
|
+ (taicpu(hpfar1).oper[2]^.val<>0) and
|
|
|
+ (BsfDWord(taicpu(hpfar1).oper[2]^.val)>=32-taicpu(p).oper[2]^.shifterop^.shiftimm) then
|
|
|
+ begin
|
|
|
+ DebugMsg('Peephole Optimization: LsrBic2Lsr done', hpfar1);
|
|
|
+ taicpu(p).oper[0]^.reg:=taicpu(hpfar1).oper[0]^.reg;
|
|
|
+ asml.remove(hpfar1);
|
|
|
+ hpfar1.free;
|
|
|
+ result:=true;
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ { This folds shifterops into following instructions
|
|
|
+ mov r0, r1, lsl #8
|
|
|
+ add r2, r3, r0
|
|
|
+
|
|
|
+ to
|
|
|
+
|
|
|
+ add r2, r3, r1, lsl #8
|
|
|
+ CAUTION! If this one is successful p might not be a mov instruction anymore!
|
|
|
+ }
|
|
|
+ if (taicpu(p).oper[1]^.typ = top_reg) and
|
|
|
+ (taicpu(p).oper[2]^.typ = top_shifterop) and
|
|
|
+ (taicpu(p).oppostfix = PF_NONE) and
|
|
|
+ MatchInstruction(hpfar1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
|
|
|
+ A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
|
|
|
+ A_CMP, A_CMN],
|
|
|
+ [taicpu(p).condition], [PF_None]) and
|
|
|
+ (not ((GenerateThumb2Code) and
|
|
|
+ (taicpu(hpfar1).opcode in [A_SBC]) and
|
|
|
+ (((taicpu(hpfar1).ops=3) and
|
|
|
+ MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[1]^.reg)) or
|
|
|
+ ((taicpu(hpfar1).ops=2) and
|
|
|
+ MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[0]^.reg))))) and
|
|
|
+ RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) and
|
|
|
+ (taicpu(hpfar1).ops >= 2) and
|
|
|
+ {Currently we can't fold into another shifterop}
|
|
|
+ (taicpu(hpfar1).oper[taicpu(hpfar1).ops-1]^.typ = top_reg) and
|
|
|
+ {Folding rrx is problematic because of the C-Flag, as we currently can't check
|
|
|
+ NR_DEFAULTFLAGS for modification}
|
|
|
+ (
|
|
|
+ {Everything is fine if we don't use RRX}
|
|
|
+ (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) or
|
|
|
+ (
|
|
|
+ {If it is RRX, then check if we're just accessing the next instruction}
|
|
|
+ Assigned(hp1) and
|
|
|
+ (hpfar1 = hp1)
|
|
|
+ )
|
|
|
+ ) and
|
|
|
+ { reg1 might not be modified inbetween }
|
|
|
+ not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hpfar1)) and
|
|
|
+ { The shifterop can contain a register, might not be modified}
|
|
|
+ (
|
|
|
+ (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) or
|
|
|
+ not(RegModifiedBetween(taicpu(p).oper[2]^.shifterop^.rs, p, hpfar1))
|
|
|
+ ) and
|
|
|
+ (
|
|
|
+ {Only ONE of the two src operands is allowed to match}
|
|
|
+ MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[taicpu(hpfar1).ops-2]^) xor
|
|
|
+ MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[taicpu(hpfar1).ops-1]^)
|
|
|
+ ) then
|
|
|
+ begin
|
|
|
+ if taicpu(hpfar1).opcode in [A_TST, A_TEQ, A_CMN] then
|
|
|
+ I2:=0
|
|
|
+ else
|
|
|
+ I2:=1;
|
|
|
+ for I:=I2 to taicpu(hpfar1).ops-1 do
|
|
|
+ if MatchOperand(taicpu(p).oper[0]^, taicpu(hpfar1).oper[I]^.reg) then
|
|
|
+ begin
|
|
|
+ { If the parameter matched on the second op from the RIGHT
|
|
|
+ we have to switch the parameters, this will not happen for CMP
|
|
|
+ were we're only evaluating the most right parameter
|
|
|
+ }
|
|
|
+ if I <> taicpu(hpfar1).ops-1 then
|
|
|
+ begin
|
|
|
+ {The SUB operators need to be changed when we swap parameters}
|
|
|
+ case taicpu(hpfar1).opcode of
|
|
|
+ A_SUB: tempop:=A_RSB;
|
|
|
+ A_SBC: tempop:=A_RSC;
|
|
|
+ A_RSB: tempop:=A_SUB;
|
|
|
+ A_RSC: tempop:=A_SBC;
|
|
|
+ else tempop:=taicpu(hpfar1).opcode;
|
|
|
+ end;
|
|
|
+ if taicpu(hpfar1).ops = 3 then
|
|
|
+ hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
|
|
|
+ taicpu(hpfar1).oper[0]^.reg, taicpu(hpfar1).oper[2]^.reg,
|
|
|
+ taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
|
|
|
+ else
|
|
|
+ hp2:=taicpu.op_reg_reg_shifterop(tempop,
|
|
|
+ taicpu(hpfar1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
|
|
|
+ taicpu(p).oper[2]^.shifterop^);
|
|
|
+ end
|
|
|
+ else
|
|
|
+ if taicpu(hpfar1).ops = 3 then
|
|
|
+ hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hpfar1).opcode,
|
|
|
+ taicpu(hpfar1).oper[0]^.reg, taicpu(hpfar1).oper[1]^.reg,
|
|
|
+ taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
|
|
|
+ else
|
|
|
+ hp2:=taicpu.op_reg_reg_shifterop(taicpu(hpfar1).opcode,
|
|
|
+ taicpu(hpfar1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
|
|
|
+ taicpu(p).oper[2]^.shifterop^);
|
|
|
+ if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
|
|
|
+ AllocRegBetween(taicpu(p).oper[2]^.shifterop^.rs,p,hpfar1,UsedRegs);
|
|
|
+ AllocRegBetween(taicpu(p).oper[1]^.reg,p,hpfar1,UsedRegs);
|
|
|
+ asml.insertbefore(hp2, hpfar1);
|
|
|
+ asml.remove(hpfar1);
|
|
|
+ hpfar1.free;
|
|
|
+ DebugMsg('Peephole Optimization: FoldShiftProcess done', hp2);
|
|
|
+
|
|
|
+ if not Assigned(hp1) then
|
|
|
+ GetNextInstruction(p, hp1)
|
|
|
+ else if hp1 = hpfar1 then
|
|
|
+ { If hp1 = hpfar1, then it's a dangling pointer }
|
|
|
+ hp1 := hp2;
|
|
|
+
|
|
|
+ RemoveCurrentP(p, hp1);
|
|
|
+ Result:=true;
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
end;
|
|
|
- else
|
|
|
- ;
|
|
|
- end;
|
|
|
+ {
|
|
|
+ Fold
|
|
|
+ mov r1, r1, lsl #2
|
|
|
+ ldr/ldrb r0, [r0, r1]
|
|
|
+ to
|
|
|
+ ldr/ldrb r0, [r0, r1, lsl #2]
|
|
|
+
|
|
|
+ XXX: This still needs some work, as we quite often encounter something like
|
|
|
+ mov r1, r2, lsl #2
|
|
|
+ add r2, r3, #imm
|
|
|
+ ldr r0, [r2, r1]
|
|
|
+ which can't be folded because r2 is overwritten between the shift and the ldr.
|
|
|
+ We could try to shuffle the registers around and fold it into.
|
|
|
+ add r1, r3, #imm
|
|
|
+ ldr r0, [r1, r2, lsl #2]
|
|
|
+ }
|
|
|
+ if (not(GenerateThumbCode)) and
|
|
|
+ { thumb2 allows only lsl #0..#3 }
|
|
|
+ (not(GenerateThumb2Code) or
|
|
|
+ ((taicpu(p).oper[2]^.shifterop^.shiftimm in [0..3]) and
|
|
|
+ (taicpu(p).oper[2]^.shifterop^.shiftmode=SM_LSL)
|
|
|
+ )
|
|
|
+ ) and
|
|
|
+ (taicpu(p).oper[1]^.typ = top_reg) and
|
|
|
+ (taicpu(p).oper[2]^.typ = top_shifterop) and
|
|
|
+ { RRX is tough to handle, because it requires tracking the C-Flag,
|
|
|
+ it is also extremly unlikely to be emitted this way}
|
|
|
+ (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) and
|
|
|
+ (taicpu(p).oper[2]^.shifterop^.shiftimm <> 0) and
|
|
|
+ (taicpu(p).oppostfix = PF_NONE) and
|
|
|
+ {Only LDR, LDRB, STR, STRB can handle scaled register indexing}
|
|
|
+ (MatchInstruction(hpfar1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B]) or
|
|
|
+ (GenerateThumb2Code and
|
|
|
+ MatchInstruction(hpfar1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B, PF_SB, PF_H, PF_SH]))
|
|
|
+ ) and
|
|
|
+ (
|
|
|
+ {If this is address by offset, one of the two registers can be used}
|
|
|
+ ((taicpu(hpfar1).oper[1]^.ref^.addressmode=AM_OFFSET) and
|
|
|
+ (
|
|
|
+ (taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) xor
|
|
|
+ (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg)
|
|
|
+ )
|
|
|
+ ) or
|
|
|
+ {For post and preindexed only the index register can be used}
|
|
|
+ ((taicpu(hpfar1).oper[1]^.ref^.addressmode in [AM_POSTINDEXED, AM_PREINDEXED]) and
|
|
|
+ (
|
|
|
+ (taicpu(hpfar1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) and
|
|
|
+ (taicpu(hpfar1).oper[1]^.ref^.base <> taicpu(p).oper[0]^.reg)
|
|
|
+ ) and
|
|
|
+ (not GenerateThumb2Code)
|
|
|
+ )
|
|
|
+ ) and
|
|
|
+ { Only fold if both registers are used. Otherwise we are folding p with itself }
|
|
|
+ (taicpu(hpfar1).oper[1]^.ref^.index<>NR_NO) and
|
|
|
+ (taicpu(hpfar1).oper[1]^.ref^.base<>NR_NO) and
|
|
|
+ { Only fold if there isn't another shifterop already, and offset is zero. }
|
|
|
+ (taicpu(hpfar1).oper[1]^.ref^.offset = 0) and
|
|
|
+ (taicpu(hpfar1).oper[1]^.ref^.shiftmode = SM_None) and
|
|
|
+ not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hpfar1)) and
|
|
|
+ RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hpfar1)) then
|
|
|
+ begin
|
|
|
+ { If the register we want to do the shift for resides in base, we need to swap that}
|
|
|
+ if (taicpu(hpfar1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
|
|
|
+ taicpu(hpfar1).oper[1]^.ref^.base := taicpu(hpfar1).oper[1]^.ref^.index;
|
|
|
+ taicpu(hpfar1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
|
|
|
+ taicpu(hpfar1).oper[1]^.ref^.shiftmode := taicpu(p).oper[2]^.shifterop^.shiftmode;
|
|
|
+ taicpu(hpfar1).oper[1]^.ref^.shiftimm := taicpu(p).oper[2]^.shifterop^.shiftimm;
|
|
|
+ DebugMsg('Peephole Optimization: FoldShiftLdrStr done', hpfar1);
|
|
|
+ RemoveCurrentP(p);
|
|
|
+ Result:=true;
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ {
|
|
|
+ Often we see shifts and then a superfluous mov to another register
|
|
|
+ In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
|
|
|
+ }
|
|
|
+ if RemoveSuperfluousMove(p, hpfar1, 'MovMov2Mov') then
|
|
|
+ Result:=true;
|
|
|
+ end;
|
|
|
+
|
|
|
+
|
|
|
+ function TCpuAsmOptimizer.OptPass1MVN(var p: tai): Boolean;
|
|
|
+ var
|
|
|
+ hp1: tai;
|
|
|
+ begin
|
|
|
+ {
|
|
|
+ change
|
|
|
+ mvn reg2,reg1
|
|
|
+ and reg3,reg4,reg2
|
|
|
+ dealloc reg2
|
|
|
+ to
|
|
|
+ bic reg3,reg4,reg1
|
|
|
+ }
|
|
|
+ Result := False;
|
|
|
+ if (taicpu(p).oper[1]^.typ = top_reg) and
|
|
|
+ GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
|
|
|
+ MatchInstruction(hp1,A_AND,[],[]) and
|
|
|
+ (((taicpu(hp1).ops=3) and
|
|
|
+ (taicpu(hp1).oper[2]^.typ=top_reg) and
|
|
|
+ (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
|
|
|
+ MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
|
|
|
+ ((taicpu(hp1).ops=2) and
|
|
|
+ (taicpu(hp1).oper[1]^.typ=top_reg) and
|
|
|
+ MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
|
|
|
+ assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
|
|
|
+ { reg1 might not be modified inbetween }
|
|
|
+ not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
|
|
|
+ begin
|
|
|
+ DebugMsg('Peephole Optimization: MvnAnd2Bic done', p);
|
|
|
+ taicpu(hp1).opcode:=A_BIC;
|
|
|
+
|
|
|
+ if taicpu(hp1).ops=3 then
|
|
|
+ begin
|
|
|
+ if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
|
|
|
+ taicpu(hp1).loadReg(1,taicpu(hp1).oper[2]^.reg); // Swap operands
|
|
|
+
|
|
|
+ taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
|
|
|
+ end
|
|
|
+ else
|
|
|
+ taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
|
|
|
+
|
|
|
+ RemoveCurrentp(p);
|
|
|
+ Result := True;
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+
|
|
|
+
|
|
|
+ function TCpuAsmOptimizer.OptPass1VMov(var p: tai): Boolean;
|
|
|
+ var
|
|
|
+ hp1: tai;
|
|
|
+ begin
|
|
|
+ {
|
|
|
+ change
|
|
|
+ vmov reg0,reg1,reg2
|
|
|
+ vmov reg1,reg2,reg0
|
|
|
+ into
|
|
|
+ vmov reg0,reg1,reg2
|
|
|
+
|
|
|
+ can be applied regardless if reg0 or reg2 is the vfp register
|
|
|
+ }
|
|
|
+ Result := False;
|
|
|
+ if (taicpu(p).ops = 3) then
|
|
|
+ while GetNextInstruction(p, hp1) and
|
|
|
+ MatchInstruction(hp1, A_VMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
|
|
|
+ (taicpu(hp1).ops = 3) and
|
|
|
+ MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[2]^) and
|
|
|
+ MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[0]^) and
|
|
|
+ MatchOperand(taicpu(p).oper[2]^, taicpu(hp1).oper[1]^) do
|
|
|
+ begin
|
|
|
+ asml.Remove(hp1);
|
|
|
+ hp1.free;
|
|
|
+ DebugMsg('Peephole Optimization: VMovVMov2VMov done', p);
|
|
|
+ { Can we do it again? }
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+
|
|
|
+
|
|
|
+ function TCpuAsmOptimizer.OptPass1VOp(var p: tai): Boolean;
|
|
|
+ var
|
|
|
+ hp1: tai;
|
|
|
+ begin
|
|
|
+ Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
|
|
|
+ RemoveSuperfluousVMov(p, hp1, 'VOpVMov2VOp');
|
|
|
+ end;
|
|
|
+
|
|
|
+
|
|
|
+ function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
|
|
|
+ begin
|
|
|
+ result := false;
|
|
|
+ if p.typ = ait_instruction then
|
|
|
+ begin
|
|
|
+ case taicpu(p).opcode of
|
|
|
+ A_CMP:
|
|
|
+ Result := OptPass1CMP(p);
|
|
|
+ A_STR:
|
|
|
+ Result := OptPass1STR(p);
|
|
|
+ A_LDR:
|
|
|
+ Result := OptPass1LDR(p);
|
|
|
+ A_MOV:
|
|
|
+ Result := OptPass1MOV(p);
|
|
|
+ A_AND:
|
|
|
+ Result := OptPass1And(p);
|
|
|
+ A_ADD,
|
|
|
+ A_SUB:
|
|
|
+ Result := OptPass1ADDSUB(p);
|
|
|
+ A_MUL:
|
|
|
+ REsult := OptPass1MUL(p);
|
|
|
+ A_ADC,
|
|
|
+ A_RSB,
|
|
|
+ A_RSC,
|
|
|
+ A_SBC,
|
|
|
+ A_BIC,
|
|
|
+ A_EOR,
|
|
|
+ A_ORR,
|
|
|
+ A_MLA,
|
|
|
+ A_MLS,
|
|
|
+ A_QADD,A_QADD16,A_QADD8,
|
|
|
+ A_QSUB,A_QSUB16,A_QSUB8,
|
|
|
+ A_QDADD,A_QDSUB,A_QASX,A_QSAX,
|
|
|
+ A_SHADD16,A_SHADD8,A_UHADD16,A_UHADD8,
|
|
|
+ A_SHSUB16,A_SHSUB8,A_UHSUB16,A_UHSUB8,
|
|
|
+ A_PKHTB,A_PKHBT,
|
|
|
+ A_SMUAD,A_SMUSD:
|
|
|
+ Result := OptPass1DataCheckMov(p);
|
|
|
+{$ifdef dummy}
|
|
|
+ A_MVN:
|
|
|
+ Result := OPtPass1MVN(p);
|
|
|
+{$endif dummy}
|
|
|
+ A_UXTB:
|
|
|
+ Result := OptPass1UXTB(p);
|
|
|
+ A_UXTH:
|
|
|
+ Result := OptPass1UXTH(p);
|
|
|
+ A_SXTB:
|
|
|
+ Result := OptPass1SXTB(p);
|
|
|
+ A_SXTH:
|
|
|
+ Result := OptPass1SXTH(p);
|
|
|
+ A_STM:
|
|
|
+ Result := OptPass1STM(p);
|
|
|
+ A_VMOV:
|
|
|
+ Result := OptPass1VMov(p);
|
|
|
+ A_VLDR,
|
|
|
+ A_VADD,
|
|
|
+ A_VMUL,
|
|
|
+ A_VDIV,
|
|
|
+ A_VSUB,
|
|
|
+ A_VSQRT,
|
|
|
+ A_VNEG,
|
|
|
+ A_VCVT,
|
|
|
+ A_VABS:
|
|
|
+ Result := OptPass1VOp(p);
|
|
|
+ else
|
|
|
+ ;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
end;
|
|
|
|
|
|
|