|
@@ -2752,7 +2752,7 @@ unit aoptx86;
|
|
GetNextInstruction_p, TempRegUsed, CrossJump: Boolean;
|
|
GetNextInstruction_p, TempRegUsed, CrossJump: Boolean;
|
|
PreMessage, RegName1, RegName2, InputVal, MaskNum: string;
|
|
PreMessage, RegName1, RegName2, InputVal, MaskNum: string;
|
|
NewSize: topsize;
|
|
NewSize: topsize;
|
|
- CurrentReg, ActiveReg: TRegister;
|
|
|
|
|
|
+ p_SourceReg, p_TargetReg, NewMMReg: TRegister;
|
|
SourceRef, TargetRef: TReference;
|
|
SourceRef, TargetRef: TReference;
|
|
MovAligned, MovUnaligned: TAsmOp;
|
|
MovAligned, MovUnaligned: TAsmOp;
|
|
ThisRef: TReference;
|
|
ThisRef: TReference;
|
|
@@ -2777,110 +2777,115 @@ unit aoptx86;
|
|
if not GetNextInstruction_p or (hp1.typ <> ait_instruction) then
|
|
if not GetNextInstruction_p or (hp1.typ <> ait_instruction) then
|
|
Exit;
|
|
Exit;
|
|
|
|
|
|
- { Look for:
|
|
|
|
- mov %reg1,%reg2
|
|
|
|
- ??? %reg2,r/m
|
|
|
|
- Change to:
|
|
|
|
- mov %reg1,%reg2
|
|
|
|
- ??? %reg1,r/m
|
|
|
|
- }
|
|
|
|
- if MatchOpType(taicpu(p), top_reg, top_reg) then
|
|
|
|
- begin
|
|
|
|
- CurrentReg := taicpu(p).oper[1]^.reg;
|
|
|
|
|
|
+ { Prevent compiler warnings }
|
|
|
|
+ p_TargetReg := NR_NO;
|
|
|
|
|
|
- if RegReadByInstruction(CurrentReg, hp1) and
|
|
|
|
- DeepMOVOpt(taicpu(p), taicpu(hp1)) then
|
|
|
|
|
|
+ if taicpu(p).oper[1]^.typ = top_reg then
|
|
|
|
+ begin
|
|
|
|
+ { Saves on a large number of dereferences }
|
|
|
|
+ p_TargetReg := taicpu(p).oper[1]^.reg;
|
|
|
|
+
|
|
|
|
+ { Look for:
|
|
|
|
+ mov %reg1,%reg2
|
|
|
|
+ ??? %reg2,r/m
|
|
|
|
+ Change to:
|
|
|
|
+ mov %reg1,%reg2
|
|
|
|
+ ??? %reg1,r/m
|
|
|
|
+ }
|
|
|
|
+ if taicpu(p).oper[0]^.typ = top_reg then
|
|
begin
|
|
begin
|
|
- { A change has occurred, just not in p }
|
|
|
|
- Result := True;
|
|
|
|
|
|
+ if RegReadByInstruction(p_TargetReg, hp1) and
|
|
|
|
+ DeepMOVOpt(taicpu(p), taicpu(hp1)) then
|
|
|
|
+ begin
|
|
|
|
+ { A change has occurred, just not in p }
|
|
|
|
+ Result := True;
|
|
|
|
|
|
- TransferUsedRegs(TmpUsedRegs);
|
|
|
|
- UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
|
|
|
|
|
|
+ TransferUsedRegs(TmpUsedRegs);
|
|
|
|
+ UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
|
|
|
|
|
|
- if not RegUsedAfterInstruction(CurrentReg, hp1, TmpUsedRegs) and
|
|
|
|
- { Just in case something didn't get modified (e.g. an
|
|
|
|
- implicit register) }
|
|
|
|
- not RegReadByInstruction(CurrentReg, hp1) then
|
|
|
|
- begin
|
|
|
|
- { We can remove the original MOV }
|
|
|
|
- DebugMsg(SPeepholeOptimization + 'Mov2Nop 3 done',p);
|
|
|
|
- RemoveCurrentp(p, hp1);
|
|
|
|
|
|
+ if not RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs) and
|
|
|
|
+ { Just in case something didn't get modified (e.g. an
|
|
|
|
+ implicit register) }
|
|
|
|
+ not RegReadByInstruction(p_TargetReg, hp1) then
|
|
|
|
+ begin
|
|
|
|
+ { We can remove the original MOV }
|
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Mov2Nop 3 done',p);
|
|
|
|
+ RemoveCurrentp(p, hp1);
|
|
|
|
|
|
- { UsedRegs got updated by RemoveCurrentp }
|
|
|
|
- Result := True;
|
|
|
|
- Exit;
|
|
|
|
- end;
|
|
|
|
|
|
+ { UsedRegs got updated by RemoveCurrentp }
|
|
|
|
+ Result := True;
|
|
|
|
+ Exit;
|
|
|
|
+ end;
|
|
|
|
|
|
- { If we know a MOV instruction has become a null operation, we might as well
|
|
|
|
- get rid of it now to save time. }
|
|
|
|
- if (taicpu(hp1).opcode = A_MOV) and
|
|
|
|
- (taicpu(hp1).oper[1]^.typ = top_reg) and
|
|
|
|
- SuperRegistersEqual(taicpu(hp1).oper[1]^.reg, taicpu(p).oper[0]^.reg) and
|
|
|
|
- { Just being a register is enough to confirm it's a null operation }
|
|
|
|
- (taicpu(hp1).oper[0]^.typ = top_reg) then
|
|
|
|
- begin
|
|
|
|
|
|
+ { If we know a MOV instruction has become a null operation, we might as well
|
|
|
|
+ get rid of it now to save time. }
|
|
|
|
+ if (taicpu(hp1).opcode = A_MOV) and
|
|
|
|
+ (taicpu(hp1).oper[1]^.typ = top_reg) and
|
|
|
|
+ SuperRegistersEqual(taicpu(hp1).oper[1]^.reg, taicpu(p).oper[0]^.reg) and
|
|
|
|
+ { Just being a register is enough to confirm it's a null operation }
|
|
|
|
+ (taicpu(hp1).oper[0]^.typ = top_reg) then
|
|
|
|
+ begin
|
|
|
|
|
|
- Result := True;
|
|
|
|
|
|
+ Result := True;
|
|
|
|
|
|
- { Speed-up to reduce a pipeline stall... if we had something like...
|
|
|
|
|
|
+ { Speed-up to reduce a pipeline stall... if we had something like...
|
|
|
|
|
|
- movl %eax,%edx
|
|
|
|
- movw %dx,%ax
|
|
|
|
|
|
+ movl %eax,%edx
|
|
|
|
+ movw %dx,%ax
|
|
|
|
|
|
- ... the second instruction would change to movw %ax,%ax, but
|
|
|
|
- given that it is now %ax that's active rather than %eax,
|
|
|
|
- penalties might occur due to a partial register write, so instead,
|
|
|
|
- change it to a MOVZX instruction when optimising for speed.
|
|
|
|
- }
|
|
|
|
- if not (cs_opt_size in current_settings.optimizerswitches) and
|
|
|
|
- IsMOVZXAcceptable and
|
|
|
|
- (taicpu(hp1).opsize < taicpu(p).opsize)
|
|
|
|
|
|
+ ... the second instruction would change to movw %ax,%ax, but
|
|
|
|
+ given that it is now %ax that's active rather than %eax,
|
|
|
|
+ penalties might occur due to a partial register write, so instead,
|
|
|
|
+ change it to a MOVZX instruction when optimising for speed.
|
|
|
|
+ }
|
|
|
|
+ if not (cs_opt_size in current_settings.optimizerswitches) and
|
|
|
|
+ IsMOVZXAcceptable and
|
|
|
|
+ (taicpu(hp1).opsize < taicpu(p).opsize)
|
|
{$ifdef x86_64}
|
|
{$ifdef x86_64}
|
|
- { operations already implicitly set the upper 64 bits to zero }
|
|
|
|
- and not ((taicpu(hp1).opsize = S_L) and (taicpu(p).opsize = S_Q))
|
|
|
|
|
|
+ { operations already implicitly set the upper 64 bits to zero }
|
|
|
|
+ and not ((taicpu(hp1).opsize = S_L) and (taicpu(p).opsize = S_Q))
|
|
{$endif x86_64}
|
|
{$endif x86_64}
|
|
- then
|
|
|
|
- begin
|
|
|
|
- CurrentReg := taicpu(hp1).oper[1]^.reg;
|
|
|
|
-
|
|
|
|
- DebugMsg(SPeepholeOptimization + 'Zero-extension to minimise pipeline stall (Mov2Movz)',hp1);
|
|
|
|
- case taicpu(p).opsize of
|
|
|
|
- S_W:
|
|
|
|
- if taicpu(hp1).opsize = S_B then
|
|
|
|
- taicpu(hp1).opsize := S_BL
|
|
|
|
- else
|
|
|
|
- InternalError(2020012911);
|
|
|
|
- S_L{$ifdef x86_64}, S_Q{$endif x86_64}:
|
|
|
|
- case taicpu(hp1).opsize of
|
|
|
|
- S_B:
|
|
|
|
- taicpu(hp1).opsize := S_BL;
|
|
|
|
|
|
+ then
|
|
|
|
+ begin
|
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Zero-extension to minimise pipeline stall (Mov2Movz)',hp1);
|
|
|
|
+ case taicpu(p).opsize of
|
|
S_W:
|
|
S_W:
|
|
- taicpu(hp1).opsize := S_WL;
|
|
|
|
|
|
+ if taicpu(hp1).opsize = S_B then
|
|
|
|
+ taicpu(hp1).opsize := S_BL
|
|
|
|
+ else
|
|
|
|
+ InternalError(2020012911);
|
|
|
|
+ S_L{$ifdef x86_64}, S_Q{$endif x86_64}:
|
|
|
|
+ case taicpu(hp1).opsize of
|
|
|
|
+ S_B:
|
|
|
|
+ taicpu(hp1).opsize := S_BL;
|
|
|
|
+ S_W:
|
|
|
|
+ taicpu(hp1).opsize := S_WL;
|
|
|
|
+ else
|
|
|
|
+ InternalError(2020012912);
|
|
|
|
+ end;
|
|
else
|
|
else
|
|
- InternalError(2020012912);
|
|
|
|
|
|
+ InternalError(2020012910);
|
|
end;
|
|
end;
|
|
- else
|
|
|
|
- InternalError(2020012910);
|
|
|
|
- end;
|
|
|
|
|
|
|
|
- taicpu(hp1).opcode := A_MOVZX;
|
|
|
|
- taicpu(hp1).oper[1]^.reg := newreg(getregtype(CurrentReg), getsupreg(CurrentReg), R_SUBD)
|
|
|
|
- end
|
|
|
|
- else
|
|
|
|
- begin
|
|
|
|
- GetNextInstruction_p := GetNextInstruction(hp1, hp2);
|
|
|
|
- DebugMsg(SPeepholeOptimization + 'Mov2Nop 4 done',hp1);
|
|
|
|
- RemoveInstruction(hp1);
|
|
|
|
|
|
+ taicpu(hp1).opcode := A_MOVZX;
|
|
|
|
+ setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBD);
|
|
|
|
+ end
|
|
|
|
+ else
|
|
|
|
+ begin
|
|
|
|
+ GetNextInstruction_p := GetNextInstruction(hp1, hp2);
|
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Mov2Nop 4 done',hp1);
|
|
|
|
+ RemoveInstruction(hp1);
|
|
|
|
|
|
- { The instruction after what was hp1 is now the immediate next instruction,
|
|
|
|
- so we can continue to make optimisations if it's present }
|
|
|
|
- if not GetNextInstruction_p or (hp2.typ <> ait_instruction) then
|
|
|
|
- Exit;
|
|
|
|
|
|
+ { The instruction after what was hp1 is now the immediate next instruction,
|
|
|
|
+ so we can continue to make optimisations if it's present }
|
|
|
|
+ if not GetNextInstruction_p or (hp2.typ <> ait_instruction) then
|
|
|
|
+ Exit;
|
|
|
|
|
|
- hp1 := hp2;
|
|
|
|
|
|
+ hp1 := hp2;
|
|
|
|
+ end;
|
|
end;
|
|
end;
|
|
- end;
|
|
|
|
|
|
|
|
|
|
+ end;
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
|
|
@@ -3375,15 +3380,15 @@ unit aoptx86;
|
|
if (taicpu(p).oper[1]^.typ = top_reg) and
|
|
if (taicpu(p).oper[1]^.typ = top_reg) and
|
|
MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
|
|
MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
|
|
begin
|
|
begin
|
|
- CurrentReg := taicpu(p).oper[1]^.reg;
|
|
|
|
|
|
+ { Remember that p_TargetReg contains taicpu(p).oper[1]^.reg }
|
|
TransferUsedRegs(TmpUsedRegs);
|
|
TransferUsedRegs(TmpUsedRegs);
|
|
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
|
|
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
|
|
{ we have
|
|
{ we have
|
|
mov x, %treg
|
|
mov x, %treg
|
|
mov %treg, y
|
|
mov %treg, y
|
|
}
|
|
}
|
|
- if not(RegInOp(CurrentReg, taicpu(hp1).oper[1]^)) then
|
|
|
|
- if not(RegUsedAfterInstruction(CurrentReg, hp1, TmpUsedRegs)) then
|
|
|
|
|
|
+ if not(RegInOp(p_TargetReg, taicpu(hp1).oper[1]^)) then
|
|
|
|
+ if not(RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs)) then
|
|
{ we've got
|
|
{ we've got
|
|
|
|
|
|
mov x, %treg
|
|
mov x, %treg
|
|
@@ -3485,8 +3490,8 @@ unit aoptx86;
|
|
RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and
|
|
RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and
|
|
not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then
|
|
not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then
|
|
begin
|
|
begin
|
|
- CurrentReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3);
|
|
|
|
- if CurrentReg <> NR_NO then
|
|
|
|
|
|
+ NewMMReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3);
|
|
|
|
+ if NewMMReg <> NR_NO then
|
|
begin
|
|
begin
|
|
{ Remember that the offsets are 8 ahead }
|
|
{ Remember that the offsets are 8 ahead }
|
|
if ((SourceRef.offset mod 16) = 8) and
|
|
if ((SourceRef.offset mod 16) = 8) and
|
|
@@ -3500,7 +3505,7 @@ unit aoptx86;
|
|
taicpu(p).opcode := MovUnaligned;
|
|
taicpu(p).opcode := MovUnaligned;
|
|
|
|
|
|
taicpu(p).opsize := S_XMM;
|
|
taicpu(p).opsize := S_XMM;
|
|
- taicpu(p).oper[1]^.reg := CurrentReg;
|
|
|
|
|
|
+ taicpu(p).oper[1]^.reg := NewMMReg;
|
|
|
|
|
|
if ((TargetRef.offset mod 16) = 8) and
|
|
if ((TargetRef.offset mod 16) = 8) and
|
|
(
|
|
(
|
|
@@ -3513,9 +3518,9 @@ unit aoptx86;
|
|
taicpu(hp1).opcode := MovUnaligned;
|
|
taicpu(hp1).opcode := MovUnaligned;
|
|
|
|
|
|
taicpu(hp1).opsize := S_XMM;
|
|
taicpu(hp1).opsize := S_XMM;
|
|
- taicpu(hp1).oper[0]^.reg := CurrentReg;
|
|
|
|
|
|
+ taicpu(hp1).oper[0]^.reg := NewMMReg;
|
|
|
|
|
|
- DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(CurrentReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 1)', p);
|
|
|
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(NewMMReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 1)', p);
|
|
|
|
|
|
RemoveInstruction(hp2);
|
|
RemoveInstruction(hp2);
|
|
RemoveInstruction(hp3);
|
|
RemoveInstruction(hp3);
|
|
@@ -3541,8 +3546,8 @@ unit aoptx86;
|
|
RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and
|
|
RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and
|
|
not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then
|
|
not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then
|
|
begin
|
|
begin
|
|
- CurrentReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3);
|
|
|
|
- if CurrentReg <> NR_NO then
|
|
|
|
|
|
+ NewMMReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3);
|
|
|
|
+ if NewMMReg <> NR_NO then
|
|
begin
|
|
begin
|
|
{ hp2 and hp3 are the starting offsets, so mod = 0 this time }
|
|
{ hp2 and hp3 are the starting offsets, so mod = 0 this time }
|
|
if ((SourceRef.offset mod 16) = 0) and
|
|
if ((SourceRef.offset mod 16) = 0) and
|
|
@@ -3556,7 +3561,7 @@ unit aoptx86;
|
|
taicpu(hp2).opcode := MovUnaligned;
|
|
taicpu(hp2).opcode := MovUnaligned;
|
|
|
|
|
|
taicpu(hp2).opsize := S_XMM;
|
|
taicpu(hp2).opsize := S_XMM;
|
|
- taicpu(hp2).oper[1]^.reg := CurrentReg;
|
|
|
|
|
|
+ taicpu(hp2).oper[1]^.reg := NewMMReg;
|
|
|
|
|
|
if ((TargetRef.offset mod 16) = 0) and
|
|
if ((TargetRef.offset mod 16) = 0) and
|
|
(
|
|
(
|
|
@@ -3569,9 +3574,9 @@ unit aoptx86;
|
|
taicpu(hp3).opcode := MovUnaligned;
|
|
taicpu(hp3).opcode := MovUnaligned;
|
|
|
|
|
|
taicpu(hp3).opsize := S_XMM;
|
|
taicpu(hp3).opsize := S_XMM;
|
|
- taicpu(hp3).oper[0]^.reg := CurrentReg;
|
|
|
|
|
|
+ taicpu(hp3).oper[0]^.reg := NewMMReg;
|
|
|
|
|
|
- DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(CurrentReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 2)', p);
|
|
|
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(NewMMReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 2)', p);
|
|
|
|
|
|
RemoveInstruction(hp1);
|
|
RemoveInstruction(hp1);
|
|
RemoveCurrentP(p, hp2);
|
|
RemoveCurrentP(p, hp2);
|
|
@@ -3799,37 +3804,37 @@ unit aoptx86;
|
|
}
|
|
}
|
|
if MatchOpType(taicpu(p), top_reg, top_reg) then
|
|
if MatchOpType(taicpu(p), top_reg, top_reg) then
|
|
begin
|
|
begin
|
|
- CurrentReg := taicpu(p).oper[0]^.reg;
|
|
|
|
- ActiveReg := taicpu(p).oper[1]^.reg;
|
|
|
|
|
|
+ p_SourceReg := taicpu(p).oper[0]^.reg;
|
|
|
|
+ { Remember that p_TargetReg contains taicpu(p).oper[1]^.reg }
|
|
|
|
|
|
if (taicpu(hp1).oper[0]^.typ = top_ref) { The other operand will be a register } and
|
|
if (taicpu(hp1).oper[0]^.typ = top_ref) { The other operand will be a register } and
|
|
- (taicpu(hp1).oper[1]^.reg = CurrentReg) and
|
|
|
|
- RegInRef(CurrentReg, taicpu(hp1).oper[0]^.ref^) and
|
|
|
|
|
|
+ (taicpu(hp1).oper[1]^.reg = p_SourceReg) and
|
|
|
|
+ RegInRef(p_SourceReg, taicpu(hp1).oper[0]^.ref^) and
|
|
GetNextInstruction(hp1, hp2) and
|
|
GetNextInstruction(hp1, hp2) and
|
|
MatchInstruction(hp2, A_MOV, [taicpu(p).opsize]) and
|
|
MatchInstruction(hp2, A_MOV, [taicpu(p).opsize]) and
|
|
(taicpu(hp2).oper[0]^.typ = top_ref) { The other operand will be a register } then
|
|
(taicpu(hp2).oper[0]^.typ = top_ref) { The other operand will be a register } then
|
|
begin
|
|
begin
|
|
SourceRef := taicpu(hp2).oper[0]^.ref^;
|
|
SourceRef := taicpu(hp2).oper[0]^.ref^;
|
|
- if RegInRef(ActiveReg, SourceRef) and
|
|
|
|
|
|
+ if RegInRef(p_TargetReg, SourceRef) and
|
|
{ If %reg1 also appears in the second reference, then it will
|
|
{ If %reg1 also appears in the second reference, then it will
|
|
not refer to the same memory block as the first reference }
|
|
not refer to the same memory block as the first reference }
|
|
- not RegInRef(CurrentReg, SourceRef) then
|
|
|
|
|
|
+ not RegInRef(p_SourceReg, SourceRef) then
|
|
begin
|
|
begin
|
|
{ Check to see if the references match if %reg2 is changed to %reg1 }
|
|
{ Check to see if the references match if %reg2 is changed to %reg1 }
|
|
- if SourceRef.base = ActiveReg then
|
|
|
|
- SourceRef.base := CurrentReg;
|
|
|
|
|
|
+ if SourceRef.base = p_TargetReg then
|
|
|
|
+ SourceRef.base := p_SourceReg;
|
|
|
|
|
|
- if SourceRef.index = ActiveReg then
|
|
|
|
- SourceRef.index := CurrentReg;
|
|
|
|
|
|
+ if SourceRef.index = p_TargetReg then
|
|
|
|
+ SourceRef.index := p_SourceReg;
|
|
|
|
|
|
{ RefsEqual also checks to ensure both references are non-volatile }
|
|
{ RefsEqual also checks to ensure both references are non-volatile }
|
|
if RefsEqual(taicpu(hp1).oper[0]^.ref^, SourceRef) then
|
|
if RefsEqual(taicpu(hp1).oper[0]^.ref^, SourceRef) then
|
|
begin
|
|
begin
|
|
- taicpu(hp2).loadreg(0, CurrentReg);
|
|
|
|
|
|
+ taicpu(hp2).loadreg(0, p_SourceReg);
|
|
|
|
|
|
DebugMsg(SPeepholeOptimization + 'Optimised register duplication and memory read (MovMovMov2MovMovMov)', p);
|
|
DebugMsg(SPeepholeOptimization + 'Optimised register duplication and memory read (MovMovMov2MovMovMov)', p);
|
|
Result := True;
|
|
Result := True;
|
|
- if taicpu(hp2).oper[1]^.reg = ActiveReg then
|
|
|
|
|
|
+ if taicpu(hp2).oper[1]^.reg = p_TargetReg then
|
|
begin
|
|
begin
|
|
DebugMsg(SPeepholeOptimization + 'Mov2Nop 5a done', p);
|
|
DebugMsg(SPeepholeOptimization + 'Mov2Nop 5a done', p);
|
|
RemoveCurrentP(p, hp1);
|
|
RemoveCurrentP(p, hp1);
|
|
@@ -3842,7 +3847,7 @@ unit aoptx86;
|
|
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
|
|
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
|
|
UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
|
|
UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
|
|
|
|
|
|
- if not RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs) then
|
|
|
|
|
|
+ if not RegUsedAfterInstruction(p_TargetReg, hp2, TmpUsedRegs) then
|
|
begin
|
|
begin
|
|
DebugMsg(SPeepholeOptimization + 'Mov2Nop 5b done', p);
|
|
DebugMsg(SPeepholeOptimization + 'Mov2Nop 5b done', p);
|
|
RemoveCurrentP(p, hp1);
|
|
RemoveCurrentP(p, hp1);
|
|
@@ -3871,8 +3876,7 @@ unit aoptx86;
|
|
{ Initialise CrossJump (if it becomes True at any point, it will remain True) }
|
|
{ Initialise CrossJump (if it becomes True at any point, it will remain True) }
|
|
CrossJump := (taicpu(hp1).opcode = A_Jcc);
|
|
CrossJump := (taicpu(hp1).opcode = A_Jcc);
|
|
|
|
|
|
- { Saves on a large number of dereferences }
|
|
|
|
- ActiveReg := taicpu(p).oper[1]^.reg;
|
|
|
|
|
|
+ { Remember that p_TargetReg contains taicpu(p).oper[1]^.reg }
|
|
|
|
|
|
TransferUsedRegs(TmpUsedRegs);
|
|
TransferUsedRegs(TmpUsedRegs);
|
|
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
|
|
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
|
|
@@ -3882,16 +3886,16 @@ unit aoptx86;
|
|
else
|
|
else
|
|
JumpTracking := nil;
|
|
JumpTracking := nil;
|
|
|
|
|
|
- while GetNextInstructionUsingRegCond(hp3,hp2,ActiveReg,JumpTracking,CrossJump) and
|
|
|
|
|
|
+ while GetNextInstructionUsingRegCond(hp3,hp2,p_TargetReg,JumpTracking,CrossJump) and
|
|
{ GetNextInstructionUsingRegCond only searches one instruction ahead unless -O3 is specified }
|
|
{ GetNextInstructionUsingRegCond only searches one instruction ahead unless -O3 is specified }
|
|
(hp2.typ=ait_instruction) do
|
|
(hp2.typ=ait_instruction) do
|
|
begin
|
|
begin
|
|
case taicpu(hp2).opcode of
|
|
case taicpu(hp2).opcode of
|
|
A_POP:
|
|
A_POP:
|
|
- if MatchOperand(taicpu(hp2).oper[0]^,ActiveReg) then
|
|
|
|
|
|
+ if MatchOperand(taicpu(hp2).oper[0]^,p_TargetReg) then
|
|
begin
|
|
begin
|
|
if not CrossJump and
|
|
if not CrossJump and
|
|
- not RegUsedBetween(ActiveReg, p, hp2) then
|
|
|
|
|
|
+ not RegUsedBetween(p_TargetReg, p, hp2) then
|
|
begin
|
|
begin
|
|
{ We can remove the original MOV since the register
|
|
{ We can remove the original MOV since the register
|
|
wasn't used between it and its popping from the stack }
|
|
wasn't used between it and its popping from the stack }
|
|
@@ -3905,7 +3909,7 @@ unit aoptx86;
|
|
Break;
|
|
Break;
|
|
end;
|
|
end;
|
|
A_MOV:
|
|
A_MOV:
|
|
- if MatchOperand(taicpu(hp2).oper[0]^,ActiveReg) and
|
|
|
|
|
|
+ if MatchOperand(taicpu(hp2).oper[0]^,p_TargetReg) and
|
|
((taicpu(p).oper[0]^.typ=top_const) or
|
|
((taicpu(p).oper[0]^.typ=top_const) or
|
|
((taicpu(p).oper[0]^.typ=top_reg) and
|
|
((taicpu(p).oper[0]^.typ=top_reg) and
|
|
not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp2))
|
|
not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp2))
|
|
@@ -3924,8 +3928,8 @@ unit aoptx86;
|
|
|
|
|
|
TempRegUsed :=
|
|
TempRegUsed :=
|
|
CrossJump { Assume the register is in use if it crossed a conditional jump } or
|
|
CrossJump { Assume the register is in use if it crossed a conditional jump } or
|
|
- RegReadByInstruction(ActiveReg, hp3) or
|
|
|
|
- RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs);
|
|
|
|
|
|
+ RegReadByInstruction(p_TargetReg, hp3) or
|
|
|
|
+ RegUsedAfterInstruction(p_TargetReg, hp2, TmpUsedRegs);
|
|
|
|
|
|
case taicpu(p).oper[0]^.typ Of
|
|
case taicpu(p).oper[0]^.typ Of
|
|
top_reg:
|
|
top_reg:
|
|
@@ -3938,17 +3942,17 @@ unit aoptx86;
|
|
|
|
|
|
mov %reg, y
|
|
mov %reg, y
|
|
}
|
|
}
|
|
- CurrentReg := taicpu(p).oper[0]^.reg; { Saves on a handful of pointer dereferences }
|
|
|
|
|
|
+ p_SourceReg := taicpu(p).oper[0]^.reg; { Saves on a handful of pointer dereferences }
|
|
RegName1 := debug_regname(taicpu(hp2).oper[0]^.reg);
|
|
RegName1 := debug_regname(taicpu(hp2).oper[0]^.reg);
|
|
- if MatchOperand(taicpu(hp2).oper[1]^, CurrentReg) then
|
|
|
|
|
|
+ if MatchOperand(taicpu(hp2).oper[1]^, p_SourceReg) then
|
|
begin
|
|
begin
|
|
{ %reg = y - remove hp2 completely (doing it here instead of relying on
|
|
{ %reg = y - remove hp2 completely (doing it here instead of relying on
|
|
the "mov %reg,%reg" optimisation might cut down on a pass iteration) }
|
|
the "mov %reg,%reg" optimisation might cut down on a pass iteration) }
|
|
|
|
|
|
if TempRegUsed then
|
|
if TempRegUsed then
|
|
begin
|
|
begin
|
|
- DebugMsg(SPeepholeOptimization + debug_regname(CurrentReg) + ' = ' + RegName1 + '; removed unnecessary instruction (MovMov2MovNop 6b}',hp2);
|
|
|
|
- AllocRegBetween(CurrentReg, p, hp2, UsedRegs);
|
|
|
|
|
|
+ DebugMsg(SPeepholeOptimization + debug_regname(p_SourceReg) + ' = ' + RegName1 + '; removed unnecessary instruction (MovMov2MovNop 6b}',hp2);
|
|
|
|
+ AllocRegBetween(p_SourceReg, p, hp2, UsedRegs);
|
|
{ Set the start of the next GetNextInstructionUsingRegCond search
|
|
{ Set the start of the next GetNextInstructionUsingRegCond search
|
|
to start at the entry right before hp2 (which is about to be removed) }
|
|
to start at the entry right before hp2 (which is about to be removed) }
|
|
hp3 := tai(hp2.Previous);
|
|
hp3 := tai(hp2.Previous);
|
|
@@ -3971,19 +3975,19 @@ unit aoptx86;
|
|
end
|
|
end
|
|
else
|
|
else
|
|
begin
|
|
begin
|
|
- AllocRegBetween(CurrentReg, p, hp2, UsedRegs);
|
|
|
|
- taicpu(hp2).loadReg(0, CurrentReg);
|
|
|
|
|
|
+ AllocRegBetween(p_SourceReg, p, hp2, UsedRegs);
|
|
|
|
+ taicpu(hp2).loadReg(0, p_SourceReg);
|
|
|
|
|
|
- DebugMsg(SPeepholeOptimization + RegName1 + ' = ' + debug_regname(CurrentReg) + '; changed to minimise pipeline stall (MovMov2Mov 6a}',hp2);
|
|
|
|
|
|
+ DebugMsg(SPeepholeOptimization + RegName1 + ' = ' + debug_regname(p_SourceReg) + '; changed to minimise pipeline stall (MovMov2Mov 6a}',hp2);
|
|
|
|
|
|
{ Check to see if the register also appears in the reference }
|
|
{ Check to see if the register also appears in the reference }
|
|
if (taicpu(hp2).oper[1]^.typ = top_ref) then
|
|
if (taicpu(hp2).oper[1]^.typ = top_ref) then
|
|
- ReplaceRegisterInRef(taicpu(hp2).oper[1]^.ref^, ActiveReg, CurrentReg);
|
|
|
|
|
|
+ ReplaceRegisterInRef(taicpu(hp2).oper[1]^.ref^, p_TargetReg, p_SourceReg);
|
|
|
|
|
|
{ Don't remove the first instruction if the temporary register is in use }
|
|
{ Don't remove the first instruction if the temporary register is in use }
|
|
if not TempRegUsed and
|
|
if not TempRegUsed and
|
|
{ ReplaceRegisterInRef won't actually replace the register if it's a different size }
|
|
{ ReplaceRegisterInRef won't actually replace the register if it's a different size }
|
|
- not RegInOp(ActiveReg, taicpu(hp2).oper[1]^) then
|
|
|
|
|
|
+ not RegInOp(p_TargetReg, taicpu(hp2).oper[1]^) then
|
|
begin
|
|
begin
|
|
DebugMsg(SPeepholeOptimization + 'MovMov2Mov 6 done',p);
|
|
DebugMsg(SPeepholeOptimization + 'MovMov2Mov 6 done',p);
|
|
RemoveCurrentP(p, hp1);
|
|
RemoveCurrentP(p, hp1);
|
|
@@ -4037,11 +4041,11 @@ unit aoptx86;
|
|
end;
|
|
end;
|
|
end
|
|
end
|
|
else
|
|
else
|
|
- if MatchOperand(taicpu(hp2).oper[1]^, ActiveReg) then
|
|
|
|
|
|
+ if MatchOperand(taicpu(hp2).oper[1]^, p_TargetReg) then
|
|
begin
|
|
begin
|
|
if not CrossJump and
|
|
if not CrossJump and
|
|
- not RegUsedBetween(ActiveReg, p, hp2) and
|
|
|
|
- not RegReadByInstruction(ActiveReg, hp2) then
|
|
|
|
|
|
+ not RegUsedBetween(p_TargetReg, p, hp2) and
|
|
|
|
+ not RegReadByInstruction(p_TargetReg, hp2) then
|
|
begin
|
|
begin
|
|
{ Register is not used before it is overwritten }
|
|
{ Register is not used before it is overwritten }
|
|
DebugMsg(SPeepholeOptimization + 'Mov2Nop 3a done',p);
|
|
DebugMsg(SPeepholeOptimization + 'Mov2Nop 3a done',p);
|
|
@@ -4067,8 +4071,8 @@ unit aoptx86;
|
|
end;
|
|
end;
|
|
A_MOVZX, A_MOVSX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}:
|
|
A_MOVZX, A_MOVSX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}:
|
|
if MatchOpType(taicpu(hp2), top_reg, top_reg) and
|
|
if MatchOpType(taicpu(hp2), top_reg, top_reg) and
|
|
- MatchOperand(taicpu(hp2).oper[0]^, ActiveReg) and
|
|
|
|
- SuperRegistersEqual(taicpu(hp2).oper[1]^.reg, ActiveReg) then
|
|
|
|
|
|
+ MatchOperand(taicpu(hp2).oper[0]^, p_TargetReg) and
|
|
|
|
+ SuperRegistersEqual(taicpu(hp2).oper[1]^.reg, p_TargetReg) then
|
|
begin
|
|
begin
|
|
{
|
|
{
|
|
Change from:
|
|
Change from:
|
|
@@ -4108,10 +4112,10 @@ unit aoptx86;
|
|
{ Also catches MOV/S/Z instructions that aren't modified }
|
|
{ Also catches MOV/S/Z instructions that aren't modified }
|
|
if taicpu(p).oper[0]^.typ = top_reg then
|
|
if taicpu(p).oper[0]^.typ = top_reg then
|
|
begin
|
|
begin
|
|
- CurrentReg := taicpu(p).oper[0]^.reg;
|
|
|
|
|
|
+ p_SourceReg := taicpu(p).oper[0]^.reg;
|
|
if
|
|
if
|
|
- not RegModifiedByInstruction(CurrentReg, hp3) and
|
|
|
|
- not RegModifiedBetween(CurrentReg, hp3, hp2) and
|
|
|
|
|
|
+ not RegModifiedByInstruction(p_SourceReg, hp3) and
|
|
|
|
+ not RegModifiedBetween(p_SourceReg, hp3, hp2) and
|
|
DeepMOVOpt(taicpu(p), taicpu(hp2)) then
|
|
DeepMOVOpt(taicpu(p), taicpu(hp2)) then
|
|
begin
|
|
begin
|
|
Result := True;
|
|
Result := True;
|
|
@@ -4120,7 +4124,7 @@ unit aoptx86;
|
|
implicit register). Also, if it does read from this
|
|
implicit register). Also, if it does read from this
|
|
register, then there's no longer an advantage to
|
|
register, then there's no longer an advantage to
|
|
changing the register on subsequent instructions.}
|
|
changing the register on subsequent instructions.}
|
|
- if not RegReadByInstruction(ActiveReg, hp2) then
|
|
|
|
|
|
+ if not RegReadByInstruction(p_TargetReg, hp2) then
|
|
begin
|
|
begin
|
|
{ If a conditional jump was crossed, do not delete
|
|
{ If a conditional jump was crossed, do not delete
|
|
the original MOV no matter what }
|
|
the original MOV no matter what }
|
|
@@ -4128,7 +4132,7 @@ unit aoptx86;
|
|
{ RegEndOfLife returns True if the register is
|
|
{ RegEndOfLife returns True if the register is
|
|
deallocated before the next instruction or has
|
|
deallocated before the next instruction or has
|
|
been loaded with a new value }
|
|
been loaded with a new value }
|
|
- RegEndOfLife(ActiveReg, taicpu(hp2)) then
|
|
|
|
|
|
+ RegEndOfLife(p_TargetReg, taicpu(hp2)) then
|
|
begin
|
|
begin
|
|
{ We can remove the original MOV }
|
|
{ We can remove the original MOV }
|
|
DebugMsg(SPeepholeOptimization + 'Mov2Nop 3b done',p);
|
|
DebugMsg(SPeepholeOptimization + 'Mov2Nop 3b done',p);
|
|
@@ -4138,7 +4142,7 @@ unit aoptx86;
|
|
Exit;
|
|
Exit;
|
|
end;
|
|
end;
|
|
|
|
|
|
- if not RegModifiedByInstruction(ActiveReg, hp2) then
|
|
|
|
|
|
+ if not RegModifiedByInstruction(p_TargetReg, hp2) then
|
|
begin
|
|
begin
|
|
{ See if there's more we can optimise }
|
|
{ See if there's more we can optimise }
|
|
hp3 := hp2;
|
|
hp3 := hp2;
|
|
@@ -4566,10 +4570,10 @@ unit aoptx86;
|
|
}
|
|
}
|
|
if MatchOpType(taicpu(p), top_reg, top_reg) then
|
|
if MatchOpType(taicpu(p), top_reg, top_reg) then
|
|
begin
|
|
begin
|
|
- CurrentReg := taicpu(p).oper[0]^.reg;
|
|
|
|
- ActiveReg := taicpu(p).oper[1]^.reg;
|
|
|
|
|
|
+ p_SourceReg := taicpu(p).oper[0]^.reg;
|
|
|
|
+ { Remember that p_TargetReg contains taicpu(p).oper[1]^.reg }
|
|
TransferUsedRegs(TmpUsedRegs);
|
|
TransferUsedRegs(TmpUsedRegs);
|
|
- if not RegUsedAfterInstruction(CurrentReg, p, TmpUsedRegs) and
|
|
|
|
|
|
+ if not RegUsedAfterInstruction(p_SourceReg, p, TmpUsedRegs) and
|
|
GetLastInstruction(p, hp2) and
|
|
GetLastInstruction(p, hp2) and
|
|
(hp2.typ = ait_instruction) and
|
|
(hp2.typ = ait_instruction) and
|
|
{ Have to make sure it's an instruction that only reads from
|
|
{ Have to make sure it's an instruction that only reads from
|
|
@@ -4578,25 +4582,21 @@ unit aoptx86;
|
|
(taicpu(hp2).ops = 2) and
|
|
(taicpu(hp2).ops = 2) and
|
|
(insprop[taicpu(hp2).opcode].Ch * [Ch_Rop1, Ch_Wop2] = [Ch_Rop1, Ch_Wop2]) and
|
|
(insprop[taicpu(hp2).opcode].Ch * [Ch_Rop1, Ch_Wop2] = [Ch_Rop1, Ch_Wop2]) and
|
|
(taicpu(hp2).oper[1]^.typ = top_reg) and
|
|
(taicpu(hp2).oper[1]^.typ = top_reg) and
|
|
- (taicpu(hp2).oper[1]^.reg = CurrentReg) then
|
|
|
|
|
|
+ (taicpu(hp2).oper[1]^.reg = p_SourceReg) then
|
|
begin
|
|
begin
|
|
case taicpu(hp2).opcode of
|
|
case taicpu(hp2).opcode of
|
|
A_FSTSW, A_FNSTSW,
|
|
A_FSTSW, A_FNSTSW,
|
|
A_IN, A_INS, A_OUT, A_OUTS,
|
|
A_IN, A_INS, A_OUT, A_OUTS,
|
|
- A_CMPS, A_LODS, A_MOVS, A_SCAS, A_STOS,
|
|
|
|
|
|
+ A_CMPS, A_LODS, A_MOVS, A_SCAS, A_STOS:
|
|
{ These routines have explicit operands, but they are restricted in
|
|
{ These routines have explicit operands, but they are restricted in
|
|
what they can be (e.g. IN and OUT can only read from AL, AX or
|
|
what they can be (e.g. IN and OUT can only read from AL, AX or
|
|
EAX. }
|
|
EAX. }
|
|
- A_CMOVcc:
|
|
|
|
- { CMOV is not valid either because then CurrentReg will depend
|
|
|
|
- on an unknown value if the condition is False and hence is
|
|
|
|
- not a pure write }
|
|
|
|
;
|
|
;
|
|
else
|
|
else
|
|
begin
|
|
begin
|
|
DebugMsg(SPeepholeOptimization + 'Removed MOV and changed destination on previous instruction to optimise register usage (FuncMov2Func)', p);
|
|
DebugMsg(SPeepholeOptimization + 'Removed MOV and changed destination on previous instruction to optimise register usage (FuncMov2Func)', p);
|
|
- taicpu(hp2).oper[1]^.reg := ActiveReg;
|
|
|
|
- AllocRegBetween(ActiveReg, hp2, p, TmpUsedRegs);
|
|
|
|
|
|
+ taicpu(hp2).oper[1]^.reg := p_TargetReg;
|
|
|
|
+ AllocRegBetween(p_TargetReg, hp2, p, TmpUsedRegs);
|
|
RemoveCurrentp(p, hp1);
|
|
RemoveCurrentp(p, hp1);
|
|
Result := True;
|
|
Result := True;
|
|
Exit;
|
|
Exit;
|