|
@@ -211,6 +211,7 @@ unit aoptx86;
|
|
procedure ConvertJumpToRET(const p: tai; const ret_p: tai);
|
|
procedure ConvertJumpToRET(const p: tai; const ret_p: tai);
|
|
|
|
|
|
function CheckJumpMovTransferOpt(var p: tai; hp1: tai; LoopCount: Integer; out Count: Integer): Boolean;
|
|
function CheckJumpMovTransferOpt(var p: tai; hp1: tai; LoopCount: Integer; out Count: Integer): Boolean;
|
|
|
|
+ function TrySwapMovOp(var p, hp1: tai): Boolean;
|
|
function TrySwapMovCmp(var p, hp1: tai): Boolean;
|
|
function TrySwapMovCmp(var p, hp1: tai): Boolean;
|
|
|
|
|
|
{ Processor-dependent reference optimisation }
|
|
{ Processor-dependent reference optimisation }
|
|
@@ -8453,10 +8454,10 @@ unit aoptx86;
|
|
Break;
|
|
Break;
|
|
|
|
|
|
case taicpu(hp2).opcode of
|
|
case taicpu(hp2).opcode of
|
|
- A_MOVSS:
|
|
|
|
|
|
+ A_MOVSD:
|
|
begin
|
|
begin
|
|
if taicpu(hp2).ops = 0 then
|
|
if taicpu(hp2).ops = 0 then
|
|
- { Wrong MOVSS }
|
|
|
|
|
|
+ { Wrong MOVSD }
|
|
Break;
|
|
Break;
|
|
Inc(Count);
|
|
Inc(Count);
|
|
if Count >= 5 then
|
|
if Count >= 5 then
|
|
@@ -8475,7 +8476,7 @@ unit aoptx86;
|
|
A_MOVZX,
|
|
A_MOVZX,
|
|
A_MOVAPS,
|
|
A_MOVAPS,
|
|
A_MOVUPS,
|
|
A_MOVUPS,
|
|
- A_MOVSD,
|
|
|
|
|
|
+ A_MOVSS,
|
|
A_MOVAPD,
|
|
A_MOVAPD,
|
|
A_MOVUPD,
|
|
A_MOVUPD,
|
|
A_MOVDQA,
|
|
A_MOVDQA,
|
|
@@ -8626,41 +8627,38 @@ unit aoptx86;
|
|
end;
|
|
end;
|
|
|
|
|
|
|
|
|
|
- function TX86AsmOptimizer.TrySwapMovCmp(var p, hp1: tai): Boolean;
|
|
|
|
|
|
+ const
|
|
|
|
+ WriteOp: array[0..3] of set of TInsChange = (
|
|
|
|
+ [Ch_Wop1, Ch_RWop1, Ch_Mop1],
|
|
|
|
+ [Ch_Wop2, Ch_RWop2, Ch_Mop2],
|
|
|
|
+ [Ch_Wop3, Ch_RWop3, Ch_Mop3],
|
|
|
|
+ [Ch_Wop4, Ch_RWop4, Ch_Mop4]);
|
|
|
|
+
|
|
|
|
+ RegWriteFlags: array[0..7] of set of TInsChange = (
|
|
|
|
+ { The order is important: EAX, ECX, EDX, EBX, ESI, EDI, EBP, ESP }
|
|
|
|
+ [Ch_WEAX, Ch_RWEAX, Ch_MEAX{$ifdef x86_64}, Ch_WRAX, Ch_RWRAX, Ch_MRAX{$endif x86_64}],
|
|
|
|
+ [Ch_WECX, Ch_RWECX, Ch_MECX{$ifdef x86_64}, Ch_WRCX, Ch_RWRCX, Ch_MRCX{$endif x86_64}],
|
|
|
|
+ [Ch_WEDX, Ch_RWEDX, Ch_MEDX{$ifdef x86_64}, Ch_WRDX, Ch_RWRDX, Ch_MRDX{$endif x86_64}],
|
|
|
|
+ [Ch_WEBX, Ch_RWEBX, Ch_MEBX{$ifdef x86_64}, Ch_WRBX, Ch_RWRBX, Ch_MRBX{$endif x86_64}],
|
|
|
|
+ [Ch_WESI, Ch_RWESI, Ch_MESI{$ifdef x86_64}, Ch_WRSI, Ch_RWRSI, Ch_MRSI{$endif x86_64}],
|
|
|
|
+ [Ch_WEDI, Ch_RWEDI, Ch_MEDI{$ifdef x86_64}, Ch_WRDI, Ch_RWRDI, Ch_MRDI{$endif x86_64}],
|
|
|
|
+ [Ch_WEBP, Ch_RWEBP, Ch_MEBP{$ifdef x86_64}, Ch_WRBP, Ch_RWRBP, Ch_MRBP{$endif x86_64}],
|
|
|
|
+ [Ch_WESP, Ch_RWESP, Ch_MESP{$ifdef x86_64}, Ch_WRSP, Ch_RWRSP, Ch_MRSP{$endif x86_64}]);
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ function TX86AsmOptimizer.TrySwapMovOp(var p, hp1: tai): Boolean;
|
|
var
|
|
var
|
|
hp2: tai;
|
|
hp2: tai;
|
|
X: Integer;
|
|
X: Integer;
|
|
- const
|
|
|
|
- WriteOp: array[0..3] of set of TInsChange = (
|
|
|
|
- [Ch_Wop1, Ch_RWop1, Ch_Mop1],
|
|
|
|
- [Ch_Wop2, Ch_RWop2, Ch_Mop2],
|
|
|
|
- [Ch_Wop3, Ch_RWop3, Ch_Mop3],
|
|
|
|
- [Ch_Wop4, Ch_RWop4, Ch_Mop4]);
|
|
|
|
-
|
|
|
|
- RegWriteFlags: array[0..7] of set of TInsChange = (
|
|
|
|
- { The order is important: EAX, ECX, EDX, EBX, ESI, EDI, EBP, ESP }
|
|
|
|
- [Ch_WEAX, Ch_RWEAX, Ch_MEAX{$ifdef x86_64}, Ch_WRAX, Ch_RWRAX, Ch_MRAX{$endif x86_64}],
|
|
|
|
- [Ch_WECX, Ch_RWECX, Ch_MECX{$ifdef x86_64}, Ch_WRCX, Ch_RWRCX, Ch_MRCX{$endif x86_64}],
|
|
|
|
- [Ch_WEDX, Ch_RWEDX, Ch_MEDX{$ifdef x86_64}, Ch_WRDX, Ch_RWRDX, Ch_MRDX{$endif x86_64}],
|
|
|
|
- [Ch_WEBX, Ch_RWEBX, Ch_MEBX{$ifdef x86_64}, Ch_WRBX, Ch_RWRBX, Ch_MRBX{$endif x86_64}],
|
|
|
|
- [Ch_WESI, Ch_RWESI, Ch_MESI{$ifdef x86_64}, Ch_WRSI, Ch_RWRSI, Ch_MRSI{$endif x86_64}],
|
|
|
|
- [Ch_WEDI, Ch_RWEDI, Ch_MEDI{$ifdef x86_64}, Ch_WRDI, Ch_RWRDI, Ch_MRDI{$endif x86_64}],
|
|
|
|
- [Ch_WEBP, Ch_RWEBP, Ch_MEBP{$ifdef x86_64}, Ch_WRBP, Ch_RWRBP, Ch_MRBP{$endif x86_64}],
|
|
|
|
- [Ch_WESP, Ch_RWESP, Ch_MESP{$ifdef x86_64}, Ch_WRSP, Ch_RWRSP, Ch_MRSP{$endif x86_64}]);
|
|
|
|
-
|
|
|
|
begin
|
|
begin
|
|
{ If we have something like:
|
|
{ If we have something like:
|
|
- cmp ###,%reg1
|
|
|
|
- mov 0,%reg2
|
|
|
|
|
|
+ op ###,###
|
|
|
|
+ mov ###,###
|
|
|
|
|
|
- And no modified registers are shared, move the instruction to before
|
|
|
|
- the comparison as this means it can be optimised without worrying
|
|
|
|
- about the FLAGS register. (CMP/MOV is generated by
|
|
|
|
- "J(c)Mov1JmpMov0 -> Set(~c)", among other things).
|
|
|
|
|
|
+ Try to move the MOV instruction to before OP as long as OP and MOV don't
|
|
|
|
+ interfere in regards to what they write to.
|
|
|
|
|
|
- As long as the second instruction doesn't use the flags or one of the
|
|
|
|
- registers used by CMP or TEST (also check any references that use the
|
|
|
|
- registers), then it can be moved prior to the comparison.
|
|
|
|
|
|
+ NOTE: p must be a 2-operand instruction
|
|
}
|
|
}
|
|
|
|
|
|
Result := False;
|
|
Result := False;
|
|
@@ -8672,12 +8670,12 @@ unit aoptx86;
|
|
{ NOP is a pipeline fence, likely marking the beginning of the function
|
|
{ NOP is a pipeline fence, likely marking the beginning of the function
|
|
epilogue, so drop out. Similarly, drop out if POP or RET are
|
|
epilogue, so drop out. Similarly, drop out if POP or RET are
|
|
encountered }
|
|
encountered }
|
|
- if MatchInstruction(hp1, A_NOP, A_POP, []) then
|
|
|
|
|
|
+ if MatchInstruction(hp1, A_NOP, A_POP, A_RET, []) then
|
|
Exit;
|
|
Exit;
|
|
|
|
|
|
- if (taicpu(hp1).opcode = A_MOVSS) and
|
|
|
|
|
|
+ if (taicpu(hp1).opcode = A_MOVSD) and
|
|
(taicpu(hp1).ops = 0) then
|
|
(taicpu(hp1).ops = 0) then
|
|
- { Wrong MOVSS }
|
|
|
|
|
|
+ { Wrong MOVSD }
|
|
Exit;
|
|
Exit;
|
|
|
|
|
|
{ Check for writes to specific registers first }
|
|
{ Check for writes to specific registers first }
|
|
@@ -8705,6 +8703,25 @@ unit aoptx86;
|
|
Exit;
|
|
Exit;
|
|
end;
|
|
end;
|
|
|
|
|
|
|
|
+ { Check p to make sure it doesn't write to something that affects hp1 }
|
|
|
|
+
|
|
|
|
+ { Check for writes to specific registers first }
|
|
|
|
+ { EAX, ECX, EDX, EBX, ESI, EDI, EBP, ESP in that order }
|
|
|
|
+ for X := 0 to 7 do
|
|
|
|
+ if (RegWriteFlags[X] * InsProp[taicpu(p).opcode].Ch <> [])
|
|
|
|
+ and RegInInstruction(newreg(R_INTREGISTER, TSuperRegister(X), R_SUBWHOLE), hp1) then
|
|
|
|
+ Exit;
|
|
|
|
+
|
|
|
|
+ for X := 0 to taicpu(p).ops - 1 do
|
|
|
|
+ begin
|
|
|
|
+ { Check to see if this operand writes to something }
|
|
|
|
+ if ((WriteOp[X] * InsProp[taicpu(p).opcode].Ch) <> []) and
|
|
|
|
+ { And matches something in hp1 }
|
|
|
|
+ (taicpu(p).oper[X]^.typ = top_reg) and
|
|
|
|
+ RegInInstruction(taicpu(p).oper[X]^.reg, hp1) then
|
|
|
|
+ Exit;
|
|
|
|
+ end;
|
|
|
|
+
|
|
{ The instruction can be safely moved }
|
|
{ The instruction can be safely moved }
|
|
asml.Remove(hp1);
|
|
asml.Remove(hp1);
|
|
|
|
|
|
@@ -8712,6 +8729,17 @@ unit aoptx86;
|
|
can be optimised into "xor %reg,%reg" later }
|
|
can be optimised into "xor %reg,%reg" later }
|
|
if SetAndTest(FindRegAllocBackward(NR_DEFAULTFLAGS, tai(p.Previous)), hp2) then
|
|
if SetAndTest(FindRegAllocBackward(NR_DEFAULTFLAGS, tai(p.Previous)), hp2) then
|
|
asml.InsertBefore(hp1, hp2)
|
|
asml.InsertBefore(hp1, hp2)
|
|
|
|
+
|
|
|
|
+ { Failing that, try to insert after the last instructions where the
|
|
|
|
+ FLAGS register is not yet in use }
|
|
|
|
+ else if GetLastInstruction(p, hp2) and
|
|
|
|
+ (
|
|
|
|
+ (hp2.typ <> ait_instruction) or
|
|
|
|
+ { Don't insert after an instruction that uses the flags when p doesn't use them }
|
|
|
|
+ RegInInstruction(NR_DEFAULTFLAGS, p) or
|
|
|
|
+ not RegInInstruction(NR_DEFAULTFLAGS, hp2)
|
|
|
|
+ ) then
|
|
|
|
+ asml.InsertAfter(hp1, hp2)
|
|
else
|
|
else
|
|
{ Note, if p.Previous is nil (even if it should logically never be the
|
|
{ Note, if p.Previous is nil (even if it should logically never be the
|
|
case), FindRegAllocBackward immediately exits with False and so we
|
|
case), FindRegAllocBackward immediately exits with False and so we
|
|
@@ -8721,26 +8749,90 @@ unit aoptx86;
|
|
|
|
|
|
DebugMsg(SPeepholeOptimization + 'Swapped ' + debug_op2str(taicpu(p).opcode) + ' and ' + debug_op2str(taicpu(hp1).opcode) + ' instructions to improve optimisation potential', hp1);
|
|
DebugMsg(SPeepholeOptimization + 'Swapped ' + debug_op2str(taicpu(p).opcode) + ' and ' + debug_op2str(taicpu(hp1).opcode) + ' instructions to improve optimisation potential', hp1);
|
|
|
|
|
|
|
|
+ { We can't trust UsedRegs because we're looking backwards, although we
|
|
|
|
+ know the registers are allocated after p at the very least, so manually
|
|
|
|
+ create tai_regalloc objects if needed }
|
|
for X := 0 to taicpu(hp1).ops - 1 do
|
|
for X := 0 to taicpu(hp1).ops - 1 do
|
|
case taicpu(hp1).oper[X]^.typ of
|
|
case taicpu(hp1).oper[X]^.typ of
|
|
top_reg:
|
|
top_reg:
|
|
- AllocRegBetween(taicpu(hp1).oper[X]^.reg, hp1, p, UsedRegs);
|
|
|
|
|
|
+ begin
|
|
|
|
+ asml.InsertBefore(tai_regalloc.alloc(taicpu(hp1).oper[X]^.reg, nil), hp1);
|
|
|
|
+ IncludeRegInUsedRegs(taicpu(hp1).oper[X]^.reg, UsedRegs);
|
|
|
|
+ AllocRegBetween(taicpu(hp1).oper[X]^.reg, hp1, p, UsedRegs);
|
|
|
|
+ end;
|
|
top_ref:
|
|
top_ref:
|
|
begin
|
|
begin
|
|
if taicpu(hp1).oper[X]^.ref^.base <> NR_NO then
|
|
if taicpu(hp1).oper[X]^.ref^.base <> NR_NO then
|
|
- AllocRegBetween(taicpu(hp1).oper[X]^.ref^.base, hp1, p, UsedRegs);
|
|
|
|
|
|
+ begin
|
|
|
|
+ asml.InsertBefore(tai_regalloc.alloc(taicpu(hp1).oper[X]^.ref^.base, nil), hp1);
|
|
|
|
+ IncludeRegInUsedRegs(taicpu(hp1).oper[X]^.ref^.base, UsedRegs);
|
|
|
|
+ AllocRegBetween(taicpu(hp1).oper[X]^.ref^.base, hp1, p, UsedRegs);
|
|
|
|
+ end;
|
|
if taicpu(hp1).oper[X]^.ref^.index <> NR_NO then
|
|
if taicpu(hp1).oper[X]^.ref^.index <> NR_NO then
|
|
- AllocRegBetween(taicpu(hp1).oper[X]^.ref^.index, hp1, p, UsedRegs);
|
|
|
|
|
|
+ begin
|
|
|
|
+ asml.InsertBefore(tai_regalloc.alloc(taicpu(hp1).oper[X]^.ref^.index, nil), hp1);
|
|
|
|
+ IncludeRegInUsedRegs(taicpu(hp1).oper[X]^.ref^.index, UsedRegs);
|
|
|
|
+ AllocRegBetween(taicpu(hp1).oper[X]^.ref^.index, hp1, p, UsedRegs);
|
|
|
|
+ end;
|
|
end;
|
|
end;
|
|
else
|
|
else
|
|
;
|
|
;
|
|
end;
|
|
end;
|
|
|
|
|
|
|
|
+ Result := True;
|
|
|
|
+ end;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ function TX86AsmOptimizer.TrySwapMovCmp(var p, hp1: tai): Boolean;
|
|
|
|
+ var
|
|
|
|
+ hp2: tai;
|
|
|
|
+ X: Integer;
|
|
|
|
+ begin
|
|
|
|
+ { If we have something like:
|
|
|
|
+ cmp ###,%reg1
|
|
|
|
+ mov 0,%reg2
|
|
|
|
+
|
|
|
|
+ And no modified registers are shared, move the instruction to before
|
|
|
|
+ the comparison as this means it can be optimised without worrying
|
|
|
|
+ about the FLAGS register. (CMP/MOV is generated by
|
|
|
|
+ "J(c)Mov1JmpMov0 -> Set(~c)", among other things).
|
|
|
|
+
|
|
|
|
+ As long as the second instruction doesn't use the flags or one of the
|
|
|
|
+ registers used by CMP or TEST (also check any references that use the
|
|
|
|
+ registers), then it can be moved prior to the comparison.
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ Result := False;
|
|
|
|
+ if not TrySwapMovOp(p, hp1) then
|
|
|
|
+ Exit;
|
|
|
|
+
|
|
if taicpu(hp1).opcode = A_LEA then
|
|
if taicpu(hp1).opcode = A_LEA then
|
|
{ The flags will be overwritten by the CMP/TEST instruction }
|
|
{ The flags will be overwritten by the CMP/TEST instruction }
|
|
ConvertLEA(taicpu(hp1));
|
|
ConvertLEA(taicpu(hp1));
|
|
|
|
|
|
Result := True;
|
|
Result := True;
|
|
|
|
+
|
|
|
|
+ { Can we move it one further back? }
|
|
|
|
+ if GetLastInstruction(hp1, hp2) and (hp2.typ = ait_instruction) and
|
|
|
|
+ { Check to see if CMP/TEST is a comparison against zero }
|
|
|
|
+ (
|
|
|
|
+ (
|
|
|
|
+ (taicpu(p).opcode = A_CMP) and
|
|
|
|
+ MatchOperand(taicpu(p).oper[0]^, 0)
|
|
|
|
+ ) or
|
|
|
|
+ (
|
|
|
|
+ (taicpu(p).opcode = A_TEST) and
|
|
|
|
+ (
|
|
|
|
+ OpsEqual(taicpu(p).oper[0]^, taicpu(p).oper[1]^) or
|
|
|
|
+ MatchOperand(taicpu(p).oper[0]^, -1)
|
|
|
|
+ )
|
|
|
|
+ )
|
|
|
|
+ ) and
|
|
|
|
+ { These instructions set the zero flag if the result is zero }
|
|
|
|
+ MatchInstruction(hp2, [A_ADD, A_SUB, A_OR, A_XOR, A_AND, A_POPCNT, A_LZCNT], []) and
|
|
|
|
+ OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) then
|
|
|
|
+ { Looks like we can - if successful, this benefits PostPeepholeOptTestOr }
|
|
|
|
+ TrySwapMovOp(hp2, hp1);
|
|
end;
|
|
end;
|
|
|
|
|
|
|
|
|