|
@@ -3253,1060 +3253,1060 @@ unit aoptx86;
|
|
|
|
|
|
if GetNextInstruction_p and (hp1.typ = ait_instruction) then
|
|
|
while True do
|
|
|
- begin
|
|
|
- if (taicpu(hp1).opcode = A_AND) and
|
|
|
- MatchOpType(taicpu(hp1),top_const,top_reg) then
|
|
|
- begin
|
|
|
- { A change has occurred, just not in p }
|
|
|
- Include(OptsToCheck, aoc_ForceNewIteration);
|
|
|
- if MatchOperand(taicpu(hp1).oper[1]^, p_TargetReg) then
|
|
|
- begin
|
|
|
- case taicpu(p).opsize of
|
|
|
- S_L:
|
|
|
- if (taicpu(hp1).oper[0]^.val = $ffffffff) then
|
|
|
- begin
|
|
|
- { Optimize out:
|
|
|
- mov x, %reg
|
|
|
- and ffffffffh, %reg
|
|
|
- }
|
|
|
- DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 1 done',p);
|
|
|
- RemoveInstruction(hp1);
|
|
|
- Result:=true;
|
|
|
- exit;
|
|
|
- end;
|
|
|
- S_Q: { TODO: Confirm if this is even possible }
|
|
|
- if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then
|
|
|
- begin
|
|
|
- { Optimize out:
|
|
|
- mov x, %reg
|
|
|
- and ffffffffffffffffh, %reg
|
|
|
- }
|
|
|
- DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 2 done',p);
|
|
|
- RemoveInstruction(hp1);
|
|
|
- Result:=true;
|
|
|
- exit;
|
|
|
- end;
|
|
|
- else
|
|
|
- ;
|
|
|
- end;
|
|
|
- if (
|
|
|
- { Make sure that if a reference is used, its registers
|
|
|
- are not modified in between }
|
|
|
- (
|
|
|
- (taicpu(p).oper[0]^.typ = top_reg) and
|
|
|
- not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)
|
|
|
- ) or
|
|
|
- (
|
|
|
- (taicpu(p).oper[0]^.typ = top_ref) and
|
|
|
- (taicpu(p).oper[0]^.ref^.refaddr <> addr_full) and
|
|
|
- not RefModifiedBetween(taicpu(p).oper[0]^.ref^, topsize2memsize[taicpu(p).opsize] shr 3, p, hp1)
|
|
|
- )
|
|
|
- ) and
|
|
|
- GetNextInstruction(hp1,hp2) and
|
|
|
- MatchInstruction(hp2,A_TEST,[]) and
|
|
|
- (
|
|
|
- MatchOperand(taicpu(hp1).oper[1]^,taicpu(hp2).oper[1]^) or
|
|
|
+ begin
|
|
|
+ if (taicpu(hp1).opcode = A_AND) and
|
|
|
+ MatchOpType(taicpu(hp1),top_const,top_reg) then
|
|
|
+ begin
|
|
|
+ { A change has occurred, just not in p }
|
|
|
+ Include(OptsToCheck, aoc_ForceNewIteration);
|
|
|
+ if MatchOperand(taicpu(hp1).oper[1]^, p_TargetReg) then
|
|
|
+ begin
|
|
|
+ case taicpu(p).opsize of
|
|
|
+ S_L:
|
|
|
+ if (taicpu(hp1).oper[0]^.val = $ffffffff) then
|
|
|
+ begin
|
|
|
+ { Optimize out:
|
|
|
+ mov x, %reg
|
|
|
+ and ffffffffh, %reg
|
|
|
+ }
|
|
|
+ DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 1 done',p);
|
|
|
+ RemoveInstruction(hp1);
|
|
|
+ Result:=true;
|
|
|
+ exit;
|
|
|
+ end;
|
|
|
+ S_Q: { TODO: Confirm if this is even possible }
|
|
|
+ if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then
|
|
|
+ begin
|
|
|
+ { Optimize out:
|
|
|
+ mov x, %reg
|
|
|
+ and ffffffffffffffffh, %reg
|
|
|
+ }
|
|
|
+ DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 2 done',p);
|
|
|
+ RemoveInstruction(hp1);
|
|
|
+ Result:=true;
|
|
|
+ exit;
|
|
|
+ end;
|
|
|
+ else
|
|
|
+ ;
|
|
|
+ end;
|
|
|
+ if (
|
|
|
+ { Make sure that if a reference is used, its registers
|
|
|
+ are not modified in between }
|
|
|
+ (
|
|
|
+ (taicpu(p).oper[0]^.typ = top_reg) and
|
|
|
+ not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)
|
|
|
+ ) or
|
|
|
+ (
|
|
|
+ (taicpu(p).oper[0]^.typ = top_ref) and
|
|
|
+ (taicpu(p).oper[0]^.ref^.refaddr <> addr_full) and
|
|
|
+ not RefModifiedBetween(taicpu(p).oper[0]^.ref^, topsize2memsize[taicpu(p).opsize] shr 3, p, hp1)
|
|
|
+ )
|
|
|
+ ) and
|
|
|
+ GetNextInstruction(hp1,hp2) and
|
|
|
+ MatchInstruction(hp2,A_TEST,[]) and
|
|
|
(
|
|
|
- { If the register being tested is smaller than the one
|
|
|
- that received a bitwise AND, permit it if the constant
|
|
|
- fits into the smaller size }
|
|
|
- (taicpu(hp1).oper[1]^.typ = top_reg) and (taicpu(hp2).oper[1]^.typ = top_reg) and
|
|
|
- SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(hp2).oper[1]^.reg) and
|
|
|
- (taicpu(hp1).oper[0]^.typ = top_const) and (taicpu(hp1).oper[0]^.val >= 0) and
|
|
|
- (GetSubReg(taicpu(hp2).oper[1]^.reg) < GetSubReg(taicpu(hp1).oper[1]^.reg)) and
|
|
|
+ MatchOperand(taicpu(hp1).oper[1]^,taicpu(hp2).oper[1]^) or
|
|
|
(
|
|
|
+ { If the register being tested is smaller than the one
|
|
|
+ that received a bitwise AND, permit it if the constant
|
|
|
+ fits into the smaller size }
|
|
|
+ (taicpu(hp1).oper[1]^.typ = top_reg) and (taicpu(hp2).oper[1]^.typ = top_reg) and
|
|
|
+ SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(hp2).oper[1]^.reg) and
|
|
|
+ (taicpu(hp1).oper[0]^.typ = top_const) and (taicpu(hp1).oper[0]^.val >= 0) and
|
|
|
+ (GetSubReg(taicpu(hp2).oper[1]^.reg) < GetSubReg(taicpu(hp1).oper[1]^.reg)) and
|
|
|
(
|
|
|
- (GetSubReg(taicpu(hp2).oper[1]^.reg) = R_SUBL) and
|
|
|
- (taicpu(hp1).oper[0]^.val <= $FF)
|
|
|
- ) or
|
|
|
- (
|
|
|
- (GetSubReg(taicpu(hp2).oper[1]^.reg) = R_SUBW) and
|
|
|
- (taicpu(hp1).oper[0]^.val <= $FFFF)
|
|
|
+ (
|
|
|
+ (GetSubReg(taicpu(hp2).oper[1]^.reg) = R_SUBL) and
|
|
|
+ (taicpu(hp1).oper[0]^.val <= $FF)
|
|
|
+ ) or
|
|
|
+ (
|
|
|
+ (GetSubReg(taicpu(hp2).oper[1]^.reg) = R_SUBW) and
|
|
|
+ (taicpu(hp1).oper[0]^.val <= $FFFF)
|
|
|
{$ifdef x86_64}
|
|
|
- ) or
|
|
|
- (
|
|
|
- (GetSubReg(taicpu(hp2).oper[1]^.reg) = R_SUBD) and
|
|
|
- (taicpu(hp1).oper[0]^.val <= $FFFFFFFF)
|
|
|
+ ) or
|
|
|
+ (
|
|
|
+ (GetSubReg(taicpu(hp2).oper[1]^.reg) = R_SUBD) and
|
|
|
+ (taicpu(hp1).oper[0]^.val <= $FFFFFFFF)
|
|
|
{$endif x86_64}
|
|
|
+ )
|
|
|
)
|
|
|
)
|
|
|
- )
|
|
|
- ) and
|
|
|
- (
|
|
|
- MatchOperand(taicpu(hp2).oper[0]^,taicpu(hp2).oper[1]^) or
|
|
|
- MatchOperand(taicpu(hp2).oper[0]^,-1)
|
|
|
- ) and
|
|
|
- GetNextInstruction(hp2,hp3) and
|
|
|
- MatchInstruction(hp3,A_Jcc,A_Setcc,[]) and
|
|
|
- (taicpu(hp3).condition in [C_E,C_NE]) then
|
|
|
- begin
|
|
|
- TransferUsedRegs(TmpUsedRegs);
|
|
|
- UpdateUsedRegsBetween(TmpUsedRegs, tai(p.Next), hp1);
|
|
|
- UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
|
|
|
- if not(RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp2, TmpUsedRegs)) then
|
|
|
- begin
|
|
|
- DebugMsg(SPeepholeOptimization + 'MovAndTest2Test done',p);
|
|
|
- taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
|
|
|
- taicpu(hp1).opcode:=A_TEST;
|
|
|
-
|
|
|
- { Shrink the TEST instruction down to the smallest possible size }
|
|
|
- case taicpu(hp1).oper[0]^.val of
|
|
|
- 0..255:
|
|
|
- if (taicpu(hp1).opsize <> S_B)
|
|
|
+ ) and
|
|
|
+ (
|
|
|
+ MatchOperand(taicpu(hp2).oper[0]^,taicpu(hp2).oper[1]^) or
|
|
|
+ MatchOperand(taicpu(hp2).oper[0]^,-1)
|
|
|
+ ) and
|
|
|
+ GetNextInstruction(hp2,hp3) and
|
|
|
+ MatchInstruction(hp3,A_Jcc,A_Setcc,[]) and
|
|
|
+ (taicpu(hp3).condition in [C_E,C_NE]) then
|
|
|
+ begin
|
|
|
+ TransferUsedRegs(TmpUsedRegs);
|
|
|
+ UpdateUsedRegsBetween(TmpUsedRegs, tai(p.Next), hp1);
|
|
|
+ UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
|
|
|
+ if not(RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp2, TmpUsedRegs)) then
|
|
|
+ begin
|
|
|
+ DebugMsg(SPeepholeOptimization + 'MovAndTest2Test done',p);
|
|
|
+ taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
|
|
|
+ taicpu(hp1).opcode:=A_TEST;
|
|
|
+
|
|
|
+ { Shrink the TEST instruction down to the smallest possible size }
|
|
|
+ case taicpu(hp1).oper[0]^.val of
|
|
|
+ 0..255:
|
|
|
+ if (taicpu(hp1).opsize <> S_B)
|
|
|
{$ifndef x86_64}
|
|
|
- and (
|
|
|
- (taicpu(hp1).oper[1]^.typ <> top_reg) or
|
|
|
- { Cannot encode byte-sized ESI, EDI, EBP or ESP under i386 }
|
|
|
- (GetSupReg(taicpu(hp1).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])
|
|
|
- )
|
|
|
+ and (
|
|
|
+ (taicpu(hp1).oper[1]^.typ <> top_reg) or
|
|
|
+ { Cannot encode byte-sized ESI, EDI, EBP or ESP under i386 }
|
|
|
+ (GetSupReg(taicpu(hp1).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])
|
|
|
+ )
|
|
|
{$endif x86_64}
|
|
|
- then
|
|
|
- begin
|
|
|
- if taicpu(hp1).opsize <> taicpu(hp2).opsize then
|
|
|
- { Only print debug message if the TEST instruction
|
|
|
- is a different size before and after }
|
|
|
- DebugMsg(SPeepholeOptimization + 'test' + debug_opsize2str(taicpu(hp1).opsize) + ' -> testb to reduce instruction size (Test2Test 1a)' , p);
|
|
|
-
|
|
|
- taicpu(hp1).opsize := S_B;
|
|
|
- if (taicpu(hp1).oper[1]^.typ = top_reg) then
|
|
|
- setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBL);
|
|
|
- end;
|
|
|
- 256..65535:
|
|
|
- if (taicpu(hp1).opsize <> S_W) then
|
|
|
- begin
|
|
|
- if taicpu(hp1).opsize <> taicpu(hp2).opsize then
|
|
|
- { Only print debug message if the TEST instruction
|
|
|
- is a different size before and after }
|
|
|
- DebugMsg(SPeepholeOptimization + 'test' + debug_opsize2str(taicpu(hp1).opsize) + ' -> testw to reduce instruction size (Test2Test 1b)' , p);
|
|
|
-
|
|
|
- taicpu(hp1).opsize := S_W;
|
|
|
- if (taicpu(hp1).oper[1]^.typ = top_reg) then
|
|
|
- setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBW);
|
|
|
- end;
|
|
|
+ then
|
|
|
+ begin
|
|
|
+ if taicpu(hp1).opsize <> taicpu(hp2).opsize then
|
|
|
+ { Only print debug message if the TEST instruction
|
|
|
+ is a different size before and after }
|
|
|
+ DebugMsg(SPeepholeOptimization + 'test' + debug_opsize2str(taicpu(hp1).opsize) + ' -> testb to reduce instruction size (Test2Test 1a)' , p);
|
|
|
+
|
|
|
+ taicpu(hp1).opsize := S_B;
|
|
|
+ if (taicpu(hp1).oper[1]^.typ = top_reg) then
|
|
|
+ setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBL);
|
|
|
+ end;
|
|
|
+ 256..65535:
|
|
|
+ if (taicpu(hp1).opsize <> S_W) then
|
|
|
+ begin
|
|
|
+ if taicpu(hp1).opsize <> taicpu(hp2).opsize then
|
|
|
+ { Only print debug message if the TEST instruction
|
|
|
+ is a different size before and after }
|
|
|
+ DebugMsg(SPeepholeOptimization + 'test' + debug_opsize2str(taicpu(hp1).opsize) + ' -> testw to reduce instruction size (Test2Test 1b)' , p);
|
|
|
+
|
|
|
+ taicpu(hp1).opsize := S_W;
|
|
|
+ if (taicpu(hp1).oper[1]^.typ = top_reg) then
|
|
|
+ setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBW);
|
|
|
+ end;
|
|
|
{$ifdef x86_64}
|
|
|
- 65536..$7FFFFFFF:
|
|
|
- if (taicpu(hp1).opsize <> S_L) then
|
|
|
- begin
|
|
|
- if taicpu(hp1).opsize <> taicpu(hp2).opsize then
|
|
|
- { Only print debug message if the TEST instruction
|
|
|
- is a different size before and after }
|
|
|
- DebugMsg(SPeepholeOptimization + 'test' + debug_opsize2str(taicpu(hp1).opsize) + ' -> testl to reduce instruction size (Test2Test 1c)' , p);
|
|
|
-
|
|
|
- taicpu(hp1).opsize := S_L;
|
|
|
- if (taicpu(hp1).oper[1]^.typ = top_reg) then
|
|
|
- setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBD);
|
|
|
- end;
|
|
|
+ 65536..$7FFFFFFF:
|
|
|
+ if (taicpu(hp1).opsize <> S_L) then
|
|
|
+ begin
|
|
|
+ if taicpu(hp1).opsize <> taicpu(hp2).opsize then
|
|
|
+ { Only print debug message if the TEST instruction
|
|
|
+ is a different size before and after }
|
|
|
+ DebugMsg(SPeepholeOptimization + 'test' + debug_opsize2str(taicpu(hp1).opsize) + ' -> testl to reduce instruction size (Test2Test 1c)' , p);
|
|
|
+
|
|
|
+ taicpu(hp1).opsize := S_L;
|
|
|
+ if (taicpu(hp1).oper[1]^.typ = top_reg) then
|
|
|
+ setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBD);
|
|
|
+ end;
|
|
|
{$endif x86_64}
|
|
|
- else
|
|
|
- ;
|
|
|
- end;
|
|
|
-
|
|
|
- RemoveInstruction(hp2);
|
|
|
- RemoveCurrentP(p);
|
|
|
- Result:=true;
|
|
|
- exit;
|
|
|
- end;
|
|
|
- end;
|
|
|
- end;
|
|
|
-
|
|
|
- if IsMOVZXAcceptable and
|
|
|
- (taicpu(hp1).oper[1]^.typ = top_reg) and
|
|
|
- (taicpu(p).oper[0]^.typ <> top_const) and { MOVZX only supports registers and memory, not immediates (use MOV for that!) }
|
|
|
- (getsupreg(p_TargetReg) = getsupreg(taicpu(hp1).oper[1]^.reg))
|
|
|
- then
|
|
|
- begin
|
|
|
- InputVal := debug_operstr(taicpu(p).oper[0]^);
|
|
|
- MaskNum := debug_tostr(taicpu(hp1).oper[0]^.val);
|
|
|
+ else
|
|
|
+ ;
|
|
|
+ end;
|
|
|
|
|
|
- case taicpu(p).opsize of
|
|
|
- S_B:
|
|
|
- if (taicpu(hp1).oper[0]^.val = $ff) then
|
|
|
- begin
|
|
|
- { Convert:
|
|
|
- movb x, %regl movb x, %regl
|
|
|
- andw ffh, %regw andl ffh, %regd
|
|
|
- To:
|
|
|
- movzbw x, %regd movzbl x, %regd
|
|
|
+ RemoveInstruction(hp2);
|
|
|
+ RemoveCurrentP(p);
|
|
|
+ Result:=true;
|
|
|
+ exit;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
|
|
|
- (Identical registers, just different sizes)
|
|
|
- }
|
|
|
- RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 8-bit register name }
|
|
|
- RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 16/32-bit register name }
|
|
|
+ if IsMOVZXAcceptable and
|
|
|
+ (taicpu(hp1).oper[1]^.typ = top_reg) and
|
|
|
+ (taicpu(p).oper[0]^.typ <> top_const) and { MOVZX only supports registers and memory, not immediates (use MOV for that!) }
|
|
|
+ (getsupreg(p_TargetReg) = getsupreg(taicpu(hp1).oper[1]^.reg))
|
|
|
+ then
|
|
|
+ begin
|
|
|
+ InputVal := debug_operstr(taicpu(p).oper[0]^);
|
|
|
+ MaskNum := debug_tostr(taicpu(hp1).oper[0]^.val);
|
|
|
|
|
|
- case taicpu(hp1).opsize of
|
|
|
- S_W: NewSize := S_BW;
|
|
|
- S_L: NewSize := S_BL;
|
|
|
+ case taicpu(p).opsize of
|
|
|
+ S_B:
|
|
|
+ if (taicpu(hp1).oper[0]^.val = $ff) then
|
|
|
+ begin
|
|
|
+ { Convert:
|
|
|
+ movb x, %regl movb x, %regl
|
|
|
+ andw ffh, %regw andl ffh, %regd
|
|
|
+ To:
|
|
|
+ movzbw x, %regd movzbl x, %regd
|
|
|
+
|
|
|
+ (Identical registers, just different sizes)
|
|
|
+ }
|
|
|
+ RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 8-bit register name }
|
|
|
+ RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 16/32-bit register name }
|
|
|
+
|
|
|
+ case taicpu(hp1).opsize of
|
|
|
+ S_W: NewSize := S_BW;
|
|
|
+ S_L: NewSize := S_BL;
|
|
|
{$ifdef x86_64}
|
|
|
- S_Q: NewSize := S_BQ;
|
|
|
+ S_Q: NewSize := S_BQ;
|
|
|
{$endif x86_64}
|
|
|
- else
|
|
|
- InternalError(2018011510);
|
|
|
- end;
|
|
|
- end
|
|
|
- else
|
|
|
- NewSize := S_NO;
|
|
|
- S_W:
|
|
|
- if (taicpu(hp1).oper[0]^.val = $ffff) then
|
|
|
- begin
|
|
|
- { Convert:
|
|
|
- movw x, %regw
|
|
|
- andl ffffh, %regd
|
|
|
- To:
|
|
|
- movzwl x, %regd
|
|
|
-
|
|
|
- (Identical registers, just different sizes)
|
|
|
- }
|
|
|
- RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 16-bit register name }
|
|
|
- RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 32-bit register name }
|
|
|
-
|
|
|
- case taicpu(hp1).opsize of
|
|
|
- S_L: NewSize := S_WL;
|
|
|
+ else
|
|
|
+ InternalError(2018011510);
|
|
|
+ end;
|
|
|
+ end
|
|
|
+ else
|
|
|
+ NewSize := S_NO;
|
|
|
+ S_W:
|
|
|
+ if (taicpu(hp1).oper[0]^.val = $ffff) then
|
|
|
+ begin
|
|
|
+ { Convert:
|
|
|
+ movw x, %regw
|
|
|
+ andl ffffh, %regd
|
|
|
+ To:
|
|
|
+ movzwl x, %regd
|
|
|
+
|
|
|
+ (Identical registers, just different sizes)
|
|
|
+ }
|
|
|
+ RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 16-bit register name }
|
|
|
+ RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 32-bit register name }
|
|
|
+
|
|
|
+ case taicpu(hp1).opsize of
|
|
|
+ S_L: NewSize := S_WL;
|
|
|
{$ifdef x86_64}
|
|
|
- S_Q: NewSize := S_WQ;
|
|
|
+ S_Q: NewSize := S_WQ;
|
|
|
{$endif x86_64}
|
|
|
- else
|
|
|
- InternalError(2018011511);
|
|
|
- end;
|
|
|
- end
|
|
|
+ else
|
|
|
+ InternalError(2018011511);
|
|
|
+ end;
|
|
|
+ end
|
|
|
+ else
|
|
|
+ NewSize := S_NO;
|
|
|
else
|
|
|
NewSize := S_NO;
|
|
|
- else
|
|
|
- NewSize := S_NO;
|
|
|
- end;
|
|
|
-
|
|
|
- if NewSize <> S_NO then
|
|
|
- begin
|
|
|
- PreMessage := 'mov' + debug_opsize2str(taicpu(p).opsize) + ' ' + InputVal + ',' + RegName1;
|
|
|
-
|
|
|
- { The actual optimization }
|
|
|
- taicpu(p).opcode := A_MOVZX;
|
|
|
- taicpu(p).changeopsize(NewSize);
|
|
|
- taicpu(p).loadoper(1, taicpu(hp1).oper[1]^);
|
|
|
- { Make sure we deal with any reference counts that were increased }
|
|
|
- if taicpu(hp1).oper[1]^.typ = top_ref then
|
|
|
- begin
|
|
|
- if Assigned(taicpu(hp1).oper[1]^.ref^.symbol) then
|
|
|
- taicpu(hp1).oper[1]^.ref^.symbol.decrefs;
|
|
|
- if Assigned(taicpu(hp1).oper[1]^.ref^.relsymbol) then
|
|
|
- taicpu(hp1).oper[1]^.ref^.relsymbol.decrefs;
|
|
|
- end;
|
|
|
+ end;
|
|
|
|
|
|
- { Safeguard if "and" is followed by a conditional command }
|
|
|
- TransferUsedRegs(TmpUsedRegs);
|
|
|
- UpdateUsedRegsBetween(TmpUsedRegs, tai(p.next), hp1);
|
|
|
+ if NewSize <> S_NO then
|
|
|
+ begin
|
|
|
+ PreMessage := 'mov' + debug_opsize2str(taicpu(p).opsize) + ' ' + InputVal + ',' + RegName1;
|
|
|
+
|
|
|
+ { The actual optimization }
|
|
|
+ taicpu(p).opcode := A_MOVZX;
|
|
|
+ taicpu(p).changeopsize(NewSize);
|
|
|
+ taicpu(p).loadoper(1, taicpu(hp1).oper[1]^);
|
|
|
+ { Make sure we deal with any reference counts that were increased }
|
|
|
+ if taicpu(hp1).oper[1]^.typ = top_ref then
|
|
|
+ begin
|
|
|
+ if Assigned(taicpu(hp1).oper[1]^.ref^.symbol) then
|
|
|
+ taicpu(hp1).oper[1]^.ref^.symbol.decrefs;
|
|
|
+ if Assigned(taicpu(hp1).oper[1]^.ref^.relsymbol) then
|
|
|
+ taicpu(hp1).oper[1]^.ref^.relsymbol.decrefs;
|
|
|
+ end;
|
|
|
|
|
|
- if (RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)) then
|
|
|
- begin
|
|
|
- { At this point, the "and" command is effectively equivalent to
|
|
|
- "test %reg,%reg". This will be handled separately by the
|
|
|
- Peephole Optimizer. [Kit] }
|
|
|
+ { Safeguard if "and" is followed by a conditional command }
|
|
|
+ TransferUsedRegs(TmpUsedRegs);
|
|
|
+ UpdateUsedRegsBetween(TmpUsedRegs, tai(p.next), hp1);
|
|
|
|
|
|
- DebugMsg(SPeepholeOptimization + PreMessage +
|
|
|
- ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
|
|
|
- end
|
|
|
- else
|
|
|
- begin
|
|
|
- DebugMsg(SPeepholeOptimization + PreMessage + '; and' + debug_opsize2str(taicpu(hp1).opsize) + ' $' + MaskNum + ',' + RegName2 +
|
|
|
- ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
|
|
|
+ if (RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)) then
|
|
|
+ begin
|
|
|
+ { At this point, the "and" command is effectively equivalent to
|
|
|
+ "test %reg,%reg". This will be handled separately by the
|
|
|
+ Peephole Optimizer. [Kit] }
|
|
|
|
|
|
- RemoveInstruction(hp1);
|
|
|
- end;
|
|
|
+ DebugMsg(SPeepholeOptimization + PreMessage +
|
|
|
+ ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
|
|
|
+ end
|
|
|
+ else
|
|
|
+ begin
|
|
|
+ DebugMsg(SPeepholeOptimization + PreMessage + '; and' + debug_opsize2str(taicpu(hp1).opsize) + ' $' + MaskNum + ',' + RegName2 +
|
|
|
+ ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p);
|
|
|
|
|
|
- Result := True;
|
|
|
- Exit;
|
|
|
+ RemoveInstruction(hp1);
|
|
|
+ end;
|
|
|
|
|
|
- { Go through DeepMOVOpt again (jump to "while True do") }
|
|
|
- Continue;
|
|
|
- end;
|
|
|
- end;
|
|
|
- end;
|
|
|
+ Result := True;
|
|
|
+ Exit;
|
|
|
|
|
|
- if taicpu(p).oper[0]^.typ = top_reg then
|
|
|
- begin
|
|
|
- p_SourceReg := taicpu(p).oper[0]^.reg;
|
|
|
+ { Go through DeepMOVOpt again (jump to "while True do") }
|
|
|
+ Continue;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
|
|
|
- { Look for:
|
|
|
- mov %reg1,%reg2
|
|
|
- ??? %reg2,r/m
|
|
|
- Change to:
|
|
|
- mov %reg1,%reg2
|
|
|
- ??? %reg1,r/m
|
|
|
- }
|
|
|
- if RegReadByInstruction(p_TargetReg, hp1) and
|
|
|
- not RegModifiedBetween(p_SourceReg, p, hp1) and
|
|
|
- DeepMOVOpt(taicpu(p), taicpu(hp1)) then
|
|
|
- begin
|
|
|
- { A change has occurred, just not in p }
|
|
|
- Include(OptsToCheck, aoc_ForceNewIteration);
|
|
|
+ if taicpu(p).oper[0]^.typ = top_reg then
|
|
|
+ begin
|
|
|
+ p_SourceReg := taicpu(p).oper[0]^.reg;
|
|
|
+
|
|
|
+ { Look for:
|
|
|
+ mov %reg1,%reg2
|
|
|
+ ??? %reg2,r/m
|
|
|
+ Change to:
|
|
|
+ mov %reg1,%reg2
|
|
|
+ ??? %reg1,r/m
|
|
|
+ }
|
|
|
+ if RegReadByInstruction(p_TargetReg, hp1) and
|
|
|
+ not RegModifiedBetween(p_SourceReg, p, hp1) and
|
|
|
+ DeepMOVOpt(taicpu(p), taicpu(hp1)) then
|
|
|
+ begin
|
|
|
+ { A change has occurred, just not in p }
|
|
|
+ Include(OptsToCheck, aoc_ForceNewIteration);
|
|
|
|
|
|
- TransferUsedRegs(TmpUsedRegs);
|
|
|
- UpdateUsedRegsBetween(TmpUsedRegs, tai(p.Next), hp1);
|
|
|
+ TransferUsedRegs(TmpUsedRegs);
|
|
|
+ UpdateUsedRegsBetween(TmpUsedRegs, tai(p.Next), hp1);
|
|
|
|
|
|
- if not RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs) and
|
|
|
- { Just in case something didn't get modified (e.g. an
|
|
|
- implicit register) }
|
|
|
- not RegReadByInstruction(p_TargetReg, hp1) then
|
|
|
- begin
|
|
|
- { We can remove the original MOV }
|
|
|
- DebugMsg(SPeepholeOptimization + 'Mov2Nop 3 done',p);
|
|
|
- RemoveCurrentP(p);
|
|
|
+ if not RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs) and
|
|
|
+ { Just in case something didn't get modified (e.g. an
|
|
|
+ implicit register) }
|
|
|
+ not RegReadByInstruction(p_TargetReg, hp1) then
|
|
|
+ begin
|
|
|
+ { We can remove the original MOV }
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Mov2Nop 3 done',p);
|
|
|
+ RemoveCurrentP(p);
|
|
|
|
|
|
- { UsedRegs got updated by RemoveCurrentp }
|
|
|
- Result := True;
|
|
|
- Exit;
|
|
|
- end;
|
|
|
+ { UsedRegs got updated by RemoveCurrentp }
|
|
|
+ Result := True;
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
|
|
|
- { If we know a MOV instruction has become a null operation, we might as well
|
|
|
- get rid of it now to save time. }
|
|
|
- if (taicpu(hp1).opcode = A_MOV) and
|
|
|
- (taicpu(hp1).oper[1]^.typ = top_reg) and
|
|
|
- SuperRegistersEqual(taicpu(hp1).oper[1]^.reg, taicpu(p).oper[0]^.reg) and
|
|
|
- { Just being a register is enough to confirm it's a null operation }
|
|
|
- (taicpu(hp1).oper[0]^.typ = top_reg) then
|
|
|
- begin
|
|
|
+ { If we know a MOV instruction has become a null operation, we might as well
|
|
|
+ get rid of it now to save time. }
|
|
|
+ if (taicpu(hp1).opcode = A_MOV) and
|
|
|
+ (taicpu(hp1).oper[1]^.typ = top_reg) and
|
|
|
+ SuperRegistersEqual(taicpu(hp1).oper[1]^.reg, taicpu(p).oper[0]^.reg) and
|
|
|
+ { Just being a register is enough to confirm it's a null operation }
|
|
|
+ (taicpu(hp1).oper[0]^.typ = top_reg) then
|
|
|
+ begin
|
|
|
|
|
|
- Result := True;
|
|
|
+ Result := True;
|
|
|
|
|
|
- { Speed-up to reduce a pipeline stall... if we had something like...
|
|
|
+ { Speed-up to reduce a pipeline stall... if we had something like...
|
|
|
|
|
|
- movl %eax,%edx
|
|
|
- movw %dx,%ax
|
|
|
+ movl %eax,%edx
|
|
|
+ movw %dx,%ax
|
|
|
|
|
|
- ... the second instruction would change to movw %ax,%ax, but
|
|
|
- given that it is now %ax that's active rather than %eax,
|
|
|
- penalties might occur due to a partial register write, so instead,
|
|
|
- change it to a MOVZX instruction when optimising for speed.
|
|
|
- }
|
|
|
- if not (cs_opt_size in current_settings.optimizerswitches) and
|
|
|
- IsMOVZXAcceptable and
|
|
|
- (taicpu(hp1).opsize < taicpu(p).opsize)
|
|
|
+ ... the second instruction would change to movw %ax,%ax, but
|
|
|
+ given that it is now %ax that's active rather than %eax,
|
|
|
+ penalties might occur due to a partial register write, so instead,
|
|
|
+ change it to a MOVZX instruction when optimising for speed.
|
|
|
+ }
|
|
|
+ if not (cs_opt_size in current_settings.optimizerswitches) and
|
|
|
+ IsMOVZXAcceptable and
|
|
|
+ (taicpu(hp1).opsize < taicpu(p).opsize)
|
|
|
{$ifdef x86_64}
|
|
|
- { operations already implicitly set the upper 64 bits to zero }
|
|
|
- and not ((taicpu(hp1).opsize = S_L) and (taicpu(p).opsize = S_Q))
|
|
|
+ { operations already implicitly set the upper 64 bits to zero }
|
|
|
+ and not ((taicpu(hp1).opsize = S_L) and (taicpu(p).opsize = S_Q))
|
|
|
{$endif x86_64}
|
|
|
- then
|
|
|
- begin
|
|
|
- DebugMsg(SPeepholeOptimization + 'Zero-extension to minimise pipeline stall (Mov2Movz)',hp1);
|
|
|
- case taicpu(p).opsize of
|
|
|
- S_W:
|
|
|
- if taicpu(hp1).opsize = S_B then
|
|
|
- taicpu(hp1).opsize := S_BL
|
|
|
- else
|
|
|
- InternalError(2020012911);
|
|
|
- S_L{$ifdef x86_64}, S_Q{$endif x86_64}:
|
|
|
- case taicpu(hp1).opsize of
|
|
|
- S_B:
|
|
|
- taicpu(hp1).opsize := S_BL;
|
|
|
- S_W:
|
|
|
- taicpu(hp1).opsize := S_WL;
|
|
|
+ then
|
|
|
+ begin
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Zero-extension to minimise pipeline stall (Mov2Movz)',hp1);
|
|
|
+ case taicpu(p).opsize of
|
|
|
+ S_W:
|
|
|
+ if taicpu(hp1).opsize = S_B then
|
|
|
+ taicpu(hp1).opsize := S_BL
|
|
|
else
|
|
|
- InternalError(2020012912);
|
|
|
- end;
|
|
|
- else
|
|
|
- InternalError(2020012910);
|
|
|
- end;
|
|
|
+ InternalError(2020012911);
|
|
|
+ S_L{$ifdef x86_64}, S_Q{$endif x86_64}:
|
|
|
+ case taicpu(hp1).opsize of
|
|
|
+ S_B:
|
|
|
+ taicpu(hp1).opsize := S_BL;
|
|
|
+ S_W:
|
|
|
+ taicpu(hp1).opsize := S_WL;
|
|
|
+ else
|
|
|
+ InternalError(2020012912);
|
|
|
+ end;
|
|
|
+ else
|
|
|
+ InternalError(2020012910);
|
|
|
+ end;
|
|
|
|
|
|
- taicpu(hp1).opcode := A_MOVZX;
|
|
|
- setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBD);
|
|
|
- end
|
|
|
- else
|
|
|
- begin
|
|
|
- GetNextInstruction_p := GetNextInstruction(hp1, hp2);
|
|
|
- DebugMsg(SPeepholeOptimization + 'Mov2Nop 4 done',hp1);
|
|
|
- RemoveInstruction(hp1);
|
|
|
+ taicpu(hp1).opcode := A_MOVZX;
|
|
|
+ setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBD);
|
|
|
+ end
|
|
|
+ else
|
|
|
+ begin
|
|
|
+ GetNextInstruction_p := GetNextInstruction(hp1, hp2);
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Mov2Nop 4 done',hp1);
|
|
|
+ RemoveInstruction(hp1);
|
|
|
|
|
|
- { The instruction after what was hp1 is now the immediate next instruction,
|
|
|
- so we can continue to make optimisations if it's present }
|
|
|
- if not GetNextInstruction_p or (hp2.typ <> ait_instruction) then
|
|
|
- Exit;
|
|
|
+ { The instruction after what was hp1 is now the immediate next instruction,
|
|
|
+ so we can continue to make optimisations if it's present }
|
|
|
+ if not GetNextInstruction_p or (hp2.typ <> ait_instruction) then
|
|
|
+ Exit;
|
|
|
|
|
|
- hp1 := hp2;
|
|
|
- end;
|
|
|
- end;
|
|
|
- end;
|
|
|
+ hp1 := hp2;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
{$ifdef x86_64}
|
|
|
- { Change:
|
|
|
- movl %reg1l,%reg2l
|
|
|
- movq %reg2q,%reg3q (%reg1 <> %reg3)
|
|
|
+ { Change:
|
|
|
+ movl %reg1l,%reg2l
|
|
|
+ movq %reg2q,%reg3q (%reg1 <> %reg3)
|
|
|
|
|
|
- To:
|
|
|
- movl %reg1l,%reg2l
|
|
|
- movl %reg1l,%reg3l (Upper 32 bits of %reg3q will be zero)
|
|
|
-
|
|
|
- If %reg1 = %reg3, convert to:
|
|
|
- movl %reg1l,%reg2l
|
|
|
- andl %reg1l,%reg1l
|
|
|
- }
|
|
|
- if (taicpu(p).opsize = S_L) and MatchInstruction(hp1,A_MOV,[S_Q]) and
|
|
|
- not RegModifiedBetween(p_SourceReg, p, hp1) and
|
|
|
- MatchOpType(taicpu(hp1), top_reg, top_reg) and
|
|
|
- SuperRegistersEqual(p_TargetReg, taicpu(hp1).oper[0]^.reg) then
|
|
|
- begin
|
|
|
- TransferUsedRegs(TmpUsedRegs);
|
|
|
- UpdateUsedRegsBetween(TmpUsedRegs, tai(p.Next), hp1);
|
|
|
+ To:
|
|
|
+ movl %reg1l,%reg2l
|
|
|
+ movl %reg1l,%reg3l (Upper 32 bits of %reg3q will be zero)
|
|
|
|
|
|
- taicpu(hp1).opsize := S_L;
|
|
|
- taicpu(hp1).loadreg(0, p_SourceReg);
|
|
|
- setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBD);
|
|
|
+ If %reg1 = %reg3, convert to:
|
|
|
+ movl %reg1l,%reg2l
|
|
|
+ andl %reg1l,%reg1l
|
|
|
+ }
|
|
|
+ if (taicpu(p).opsize = S_L) and MatchInstruction(hp1,A_MOV,[S_Q]) and
|
|
|
+ not RegModifiedBetween(p_SourceReg, p, hp1) and
|
|
|
+ MatchOpType(taicpu(hp1), top_reg, top_reg) and
|
|
|
+ SuperRegistersEqual(p_TargetReg, taicpu(hp1).oper[0]^.reg) then
|
|
|
+ begin
|
|
|
+ TransferUsedRegs(TmpUsedRegs);
|
|
|
+ UpdateUsedRegsBetween(TmpUsedRegs, tai(p.Next), hp1);
|
|
|
|
|
|
- AllocRegBetween(p_SourceReg, p, hp1, UsedRegs);
|
|
|
+ taicpu(hp1).opsize := S_L;
|
|
|
+ taicpu(hp1).loadreg(0, p_SourceReg);
|
|
|
+ setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBD);
|
|
|
|
|
|
- if (p_SourceReg = taicpu(hp1).oper[1]^.reg) then
|
|
|
- begin
|
|
|
- { %reg1 = %reg3 }
|
|
|
- DebugMsg(SPeepholeOptimization + 'Made 32-to-64-bit zero extension more efficient (MovlMovq2MovlAndl 1)', hp1);
|
|
|
- taicpu(hp1).opcode := A_AND;
|
|
|
- end
|
|
|
- else
|
|
|
- begin
|
|
|
- { %reg1 <> %reg3 }
|
|
|
- DebugMsg(SPeepholeOptimization + 'Made 32-to-64-bit zero extension more efficient (MovlMovq2MovlMovl 1)', hp1);
|
|
|
- end;
|
|
|
+ AllocRegBetween(p_SourceReg, p, hp1, UsedRegs);
|
|
|
|
|
|
- if not RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs) then
|
|
|
- begin
|
|
|
- DebugMsg(SPeepholeOptimization + 'Mov2Nop 8 done', p);
|
|
|
- RemoveCurrentP(p);
|
|
|
- Result := True;
|
|
|
- Exit;
|
|
|
- end
|
|
|
- else
|
|
|
- begin
|
|
|
- { Initial instruction wasn't actually changed }
|
|
|
- Include(OptsToCheck, aoc_ForceNewIteration);
|
|
|
+ if (p_SourceReg = taicpu(hp1).oper[1]^.reg) then
|
|
|
+ begin
|
|
|
+ { %reg1 = %reg3 }
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Made 32-to-64-bit zero extension more efficient (MovlMovq2MovlAndl 1)', hp1);
|
|
|
+ taicpu(hp1).opcode := A_AND;
|
|
|
+ end
|
|
|
+ else
|
|
|
+ begin
|
|
|
+ { %reg1 <> %reg3 }
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Made 32-to-64-bit zero extension more efficient (MovlMovq2MovlMovl 1)', hp1);
|
|
|
+ end;
|
|
|
|
|
|
- { if %reg1 = %reg3, don't do the long-distance lookahead that
|
|
|
- appears below since %reg1 has technically changed }
|
|
|
- if taicpu(hp1).opcode = A_AND then
|
|
|
+ if not RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs) then
|
|
|
+ begin
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Mov2Nop 8 done', p);
|
|
|
+ RemoveCurrentP(p);
|
|
|
+ Result := True;
|
|
|
Exit;
|
|
|
- end;
|
|
|
- end;
|
|
|
+ end
|
|
|
+ else
|
|
|
+ begin
|
|
|
+ { Initial instruction wasn't actually changed }
|
|
|
+ Include(OptsToCheck, aoc_ForceNewIteration);
|
|
|
+
|
|
|
+ { if %reg1 = %reg3, don't do the long-distance lookahead that
|
|
|
+ appears below since %reg1 has technically changed }
|
|
|
+ if taicpu(hp1).opcode = A_AND then
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
{$endif x86_64}
|
|
|
- end
|
|
|
- else if taicpu(p).oper[0]^.typ = top_const then
|
|
|
- begin
|
|
|
- if (taicpu(hp1).opcode = A_OR) and
|
|
|
- (taicpu(p).oper[1]^.typ = top_reg) and
|
|
|
- MatchOperand(taicpu(p).oper[0]^, 0) and
|
|
|
- MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) then
|
|
|
- begin
|
|
|
- { mov 0, %reg
|
|
|
- or ###,%reg
|
|
|
- Change to (only if the flags are not used):
|
|
|
- mov ###,%reg
|
|
|
- }
|
|
|
- TransferUsedRegs(TmpUsedRegs);
|
|
|
- UpdateUsedRegsBetween(TmpUsedRegs, tai(p.Next), hp1);
|
|
|
- DoOptimisation := True;
|
|
|
+ end
|
|
|
+ else if taicpu(p).oper[0]^.typ = top_const then
|
|
|
+ begin
|
|
|
+ if (taicpu(hp1).opcode = A_OR) and
|
|
|
+ (taicpu(p).oper[1]^.typ = top_reg) and
|
|
|
+ MatchOperand(taicpu(p).oper[0]^, 0) and
|
|
|
+ MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) then
|
|
|
+ begin
|
|
|
+ { mov 0, %reg
|
|
|
+ or ###,%reg
|
|
|
+ Change to (only if the flags are not used):
|
|
|
+ mov ###,%reg
|
|
|
+ }
|
|
|
+ TransferUsedRegs(TmpUsedRegs);
|
|
|
+ UpdateUsedRegsBetween(TmpUsedRegs, tai(p.Next), hp1);
|
|
|
+ DoOptimisation := True;
|
|
|
|
|
|
- { Even if the flags are used, we might be able to do the optimisation
|
|
|
- if the conditions are predictable }
|
|
|
- if RegInUsedRegs(NR_DEFAULTFLAGS, TmpUsedRegs) then
|
|
|
- begin
|
|
|
- { Only perform if ### = %reg (the same register) or equal to 0,
|
|
|
- so %reg is guaranteed to still have a value of zero }
|
|
|
- if MatchOperand(taicpu(hp1).oper[0]^, 0) or
|
|
|
- MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^.reg) then
|
|
|
- begin
|
|
|
- hp2 := hp1;
|
|
|
- UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
|
|
|
- while RegInUsedRegs(NR_DEFAULTFLAGS, TmpUsedRegs) and
|
|
|
- GetNextInstruction(hp2, hp3) do
|
|
|
- begin
|
|
|
- { Don't continue modifying if the flags state is getting changed }
|
|
|
- if RegModifiedByInstruction(NR_DEFAULTFLAGS, hp3) then
|
|
|
- Break;
|
|
|
+ { Even if the flags are used, we might be able to do the optimisation
|
|
|
+ if the conditions are predictable }
|
|
|
+ if RegInUsedRegs(NR_DEFAULTFLAGS, TmpUsedRegs) then
|
|
|
+ begin
|
|
|
+ { Only perform if ### = %reg (the same register) or equal to 0,
|
|
|
+ so %reg is guaranteed to still have a value of zero }
|
|
|
+ if MatchOperand(taicpu(hp1).oper[0]^, 0) or
|
|
|
+ MatchOperand(taicpu(hp1).oper[0]^, taicpu(hp1).oper[1]^.reg) then
|
|
|
+ begin
|
|
|
+ hp2 := hp1;
|
|
|
+ UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
|
|
|
+ while RegInUsedRegs(NR_DEFAULTFLAGS, TmpUsedRegs) and
|
|
|
+ GetNextInstruction(hp2, hp3) do
|
|
|
+ begin
|
|
|
+ { Don't continue modifying if the flags state is getting changed }
|
|
|
+ if RegModifiedByInstruction(NR_DEFAULTFLAGS, hp3) then
|
|
|
+ Break;
|
|
|
|
|
|
- UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
|
|
|
- if MatchInstruction(hp3, A_Jcc, A_SETcc, A_CMOVcc, []) then
|
|
|
- begin
|
|
|
+ UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
|
|
|
+ if MatchInstruction(hp3, A_Jcc, A_SETcc, A_CMOVcc, []) then
|
|
|
+ begin
|
|
|
|
|
|
- if condition_in(C_E, taicpu(hp3).condition) or (taicpu(hp3).condition in [C_NC, C_NS, C_NO]) then
|
|
|
- begin
|
|
|
- { Condition is always true }
|
|
|
- case taicpu(hp3).opcode of
|
|
|
- A_Jcc:
|
|
|
- begin
|
|
|
- { Check for jump shortcuts before we destroy the condition }
|
|
|
- hp4 := hp3;
|
|
|
- DoJumpOptimizations(hp3, TempBool);
|
|
|
- { Make sure hp3 hasn't changed }
|
|
|
- if (hp4 = hp3) then
|
|
|
- begin
|
|
|
- DebugMsg(SPeepholeOptimization + 'Condition is always true (jump made unconditional)', hp3);
|
|
|
- MakeUnconditional(taicpu(hp3));
|
|
|
- end;
|
|
|
- Result := True;
|
|
|
- end;
|
|
|
- A_CMOVcc:
|
|
|
- begin
|
|
|
- DebugMsg(SPeepholeOptimization + 'Condition is always true (CMOVcc -> MOV)', hp3);
|
|
|
- taicpu(hp3).opcode := A_MOV;
|
|
|
- taicpu(hp3).condition := C_None;
|
|
|
- Result := True;
|
|
|
- end;
|
|
|
- A_SETcc:
|
|
|
- begin
|
|
|
- DebugMsg(SPeepholeOptimization + 'Condition is always true (changed to MOV 1)', hp3);
|
|
|
- { Convert "set(c) %reg" instruction to "movb 1,%reg" }
|
|
|
- taicpu(hp3).opcode := A_MOV;
|
|
|
- taicpu(hp3).ops := 2;
|
|
|
- taicpu(hp3).condition := C_None;
|
|
|
- taicpu(hp3).opsize := S_B;
|
|
|
- taicpu(hp3).loadreg(1,taicpu(hp3).oper[0]^.reg);
|
|
|
- taicpu(hp3).loadconst(0, 1);
|
|
|
- Result := True;
|
|
|
- end;
|
|
|
- else
|
|
|
- InternalError(2021090701);
|
|
|
- end;
|
|
|
- end
|
|
|
- else if (taicpu(hp3).condition in [C_A, C_B, C_C, C_G, C_L, C_NE, C_NZ, C_O, C_S]) then
|
|
|
- begin
|
|
|
- { Condition is always false }
|
|
|
- case taicpu(hp3).opcode of
|
|
|
- A_Jcc:
|
|
|
- begin
|
|
|
- DebugMsg(SPeepholeOptimization + 'Condition is always false (jump removed)', hp3);
|
|
|
- TAsmLabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
|
|
|
- RemoveInstruction(hp3);
|
|
|
- Result := True;
|
|
|
- { Since hp3 was deleted, hp2 must not be updated }
|
|
|
- Continue;
|
|
|
- end;
|
|
|
- A_CMOVcc:
|
|
|
- begin
|
|
|
- DebugMsg(SPeepholeOptimization + 'Condition is always false (conditional load removed)', hp3);
|
|
|
- RemoveInstruction(hp3);
|
|
|
- Result := True;
|
|
|
- { Since hp3 was deleted, hp2 must not be updated }
|
|
|
- Continue;
|
|
|
- end;
|
|
|
- A_SETcc:
|
|
|
- begin
|
|
|
- DebugMsg(SPeepholeOptimization + 'Condition is always false (changed to MOV 0)', hp3);
|
|
|
- { Convert "set(c) %reg" instruction to "movb 0,%reg" }
|
|
|
- taicpu(hp3).opcode := A_MOV;
|
|
|
- taicpu(hp3).ops := 2;
|
|
|
- taicpu(hp3).condition := C_None;
|
|
|
- taicpu(hp3).opsize := S_B;
|
|
|
- taicpu(hp3).loadreg(1,taicpu(hp3).oper[0]^.reg);
|
|
|
- taicpu(hp3).loadconst(0, 0);
|
|
|
- Result := True;
|
|
|
- end;
|
|
|
- else
|
|
|
- InternalError(2021090702);
|
|
|
- end;
|
|
|
- end
|
|
|
- else
|
|
|
- { Uncertain what to do - don't optimise (although optimise other conditional statements if present) }
|
|
|
- DoOptimisation := False;
|
|
|
- end;
|
|
|
+ if condition_in(C_E, taicpu(hp3).condition) or (taicpu(hp3).condition in [C_NC, C_NS, C_NO]) then
|
|
|
+ begin
|
|
|
+ { Condition is always true }
|
|
|
+ case taicpu(hp3).opcode of
|
|
|
+ A_Jcc:
|
|
|
+ begin
|
|
|
+ { Check for jump shortcuts before we destroy the condition }
|
|
|
+ hp4 := hp3;
|
|
|
+ DoJumpOptimizations(hp3, TempBool);
|
|
|
+ { Make sure hp3 hasn't changed }
|
|
|
+ if (hp4 = hp3) then
|
|
|
+ begin
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Condition is always true (jump made unconditional)', hp3);
|
|
|
+ MakeUnconditional(taicpu(hp3));
|
|
|
+ end;
|
|
|
+ Result := True;
|
|
|
+ end;
|
|
|
+ A_CMOVcc:
|
|
|
+ begin
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Condition is always true (CMOVcc -> MOV)', hp3);
|
|
|
+ taicpu(hp3).opcode := A_MOV;
|
|
|
+ taicpu(hp3).condition := C_None;
|
|
|
+ Result := True;
|
|
|
+ end;
|
|
|
+ A_SETcc:
|
|
|
+ begin
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Condition is always true (changed to MOV 1)', hp3);
|
|
|
+ { Convert "set(c) %reg" instruction to "movb 1,%reg" }
|
|
|
+ taicpu(hp3).opcode := A_MOV;
|
|
|
+ taicpu(hp3).ops := 2;
|
|
|
+ taicpu(hp3).condition := C_None;
|
|
|
+ taicpu(hp3).opsize := S_B;
|
|
|
+ taicpu(hp3).loadreg(1,taicpu(hp3).oper[0]^.reg);
|
|
|
+ taicpu(hp3).loadconst(0, 1);
|
|
|
+ Result := True;
|
|
|
+ end;
|
|
|
+ else
|
|
|
+ InternalError(2021090701);
|
|
|
+ end;
|
|
|
+ end
|
|
|
+ else if (taicpu(hp3).condition in [C_A, C_B, C_C, C_G, C_L, C_NE, C_NZ, C_O, C_S]) then
|
|
|
+ begin
|
|
|
+ { Condition is always false }
|
|
|
+ case taicpu(hp3).opcode of
|
|
|
+ A_Jcc:
|
|
|
+ begin
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Condition is always false (jump removed)', hp3);
|
|
|
+ TAsmLabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
|
|
|
+ RemoveInstruction(hp3);
|
|
|
+ Result := True;
|
|
|
+ { Since hp3 was deleted, hp2 must not be updated }
|
|
|
+ Continue;
|
|
|
+ end;
|
|
|
+ A_CMOVcc:
|
|
|
+ begin
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Condition is always false (conditional load removed)', hp3);
|
|
|
+ RemoveInstruction(hp3);
|
|
|
+ Result := True;
|
|
|
+ { Since hp3 was deleted, hp2 must not be updated }
|
|
|
+ Continue;
|
|
|
+ end;
|
|
|
+ A_SETcc:
|
|
|
+ begin
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Condition is always false (changed to MOV 0)', hp3);
|
|
|
+ { Convert "set(c) %reg" instruction to "movb 0,%reg" }
|
|
|
+ taicpu(hp3).opcode := A_MOV;
|
|
|
+ taicpu(hp3).ops := 2;
|
|
|
+ taicpu(hp3).condition := C_None;
|
|
|
+ taicpu(hp3).opsize := S_B;
|
|
|
+ taicpu(hp3).loadreg(1,taicpu(hp3).oper[0]^.reg);
|
|
|
+ taicpu(hp3).loadconst(0, 0);
|
|
|
+ Result := True;
|
|
|
+ end;
|
|
|
+ else
|
|
|
+ InternalError(2021090702);
|
|
|
+ end;
|
|
|
+ end
|
|
|
+ else
|
|
|
+ { Uncertain what to do - don't optimise (although optimise other conditional statements if present) }
|
|
|
+ DoOptimisation := False;
|
|
|
+ end;
|
|
|
|
|
|
- hp2 := hp3;
|
|
|
- end;
|
|
|
+ hp2 := hp3;
|
|
|
+ end;
|
|
|
|
|
|
- if DoOptimisation then
|
|
|
- begin
|
|
|
- UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
|
|
|
- if RegInUsedRegs(NR_DEFAULTFLAGS, TmpUsedRegs) then
|
|
|
- { Flags are still in use - don't optimise }
|
|
|
- DoOptimisation := False;
|
|
|
- end;
|
|
|
- end
|
|
|
- else
|
|
|
- DoOptimisation := False;
|
|
|
- end;
|
|
|
+ if DoOptimisation then
|
|
|
+ begin
|
|
|
+ UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
|
|
|
+ if RegInUsedRegs(NR_DEFAULTFLAGS, TmpUsedRegs) then
|
|
|
+ { Flags are still in use - don't optimise }
|
|
|
+ DoOptimisation := False;
|
|
|
+ end;
|
|
|
+ end
|
|
|
+ else
|
|
|
+ DoOptimisation := False;
|
|
|
+ end;
|
|
|
|
|
|
- if DoOptimisation then
|
|
|
- begin
|
|
|
+ if DoOptimisation then
|
|
|
+ begin
|
|
|
{$ifdef x86_64}
|
|
|
- { OR only supports 32-bit sign-extended constants for 64-bit
|
|
|
- instructions, so compensate for this if the constant is
|
|
|
- encoded as a value greater than or equal to 2^31 }
|
|
|
- if (taicpu(hp1).opsize = S_Q) and
|
|
|
- (taicpu(hp1).oper[0]^.typ = top_const) and
|
|
|
- (taicpu(hp1).oper[0]^.val >= $80000000) then
|
|
|
- taicpu(hp1).oper[0]^.val := taicpu(hp1).oper[0]^.val or $FFFFFFFF00000000;
|
|
|
+ { OR only supports 32-bit sign-extended constants for 64-bit
|
|
|
+ instructions, so compensate for this if the constant is
|
|
|
+ encoded as a value greater than or equal to 2^31 }
|
|
|
+ if (taicpu(hp1).opsize = S_Q) and
|
|
|
+ (taicpu(hp1).oper[0]^.typ = top_const) and
|
|
|
+ (taicpu(hp1).oper[0]^.val >= $80000000) then
|
|
|
+ taicpu(hp1).oper[0]^.val := taicpu(hp1).oper[0]^.val or $FFFFFFFF00000000;
|
|
|
{$endif x86_64}
|
|
|
- DebugMsg(SPeepholeOptimization + 'MOV 0 / OR -> MOV', p);
|
|
|
- taicpu(hp1).opcode := A_MOV;
|
|
|
- RemoveCurrentP(p);
|
|
|
- Result := True;
|
|
|
- Exit;
|
|
|
- end;
|
|
|
- end;
|
|
|
- end
|
|
|
- else if
|
|
|
- { oper[0] is a reference }
|
|
|
- (taicpu(p).oper[0]^.ref^.refaddr <> addr_full) then
|
|
|
- begin
|
|
|
- if MatchInstruction(hp1,A_LEA,[S_L{$ifdef x86_64},S_Q{$endif x86_64}]) then
|
|
|
- begin
|
|
|
- if ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
|
|
|
- (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
|
|
|
- ) or
|
|
|
- (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
|
|
|
- (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
|
|
|
- )
|
|
|
- ) and
|
|
|
- not RegModifiedBetween(Taicpu(hp1).oper[1]^.reg, p, hp1) then
|
|
|
- { mov ref,reg1
|
|
|
- lea (reg1,reg2),reg2
|
|
|
-
|
|
|
- to
|
|
|
+ DebugMsg(SPeepholeOptimization + 'MOV 0 / OR -> MOV', p);
|
|
|
+ taicpu(hp1).opcode := A_MOV;
|
|
|
+ RemoveCurrentP(p);
|
|
|
+ Result := True;
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end
|
|
|
+ else if
|
|
|
+ { oper[0] is a reference }
|
|
|
+ (taicpu(p).oper[0]^.ref^.refaddr <> addr_full) then
|
|
|
+ begin
|
|
|
+ if MatchInstruction(hp1,A_LEA,[S_L{$ifdef x86_64},S_Q{$endif x86_64}]) then
|
|
|
+ begin
|
|
|
+ if ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
|
|
|
+ (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
|
|
|
+ ) or
|
|
|
+ (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
|
|
|
+ (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
|
|
|
+ )
|
|
|
+ ) and
|
|
|
+ not RegModifiedBetween(Taicpu(hp1).oper[1]^.reg, p, hp1) then
|
|
|
+ { mov ref,reg1
|
|
|
+ lea (reg1,reg2),reg2
|
|
|
|
|
|
- add ref,reg2 }
|
|
|
- begin
|
|
|
- TransferUsedRegs(TmpUsedRegs);
|
|
|
- UpdateUsedRegsBetween(TmpUsedRegs, tai(p.Next), hp1);
|
|
|
+ to
|
|
|
|
|
|
- { If the flags register is in use, don't change the instruction to an
|
|
|
- ADD otherwise this will scramble the flags. [Kit] }
|
|
|
- if not RegInUsedRegs(NR_DEFAULTFLAGS, TmpUsedRegs) and
|
|
|
- { reg1 may not be used afterwards }
|
|
|
- not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
|
|
|
- begin
|
|
|
- Taicpu(hp1).opcode:=A_ADD;
|
|
|
- Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
|
|
|
- DebugMsg(SPeepholeOptimization + 'MovLea2Add done',hp1);
|
|
|
- RemoveCurrentp(p);
|
|
|
- result:=true;
|
|
|
- exit;
|
|
|
- end;
|
|
|
- end;
|
|
|
+ add ref,reg2 }
|
|
|
+ begin
|
|
|
+ TransferUsedRegs(TmpUsedRegs);
|
|
|
+ UpdateUsedRegsBetween(TmpUsedRegs, tai(p.Next), hp1);
|
|
|
+
|
|
|
+ { If the flags register is in use, don't change the instruction to an
|
|
|
+ ADD otherwise this will scramble the flags. [Kit] }
|
|
|
+ if not RegInUsedRegs(NR_DEFAULTFLAGS, TmpUsedRegs) and
|
|
|
+ { reg1 may not be used afterwards }
|
|
|
+ not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
|
|
|
+ begin
|
|
|
+ Taicpu(hp1).opcode:=A_ADD;
|
|
|
+ Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
|
|
|
+ DebugMsg(SPeepholeOptimization + 'MovLea2Add done',hp1);
|
|
|
+ RemoveCurrentp(p);
|
|
|
+ result:=true;
|
|
|
+ exit;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
|
|
|
- { If the LEA instruction can be converted into an arithmetic instruction,
|
|
|
- it may be possible to then fold it in the next optimisation. }
|
|
|
- if ConvertLEA(taicpu(hp1)) then
|
|
|
- Include(OptsToCheck, aoc_ForceNewIteration);
|
|
|
- end;
|
|
|
+ { If the LEA instruction can be converted into an arithmetic instruction,
|
|
|
+ it may be possible to then fold it in the next optimisation. }
|
|
|
+ if ConvertLEA(taicpu(hp1)) then
|
|
|
+ Include(OptsToCheck, aoc_ForceNewIteration);
|
|
|
+ end;
|
|
|
|
|
|
- {
|
|
|
- mov ref,reg0
|
|
|
- <op> reg0,reg1
|
|
|
- dealloc reg0
|
|
|
+ {
|
|
|
+ mov ref,reg0
|
|
|
+ <op> reg0,reg1
|
|
|
+ dealloc reg0
|
|
|
|
|
|
- to
|
|
|
+ to
|
|
|
|
|
|
- <op> ref,reg1
|
|
|
- }
|
|
|
- if MatchOpType(taicpu(hp1),top_reg,top_reg) and
|
|
|
- (taicpu(hp1).oper[0]^.reg = p_TargetReg) and
|
|
|
- MatchInstruction(hp1, [A_AND, A_OR, A_XOR, A_ADD, A_SUB, A_CMP, A_TEST, A_CMOVcc, A_BSR, A_BSF, A_POPCNT, A_LZCNT], [taicpu(p).opsize]) and
|
|
|
- not SuperRegistersEqual(taicpu(hp1).oper[1]^.reg, p_TargetReg) and
|
|
|
- not RefModifiedBetween(taicpu(p).oper[0]^.ref^, topsize2memsize[taicpu(p).opsize] shr 3, p, hp1) then
|
|
|
- begin
|
|
|
- TransferUsedRegs(TmpUsedRegs);
|
|
|
- UpdateUsedRegsBetween(TmpUsedRegs, tai(p.Next), hp1);
|
|
|
- if not RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs) then
|
|
|
- begin
|
|
|
- taicpu(hp1).loadref(0,taicpu(p).oper[0]^.ref^);
|
|
|
+ <op> ref,reg1
|
|
|
+ }
|
|
|
+ if MatchOpType(taicpu(hp1),top_reg,top_reg) and
|
|
|
+ (taicpu(hp1).oper[0]^.reg = p_TargetReg) and
|
|
|
+ MatchInstruction(hp1, [A_AND, A_OR, A_XOR, A_ADD, A_SUB, A_CMP, A_TEST, A_CMOVcc, A_BSR, A_BSF, A_POPCNT, A_LZCNT], [taicpu(p).opsize]) and
|
|
|
+ not SuperRegistersEqual(taicpu(hp1).oper[1]^.reg, p_TargetReg) and
|
|
|
+ not RefModifiedBetween(taicpu(p).oper[0]^.ref^, topsize2memsize[taicpu(p).opsize] shr 3, p, hp1) then
|
|
|
+ begin
|
|
|
+ TransferUsedRegs(TmpUsedRegs);
|
|
|
+ UpdateUsedRegsBetween(TmpUsedRegs, tai(p.Next), hp1);
|
|
|
+ if not RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs) then
|
|
|
+ begin
|
|
|
+ taicpu(hp1).loadref(0,taicpu(p).oper[0]^.ref^);
|
|
|
|
|
|
- { loadref increases the reference count, so decrement it again }
|
|
|
- if Assigned(taicpu(p).oper[0]^.ref^.symbol) then
|
|
|
- taicpu(p).oper[0]^.ref^.symbol.decrefs;
|
|
|
- if Assigned(taicpu(p).oper[0]^.ref^.relsymbol) then
|
|
|
- taicpu(p).oper[0]^.ref^.relsymbol.decrefs;
|
|
|
+ { loadref increases the reference count, so decrement it again }
|
|
|
+ if Assigned(taicpu(p).oper[0]^.ref^.symbol) then
|
|
|
+ taicpu(p).oper[0]^.ref^.symbol.decrefs;
|
|
|
+ if Assigned(taicpu(p).oper[0]^.ref^.relsymbol) then
|
|
|
+ taicpu(p).oper[0]^.ref^.relsymbol.decrefs;
|
|
|
|
|
|
- DebugMsg(SPeepholeOptimization + 'MovOp2Op done',hp1);
|
|
|
+ DebugMsg(SPeepholeOptimization + 'MovOp2Op done',hp1);
|
|
|
|
|
|
- { See if we can remove the allocation of reg0 }
|
|
|
- if not RegInRef(p_TargetReg, taicpu(p).oper[0]^.ref^) then
|
|
|
- TryRemoveRegAlloc(p_TargetReg, p, hp1);
|
|
|
+ { See if we can remove the allocation of reg0 }
|
|
|
+ if not RegInRef(p_TargetReg, taicpu(p).oper[0]^.ref^) then
|
|
|
+ TryRemoveRegAlloc(p_TargetReg, p, hp1);
|
|
|
|
|
|
- RemoveCurrentp(p);
|
|
|
- Result:=true;
|
|
|
- exit;
|
|
|
- end;
|
|
|
- end;
|
|
|
- end;
|
|
|
+ RemoveCurrentp(p);
|
|
|
+ Result:=true;
|
|
|
+ exit;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
|
|
|
- { Depending on the DeepMOVOpt above, it may turn out that hp1 completely
|
|
|
- overwrites the original destination register. e.g.
|
|
|
+ { Depending on the DeepMOVOpt above, it may turn out that hp1 completely
|
|
|
+ overwrites the original destination register. e.g.
|
|
|
|
|
|
- movl ###,%reg2d
|
|
|
- movslq ###,%reg2q (### doesn't have to be the same as the first one)
|
|
|
+ movl ###,%reg2d
|
|
|
+ movslq ###,%reg2q (### doesn't have to be the same as the first one)
|
|
|
|
|
|
- In this case, we can remove the MOV (Go to "Mov2Nop 5" below)
|
|
|
- }
|
|
|
- if MatchInstruction(hp1, [A_LEA, A_MOV, A_MOVSX, A_MOVZX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}], []) and
|
|
|
- (taicpu(hp1).oper[1]^.typ = top_reg) and
|
|
|
- Reg1WriteOverwritesReg2Entirely(taicpu(hp1).oper[1]^.reg, taicpu(p).oper[1]^.reg) then
|
|
|
- begin
|
|
|
- if RegInOp(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[0]^) then
|
|
|
- begin
|
|
|
- if (taicpu(hp1).oper[0]^.typ = top_reg) then
|
|
|
- case taicpu(p).oper[0]^.typ of
|
|
|
- top_const:
|
|
|
- { We have something like:
|
|
|
+ In this case, we can remove the MOV (Go to "Mov2Nop 5" below)
|
|
|
+ }
|
|
|
+ if MatchInstruction(hp1, [A_LEA, A_MOV, A_MOVSX, A_MOVZX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}], []) and
|
|
|
+ (taicpu(hp1).oper[1]^.typ = top_reg) and
|
|
|
+ Reg1WriteOverwritesReg2Entirely(taicpu(hp1).oper[1]^.reg, taicpu(p).oper[1]^.reg) then
|
|
|
+ begin
|
|
|
+ if RegInOp(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[0]^) then
|
|
|
+ begin
|
|
|
+ if (taicpu(hp1).oper[0]^.typ = top_reg) then
|
|
|
+ case taicpu(p).oper[0]^.typ of
|
|
|
+ top_const:
|
|
|
+ { We have something like:
|
|
|
|
|
|
- movb $x, %regb
|
|
|
- movzbl %regb,%regd
|
|
|
+ movb $x, %regb
|
|
|
+ movzbl %regb,%regd
|
|
|
|
|
|
- Change to:
|
|
|
+ Change to:
|
|
|
|
|
|
- movl $x, %regd
|
|
|
- }
|
|
|
- begin
|
|
|
- case taicpu(hp1).opsize of
|
|
|
- S_BW:
|
|
|
- begin
|
|
|
- convert_mov_value(A_MOVSX, $FF);
|
|
|
- setsubreg(taicpu(p).oper[1]^.reg, R_SUBW);
|
|
|
- taicpu(p).opsize := S_W;
|
|
|
- end;
|
|
|
- S_BL:
|
|
|
- begin
|
|
|
- convert_mov_value(A_MOVSX, $FF);
|
|
|
- setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
|
|
|
- taicpu(p).opsize := S_L;
|
|
|
- end;
|
|
|
- S_WL:
|
|
|
- begin
|
|
|
- convert_mov_value(A_MOVSX, $FFFF);
|
|
|
- setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
|
|
|
- taicpu(p).opsize := S_L;
|
|
|
- end;
|
|
|
+ movl $x, %regd
|
|
|
+ }
|
|
|
+ begin
|
|
|
+ case taicpu(hp1).opsize of
|
|
|
+ S_BW:
|
|
|
+ begin
|
|
|
+ convert_mov_value(A_MOVSX, $FF);
|
|
|
+ setsubreg(taicpu(p).oper[1]^.reg, R_SUBW);
|
|
|
+ taicpu(p).opsize := S_W;
|
|
|
+ end;
|
|
|
+ S_BL:
|
|
|
+ begin
|
|
|
+ convert_mov_value(A_MOVSX, $FF);
|
|
|
+ setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
|
|
|
+ taicpu(p).opsize := S_L;
|
|
|
+ end;
|
|
|
+ S_WL:
|
|
|
+ begin
|
|
|
+ convert_mov_value(A_MOVSX, $FFFF);
|
|
|
+ setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
|
|
|
+ taicpu(p).opsize := S_L;
|
|
|
+ end;
|
|
|
{$ifdef x86_64}
|
|
|
- S_BQ:
|
|
|
- begin
|
|
|
- convert_mov_value(A_MOVSX, $FF);
|
|
|
- setsubreg(taicpu(p).oper[1]^.reg, R_SUBQ);
|
|
|
- taicpu(p).opsize := S_Q;
|
|
|
- end;
|
|
|
- S_WQ:
|
|
|
- begin
|
|
|
- convert_mov_value(A_MOVSX, $FFFF);
|
|
|
- setsubreg(taicpu(p).oper[1]^.reg, R_SUBQ);
|
|
|
- taicpu(p).opsize := S_Q;
|
|
|
- end;
|
|
|
- S_LQ:
|
|
|
- begin
|
|
|
- convert_mov_value(A_MOVSXD, $FFFFFFFF); { Note it's MOVSXD, not MOVSX }
|
|
|
- setsubreg(taicpu(p).oper[1]^.reg, R_SUBQ);
|
|
|
- taicpu(p).opsize := S_Q;
|
|
|
- end;
|
|
|
+ S_BQ:
|
|
|
+ begin
|
|
|
+ convert_mov_value(A_MOVSX, $FF);
|
|
|
+ setsubreg(taicpu(p).oper[1]^.reg, R_SUBQ);
|
|
|
+ taicpu(p).opsize := S_Q;
|
|
|
+ end;
|
|
|
+ S_WQ:
|
|
|
+ begin
|
|
|
+ convert_mov_value(A_MOVSX, $FFFF);
|
|
|
+ setsubreg(taicpu(p).oper[1]^.reg, R_SUBQ);
|
|
|
+ taicpu(p).opsize := S_Q;
|
|
|
+ end;
|
|
|
+ S_LQ:
|
|
|
+ begin
|
|
|
+ convert_mov_value(A_MOVSXD, $FFFFFFFF); { Note it's MOVSXD, not MOVSX }
|
|
|
+ setsubreg(taicpu(p).oper[1]^.reg, R_SUBQ);
|
|
|
+ taicpu(p).opsize := S_Q;
|
|
|
+ end;
|
|
|
{$endif x86_64}
|
|
|
- else
|
|
|
- { If hp1 was a MOV instruction, it should have been
|
|
|
- optimised already }
|
|
|
- InternalError(2020021001);
|
|
|
+ else
|
|
|
+ { If hp1 was a MOV instruction, it should have been
|
|
|
+ optimised already }
|
|
|
+ InternalError(2020021001);
|
|
|
+ end;
|
|
|
+ DebugMsg(SPeepholeOptimization + 'MovMovXX2MovXX 2 done',p);
|
|
|
+ RemoveInstruction(hp1);
|
|
|
+ Result := True;
|
|
|
+ Exit;
|
|
|
end;
|
|
|
- DebugMsg(SPeepholeOptimization + 'MovMovXX2MovXX 2 done',p);
|
|
|
- RemoveInstruction(hp1);
|
|
|
- Result := True;
|
|
|
- Exit;
|
|
|
- end;
|
|
|
- top_ref:
|
|
|
- begin
|
|
|
- { We have something like:
|
|
|
+ top_ref:
|
|
|
+ begin
|
|
|
+ { We have something like:
|
|
|
|
|
|
- movb mem, %regb
|
|
|
- movzbl %regb,%regd
|
|
|
+ movb mem, %regb
|
|
|
+ movzbl %regb,%regd
|
|
|
|
|
|
- Change to:
|
|
|
+ Change to:
|
|
|
|
|
|
- movzbl mem, %regd
|
|
|
- }
|
|
|
- if (taicpu(p).oper[0]^.ref^.refaddr<>addr_full) and (IsMOVZXAcceptable or (taicpu(hp1).opcode<>A_MOVZX)) then
|
|
|
- begin
|
|
|
- DebugMsg(SPeepholeOptimization + 'MovMovXX2MovXX 1 done',p);
|
|
|
-
|
|
|
- taicpu(p).opcode := taicpu(hp1).opcode;
|
|
|
- taicpu(p).opsize := taicpu(hp1).opsize;
|
|
|
- taicpu(p).oper[1]^.reg := taicpu(hp1).oper[1]^.reg;
|
|
|
+ movzbl mem, %regd
|
|
|
+ }
|
|
|
+ if (taicpu(p).oper[0]^.ref^.refaddr<>addr_full) and (IsMOVZXAcceptable or (taicpu(hp1).opcode<>A_MOVZX)) then
|
|
|
+ begin
|
|
|
+ DebugMsg(SPeepholeOptimization + 'MovMovXX2MovXX 1 done',p);
|
|
|
|
|
|
- RemoveInstruction(hp1);
|
|
|
- Result := True;
|
|
|
- Exit;
|
|
|
- end;
|
|
|
- end;
|
|
|
- else
|
|
|
- if (taicpu(hp1).opcode <> A_MOV) and (taicpu(hp1).opcode <> A_LEA) then
|
|
|
- { Just to make a saving, since there are no more optimisations with MOVZX and MOVSX/D }
|
|
|
- Exit;
|
|
|
- end;
|
|
|
- end
|
|
|
- { The RegInOp check makes sure that movl r/m,%reg1l; movzbl (%reg1l),%reg1l"
|
|
|
- and "movl r/m,%reg1; leal $1(%reg1,%reg2),%reg1" etc. are not incorrectly
|
|
|
- optimised }
|
|
|
- else
|
|
|
- begin
|
|
|
- DebugMsg(SPeepholeOptimization + 'Mov2Nop 5 done',p);
|
|
|
- RemoveCurrentP(p);
|
|
|
- Result := True;
|
|
|
- Exit;
|
|
|
- end;
|
|
|
- end;
|
|
|
+ taicpu(p).opcode := taicpu(hp1).opcode;
|
|
|
+ taicpu(p).opsize := taicpu(hp1).opsize;
|
|
|
+ taicpu(p).oper[1]^.reg := taicpu(hp1).oper[1]^.reg;
|
|
|
|
|
|
- if (taicpu(hp1).opcode = A_MOV) and
|
|
|
- MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
|
|
|
- begin
|
|
|
- { Remember that p_TargetReg contains taicpu(p).oper[1]^.reg }
|
|
|
- TransferUsedRegs(TmpUsedRegs);
|
|
|
- UpdateUsedRegsBetween(TmpUsedRegs, tai(p.Next), hp1);
|
|
|
- { we have
|
|
|
- mov x, %treg
|
|
|
- mov %treg, y
|
|
|
- }
|
|
|
- if not(RegInOp(p_TargetReg, taicpu(hp1).oper[1]^)) then
|
|
|
- if not(RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs)) then
|
|
|
- begin
|
|
|
- { we've got
|
|
|
+ RemoveInstruction(hp1);
|
|
|
+ Result := True;
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ else
|
|
|
+ if (taicpu(hp1).opcode <> A_MOV) and (taicpu(hp1).opcode <> A_LEA) then
|
|
|
+ { Just to make a saving, since there are no more optimisations with MOVZX and MOVSX/D }
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+ end
|
|
|
+ { The RegInOp check makes sure that movl r/m,%reg1l; movzbl (%reg1l),%reg1l"
|
|
|
+ and "movl r/m,%reg1; leal $1(%reg1,%reg2),%reg1" etc. are not incorrectly
|
|
|
+ optimised }
|
|
|
+ else
|
|
|
+ begin
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Mov2Nop 5 done',p);
|
|
|
+ RemoveCurrentP(p);
|
|
|
+ Result := True;
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
|
|
|
+ if (taicpu(hp1).opcode = A_MOV) and
|
|
|
+ MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
|
|
|
+ begin
|
|
|
+ { Remember that p_TargetReg contains taicpu(p).oper[1]^.reg }
|
|
|
+ TransferUsedRegs(TmpUsedRegs);
|
|
|
+ UpdateUsedRegsBetween(TmpUsedRegs, tai(p.Next), hp1);
|
|
|
+ { we have
|
|
|
mov x, %treg
|
|
|
mov %treg, y
|
|
|
+ }
|
|
|
+ if not(RegInOp(p_TargetReg, taicpu(hp1).oper[1]^)) then
|
|
|
+ if not(RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs)) then
|
|
|
+ begin
|
|
|
+ { we've got
|
|
|
|
|
|
- with %treg is not used after }
|
|
|
- case taicpu(p).oper[0]^.typ Of
|
|
|
- { top_reg is covered by DeepMOVOpt }
|
|
|
- top_const:
|
|
|
- begin
|
|
|
- { change
|
|
|
- mov const, %treg
|
|
|
- mov %treg, y
|
|
|
-
|
|
|
- to
|
|
|
+ mov x, %treg
|
|
|
+ mov %treg, y
|
|
|
|
|
|
- mov const, y
|
|
|
- }
|
|
|
-{$ifdef x86_64}
|
|
|
- if (taicpu(hp1).oper[1]^.typ=top_reg) or
|
|
|
- ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
|
|
|
-{$endif x86_64}
|
|
|
- begin
|
|
|
- taicpu(hp1).loadconst(0, taicpu(p).oper[0]^.val);
|
|
|
- DebugMsg(SPeepholeOptimization + 'MovMov2Mov 5 done', hp1);
|
|
|
- RemoveCurrentP(p);
|
|
|
- Result := True;
|
|
|
- Exit;
|
|
|
- end;
|
|
|
- end;
|
|
|
- top_ref:
|
|
|
- case taicpu(hp1).oper[1]^.typ of
|
|
|
- top_reg:
|
|
|
+ with %treg is not used after }
|
|
|
+ case taicpu(p).oper[0]^.typ Of
|
|
|
+ { top_reg is covered by DeepMOVOpt }
|
|
|
+ top_const:
|
|
|
+ begin
|
|
|
{ change
|
|
|
- mov mem, %treg
|
|
|
- mov %treg, %reg
|
|
|
+ mov const, %treg
|
|
|
+ mov %treg, y
|
|
|
|
|
|
- to
|
|
|
+ to
|
|
|
|
|
|
- mov mem, %reg"
|
|
|
+ mov const, y
|
|
|
}
|
|
|
- if not RegUsedBetween(taicpu(hp1).oper[1]^.reg, p, hp1) then
|
|
|
+{$ifdef x86_64}
|
|
|
+ if (taicpu(hp1).oper[1]^.typ=top_reg) or
|
|
|
+ ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
|
|
|
+{$endif x86_64}
|
|
|
begin
|
|
|
- taicpu(p).loadreg(1, taicpu(hp1).oper[1]^.reg);
|
|
|
- DebugMsg(SPeepholeOptimization + 'MovMov2Mov 3a done', p);
|
|
|
- AllocRegBetween(taicpu(hp1).oper[1]^.reg, p, hp1, UsedRegs);
|
|
|
- RemoveInstruction(hp1);
|
|
|
+ taicpu(hp1).loadconst(0, taicpu(p).oper[0]^.val);
|
|
|
+ DebugMsg(SPeepholeOptimization + 'MovMov2Mov 5 done', hp1);
|
|
|
+ RemoveCurrentP(p);
|
|
|
Result := True;
|
|
|
Exit;
|
|
|
- end
|
|
|
- else if
|
|
|
- { Make sure that if a reference is used, its
|
|
|
- registers are not modified in between }
|
|
|
- not RefModifiedBetween(taicpu(p).oper[0]^.ref^, topsize2memsize[taicpu(p).opsize] shr 3, p, hp1) then
|
|
|
- begin
|
|
|
- if (taicpu(p).oper[0]^.ref^.base <> NR_NO){$ifdef x86_64} and (taicpu(p).oper[0]^.ref^.base <> NR_RIP){$endif x86_64} then
|
|
|
- AllocRegBetween(taicpu(p).oper[0]^.ref^.base, p, hp1, UsedRegs);
|
|
|
- if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[0]^.ref^.base) then
|
|
|
- AllocRegBetween(taicpu(p).oper[0]^.ref^.index, p, hp1, UsedRegs);
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ top_ref:
|
|
|
+ case taicpu(hp1).oper[1]^.typ of
|
|
|
+ top_reg:
|
|
|
+ { change
|
|
|
+ mov mem, %treg
|
|
|
+ mov %treg, %reg
|
|
|
|
|
|
- taicpu(hp1).loadref(0, taicpu(p).oper[0]^.ref^);
|
|
|
+ to
|
|
|
|
|
|
- if Assigned(taicpu(p).oper[0]^.ref^.symbol) then
|
|
|
- taicpu(p).oper[0]^.ref^.symbol.decrefs;
|
|
|
- if Assigned(taicpu(p).oper[0]^.ref^.relsymbol) then
|
|
|
- taicpu(p).oper[0]^.ref^.relsymbol.decrefs;
|
|
|
+ mov mem, %reg"
|
|
|
+ }
|
|
|
+ if not RegUsedBetween(taicpu(hp1).oper[1]^.reg, p, hp1) then
|
|
|
+ begin
|
|
|
+ taicpu(p).loadreg(1, taicpu(hp1).oper[1]^.reg);
|
|
|
+ DebugMsg(SPeepholeOptimization + 'MovMov2Mov 3a done', p);
|
|
|
+ AllocRegBetween(taicpu(hp1).oper[1]^.reg, p, hp1, UsedRegs);
|
|
|
+ RemoveInstruction(hp1);
|
|
|
+ Result := True;
|
|
|
+ Exit;
|
|
|
+ end
|
|
|
+ else if
|
|
|
+ { Make sure that if a reference is used, its
|
|
|
+ registers are not modified in between }
|
|
|
+ not RefModifiedBetween(taicpu(p).oper[0]^.ref^, topsize2memsize[taicpu(p).opsize] shr 3, p, hp1) then
|
|
|
+ begin
|
|
|
+ if (taicpu(p).oper[0]^.ref^.base <> NR_NO){$ifdef x86_64} and (taicpu(p).oper[0]^.ref^.base <> NR_RIP){$endif x86_64} then
|
|
|
+ AllocRegBetween(taicpu(p).oper[0]^.ref^.base, p, hp1, UsedRegs);
|
|
|
+ if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[0]^.ref^.base) then
|
|
|
+ AllocRegBetween(taicpu(p).oper[0]^.ref^.index, p, hp1, UsedRegs);
|
|
|
|
|
|
- DebugMsg(SPeepholeOptimization + 'MovMov2Mov 3 done', hp1);
|
|
|
- RemoveCurrentP(p);
|
|
|
- Result := True;
|
|
|
- Exit;
|
|
|
- end;
|
|
|
- top_ref:
|
|
|
- if not RegInRef(p_TargetReg, taicpu(p).oper[0]^.ref^) then
|
|
|
- begin
|
|
|
+ taicpu(hp1).loadref(0, taicpu(p).oper[0]^.ref^);
|
|
|
+
|
|
|
+ if Assigned(taicpu(p).oper[0]^.ref^.symbol) then
|
|
|
+ taicpu(p).oper[0]^.ref^.symbol.decrefs;
|
|
|
+ if Assigned(taicpu(p).oper[0]^.ref^.relsymbol) then
|
|
|
+ taicpu(p).oper[0]^.ref^.relsymbol.decrefs;
|
|
|
+
|
|
|
+ DebugMsg(SPeepholeOptimization + 'MovMov2Mov 3 done', hp1);
|
|
|
+ RemoveCurrentP(p);
|
|
|
+ Result := True;
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+ top_ref:
|
|
|
+ if not RegInRef(p_TargetReg, taicpu(p).oper[0]^.ref^) then
|
|
|
+ begin
|
|
|
{$ifdef x86_64}
|
|
|
- { Look for the following to simplify:
|
|
|
-
|
|
|
- mov x(mem1), %reg
|
|
|
- mov %reg, y(mem2)
|
|
|
- mov x+8(mem1), %reg
|
|
|
- mov %reg, y+8(mem2)
|
|
|
-
|
|
|
- Change to:
|
|
|
- movdqu x(mem1), %xmmreg
|
|
|
- movdqu %xmmreg, y(mem2)
|
|
|
-
|
|
|
- ...but only as long as the memory blocks don't overlap
|
|
|
- }
|
|
|
- SourceRef := taicpu(p).oper[0]^.ref^;
|
|
|
- TargetRef := taicpu(hp1).oper[1]^.ref^;
|
|
|
- if (taicpu(p).opsize = S_Q) and
|
|
|
- not RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs) and
|
|
|
- GetNextInstruction(hp1, hp2) and
|
|
|
- MatchInstruction(hp2, A_MOV, [taicpu(p).opsize]) and
|
|
|
- MatchOpType(taicpu(hp2), top_ref, top_reg) then
|
|
|
- begin
|
|
|
- { Delay calling GetNextInstruction(hp2, hp3) for as long as possible }
|
|
|
+ { Look for the following to simplify:
|
|
|
+
|
|
|
+ mov x(mem1), %reg
|
|
|
+ mov %reg, y(mem2)
|
|
|
+ mov x+8(mem1), %reg
|
|
|
+ mov %reg, y+8(mem2)
|
|
|
+
|
|
|
+ Change to:
|
|
|
+ movdqu x(mem1), %xmmreg
|
|
|
+ movdqu %xmmreg, y(mem2)
|
|
|
+
|
|
|
+ ...but only as long as the memory blocks don't overlap
|
|
|
+ }
|
|
|
+ SourceRef := taicpu(p).oper[0]^.ref^;
|
|
|
+ TargetRef := taicpu(hp1).oper[1]^.ref^;
|
|
|
+ if (taicpu(p).opsize = S_Q) and
|
|
|
+ not RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs) and
|
|
|
+ GetNextInstruction(hp1, hp2) and
|
|
|
+ MatchInstruction(hp2, A_MOV, [taicpu(p).opsize]) and
|
|
|
+ MatchOpType(taicpu(hp2), top_ref, top_reg) then
|
|
|
+ begin
|
|
|
+ { Delay calling GetNextInstruction(hp2, hp3) for as long as possible }
|
|
|
|
|
|
- UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
|
|
|
+ UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
|
|
|
|
|
|
- Inc(SourceRef.offset, 8);
|
|
|
+ Inc(SourceRef.offset, 8);
|
|
|
|
|
|
- if UseAVX then
|
|
|
- begin
|
|
|
- MovAligned := A_VMOVDQA;
|
|
|
- MovUnaligned := A_VMOVDQU;
|
|
|
- end
|
|
|
- else
|
|
|
- begin
|
|
|
- MovAligned := A_MOVDQA;
|
|
|
- MovUnaligned := A_MOVDQU;
|
|
|
- end;
|
|
|
-
|
|
|
- if RefsEqual(SourceRef, taicpu(hp2).oper[0]^.ref^) and
|
|
|
- not RefsMightOverlap(taicpu(p).oper[0]^.ref^, TargetRef, 16) then
|
|
|
- begin
|
|
|
- UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
|
|
|
- Inc(TargetRef.offset, 8);
|
|
|
- if GetNextInstruction(hp2, hp3) and
|
|
|
- MatchInstruction(hp3, A_MOV, [taicpu(p).opsize]) and
|
|
|
- MatchOpType(taicpu(hp3), top_reg, top_ref) and
|
|
|
- (taicpu(hp2).oper[1]^.reg = taicpu(hp3).oper[0]^.reg) and
|
|
|
- RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and
|
|
|
- not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then
|
|
|
- begin
|
|
|
- NewMMReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3);
|
|
|
- if NewMMReg <> NR_NO then
|
|
|
- begin
|
|
|
- { Remember that the offsets are 8 ahead }
|
|
|
- if ((SourceRef.offset mod 16) = 8) and
|
|
|
- (
|
|
|
- { Base pointer is always aligned (stack pointer won't be if there's no stack frame) }
|
|
|
- (SourceRef.base = current_procinfo.framepointer) or
|
|
|
- ((SourceRef.alignment >= 16) and ((SourceRef.alignment mod 16) = 0))
|
|
|
- ) then
|
|
|
- taicpu(p).opcode := MovAligned
|
|
|
- else
|
|
|
- taicpu(p).opcode := MovUnaligned;
|
|
|
-
|
|
|
- taicpu(p).opsize := S_XMM;
|
|
|
- taicpu(p).oper[1]^.reg := NewMMReg;
|
|
|
-
|
|
|
- if ((TargetRef.offset mod 16) = 8) and
|
|
|
- (
|
|
|
- { Base pointer is always aligned (stack pointer won't be if there's no stack frame) }
|
|
|
- (TargetRef.base = current_procinfo.framepointer) or
|
|
|
- ((TargetRef.alignment >= 16) and ((TargetRef.alignment mod 16) = 0))
|
|
|
- ) then
|
|
|
- taicpu(hp1).opcode := MovAligned
|
|
|
- else
|
|
|
- taicpu(hp1).opcode := MovUnaligned;
|
|
|
-
|
|
|
- taicpu(hp1).opsize := S_XMM;
|
|
|
- taicpu(hp1).oper[0]^.reg := NewMMReg;
|
|
|
-
|
|
|
- DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(NewMMReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 1)', p);
|
|
|
-
|
|
|
- RemoveInstruction(hp2);
|
|
|
- RemoveInstruction(hp3);
|
|
|
- Result := True;
|
|
|
- Exit;
|
|
|
- end;
|
|
|
- end;
|
|
|
- end
|
|
|
- else
|
|
|
- begin
|
|
|
- { See if the next references are 8 less rather than 8 greater }
|
|
|
-
|
|
|
- Dec(SourceRef.offset, 16); { -8 the other way }
|
|
|
- if RefsEqual(SourceRef, taicpu(hp2).oper[0]^.ref^) then
|
|
|
- begin
|
|
|
- UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
|
|
|
- Dec(TargetRef.offset, 8); { Only 8, not 16, as it wasn't incremented unlike SourceRef }
|
|
|
- if not RefsMightOverlap(SourceRef, TargetRef, 16) and
|
|
|
- GetNextInstruction(hp2, hp3) and
|
|
|
- MatchInstruction(hp3, A_MOV, [taicpu(p).opsize]) and
|
|
|
- MatchOpType(taicpu(hp3), top_reg, top_ref) and
|
|
|
- (taicpu(hp2).oper[1]^.reg = taicpu(hp3).oper[0]^.reg) and
|
|
|
- RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and
|
|
|
- not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then
|
|
|
- begin
|
|
|
- NewMMReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3);
|
|
|
- if NewMMReg <> NR_NO then
|
|
|
- begin
|
|
|
- { hp2 and hp3 are the starting offsets, so mod = 0 this time }
|
|
|
- if ((SourceRef.offset mod 16) = 0) and
|
|
|
- (
|
|
|
- { Base pointer is always aligned (stack pointer won't be if there's no stack frame) }
|
|
|
- (SourceRef.base = current_procinfo.framepointer) or
|
|
|
- ((SourceRef.alignment >= 16) and ((SourceRef.alignment mod 16) = 0))
|
|
|
- ) then
|
|
|
- taicpu(hp2).opcode := MovAligned
|
|
|
- else
|
|
|
- taicpu(hp2).opcode := MovUnaligned;
|
|
|
-
|
|
|
- taicpu(hp2).opsize := S_XMM;
|
|
|
- taicpu(hp2).oper[1]^.reg := NewMMReg;
|
|
|
-
|
|
|
- if ((TargetRef.offset mod 16) = 0) and
|
|
|
- (
|
|
|
- { Base pointer is always aligned (stack pointer won't be if there's no stack frame) }
|
|
|
- (TargetRef.base = current_procinfo.framepointer) or
|
|
|
- ((TargetRef.alignment >= 16) and ((TargetRef.alignment mod 16) = 0))
|
|
|
- ) then
|
|
|
- taicpu(hp3).opcode := MovAligned
|
|
|
- else
|
|
|
- taicpu(hp3).opcode := MovUnaligned;
|
|
|
-
|
|
|
- taicpu(hp3).opsize := S_XMM;
|
|
|
- taicpu(hp3).oper[0]^.reg := NewMMReg;
|
|
|
-
|
|
|
- DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(NewMMReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 2)', p);
|
|
|
-
|
|
|
- RemoveInstruction(hp1);
|
|
|
- RemoveCurrentP(p);
|
|
|
- Result := True;
|
|
|
- Exit;
|
|
|
- end;
|
|
|
- end;
|
|
|
- end;
|
|
|
- end;
|
|
|
- end;
|
|
|
+ if UseAVX then
|
|
|
+ begin
|
|
|
+ MovAligned := A_VMOVDQA;
|
|
|
+ MovUnaligned := A_VMOVDQU;
|
|
|
+ end
|
|
|
+ else
|
|
|
+ begin
|
|
|
+ MovAligned := A_MOVDQA;
|
|
|
+ MovUnaligned := A_MOVDQU;
|
|
|
+ end;
|
|
|
+
|
|
|
+ if RefsEqual(SourceRef, taicpu(hp2).oper[0]^.ref^) and
|
|
|
+ not RefsMightOverlap(taicpu(p).oper[0]^.ref^, TargetRef, 16) then
|
|
|
+ begin
|
|
|
+ UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
|
|
|
+ Inc(TargetRef.offset, 8);
|
|
|
+ if GetNextInstruction(hp2, hp3) and
|
|
|
+ MatchInstruction(hp3, A_MOV, [taicpu(p).opsize]) and
|
|
|
+ MatchOpType(taicpu(hp3), top_reg, top_ref) and
|
|
|
+ (taicpu(hp2).oper[1]^.reg = taicpu(hp3).oper[0]^.reg) and
|
|
|
+ RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and
|
|
|
+ not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then
|
|
|
+ begin
|
|
|
+ NewMMReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3);
|
|
|
+ if NewMMReg <> NR_NO then
|
|
|
+ begin
|
|
|
+ { Remember that the offsets are 8 ahead }
|
|
|
+ if ((SourceRef.offset mod 16) = 8) and
|
|
|
+ (
|
|
|
+ { Base pointer is always aligned (stack pointer won't be if there's no stack frame) }
|
|
|
+ (SourceRef.base = current_procinfo.framepointer) or
|
|
|
+ ((SourceRef.alignment >= 16) and ((SourceRef.alignment mod 16) = 0))
|
|
|
+ ) then
|
|
|
+ taicpu(p).opcode := MovAligned
|
|
|
+ else
|
|
|
+ taicpu(p).opcode := MovUnaligned;
|
|
|
+
|
|
|
+ taicpu(p).opsize := S_XMM;
|
|
|
+ taicpu(p).oper[1]^.reg := NewMMReg;
|
|
|
+
|
|
|
+ if ((TargetRef.offset mod 16) = 8) and
|
|
|
+ (
|
|
|
+ { Base pointer is always aligned (stack pointer won't be if there's no stack frame) }
|
|
|
+ (TargetRef.base = current_procinfo.framepointer) or
|
|
|
+ ((TargetRef.alignment >= 16) and ((TargetRef.alignment mod 16) = 0))
|
|
|
+ ) then
|
|
|
+ taicpu(hp1).opcode := MovAligned
|
|
|
+ else
|
|
|
+ taicpu(hp1).opcode := MovUnaligned;
|
|
|
+
|
|
|
+ taicpu(hp1).opsize := S_XMM;
|
|
|
+ taicpu(hp1).oper[0]^.reg := NewMMReg;
|
|
|
+
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(NewMMReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 1)', p);
|
|
|
+
|
|
|
+ RemoveInstruction(hp2);
|
|
|
+ RemoveInstruction(hp3);
|
|
|
+ Result := True;
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end
|
|
|
+ else
|
|
|
+ begin
|
|
|
+ { See if the next references are 8 less rather than 8 greater }
|
|
|
+
|
|
|
+ Dec(SourceRef.offset, 16); { -8 the other way }
|
|
|
+ if RefsEqual(SourceRef, taicpu(hp2).oper[0]^.ref^) then
|
|
|
+ begin
|
|
|
+ UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
|
|
|
+ Dec(TargetRef.offset, 8); { Only 8, not 16, as it wasn't incremented unlike SourceRef }
|
|
|
+ if not RefsMightOverlap(SourceRef, TargetRef, 16) and
|
|
|
+ GetNextInstruction(hp2, hp3) and
|
|
|
+ MatchInstruction(hp3, A_MOV, [taicpu(p).opsize]) and
|
|
|
+ MatchOpType(taicpu(hp3), top_reg, top_ref) and
|
|
|
+ (taicpu(hp2).oper[1]^.reg = taicpu(hp3).oper[0]^.reg) and
|
|
|
+ RefsEqual(TargetRef, taicpu(hp3).oper[1]^.ref^) and
|
|
|
+ not RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp3, TmpUsedRegs) then
|
|
|
+ begin
|
|
|
+ NewMMReg := GetMMRegisterBetween(R_SUBMMX, UsedRegs, p, hp3);
|
|
|
+ if NewMMReg <> NR_NO then
|
|
|
+ begin
|
|
|
+ { hp2 and hp3 are the starting offsets, so mod = 0 this time }
|
|
|
+ if ((SourceRef.offset mod 16) = 0) and
|
|
|
+ (
|
|
|
+ { Base pointer is always aligned (stack pointer won't be if there's no stack frame) }
|
|
|
+ (SourceRef.base = current_procinfo.framepointer) or
|
|
|
+ ((SourceRef.alignment >= 16) and ((SourceRef.alignment mod 16) = 0))
|
|
|
+ ) then
|
|
|
+ taicpu(hp2).opcode := MovAligned
|
|
|
+ else
|
|
|
+ taicpu(hp2).opcode := MovUnaligned;
|
|
|
+
|
|
|
+ taicpu(hp2).opsize := S_XMM;
|
|
|
+ taicpu(hp2).oper[1]^.reg := NewMMReg;
|
|
|
+
|
|
|
+ if ((TargetRef.offset mod 16) = 0) and
|
|
|
+ (
|
|
|
+ { Base pointer is always aligned (stack pointer won't be if there's no stack frame) }
|
|
|
+ (TargetRef.base = current_procinfo.framepointer) or
|
|
|
+ ((TargetRef.alignment >= 16) and ((TargetRef.alignment mod 16) = 0))
|
|
|
+ ) then
|
|
|
+ taicpu(hp3).opcode := MovAligned
|
|
|
+ else
|
|
|
+ taicpu(hp3).opcode := MovUnaligned;
|
|
|
+
|
|
|
+ taicpu(hp3).opsize := S_XMM;
|
|
|
+ taicpu(hp3).oper[0]^.reg := NewMMReg;
|
|
|
+
|
|
|
+ DebugMsg(SPeepholeOptimization + 'Used ' + debug_regname(NewMMReg) + ' to merge a pair of memory moves (MovMovMovMov2MovdqMovdq 2)', p);
|
|
|
+
|
|
|
+ RemoveInstruction(hp1);
|
|
|
+ RemoveCurrentP(p);
|
|
|
+ Result := True;
|
|
|
+ Exit;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
{$endif x86_64}
|
|
|
- end;
|
|
|
- else
|
|
|
- { The write target should be a reg or a ref }
|
|
|
- InternalError(2021091601);
|
|
|
- end;
|
|
|
- else
|
|
|
- ;
|
|
|
+ end;
|
|
|
+ else
|
|
|
+ { The write target should be a reg or a ref }
|
|
|
+ InternalError(2021091601);
|
|
|
+ end;
|
|
|
+ else
|
|
|
+ ;
|
|
|
+ end;
|
|
|
+ end
|
|
|
+ else if (taicpu(p).oper[0]^.typ = top_const) and
|
|
|
+ { %treg is used afterwards, but all eventualities other
|
|
|
+ than the first MOV instruction being a constant are
|
|
|
+ covered by DeepMOVOpt, so only check for that }
|
|
|
+ (
|
|
|
+ { For MOV operations, a size saving is only made if the register/const is byte-sized }
|
|
|
+ not (cs_opt_size in current_settings.optimizerswitches) or
|
|
|
+ (taicpu(hp1).opsize = S_B)
|
|
|
+ ) and
|
|
|
+ (
|
|
|
+ (taicpu(hp1).oper[1]^.typ = top_reg) or
|
|
|
+ ((taicpu(p).oper[0]^.val >= low(longint)) and (taicpu(p).oper[0]^.val <= high(longint)))
|
|
|
+ ) then
|
|
|
+ begin
|
|
|
+ DebugMsg(SPeepholeOptimization + debug_operstr(taicpu(hp1).oper[0]^) + ' = $' + debug_tostr(taicpu(p).oper[0]^.val) + '; changed to minimise pipeline stall (MovMov2Mov 6b)',hp1);
|
|
|
+ taicpu(hp1).loadconst(0, taicpu(p).oper[0]^.val);
|
|
|
+ Include(OptsToCheck, aoc_ForceNewIteration);
|
|
|
end;
|
|
|
- end
|
|
|
- else if (taicpu(p).oper[0]^.typ = top_const) and
|
|
|
- { %treg is used afterwards, but all eventualities other
|
|
|
- than the first MOV instruction being a constant are
|
|
|
- covered by DeepMOVOpt, so only check for that }
|
|
|
- (
|
|
|
- { For MOV operations, a size saving is only made if the register/const is byte-sized }
|
|
|
- not (cs_opt_size in current_settings.optimizerswitches) or
|
|
|
- (taicpu(hp1).opsize = S_B)
|
|
|
- ) and
|
|
|
- (
|
|
|
- (taicpu(hp1).oper[1]^.typ = top_reg) or
|
|
|
- ((taicpu(p).oper[0]^.val >= low(longint)) and (taicpu(p).oper[0]^.val <= high(longint)))
|
|
|
- ) then
|
|
|
- begin
|
|
|
- DebugMsg(SPeepholeOptimization + debug_operstr(taicpu(hp1).oper[0]^) + ' = $' + debug_tostr(taicpu(p).oper[0]^.val) + '; changed to minimise pipeline stall (MovMov2Mov 6b)',hp1);
|
|
|
- taicpu(hp1).loadconst(0, taicpu(p).oper[0]^.val);
|
|
|
- Include(OptsToCheck, aoc_ForceNewIteration);
|
|
|
- end;
|
|
|
- end;
|
|
|
- Break;
|
|
|
- end;
|
|
|
+ end;
|
|
|
+ Break;
|
|
|
+ end;
|
|
|
end;
|
|
|
|
|
|
if taicpu(p).oper[0]^.typ = top_reg then
|