Browse Source

* x86-64: Extended the movl/movq optimisation to cover more distance

J. Gareth "Curious Kit" Moreton 2 years ago
parent
commit
f3f9c68ddb
1 changed files with 93 additions and 29 deletions
  1. 93 29
      compiler/x86/aoptx86.pas

+ 93 - 29
compiler/x86/aoptx86.pas

@@ -4125,13 +4125,13 @@ unit aoptx86;
             if (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
               begin
                 { %reg1 = %reg3 }
-                DebugMsg(SPeepholeOptimization + 'Made 32-to-64-bit zero extension more efficient (MovlMovq2MovlAndl)', hp1);
+                DebugMsg(SPeepholeOptimization + 'Made 32-to-64-bit zero extension more efficient (MovlMovq2MovlAndl 1)', hp1);
                 taicpu(hp1).opcode := A_AND;
               end
             else
               begin
                 { %reg1 <> %reg3 }
-                DebugMsg(SPeepholeOptimization + 'Made 32-to-64-bit zero extension more efficient (MovlMovq2MovlMovl)', hp1);
+                DebugMsg(SPeepholeOptimization + 'Made 32-to-64-bit zero extension more efficient (MovlMovq2MovlMovl 1)', hp1);
               end;
 
             if not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs) then
@@ -4332,35 +4332,99 @@ unit aoptx86;
                               Internalerror(2019103001);
                           end;
                       end
-                    else
-                      if MatchOperand(taicpu(hp2).oper[1]^, p_TargetReg) then
-                        begin
-                          if not CrossJump and
-                            not RegUsedBetween(p_TargetReg, p, hp2) and
-                            not RegReadByInstruction(p_TargetReg, hp2) then
-                            begin
-                              { Register is not used before it is overwritten }
-                              DebugMsg(SPeepholeOptimization + 'Mov2Nop 3a done',p);
-                              RemoveCurrentp(p, hp1);
-                              Result := True;
-                              Exit;
-                            end;
+                    else if MatchOperand(taicpu(hp2).oper[1]^, p_TargetReg) then
+                      begin
+                        if not CrossJump and
+                          not RegUsedBetween(p_TargetReg, p, hp2) and
+                          not RegReadByInstruction(p_TargetReg, hp2) then
+                          begin
+                            { Register is not used before it is overwritten }
+                            DebugMsg(SPeepholeOptimization + 'Mov2Nop 3a done',p);
+                            RemoveCurrentp(p, hp1);
+                            Result := True;
+                            Exit;
+                          end;
 
-                          if (taicpu(p).oper[0]^.typ = top_const) and
-                            (taicpu(hp2).oper[0]^.typ = top_const) then
-                            begin
-                              if taicpu(p).oper[0]^.val = taicpu(hp2).oper[0]^.val then
-                                begin
-                                  { Same value - register hasn't changed }
-                                  DebugMsg(SPeepholeOptimization + 'Mov2Nop 2 done', hp2);
-                                  RemoveInstruction(hp2);
-                                  Result := True;
+                        if (taicpu(p).oper[0]^.typ = top_const) and
+                          (taicpu(hp2).oper[0]^.typ = top_const) then
+                          begin
+                            if taicpu(p).oper[0]^.val = taicpu(hp2).oper[0]^.val then
+                              begin
+                                { Same value - register hasn't changed }
+                                DebugMsg(SPeepholeOptimization + 'Mov2Nop 2 done', hp2);
+                                RemoveInstruction(hp2);
+                                Result := True;
+
+                                { See if there's more we can optimise }
+                                Continue;
+                              end;
+                          end;
+{$ifdef x86_64}
+                      end
+                    { Change:
+                        movl %reg1l,%reg2l
+                        ...
+                        movq %reg2q,%reg3q  (%reg1 <> %reg3)
+
+                      To:
+                        movl %reg1l,%reg2l
+                        ...
+                        movl %reg1l,%reg3l  (Upper 32 bits of %reg3q will be zero)
+
+                      If %reg1 = %reg3, convert to:
+                        movl %reg1l,%reg2l
+                        ...
+                        andl %reg1l,%reg1l
+                    }
+                    else if (taicpu(p).opsize = S_L) and MatchInstruction(hp2,A_MOV,[S_Q]) and
+                      (taicpu(p).oper[0]^.typ = top_reg) and
+                      MatchOpType(taicpu(hp2), top_reg, top_reg) and
+                      SuperRegistersEqual(p_TargetReg, taicpu(hp2).oper[0]^.reg) and
+                      not RegModifiedBetween(p_TargetReg, p, hp2) then
+                      begin
+                        TempRegUsed :=
+                          CrossJump { Assume the register is in use if it crossed a conditional jump } or
+                          RegReadByInstruction(p_TargetReg, hp3) or
+                          RegUsedAfterInstruction(p_TargetReg, hp2, TmpUsedRegs);
+
+                        taicpu(hp2).opsize := S_L;
+                        taicpu(hp2).loadreg(0, taicpu(p).oper[0]^.reg);
+                        setsubreg(taicpu(hp2).oper[1]^.reg, R_SUBD);
+
+                        AllocRegBetween(taicpu(p).oper[0]^.reg, p, hp2, UsedRegs);
+
+                        if (taicpu(p).oper[0]^.reg = taicpu(hp2).oper[1]^.reg) then
+                          begin
+                            { %reg1 = %reg3 }
+                            DebugMsg(SPeepholeOptimization + 'Made 32-to-64-bit zero extension more efficient (MovlMovq2MovlAndl 2)', hp2);
+                            taicpu(hp2).opcode := A_AND;
+                          end
+                        else
+                          begin
+                            { %reg1 <> %reg3 }
+                            DebugMsg(SPeepholeOptimization + 'Made 32-to-64-bit zero extension more efficient (MovlMovq2MovlMovl 2)', hp2);
+                          end;
+
+                        if not TempRegUsed then
+                          begin
+                            DebugMsg(SPeepholeOptimization + 'Mov2Nop 8a done', p);
+                            RemoveCurrentP(p, hp1);
+                            Result := True;
+                            Exit;
+                          end
+                        else
+                          begin
+                            { Initial instruction wasn't actually changed }
+                            Include(OptsToCheck, aoc_ForceNewIteration);
+
+                            { if %reg1 = %reg3, don't do the long-distance lookahead that
+                              appears below since %reg1 has technically changed }
+                            if taicpu(hp2).opcode = A_AND then
+                              Break;
+                          end;
+{$endif x86_64}
+                      end;
 
-                                  { See if there's more we can optimise }
-                                  Continue;
-                                end;
-                            end;
-                        end;
                   A_MOVZX, A_MOVSX{$ifdef x86_64}, A_MOVSXD{$endif x86_64}:
                     if MatchOpType(taicpu(hp2), top_reg, top_reg) and
                       MatchOperand(taicpu(hp2).oper[0]^, p_TargetReg) and