Browse Source

* x86-64: movl/movq -> movl/movl zero-extension optimisation

J. Gareth "Curious Kit" Moreton 2 years ago
parent
commit
42b052eb83
1 changed files with 60 additions and 1 deletions
  1. 60 1
      compiler/x86/aoptx86.pas

+ 60 - 1
compiler/x86/aoptx86.pas

@@ -4095,6 +4095,65 @@ unit aoptx86;
               end;
               end;
           end;
           end;
 
 
+{$ifdef x86_64}
+        { Change:
+            movl %reg1l,%reg2l
+            movq %reg2q,%reg3q  (%reg1 <> %reg3)
+
+          To:
+            movl %reg1l,%reg2l
+            movl %reg1l,%reg3l  (Upper 32 bits of %reg3q will be zero)
+
+          If %reg1 = %reg3, convert to:
+            movl %reg1l,%reg2l
+            andl %reg1l,%reg1l
+        }
+        if (taicpu(p).opsize = S_L) and MatchInstruction(hp1,A_MOV,[S_Q]) and
+          MatchOpType(taicpu(p), top_reg, top_reg) and
+          MatchOpType(taicpu(hp1), top_reg, top_reg) and
+          SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp1).oper[0]^.reg) then
+          begin
+            TransferUsedRegs(TmpUsedRegs);
+            UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
+
+            taicpu(hp1).opsize := S_L;
+            taicpu(hp1).loadreg(0, taicpu(p).oper[0]^.reg);
+            setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBD);
+
+            AllocRegBetween(taicpu(p).oper[0]^.reg, p, hp1, UsedRegs);
+
+            if (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
+              begin
+                { %reg1 = %reg3 }
+                DebugMsg(SPeepholeOptimization + 'Made 32-to-64-bit zero extension more efficient (MovlMovq2MovlAndl)', hp1);
+                taicpu(hp1).opcode := A_AND;
+              end
+            else
+              begin
+                { %reg1 <> %reg3 }
+                DebugMsg(SPeepholeOptimization + 'Made 32-to-64-bit zero extension more efficient (MovlMovq2MovlMovl)', hp1);
+              end;
+
+            if not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs) then
+              begin
+                DebugMsg(SPeepholeOptimization + 'Mov2Nop 8 done', p);
+                RemoveCurrentP(p, hp1);
+                Result := True;
+                Exit;
+              end
+            else
+              begin
+                { Initial instruction wasn't actually changed }
+                Include(OptsToCheck, aoc_ForceNewIteration);
+
+                { if %reg1 = %reg3, don't do the long-distance lookahead that
+                  appears below since %reg1 has technically changed }
+                if taicpu(hp1).opcode = A_AND then
+                  Exit;
+              end;
+          end;
+{$endif x86_64}
+
         { search further than the next instruction for a mov (as long as it's not a jump) }
         { search further than the next instruction for a mov (as long as it's not a jump) }
         if not is_calljmpuncondret(taicpu(hp1).opcode) and
         if not is_calljmpuncondret(taicpu(hp1).opcode) and
           { check as much as possible before the expensive GetNextInstructionUsingRegCond call }
           { check as much as possible before the expensive GetNextInstructionUsingRegCond call }
@@ -4339,7 +4398,7 @@ unit aoptx86;
                         Exit;
                         Exit;
                       end;
                       end;
                   else
                   else
-                    { Move down to the MatchOpType if-block below };
+                    { Move down to the if-block below };
                 end;
                 end;
 
 
                 { Also catches MOV/S/Z instructions that aren't modified }
                 { Also catches MOV/S/Z instructions that aren't modified }