Browse Source

* patch by J. Gareth Moreton: x86 MOVZX/CMP optimisation, resolves #38882

git-svn-id: trunk@49366 -
florian 4 years ago
parent
commit
884d24e321
1 changed files with 126 additions and 12 deletions
  1. 126 12
      compiler/x86/aoptx86.pas

+ 126 - 12
compiler/x86/aoptx86.pas

@@ -5156,22 +5156,19 @@ unit aoptx86;
         InstrMax, Index: Integer;
         UpperLimit, TrySmallerLimit: TCgInt;
 
+        PreMessage: string;
+
         { Data flow analysis }
         TestValMin, TestValMax: TCgInt;
-        SmallerOverflow: Boolean;
+        SmallerOverflow, FirstCheck: Boolean;
 
       begin
         Result := False;
         p_removed := False;
 
-        { This is anything but quick! }
-        if not(cs_opt_level2 in current_settings.optimizerswitches) then
-          Exit;
-
         SetLength(InstrList, 0);
         InstrMax := -1;
         ThisReg := taicpu(p).oper[1]^.reg;
-        hp1 := p;
 
         case taicpu(p).opsize of
           S_BW, S_BL:
@@ -5193,6 +5190,33 @@ unit aoptx86;
             InternalError(2020112301);
         end;
 
+        { With MinSize and MaxSize set, we can check some other optimisations
+          first, before attempting the expensive data flow analysis }
+        FirstCheck := GetNextInstructionUsingReg(p, hp1, ThisReg) and
+          (hp1.typ = ait_instruction) and
+          (
+            { Under -O1 and -O2, GetNextInstructionUsingReg may return an
+              instruction that doesn't actually contain ThisReg }
+            (cs_opt_level3 in current_settings.optimizerswitches) or
+            RegInInstruction(ThisReg, hp1)
+          );
+
+        { Data-flow analysis won't get anywhere since there was no instruction match }
+        if not FirstCheck then
+          Exit;
+
+        { Check for:
+            movz ##,%reg
+            cmp  $y,%reg
+            (%reg deallocated)
+
+          Change register size so movz becomes mov
+        }
+
+        { This is anything but quick! }
+        if not(cs_opt_level2 in current_settings.optimizerswitches) then
+          Exit;
+
         TestValMin := 0;
         TestValMax := UpperLimit;
         TrySmallerLimit := UpperLimit;
@@ -5200,15 +5224,20 @@ unit aoptx86;
         SmallerOverflow := False;
         RegChanged := False;
 
-        while GetNextInstructionUsingReg(hp1, hp1, ThisReg) and
-          (hp1.typ = ait_instruction) and
+        while FirstCheck { No need to waste checking for the next instruction again } or
           (
-            { Under -O1 and -O2, GetNextInstructionUsingReg may return an
-              instruction that doesn't actually contain ThisReg }
-            (cs_opt_level3 in current_settings.optimizerswitches) or
-            RegInInstruction(ThisReg, hp1)
+            GetNextInstructionUsingReg(hp1, hp1, ThisReg) and
+            (hp1.typ = ait_instruction) and
+            (
+              { Under -O1 and -O2, GetNextInstructionUsingReg may return an
+                instruction that doesn't actually contain ThisReg }
+              (cs_opt_level3 in current_settings.optimizerswitches) or
+              RegInInstruction(ThisReg, hp1)
+            )
           ) do
           begin
+            FirstCheck := False;
+
             case taicpu(hp1).opcode of
               A_INC,A_DEC:
                 begin
@@ -5228,6 +5257,91 @@ unit aoptx86;
                     end;
                 end;
 
+              A_CMP:
+                begin
+                  { Smallest signed value for MinSize }
+                  TrySmallerLimit := not (UpperLimit shr 1);
+
+                  if (taicpu(hp1).oper[1]^.typ <> top_reg) or
+                    { Has to be an exact match on the register }
+                    (taicpu(hp1).oper[1]^.reg <> ThisReg) or
+                    (taicpu(hp1).oper[0]^.typ <> top_const) or
+                    { Make sure the comparison value is not smaller than the
+                      smallest allowed signed value for the minimum size (e.g.
+                      -128 for 8-bit) }
+                    (taicpu(hp1).oper[0]^.val < TrySmallerLimit) then
+                    Break;
+
+                  TestValMin := TestValMin - taicpu(hp1).oper[0]^.val;
+                  TestValMax := TestValMax - taicpu(hp1).oper[0]^.val;
+
+                  if (TestValMin < TrySmallerLimit) or (TestValMax < TrySmallerLimit) or
+                    (TestValMin > UpperLimit) or (TestValMax > UpperLimit) then
+                    { Overflow }
+                    Break;
+
+                  { Check to see if the active register is used afterwards }
+                  TransferUsedRegs(TmpUsedRegs);
+                  IncludeRegInUsedRegs(ThisReg, TmpUsedRegs);
+                  if not RegUsedAfterInstruction(ThisReg, hp1, TmpUsedRegs) then
+                    begin
+                      case MinSize of
+                        S_B:
+                          TargetSubReg := R_SUBL;
+                        S_W:
+                          TargetSubReg := R_SUBW;
+                        else
+                          InternalError(2021051002);
+                      end;
+
+                      { Update the register to its new size }
+                      ThisReg := newreg(R_INTREGISTER, getsupreg(ThisReg), TargetSubReg);
+
+                      taicpu(hp1).oper[1]^.reg := ThisReg;
+                      taicpu(hp1).opsize := MinSize;
+
+                      { Convert the input MOVZX to a MOV }
+                      if (taicpu(p).oper[0]^.typ = top_reg) and
+                        SuperRegistersEqual(taicpu(p).oper[0]^.reg, ThisReg) then
+                        begin
+                          { Or remove it completely! }
+                          DebugMsg(SPeepholeOptimization + 'Movzx2Nop 1a', p);
+                          RemoveCurrentP(p);
+                          p_removed := True;
+                        end
+                      else
+                        begin
+                          DebugMsg(SPeepholeOptimization + 'Movzx2Mov 1a', p);
+                          taicpu(p).opcode := A_MOV;
+                          taicpu(p).oper[1]^.reg := ThisReg;
+                          taicpu(p).opsize := MinSize;
+                        end;
+
+                      if (InstrMax >= 0) then
+                        begin
+                          for Index := 0 to InstrMax do
+                            begin
+
+                              { If p_removed is true, then the original MOV/Z was removed
+                                and removing the AND instruction may not be safe if it
+                                appears first }
+                              if (InstrList[Index].oper[InstrList[Index].ops - 1]^.typ <> top_reg) then
+                                InternalError(2020112311);
+
+                              if InstrList[Index].oper[0]^.typ = top_reg then
+                                InstrList[Index].oper[0]^.reg := ThisReg;
+
+                              InstrList[Index].oper[InstrList[Index].ops - 1]^.reg := ThisReg;
+                              InstrList[Index].opsize := MinSize;
+                            end;
+
+                        end;
+
+                      Result := True;
+                      Exit;
+                    end;
+                end;
+
               { OR and XOR are not included because they can too easily fool
                 the data flow analysis (they can cause non-linear behaviour) }
               A_ADD,A_SUB,A_AND,A_SHL,A_SHR: