Browse Source

Refactoring procedures in aoptx86 to gain speed boosts

J. Gareth "Curious Kit" Moreton 3 years ago
parent
commit
d209d3ba84
1 changed files with 25 additions and 31 deletions
  1. 25 31
      compiler/x86/aoptx86.pas

+ 25 - 31
compiler/x86/aoptx86.pas

@@ -50,7 +50,7 @@ unit aoptx86;
         OptsToCheck : set of TOptsToCheck;
         OptsToCheck : set of TOptsToCheck;
         function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
         function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
         function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
         function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
-        function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
+        class function RegReadByInstruction(reg : TRegister; hp : tai) : boolean; static;
         function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
         function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
         function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
         function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
 
 
@@ -98,12 +98,12 @@ unit aoptx86;
         function GetMMRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai): TRegister;
         function GetMMRegisterBetween(RegSize: TSubRegister; var AUsedRegs: TAllUsedRegs; p, hp: tai): TRegister;
 
 
         { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
         { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
-        function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
+        class function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean; static;
         { checks whether reading the value in reg1 depends on the value of reg2. This
         { checks whether reading the value in reg1 depends on the value of reg2. This
           is very similar to SuperRegisterEquals, except it takes into account that
           is very similar to SuperRegisterEquals, except it takes into account that
           R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
           R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
           depend on the value in AH). }
           depend on the value in AH). }
-        function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
+        class function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean; static;
 
 
         { Replaces all references to AOldReg in a memory reference to ANewReg }
         { Replaces all references to AOldReg in a memory reference to ANewReg }
         class function ReplaceRegisterInRef(var ref: TReference; const AOldReg, ANewReg: TRegister): Boolean; static;
         class function ReplaceRegisterInRef(var ref: TReference; const AOldReg, ANewReg: TRegister): Boolean; static;
@@ -353,13 +353,12 @@ unit aoptx86;
     function RefsEqual(const r1, r2: treference): boolean;
     function RefsEqual(const r1, r2: treference): boolean;
       begin
       begin
         RefsEqual :=
         RefsEqual :=
-          (r1.offset = r2.offset) and
-          (r1.segment = r2.segment) and (r1.base = r2.base) and
-          (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
           (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
           (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
           (r1.relsymbol = r2.relsymbol) and
           (r1.relsymbol = r2.relsymbol) and
-          (r1.volatility=[]) and
-          (r2.volatility=[]);
+          (r1.segment = r2.segment) and (r1.base = r2.base) and
+          (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
+          (r1.offset = r2.offset) and
+          (r1.volatility + r2.volatility = []);
       end;
       end;
 
 
 
 
@@ -476,7 +475,7 @@ unit aoptx86;
     end;
     end;
 
 
 
 
-  function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
+  class function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
     var
     var
       p: taicpu;
       p: taicpu;
       opcount: longint;
       opcount: longint;
@@ -1069,7 +1068,6 @@ unit aoptx86;
         Currentp: tai;
         Currentp: tai;
         Breakout: Boolean;
         Breakout: Boolean;
       begin
       begin
-        { TODO: Currently, only the volatile registers are checked - can this be extended to use any register the procedure has preserved? }
         Result := NR_NO;
         Result := NR_NO;
         RegSet :=
         RegSet :=
           paramanager.get_volatile_registers_int(current_procinfo.procdef.proccalloption) +
           paramanager.get_volatile_registers_int(current_procinfo.procdef.proccalloption) +
@@ -1143,7 +1141,6 @@ unit aoptx86;
         Currentp: tai;
         Currentp: tai;
         Breakout: Boolean;
         Breakout: Boolean;
       begin
       begin
-        { TODO: Currently, only the volatile registers are checked - can this be extended to use any register the procedure has preserved? }
         Result := NR_NO;
         Result := NR_NO;
         RegSet :=
         RegSet :=
           paramanager.get_volatile_registers_mm(current_procinfo.procdef.proccalloption) +
           paramanager.get_volatile_registers_mm(current_procinfo.procdef.proccalloption) +
@@ -1198,7 +1195,7 @@ unit aoptx86;
       end;
       end;
 
 
 
 
-    function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
+    class function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
       begin
       begin
         if not SuperRegistersEqual(reg1,reg2) then
         if not SuperRegistersEqual(reg1,reg2) then
           exit(false);
           exit(false);
@@ -1235,7 +1232,7 @@ unit aoptx86;
       end;
       end;
 
 
 
 
-    function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
+    class function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
       begin
       begin
         if not SuperRegistersEqual(reg1,reg2) then
         if not SuperRegistersEqual(reg1,reg2) then
           exit(false);
           exit(false);
@@ -1426,7 +1423,7 @@ unit aoptx86;
 
 
     function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
     function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
       var
       var
-        p: taicpu absolute hp;
+        p: taicpu absolute hp; { Implicit typecast }
         i: Integer;
         i: Integer;
       begin
       begin
         Result := False;
         Result := False;
@@ -1434,7 +1431,6 @@ unit aoptx86;
            (hp.typ <> ait_instruction) then
            (hp.typ <> ait_instruction) then
          Exit;
          Exit;
 
 
-//        p := taicpu(hp);
         Prefetch(insprop[p.opcode]);
         Prefetch(insprop[p.opcode]);
         if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
         if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
           with insprop[p.opcode] do
           with insprop[p.opcode] do
@@ -1442,12 +1438,14 @@ unit aoptx86;
               case getsubreg(reg) of
               case getsubreg(reg) of
                 R_SUBW,R_SUBD,R_SUBQ:
                 R_SUBW,R_SUBD,R_SUBQ:
                   Result:=
                   Result:=
-                    RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
-                    RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
-                    RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
-                    RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
-                    RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
-                    RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
+                    { ZF, CF, OF, SF, PF and AF must all be set in some way (ordered so the most
+                      uncommon flags are checked first }
+                    ([Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags] * Ch <> []) and
+                    ([Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch <> []) and
+                    ([Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch <> []) and
+                    ([Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch <> []) and
+                    ([Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch <> []) and
+                    ([Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch <> []);
                 R_SUBFLAGCARRY:
                 R_SUBFLAGCARRY:
                   Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
                   Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
                 R_SUBFLAGPARITY:
                 R_SUBFLAGPARITY:
@@ -1465,10 +1463,7 @@ unit aoptx86;
                 R_SUBFLAGDIRECTION:
                 R_SUBFLAGDIRECTION:
                   Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
                   Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
                 else
                 else
-                  begin
-                  writeln(getsubreg(reg));
                   internalerror(2017050501);
                   internalerror(2017050501);
-                  end;
               end;
               end;
               exit;
               exit;
             end;
             end;
@@ -1731,7 +1726,7 @@ unit aoptx86;
                             end;
                             end;
                         end;
                         end;
 
 
-                      { Don't do these ones first in case an input operand is equal to an explicit output registers }
+                      { Don't do these ones first in case an input operand is equal to an explicit output register }
                       case getsupreg(reg) of
                       case getsupreg(reg) of
                         RS_EAX:
                         RS_EAX:
                           if ([Ch_WEAX{$ifdef x86_64},Ch_WRAX{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_EAX, reg) then
                           if ([Ch_WEAX{$ifdef x86_64},Ch_WRAX{$endif x86_64}]*Ch<>[]) and Reg1WriteOverwritesReg2Entirely(NR_EAX, reg) then
@@ -5002,7 +4997,8 @@ unit aoptx86;
                       ((taicpu(p).oper[0]^.ref^.symbol=nil) or (taicpu(hp1).oper[ref]^.ref^.symbol=nil)) and
                       ((taicpu(p).oper[0]^.ref^.symbol=nil) or (taicpu(hp1).oper[ref]^.ref^.symbol=nil)) and
                       ((taicpu(p).oper[0]^.ref^.relsymbol=nil) or (taicpu(hp1).oper[ref]^.ref^.relsymbol=nil)) and
                       ((taicpu(p).oper[0]^.ref^.relsymbol=nil) or (taicpu(hp1).oper[ref]^.ref^.relsymbol=nil)) and
                       ((taicpu(p).oper[0]^.ref^.scalefactor <= 1) or (taicpu(hp1).oper[ref]^.ref^.scalefactor <= 1)) and
                       ((taicpu(p).oper[0]^.ref^.scalefactor <= 1) or (taicpu(hp1).oper[ref]^.ref^.scalefactor <= 1)) and
-                      (taicpu(p).oper[0]^.ref^.segment=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.segment=NR_NO)
+                      { Segment register of p.oper[0]^.ref will be NR_NO already }
+                      (taicpu(hp1).oper[ref]^.ref^.segment=NR_NO)
 {$ifdef x86_64}
 {$ifdef x86_64}
                       and (abs(taicpu(hp1).oper[ref]^.ref^.offset+taicpu(p).oper[0]^.ref^.offset)<=$7fffffff)
                       and (abs(taicpu(hp1).oper[ref]^.ref^.offset+taicpu(p).oper[0]^.ref^.offset)<=$7fffffff)
                       and (((taicpu(p).oper[0]^.ref^.base<>NR_RIP) and (taicpu(p).oper[0]^.ref^.index<>NR_RIP)) or
                       and (((taicpu(p).oper[0]^.ref^.base<>NR_RIP) and (taicpu(p).oper[0]^.ref^.index<>NR_RIP)) or
@@ -5053,10 +5049,9 @@ unit aoptx86;
             if MatchInstruction(hp1,A_LEA,[taicpu(p).opsize]) and
             if MatchInstruction(hp1,A_LEA,[taicpu(p).opsize]) and
               (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
               (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
               (taicpu(p).oper[0]^.ref^.relsymbol = nil) and
               (taicpu(p).oper[0]^.ref^.relsymbol = nil) and
-              (taicpu(p).oper[0]^.ref^.segment = NR_NO) and
               (taicpu(p).oper[0]^.ref^.symbol = nil) and
               (taicpu(p).oper[0]^.ref^.symbol = nil) and
               (taicpu(hp1).oper[0]^.ref^.relsymbol = nil) and
               (taicpu(hp1).oper[0]^.ref^.relsymbol = nil) and
-              (taicpu(hp1).oper[0]^.ref^.segment = NR_NO) and
+              { Since we're merging two LEA instructions, the segment registers don't matter }
               (taicpu(hp1).oper[0]^.ref^.symbol = nil) and
               (taicpu(hp1).oper[0]^.ref^.symbol = nil) and
               (
               (
                 (taicpu(p).oper[0]^.ref^.base = NR_NO) or { Don't call RegModifiedBetween unnecessarily }
                 (taicpu(p).oper[0]^.ref^.base = NR_NO) or { Don't call RegModifiedBetween unnecessarily }
@@ -5855,7 +5850,7 @@ unit aoptx86;
                     if (TmpRef.base = NR_NO) and
                     if (TmpRef.base = NR_NO) and
                        (taicpu(hp1).oper[0]^.ref^.symbol=nil) and
                        (taicpu(hp1).oper[0]^.ref^.symbol=nil) and
                        (taicpu(hp1).oper[0]^.ref^.relsymbol=nil) and
                        (taicpu(hp1).oper[0]^.ref^.relsymbol=nil) and
-                       (taicpu(hp1).oper[0]^.ref^.segment=NR_NO) and
+                       { Segment register isn't a concern here }
                        ((taicpu(hp1).oper[0]^.ref^.scalefactor=0) or
                        ((taicpu(hp1).oper[0]^.ref^.scalefactor=0) or
                        (taicpu(hp1).oper[0]^.ref^.scalefactor*tmpref.scalefactor<=8)) then
                        (taicpu(hp1).oper[0]^.ref^.scalefactor*tmpref.scalefactor<=8)) then
                       begin
                       begin
@@ -12200,7 +12195,6 @@ unit aoptx86;
            (taicpu(p).oper[0]^.ref^.offset=-24))  and
            (taicpu(p).oper[0]^.ref^.offset=-24))  and
           (taicpu(p).oper[0]^.ref^.symbol=nil) and
           (taicpu(p).oper[0]^.ref^.symbol=nil) and
           (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
           (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
-          (taicpu(p).oper[0]^.ref^.segment=NR_NO) and
           (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG) and
           (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG) and
           GetNextInstruction(p, hp1) and
           GetNextInstruction(p, hp1) and
           { Take a copy of hp1 }
           { Take a copy of hp1 }
@@ -12217,7 +12211,7 @@ unit aoptx86;
           (taicpu(hp2).oper[0]^.ref^.index=NR_NO) and
           (taicpu(hp2).oper[0]^.ref^.index=NR_NO) and
           (taicpu(hp2).oper[0]^.ref^.symbol=nil) and
           (taicpu(hp2).oper[0]^.ref^.symbol=nil) and
           (taicpu(hp2).oper[0]^.ref^.relsymbol=nil) and
           (taicpu(hp2).oper[0]^.ref^.relsymbol=nil) and
-          (taicpu(hp2).oper[0]^.ref^.segment=NR_NO) and
+          { Segment register will be NR_NO }
           (taicpu(hp2).oper[1]^.reg=NR_STACK_POINTER_REG) and
           (taicpu(hp2).oper[1]^.reg=NR_STACK_POINTER_REG) and
           GetNextInstruction(hp2, hp3) and
           GetNextInstruction(hp2, hp3) and
           { trick to skip label }
           { trick to skip label }