Kaynağa Gözat

+ factored out TX86AsmOptimizer.PrePeepholeOptIMUL, used now by x86-64 and i386
* generalized and simplified the code

git-svn-id: trunk@40162 -

florian 6 yıl önce
ebeveyn
işleme
59d5d6ec95

+ 7 - 0
compiler/aoptutils.pas

@@ -31,6 +31,7 @@ unit aoptutils;
 
     function MatchOpType(const p : taicpu;type0: toptype) : Boolean;
     function MatchOpType(const p : taicpu;type0,type1 : toptype) : Boolean;
+    function MatchOpType(const p : taicpu; type0,type1,type2 : toptype) : Boolean;
 
     { skips all labels and returns the next "real" instruction }
     function SkipLabels(hp: tai; var hp2: tai): boolean;
@@ -49,6 +50,12 @@ unit aoptutils;
       end;
 
 
+    function MatchOpType(const p : taicpu; type0,type1,type2 : toptype) : Boolean;
+      begin
+        Result:=(p.ops=3) and (p.oper[0]^.typ=type0) and (p.oper[1]^.typ=type1) and (p.oper[2]^.typ=type1);
+      end;
+
+
     { skips all labels and returns the next "real" instruction }
     function SkipLabels(hp: tai; var hp2: tai): boolean;
       begin

+ 2 - 199
compiler/i386/aoptcpu.pas

@@ -169,205 +169,8 @@ begin
               end;
             case taicpu(p).opcode Of
               A_IMUL:
-                {changes certain "imul const, %reg"'s to lea sequences}
-                begin
-                  if (taicpu(p).oper[0]^.typ = Top_Const) and
-                     (taicpu(p).oper[1]^.typ = Top_Reg) and
-                     (taicpu(p).opsize = S_L) then
-                    if (taicpu(p).oper[0]^.val = 1) then
-                      if (taicpu(p).ops = 2) then
-                       {remove "imul $1, reg"}
-                        begin
-                          hp1 := tai(p.Next);
-                          asml.remove(p);
-                          p.free;
-                          p := hp1;
-                          continue;
-                        end
-                      else
-                       {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
-                        begin
-                          hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
-                          InsertLLItem(p.previous, p.next, hp1);
-                          p.free;
-                          p := hp1;
-                        end
-                    else if
-                     ((taicpu(p).ops <= 2) or
-                      (taicpu(p).oper[2]^.typ = Top_Reg)) and
-                     (taicpu(p).oper[0]^.val <= 12) and
-                     not(cs_opt_size in current_settings.optimizerswitches) and
-                     (not(GetNextInstruction(p, hp1)) or
-                       {GetNextInstruction(p, hp1) and}
-                       not((tai(hp1).typ = ait_instruction) and
-                           ((taicpu(hp1).opcode=A_Jcc) and
-                            (taicpu(hp1).condition in [C_O,C_NO])))) then
-                      begin
-                        reference_reset(tmpref,1,[]);
-                        case taicpu(p).oper[0]^.val Of
-                          3: begin
-                             {imul 3, reg1, reg2 to
-                                lea (reg1,reg1,2), reg2
-                              imul 3, reg1 to
-                                lea (reg1,reg1,2), reg1}
-                               TmpRef.base := taicpu(p).oper[1]^.reg;
-                               TmpRef.index := taicpu(p).oper[1]^.reg;
-                               TmpRef.ScaleFactor := 2;
-                               if (taicpu(p).ops = 2) then
-                                 hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
-                               else
-                                 hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
-                               InsertLLItem(p.previous, p.next, hp1);
-                               p.free;
-                               p := hp1;
-                            end;
-                         5: begin
-                            {imul 5, reg1, reg2 to
-                               lea (reg1,reg1,4), reg2
-                             imul 5, reg1 to
-                               lea (reg1,reg1,4), reg1}
-                              TmpRef.base := taicpu(p).oper[1]^.reg;
-                              TmpRef.index := taicpu(p).oper[1]^.reg;
-                              TmpRef.ScaleFactor := 4;
-                              if (taicpu(p).ops = 2) then
-                                hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
-                              else
-                                hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
-                              InsertLLItem(p.previous, p.next, hp1);
-                              p.free;
-                              p := hp1;
-                            end;
-                         6: begin
-                            {imul 6, reg1, reg2 to
-                               lea (,reg1,2), reg2
-                               lea (reg2,reg1,4), reg2
-                             imul 6, reg1 to
-                               lea (reg1,reg1,2), reg1
-                               add reg1, reg1}
-                              if (current_settings.optimizecputype <= cpu_386) then
-                                begin
-                                  TmpRef.index := taicpu(p).oper[1]^.reg;
-                                  if (taicpu(p).ops = 3) then
-                                    begin
-                                      TmpRef.base := taicpu(p).oper[2]^.reg;
-                                      TmpRef.ScaleFactor := 4;
-                                      hp1 :=  taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
-                                    end
-                                  else
-                                    begin
-                                      hp1 :=  taicpu.op_reg_reg(A_ADD, S_L,
-                                        taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
-                                    end;
-                                  InsertLLItem(p, p.next, hp1);
-                                  reference_reset(tmpref,2,[]);
-                                  TmpRef.index := taicpu(p).oper[1]^.reg;
-                                  TmpRef.ScaleFactor := 2;
-                                  if (taicpu(p).ops = 3) then
-                                    begin
-                                      TmpRef.base := NR_NO;
-                                      hp1 :=  taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
-                                        taicpu(p).oper[2]^.reg);
-                                    end
-                                  else
-                                    begin
-                                      TmpRef.base := taicpu(p).oper[1]^.reg;
-                                      hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
-                                    end;
-                                  InsertLLItem(p.previous, p.next, hp1);
-                                  p.free;
-                                  p := tai(hp1.next);
-                                end
-                            end;
-                          9: begin
-                             {imul 9, reg1, reg2 to
-                                lea (reg1,reg1,8), reg2
-                              imul 9, reg1 to
-                                lea (reg1,reg1,8), reg1}
-                               TmpRef.base := taicpu(p).oper[1]^.reg;
-                               TmpRef.index := taicpu(p).oper[1]^.reg;
-                               TmpRef.ScaleFactor := 8;
-                               if (taicpu(p).ops = 2) then
-                                 hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
-                               else
-                                 hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
-                               InsertLLItem(p.previous, p.next, hp1);
-                               p.free;
-                               p := hp1;
-                             end;
-                         10: begin
-                            {imul 10, reg1, reg2 to
-                               lea (reg1,reg1,4), reg2
-                               add reg2, reg2
-                             imul 10, reg1 to
-                               lea (reg1,reg1,4), reg1
-                               add reg1, reg1}
-                               if (current_settings.optimizecputype <= cpu_386) then
-                                 begin
-                                   if (taicpu(p).ops = 3) then
-                                     hp1 :=  taicpu.op_reg_reg(A_ADD, S_L,
-                                       taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
-                                   else
-                                     hp1 := taicpu.op_reg_reg(A_ADD, S_L,
-                                       taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
-                                   InsertLLItem(p, p.next, hp1);
-                                   TmpRef.base := taicpu(p).oper[1]^.reg;
-                                   TmpRef.index := taicpu(p).oper[1]^.reg;
-                                   TmpRef.ScaleFactor := 4;
-                                   if (taicpu(p).ops = 3) then
-                                      hp1 :=  taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
-                                    else
-                                      hp1 :=  taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
-                                   InsertLLItem(p.previous, p.next, hp1);
-                                   p.free;
-                                   p := tai(hp1.next);
-                                 end
-                             end;
-                         12: begin
-                            {imul 12, reg1, reg2 to
-                               lea (,reg1,4), reg2
-                               lea (reg2,reg1,8), reg2
-                             imul 12, reg1 to
-                               lea (reg1,reg1,2), reg1
-                               lea (,reg1,4), reg1}
-                               if (current_settings.optimizecputype <= cpu_386)
-                                 then
-                                   begin
-                                     TmpRef.index := taicpu(p).oper[1]^.reg;
-                                     if (taicpu(p).ops = 3) then
-                                       begin
-                                         TmpRef.base := taicpu(p).oper[2]^.reg;
-                                         TmpRef.ScaleFactor := 8;
-                                         hp1 :=  taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
-                                       end
-                                     else
-                                       begin
-                                         TmpRef.base := NR_NO;
-                                         TmpRef.ScaleFactor := 4;
-                                         hp1 :=  taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
-                                       end;
-                                     InsertLLItem(p, p.next, hp1);
-                                     reference_reset(tmpref,2,[]);
-                                     TmpRef.index := taicpu(p).oper[1]^.reg;
-                                     if (taicpu(p).ops = 3) then
-                                       begin
-                                         TmpRef.base := NR_NO;
-                                         TmpRef.ScaleFactor := 4;
-                                         hp1 :=  taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
-                                       end
-                                     else
-                                       begin
-                                         TmpRef.base := taicpu(p).oper[1]^.reg;
-                                         TmpRef.ScaleFactor := 2;
-                                         hp1 :=  taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
-                                       end;
-                                     InsertLLItem(p.previous, p.next, hp1);
-                                     p.free;
-                                     p := tai(hp1.next);
-                                   end
-                             end
-                        end;
-                      end;
-                end;
+                if PrePeepholeOptIMUL(p) then
+                  Continue;
               A_SAR,A_SHR:
                 if PrePeepholeOptSxx(p) then
                   continue;

+ 86 - 0
compiler/x86/aoptx86.pas

@@ -57,6 +57,7 @@ unit aoptx86;
         function DoSubAddOpt(var p : tai) : Boolean;
 
         function PrePeepholeOptSxx(var p : tai) : boolean;
+        function PrePeepholeOptIMUL(var p : tai) : boolean;
 
         function OptPass1AND(var p : tai) : boolean;
         function OptPass1VMOVAP(var p : tai) : boolean;
@@ -718,6 +719,91 @@ unit aoptx86;
       end;
 
 
+    function TX86AsmOptimizer.PrePeepholeOptIMUL(var p : tai) : boolean;
+      var
+        opsize : topsize;
+        hp1 : tai;
+        tmpref : treference;
+        hp2 : taicpu;
+        ShiftValue : Cardinal;
+        BaseValue : TCGInt;
+      begin
+        result:=false;
+        opsize:=taicpu(p).opsize;
+        { changes certain "imul const, %reg"'s to lea sequences }
+        if (MatchOpType(taicpu(p),top_const,top_reg) or
+            MatchOpType(taicpu(p),top_const,top_reg,top_reg)) and
+           (opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) then
+          if (taicpu(p).oper[0]^.val = 1) then
+            if (taicpu(p).ops = 2) then
+             { remove "imul $1, reg" }
+              begin
+                hp1 := tai(p.Next);
+                asml.remove(p);
+                DebugMsg(SPeepholeOptimization + 'Imul2Nop done',p);
+                p.free;
+                p := hp1;
+                result:=true;
+              end
+            else
+             { change "imul $1, reg1, reg2" to "mov reg1, reg2" }
+              begin
+                hp1 := taicpu.Op_Reg_Reg(A_MOV, opsize, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
+                InsertLLItem(p.previous, p.next, hp1);
+                DebugMsg(SPeepholeOptimization + 'Imul2Mov done',p);
+                p.free;
+                p := hp1;
+              end
+          else if
+           ((taicpu(p).ops <= 2) or
+            (taicpu(p).oper[2]^.typ = Top_Reg)) and
+           not(cs_opt_size in current_settings.optimizerswitches) and
+           (not(GetNextInstruction(p, hp1)) or
+             not((tai(hp1).typ = ait_instruction) and
+                 ((taicpu(hp1).opcode=A_Jcc) and
+                  (taicpu(hp1).condition in [C_O,C_NO])))) then
+            begin
+              {
+                imul X, reg1, reg2 to
+                  lea (reg1,reg1,Y), reg2
+                  shl ZZ,reg2
+                imul XX, reg1 to
+                  lea (reg1,reg1,YY), reg1
+                  shl ZZ,reg2
+
+                This optimziation makes sense for pretty much every x86, except the VIA Nano3000: it has IMUL latency 2, lea/shl pair as well,
+                it does not exist as a separate optimization target in FPC though.
+
+                This optimziation can be applied as long as only two bits are set in the constant and those two bits are separated by
+                at most two zeros
+              }
+              reference_reset(tmpref,1,[]);
+              if (PopCnt(QWord(taicpu(p).oper[0]^.val))=2) and (BsrQWord(taicpu(p).oper[0]^.val)-BsfQWord(taicpu(p).oper[0]^.val)<=3) then
+                begin
+                  ShiftValue:=BsfQWord(taicpu(p).oper[0]^.val);
+                  BaseValue:=taicpu(p).oper[0]^.val shr ShiftValue;
+                  TmpRef.base := taicpu(p).oper[1]^.reg;
+                  TmpRef.index := taicpu(p).oper[1]^.reg;
+                  if not(BaseValue in [3,5,9]) then
+                    Internalerror(2018110101);
+                  TmpRef.ScaleFactor := BaseValue-1;
+                  if (taicpu(p).ops = 2) then
+                    hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[1]^.reg)
+                  else
+                    hp1 := taicpu.op_ref_reg(A_LEA, opsize, TmpRef, taicpu(p).oper[2]^.reg);
+                  AsmL.InsertAfter(hp1,p);
+                  DebugMsg(SPeepholeOptimization + 'Imul2LeaShl done',p);
+                  AsmL.Remove(p);
+                  taicpu(hp1).fileinfo:=taicpu(p).fileinfo;
+                  p.free;
+                  p := hp1;
+                  if ShiftValue>0 then
+                    AsmL.InsertAfter(taicpu.op_const_reg(A_SHL, opsize, ShiftValue, taicpu(hp1).oper[1]^.reg),hp1);
+              end;
+            end;
+      end;
+
+
     function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
       var
         p: taicpu;

+ 2 - 0
compiler/x86_64/aoptcpu.pas

@@ -51,6 +51,8 @@ uses
           ait_instruction:
             begin
               case taicpu(p).opcode of
+                A_IMUL:
+                  result:=PrePeepholeOptIMUL(p);
                 A_SAR,A_SHR:
                   result:=PrePeepholeOptSxx(p);
               end;