Browse Source

+ implemented x86-64 mov optimization to test register usage tracking framework

git-svn-id: trunk@20892 -
florian 13 years ago
parent
commit
fc673340fe
1 changed files with 91 additions and 25 deletions
  1. 91 25
      compiler/x86_64/aoptcpu.pas

+ 91 - 25
compiler/x86_64/aoptcpu.pas

@@ -41,6 +41,7 @@ uses
   cutils,
   verbose,
   cgbase, cgutils,
+  aoptobj,
   aasmbase, aasmdata, aasmcpu;
 
 function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
@@ -49,11 +50,11 @@ begin
   case hp1.opcode of
     A_ADD, A_SUB, A_OR, A_XOR, A_AND, A_SHL, A_SHR, A_SAR:
       isFoldableArithOp :=
+        (taicpu(hp1).oper[1]^.typ = top_reg) and
+        (taicpu(hp1).oper[1]^.reg = reg) and
         ((taicpu(hp1).oper[0]^.typ = top_const) or
         ((taicpu(hp1).oper[0]^.typ = top_reg) and
-        (taicpu(hp1).oper[0]^.reg<>reg))) and
-        (taicpu(hp1).oper[1]^.typ = top_reg) and
-        (taicpu(hp1).oper[1]^.reg = reg);
+        (taicpu(hp1).oper[0]^.reg<>reg)));
     A_INC, A_DEC:
       isFoldableArithOp :=
         (taicpu(hp1).oper[0]^.typ = top_reg) and
@@ -65,6 +66,8 @@ function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
 var
   next1: tai;
   hp1, hp2: tai;
+  GetNextIntruction_p : boolean;
+  TmpUsedRegs : TAllUsedRegs;
 begin
   Result := False;
   case p.typ of
@@ -92,7 +95,7 @@ begin
                 taicpu(hp1).oper[0]^.val);
               asml.remove(p);
               p.Free;
-              p := hp1;
+              p:=hp1;
             end;
 (*                      else
   {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
@@ -108,8 +111,11 @@ begin
         A_MOV:
         { removes superfluous And's after mov's }
           begin
+            if not(cs_opt_level3 in current_settings.optimizerswitches) then
+              exit;
+            GetNextIntruction_p:=GetNextInstruction(p, hp1);
             if (taicpu(p).oper[1]^.typ = top_reg) and
-               GetNextInstruction(p, hp1) and
+               GetNextIntruction_p and
                (tai(hp1).typ = ait_instruction) and
                (taicpu(hp1).opcode = A_AND) and
                (taicpu(hp1).oper[0]^.typ = top_const) and
@@ -122,7 +128,65 @@ begin
                       asml.remove(hp1);
                       hp1.free;
                     end;
-              end;
+              end
+            else if (taicpu(p).oper[1]^.typ = top_reg) and
+              GetNextIntruction_p and
+              (hp1.typ = ait_instruction) and
+              GetNextInstruction(hp1, hp2) and
+              (hp2.typ = ait_instruction) and
+              (taicpu(hp2).opcode = A_MOV) and
+              (taicpu(hp2).oper[0]^.typ = top_reg) and
+              OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
+              (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
+               ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
+                IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
+              ) then
+              { change   movsX/movzX    reg/ref, reg2             }
+              {          add/sub/or/... reg3/$const, reg2         }
+              {          mov            reg2 reg/ref              }
+              { to       add/sub/or/... reg3/$const, reg/ref      }
+              begin
+                CopyUsedRegs(TmpUsedRegs);
+                UpdateUsedRegs(TmpUsedRegs, tai(p.next));
+                UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
+                If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
+                  begin
+                    { by example:
+                        movswl  %si,%eax        movswl  %si,%eax      p
+                        decl    %eax            addl    %edx,%eax     hp1
+                        movw    %ax,%si         movw    %ax,%si       hp2
+                      ->
+                        movswl  %si,%eax        movswl  %si,%eax      p
+                        decw    %eax            addw    %edx,%eax     hp1
+                        movw    %ax,%si         movw    %ax,%si       hp2
+                    }
+                    taicpu(hp1).changeopsize(taicpu(hp2).opsize);
+                    {
+                      ->
+                        movswl  %si,%eax        movswl  %si,%eax      p
+                        decw    %si             addw    %dx,%si       hp1
+                        movw    %ax,%si         movw    %ax,%si       hp2
+                    }
+                    case taicpu(hp1).ops of
+                      1:
+                        taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
+                      2:
+                        taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
+                      else
+                        internalerror(2008042701);
+                    end;
+                    {
+                      ->
+                        decw    %si             addw    %dx,%si       p
+                    }
+                    asml.remove(p);
+                    asml.remove(hp2);
+                    p.Free;
+                    hp2.Free;
+                    p := hp1;
+                 end;
+                ReleaseUsedRegs(TmpUsedRegs);
+              end
           end;
         A_MOVSX,
         A_MOVZX:
@@ -190,26 +254,28 @@ begin
               (taicpu(hp1).oper[0]^.typ = top_const) and
               (taicpu(hp1).oper[1]^.typ = top_reg) and
               (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
-              case taicpu(p).opsize of
-                S_BL, S_BW, S_BQ:
-                  if (taicpu(hp1).oper[0]^.val = $ff) then
-                    begin
-                      asml.remove(hp1);
-                      hp1.Free;
-                    end;
-                S_WL, S_WQ:
-                  if (taicpu(hp1).oper[0]^.val = $ffff) then
-                    begin
-                      asml.remove(hp1);
-                      hp1.Free;
-                    end;
-                S_LQ:
-                  if (taicpu(hp1).oper[0]^.val = $ffffffff) then
-                    begin
-                      asml.remove(hp1);
-                      hp1.Free;
+                begin
+                  case taicpu(p).opsize of
+                    S_BL, S_BW, S_BQ:
+                      if (taicpu(hp1).oper[0]^.val = $ff) then
+                        begin
+                          asml.remove(hp1);
+                          hp1.Free;
+                        end;
+                    S_WL, S_WQ:
+                      if (taicpu(hp1).oper[0]^.val = $ffff) then
+                        begin
+                          asml.remove(hp1);
+                          hp1.Free;
+                        end;
+                    S_LQ:
+                      if (taicpu(hp1).oper[0]^.val = $ffffffff) then
+                        begin
+                          asml.remove(hp1);
+                          hp1.Free;
+                        end;
                     end;
-                end;
+               end;
             { changes some movzx constructs to faster synonims (all examples
               are given with eax/ax, but are also valid for other registers)}
             if (taicpu(p).oper[1]^.typ = top_reg) then