Răsfoiți Sursa

* enable Lea2AddBase and Lea2AddIndex in TX86AsmOptimizer.PostPeepholeOptLea as we have flag tracking now
* some flag allocations fixed

git-svn-id: trunk@38501 -

florian 7 ani în urmă
părinte
comite
9b18e39c81

+ 3 - 0
compiler/i386/aoptcpu.pas

@@ -1054,6 +1054,9 @@ begin
               A_CALL:
                 if PostPeepHoleOptCall(p) then
                   Continue;
+              A_LEA:
+                if PostPeepholeOptLea(p) then
+                  Continue;
               A_CMP:
                 if PostPeepholeOptCmp(p) then
                   Continue;

+ 31 - 30
compiler/x86/aoptx86.pas

@@ -83,6 +83,7 @@ unit aoptx86;
         function PostPeepholeOptCmp(var p : tai) : Boolean;
         function PostPeepholeOptTestOr(var p : tai) : Boolean;
         function PostPeepholeOptCall(var p : tai) : Boolean;
+        function PostPeepholeOptLea(const p : tai) : Boolean;
 
         procedure OptReferences;
       end;
@@ -668,7 +669,10 @@ unit aoptx86;
                 R_SUBFLAGDIRECTION:
                   Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
                 else
+                  begin
+                  writeln(getsubreg(reg));
                   internalerror(2017050501);
+                  end;
               end;
               exit;
             end;
@@ -1948,36 +1952,6 @@ unit aoptx86;
               end;
             ReleaseUsedRegs(TmpUsedRegs);
           end;
-
-(*
-        This is unsafe, lea doesn't modify the flags but "add"
-        does. This breaks webtbs/tw15694.pp. The above
-        transformations are also unsafe, but they don't seem to
-        be triggered by code that FPC generators (or that at
-        least does not occur in the tests...). This needs to be
-        fixed by checking for the liveness of the flags register.
-
-        else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
-          begin
-            hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
-              taicpu(p).oper[0]^.ref^.base);
-            InsertLLItem(asml,p.previous,p.next, hp1);
-            DebugMsg(SPeepholeOptimization + 'Lea2AddBase done',hp1);
-            p.free;
-            p:=hp1;
-            continue;
-          end
-        else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
-          begin
-            hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
-              taicpu(p).oper[0]^.ref^.index);
-            InsertLLItem(asml,p.previous,p.next,hp1);
-            DebugMsg(SPeepholeOptimization + 'Lea2AddIndex done',hp1);
-            p.free;
-            p:=hp1;
-            continue;
-          end
-*)
       end;
 
 
@@ -2988,6 +2962,33 @@ unit aoptx86;
       end;
 
 
+    function TX86AsmOptimizer.PostPeepholeOptLea(const p : tai) : Boolean;
+      begin
+        Result:=false;
+        if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) and
+          MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
+          (taicpu(p).oper[0]^.ref^.index<>NR_NO) then
+          begin
+            taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.base);
+            taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.index);
+            taicpu(p).opcode:=A_ADD;
+            DebugMsg(SPeepholeOptimization + 'Lea2AddBase done',p);
+            result:=true;
+          end
+
+        else if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) and
+          MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and
+          (taicpu(p).oper[0]^.ref^.base<>NR_NO) then
+          begin
+            taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.index);
+            taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.base);
+            taicpu(p).opcode:=A_ADD;
+            DebugMsg(SPeepholeOptimization + 'Lea2AddIndex done',p);
+            result:=true;
+          end;
+      end;
+
+
     function TX86AsmOptimizer.PostPeepholeOptMov(const p : tai) : Boolean;
       var
         Value, RegName: string;

+ 2 - 0
compiler/x86/cgx86.pas

@@ -2529,11 +2529,13 @@ unit cgx86;
             exit;
           end;
 {$endif x86_64}
+        cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
         if (a = 0) then
           list.concat(taicpu.op_reg_reg(A_TEST,tcgsize2opsize[size],reg,reg))
         else
           list.concat(taicpu.op_const_reg(A_CMP,tcgsize2opsize[size],a,reg));
         a_jmp_cond(list,cmp_op,l);
+        cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
       end;
 
 

+ 1 - 0
compiler/x86/nx86add.pas

@@ -138,6 +138,7 @@ unit nx86add;
                       spilling, while 'test %reg,%reg' still requires loading into register.
                       If spilling is not necessary, it is changed back into 'test %reg,%reg' by
                       peephole optimizer (this optimization is currently available only for i386). }
+                   cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
 {$ifdef i386}
                    emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
 {$else i386}

+ 2 - 0
compiler/x86_64/aoptcpu.pas

@@ -161,6 +161,8 @@ uses
                   Result:=PostPeepholeOptXor(p);
                 A_CALL:
                   Result:=PostPeepholeOptCall(p);
+                A_LEA:
+                  Result:=PostPeepholeOptLea(p);
               end;
             end;
         end;