Browse Source

* factored out TX86AsmOptimizer.PostPeepholeOptCall
+ use TX86AsmOptimizer.PostPeepholeOptCall on x86-64

git-svn-id: trunk@38278 -

florian 7 years ago
parent
commit
91514da267
3 changed files with 57 additions and 41 deletions
  1. 2 41
      compiler/i386/aoptcpu.pas
  2. 53 0
      compiler/x86/aoptx86.pas
  3. 2 0
      compiler/x86_64/aoptcpu.pas

+ 2 - 41
compiler/i386/aoptcpu.pas

@@ -1179,47 +1179,8 @@ begin
               end;
               end;
             case taicpu(p).opcode Of
             case taicpu(p).opcode Of
               A_CALL:
               A_CALL:
-                begin
-                  { don't do this on modern CPUs, this really hurts them due to
-                    broken call/ret pairing }
-                  if (current_settings.optimizecputype < cpu_Pentium2) and
-                     not(cs_create_pic in current_settings.moduleswitches) and
-                     GetNextInstruction(p, hp1) and
-                     (hp1.typ = ait_instruction) and
-                     (taicpu(hp1).opcode = A_JMP) and
-                     ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
-                    begin
-                      hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
-                      InsertLLItem(p.previous, p, hp2);
-                      taicpu(p).opcode := A_JMP;
-                      taicpu(p).is_jmp := true;
-                      asml.remove(hp1);
-                      hp1.free;
-                    end
-                  { replace
-                      call   procname
-                      ret
-                    by
-                      jmp    procname
-
-                    this should never hurt except when pic is used, not sure
-                    how to handle it then
-
-                    but do it only on level 4 because it destroys stack back traces
-                  }
-                  else if (cs_opt_level4 in current_settings.optimizerswitches) and
-                     not(cs_create_pic in current_settings.moduleswitches) and
-                     GetNextInstruction(p, hp1) and
-                     (hp1.typ = ait_instruction) and
-                     (taicpu(hp1).opcode = A_RET) and
-                     (taicpu(hp1).ops=0) then
-                    begin
-                      taicpu(p).opcode := A_JMP;
-                      taicpu(p).is_jmp := true;
-                      asml.remove(hp1);
-                      hp1.free;
-                    end;
-                end;
+                if PostPeepHoleOptCall(p) then
+                  Continue;
               A_CMP:
               A_CMP:
                 if PostPeepholeOptCmp(p) then
                 if PostPeepholeOptCmp(p) then
                   Continue;
                   Continue;

+ 53 - 0
compiler/x86/aoptx86.pas

@@ -82,6 +82,7 @@ unit aoptx86;
 {$endif}
 {$endif}
         function PostPeepholeOptCmp(var p : tai) : Boolean;
         function PostPeepholeOptCmp(var p : tai) : Boolean;
         function PostPeepholeOptTestOr(var p : tai) : Boolean;
         function PostPeepholeOptTestOr(var p : tai) : Boolean;
+        function PostPeepholeOptCall(var p : tai) : Boolean;
 
 
         procedure OptReferences;
         procedure OptReferences;
       end;
       end;
@@ -3075,6 +3076,58 @@ unit aoptx86;
           taicpu(p).loadoper(0,taicpu(p).oper[1]^);
           taicpu(p).loadoper(0,taicpu(p).oper[1]^);
       end;
       end;
 
 
+
+    function TX86AsmOptimizer.PostPeepholeOptCall(var p : tai) : Boolean;
+      var
+        hp1 : tai;
+      begin
+        Result:=false;
+{$ifndef x86_64}
+        { don't do this on modern CPUs, this really hurts them due to
+          broken call/ret pairing }
+        if (current_settings.optimizecputype < cpu_Pentium2) and
+           not(cs_create_pic in current_settings.moduleswitches) and
+           GetNextInstruction(p, hp1) and
+           MatchInstruction(hp1,A_JMP,[S_NO]) and
+           MatchOpType(taicpu(hp1),top_ref) and
+           (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
+          begin
+            hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
+            InsertLLItem(p.previous, p, hp2);
+            taicpu(p).opcode := A_JMP;
+            taicpu(p).is_jmp := true;
+            asml.remove(hp1);
+            hp1.free;
+            Result:=true;
+          end
+        else
+{$endif x86_64}
+        { replace
+            call   procname
+            ret
+          by
+            jmp    procname
+
+          this should never hurt except when pic is used, not sure
+          how to handle it then
+
+          but do it only on level 4 because it destroys stack back traces
+        }
+        if (cs_opt_level4 in current_settings.optimizerswitches) and
+          not(cs_create_pic in current_settings.moduleswitches) and
+          GetNextInstruction(p, hp1) and
+          MatchInstruction(hp1,A_RET,[S_NO]) and
+          (taicpu(hp1).ops=0) then
+          begin
+            taicpu(p).opcode := A_JMP;
+            taicpu(p).is_jmp := true;
+            asml.remove(hp1);
+            hp1.free;
+            Result:=true;
+          end;
+      end;
+
+
 {$ifdef x86_64}
 {$ifdef x86_64}
     function TX86AsmOptimizer.PostPeepholeOptMovzx(const p : tai) : Boolean;
     function TX86AsmOptimizer.PostPeepholeOptMovzx(const p : tai) : Boolean;
       var
       var

+ 2 - 0
compiler/x86_64/aoptcpu.pas

@@ -157,6 +157,8 @@ uses
                   Result:=PostPeepholeOptTestOr(p);
                   Result:=PostPeepholeOptTestOr(p);
                 A_XOR:
                 A_XOR:
                   Result:=PostPeepholeOptXor(p);
                   Result:=PostPeepholeOptXor(p);
+                A_CALL:
+                  Result:=PostPeepholeOptCall(p);
               end;
               end;
             end;
             end;
         end;
         end;