Browse Source

* adapt optimizer to handle push/pop for stack alignment
* adapt test

git-svn-id: trunk@45477 -

florian 5 years ago
parent
commit
809fb83334
3 changed files with 77 additions and 19 deletions
  1. 74 18
      compiler/x86/aoptx86.pas
  2. 2 0
      compiler/x86_64/aoptcpu.pas
  3. 1 1
      tests/test/tasm23b.pp

+ 74 - 18
compiler/x86/aoptx86.pas

@@ -73,6 +73,8 @@ unit aoptx86;
         }
         function GetNextInstructionUsingRegTrackingUse(Current: tai; out Next: tai; reg: TRegister): Boolean;
         function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
+      private
+        function SkipSimpleInstructions(var hp1: tai): Boolean;
       protected
         class function IsMOVZXAcceptable: Boolean; static; inline;
 
@@ -149,6 +151,7 @@ unit aoptx86;
         function PostPeepholeOptTestOr(var p : tai) : Boolean;
         function PostPeepholeOptCall(var p : tai) : Boolean;
         function PostPeepholeOptLea(var p : tai) : Boolean;
+        function PostPeepholeOptPush(var p: tai): Boolean;
 
         procedure ConvertJumpToRET(const p: tai; const ret_p: tai);
 
@@ -5739,25 +5742,26 @@ unit aoptx86;
       end;
 
 
-    function TX86AsmOptimizer.PostPeepholeOptLea(var p : tai) : Boolean;
+    function TX86AsmOptimizer.SkipSimpleInstructions(var hp1 : tai) : Boolean;
+      begin
+        { we can skip all instructions not messing with the stack pointer }
+        while assigned(hp1) and {MatchInstruction(taicpu(hp1),[A_LEA,A_MOV,A_MOVQ,A_MOVSQ,A_MOVSX,A_MOVSXD,A_MOVZX,
+          A_AND,A_OR,A_XOR,A_ADD,A_SHR,A_SHL,A_IMUL,A_SETcc,A_SAR,A_SUB,A_TEST,A_CMOVcc,
+          A_MOVSS,A_MOVSD,A_MOVAPS,A_MOVUPD,A_MOVAPD,A_MOVUPS,
+          A_VMOVSS,A_VMOVSD,A_VMOVAPS,A_VMOVUPD,A_VMOVAPD,A_VMOVUPS],[]) and}
+          ({(taicpu(hp1).ops=0) or }
+           ({(MatchOpType(taicpu(hp1),top_reg,top_reg) or MatchOpType(taicpu(hp1),top_const,top_reg) or
+             (MatchOpType(taicpu(hp1),top_ref,top_reg))
+            ) and }
+            not(RegInInstruction(NR_STACK_POINTER_REG,hp1)) { and not(RegInInstruction(NR_FRAME_POINTER_REG,hp1))}
+           )
+          ) do
+          GetNextInstruction(hp1,hp1);
+        Result:=assigned(hp1);
+      end;
 
-      function SkipSimpleInstructions(var hp1 : tai) : Boolean;
-        begin
-          { we can skip all instructions not messing with the stack pointer }
-          while assigned(hp1) and {MatchInstruction(taicpu(hp1),[A_LEA,A_MOV,A_MOVQ,A_MOVSQ,A_MOVSX,A_MOVSXD,A_MOVZX,
-            A_AND,A_OR,A_XOR,A_ADD,A_SHR,A_SHL,A_IMUL,A_SETcc,A_SAR,A_SUB,A_TEST,A_CMOVcc,
-            A_MOVSS,A_MOVSD,A_MOVAPS,A_MOVUPD,A_MOVAPD,A_MOVUPS,
-            A_VMOVSS,A_VMOVSD,A_VMOVAPS,A_VMOVUPD,A_VMOVAPD,A_VMOVUPS],[]) and}
-            ({(taicpu(hp1).ops=0) or }
-             ({(MatchOpType(taicpu(hp1),top_reg,top_reg) or MatchOpType(taicpu(hp1),top_const,top_reg) or
-               (MatchOpType(taicpu(hp1),top_ref,top_reg))
-              ) and }
-              not(RegInInstruction(NR_STACK_POINTER_REG,hp1)) { and not(RegInInstruction(NR_FRAME_POINTER_REG,hp1))}
-             )
-            ) do
-            GetNextInstruction(hp1,hp1);
-          Result:=assigned(hp1);
-        end;
+
+    function TX86AsmOptimizer.PostPeepholeOptLea(var p : tai) : Boolean;
 
       var
         hp1, hp2, hp3, hp4: tai;
@@ -5821,6 +5825,58 @@ unit aoptx86;
       end;
 
 
+    function TX86AsmOptimizer.PostPeepholeOptPush(var p : tai) : Boolean;
+
+      var
+        hp1, hp2, hp3, hp4: tai;
+      begin
+        Result:=false;
+        { replace
+            push %rax
+            call   procname
+            pop %rcx
+            ret
+          by
+            jmp    procname
+
+          but do it only on level 4 because it destroys stack back traces
+
+          It depends on the fact, that the sequence push rax/pop rcx is used for stack alignment as rcx is volatile
+          for all supported calling conventions
+        }
+        if (cs_opt_level4 in current_settings.optimizerswitches) and
+          MatchOpType(taicpu(p),top_reg) and
+          (taicpu(p).oper[0]^.reg=NR_RAX) and
+          GetNextInstruction(p, hp1) and
+          { Take a copy of hp1 }
+          SetAndTest(hp1, hp4) and
+          { trick to skip label }
+          ((hp1.typ=ait_instruction) or GetNextInstruction(hp1, hp1)) and
+          SkipSimpleInstructions(hp1) and
+          MatchInstruction(hp1,A_CALL,[S_NO]) and
+          GetNextInstruction(hp1, hp2) and
+          MatchInstruction(hp2,A_POP,[taicpu(p).opsize]) and
+          MatchOpType(taicpu(hp2),top_reg) and
+          (taicpu(hp2).oper[0]^.reg=NR_RCX) and
+          GetNextInstruction(hp2, hp3) and
+          { trick to skip label }
+          ((hp3.typ=ait_instruction) or GetNextInstruction(hp3, hp3)) and
+          MatchInstruction(hp3,A_RET,[S_NO]) and
+          (taicpu(hp3).ops=0) then
+          begin
+            taicpu(hp1).opcode := A_JMP;
+            taicpu(hp1).is_jmp := true;
+            DebugMsg(SPeepholeOptimization + 'PushCallPushRet2Jmp done',p);
+            RemoveCurrentP(p, hp4);
+            AsmL.Remove(hp2);
+            hp2.free;
+            AsmL.Remove(hp3);
+            hp3.free;
+            Result:=true;
+          end;
+      end;
+
+
     function TX86AsmOptimizer.PostPeepholeOptMov(var p : tai) : Boolean;
       var
         Value, RegName: string;

+ 2 - 0
compiler/x86_64/aoptcpu.pas

@@ -194,6 +194,8 @@ uses
                   Result:=PostPeepholeOptCall(p);
                 A_LEA:
                   Result:=PostPeepholeOptLea(p);
+                A_PUSH:
+                  Result:=PostPeepholeOptPush(p);
                 else
                   ;
               end;

+ 1 - 1
tests/test/tasm23b.pp

@@ -6,7 +6,7 @@ program tasm23b;
 {$S-}
 
 const
-  t_size = 33;
+  t_size = 25;
 procedure t; assembler;
 asm
   mov eax, [rbx[5]][rdi][54][-17][45][4]      { mov eax, [rbx+rdi+5Bh] }