Browse Source

* use lea to adjust stack pointer, this is equal or faster on all modern CPUs than add/sub

git-svn-id: trunk@25010 -
florian 12 years ago
parent
commit
94cf650d9a
3 changed files with 41 additions and 11 deletions
  1. 13 3
      compiler/i386/cgcpu.pas
  2. 17 7
      compiler/x86/cgx86.pas
  3. 11 1
      compiler/x86_64/cgcpu.pas

+ 13 - 3
compiler/i386/cgcpu.pas

@@ -293,6 +293,16 @@ unit cgcpu;
 
 
 
 
     procedure tcg386.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
     procedure tcg386.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
+
+      procedure increase_fp(a : tcgint);
+        var
+          href : treference;
+        begin
+          reference_reset_base(href,current_procinfo.framepointer,a,0);
+          { normally, lea is a better choice than an add }
+          list.concat(Taicpu.op_ref_reg(A_LEA,TCGSize2OpSize[OS_ADDR],href,current_procinfo.framepointer));
+        end;
+
       var
       var
         stacksize : longint;
         stacksize : longint;
       begin
       begin
@@ -304,7 +314,7 @@ unit cgcpu;
         { remove stackframe }
         { remove stackframe }
         if not nostackframe then
         if not nostackframe then
           begin
           begin
-            if (current_procinfo.framepointer=NR_STACK_POINTER_REG) then
+            if current_procinfo.framepointer=NR_STACK_POINTER_REG then
               begin
               begin
                 stacksize:=current_procinfo.calc_stackframe_size;
                 stacksize:=current_procinfo.calc_stackframe_size;
                 if (target_info.stackalign>4) and
                 if (target_info.stackalign>4) and
@@ -314,8 +324,8 @@ unit cgcpu;
                     { if you (think you) know what you are doing              }
                     { if you (think you) know what you are doing              }
                     (po_assembler in current_procinfo.procdef.procoptions)) then
                     (po_assembler in current_procinfo.procdef.procoptions)) then
                   stacksize := align(stacksize+sizeof(aint),target_info.stackalign) - sizeof(aint);
                   stacksize := align(stacksize+sizeof(aint),target_info.stackalign) - sizeof(aint);
-                if (stacksize<>0) then
-                  cg.a_op_const_reg(list,OP_ADD,OS_ADDR,stacksize,current_procinfo.framepointer);
+                if stacksize<>0 then
+                  increase_fp(stacksize);
               end
               end
             else
             else
               list.concat(Taicpu.op_none(A_LEAVE,S_NO));
               list.concat(Taicpu.op_none(A_LEAVE,S_NO));

+ 17 - 7
compiler/x86/cgx86.pas

@@ -2318,6 +2318,16 @@ unit cgx86;
 
 
 
 
     procedure tcgx86.g_stackpointer_alloc(list : TAsmList;localsize : longint);
     procedure tcgx86.g_stackpointer_alloc(list : TAsmList;localsize : longint);
+
+      procedure decrease_sp(a : tcgint);
+        var
+          href : treference;
+        begin
+          reference_reset_base(href,NR_STACK_POINTER_REG,-a,0);
+          { normally, lea is a better choice than a sub to adjust the stack pointer }
+          list.concat(Taicpu.op_ref_reg(A_LEA,TCGSize2OpSize[OS_ADDR],href,NR_STACK_POINTER_REG));
+        end;
+
 {$ifdef x86}
 {$ifdef x86}
 {$ifndef NOTARGETWIN}
 {$ifndef NOTARGETWIN}
       var
       var
@@ -2338,7 +2348,7 @@ unit cgx86;
              begin
              begin
                if localsize div winstackpagesize<=5 then
                if localsize div winstackpagesize<=5 then
                  begin
                  begin
-                    list.concat(Taicpu.Op_const_reg(A_SUB,S_L,localsize-4,NR_ESP));
+                    decrease_sp(localsize-4);
                     for i:=1 to localsize div winstackpagesize do
                     for i:=1 to localsize div winstackpagesize do
                       begin
                       begin
                          reference_reset_base(href,NR_ESP,localsize-i*winstackpagesize,4);
                          reference_reset_base(href,NR_ESP,localsize-i*winstackpagesize,4);
@@ -2353,11 +2363,11 @@ unit cgx86;
                     list.concat(Taicpu.op_reg(A_PUSH,S_L,NR_EDI));
                     list.concat(Taicpu.op_reg(A_PUSH,S_L,NR_EDI));
                     list.concat(Taicpu.op_const_reg(A_MOV,S_L,localsize div winstackpagesize,NR_EDI));
                     list.concat(Taicpu.op_const_reg(A_MOV,S_L,localsize div winstackpagesize,NR_EDI));
                     a_label(list,again);
                     a_label(list,again);
-                    list.concat(Taicpu.op_const_reg(A_SUB,S_L,winstackpagesize-4,NR_ESP));
+                    decrease_sp(winstackpagesize-4);
                     list.concat(Taicpu.op_reg(A_PUSH,S_L,NR_EAX));
                     list.concat(Taicpu.op_reg(A_PUSH,S_L,NR_EAX));
                     list.concat(Taicpu.op_reg(A_DEC,S_L,NR_EDI));
                     list.concat(Taicpu.op_reg(A_DEC,S_L,NR_EDI));
                     a_jmp_cond(list,OC_NE,again);
                     a_jmp_cond(list,OC_NE,again);
-                    list.concat(Taicpu.op_const_reg(A_SUB,S_L,localsize mod winstackpagesize - 4,NR_ESP));
+                    decrease_sp(localsize mod winstackpagesize-4);
                     reference_reset_base(href,NR_ESP,localsize-4,4);
                     reference_reset_base(href,NR_ESP,localsize-4,4);
                     list.concat(Taicpu.op_ref_reg(A_MOV,S_L,href,NR_EDI));
                     list.concat(Taicpu.op_ref_reg(A_MOV,S_L,href,NR_EDI));
                     ungetcpuregister(list,NR_EDI);
                     ungetcpuregister(list,NR_EDI);
@@ -2375,7 +2385,7 @@ unit cgx86;
              begin
              begin
                if localsize div winstackpagesize<=5 then
                if localsize div winstackpagesize<=5 then
                  begin
                  begin
-                    list.concat(Taicpu.Op_const_reg(A_SUB,S_Q,localsize,NR_RSP));
+                    decrease_sp(localsize);
                     for i:=1 to localsize div winstackpagesize do
                     for i:=1 to localsize div winstackpagesize do
                       begin
                       begin
                          reference_reset_base(href,NR_RSP,localsize-i*winstackpagesize+4,4);
                          reference_reset_base(href,NR_RSP,localsize-i*winstackpagesize+4,4);
@@ -2390,19 +2400,19 @@ unit cgx86;
                     getcpuregister(list,NR_R10);
                     getcpuregister(list,NR_R10);
                     list.concat(Taicpu.op_const_reg(A_MOV,S_Q,localsize div winstackpagesize,NR_R10));
                     list.concat(Taicpu.op_const_reg(A_MOV,S_Q,localsize div winstackpagesize,NR_R10));
                     a_label(list,again);
                     a_label(list,again);
-                    list.concat(Taicpu.op_const_reg(A_SUB,S_Q,winstackpagesize,NR_RSP));
+                    decrease_sp(winstackpagesize);
                     reference_reset_base(href,NR_RSP,0,4);
                     reference_reset_base(href,NR_RSP,0,4);
                     list.concat(Taicpu.op_reg_ref(A_MOV,S_L,NR_EAX,href));
                     list.concat(Taicpu.op_reg_ref(A_MOV,S_L,NR_EAX,href));
                     list.concat(Taicpu.op_reg(A_DEC,S_Q,NR_R10));
                     list.concat(Taicpu.op_reg(A_DEC,S_Q,NR_R10));
                     a_jmp_cond(list,OC_NE,again);
                     a_jmp_cond(list,OC_NE,again);
-                    list.concat(Taicpu.op_const_reg(A_SUB,S_Q,localsize mod winstackpagesize,NR_RSP));
+                    decrease_sp(localsize mod winstackpagesize);
                     ungetcpuregister(list,NR_R10);
                     ungetcpuregister(list,NR_R10);
                  end
                  end
              end
              end
            else
            else
 {$endif NOTARGETWIN}
 {$endif NOTARGETWIN}
 {$endif x86_64}
 {$endif x86_64}
-            list.concat(Taicpu.Op_const_reg(A_SUB,tcgsize2opsize[OS_ADDR],localsize,NR_STACK_POINTER_REG));
+            decrease_sp(localsize);
          end;
          end;
       end;
       end;
 
 

+ 11 - 1
compiler/x86_64/cgcpu.pas

@@ -177,6 +177,16 @@ unit cgcpu;
 
 
 
 
     procedure tcgx86_64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
     procedure tcgx86_64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
+
+      procedure increase_sp(a : tcgint);
+        var
+          href : treference;
+        begin
+          reference_reset_base(href,NR_STACK_POINTER_REG,a,0);
+          { normally, lea is a better choice than an add }
+          list.concat(Taicpu.op_ref_reg(A_LEA,TCGSize2OpSize[OS_ADDR],href,NR_STACK_POINTER_REG));
+        end;
+
       var
       var
         href : treference;
         href : treference;
       begin
       begin
@@ -195,7 +205,7 @@ unit cgcpu;
                (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
                (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
               begin
               begin
                 if (current_procinfo.final_localsize<>0) then
                 if (current_procinfo.final_localsize<>0) then
-                  cg.a_op_const_reg(list,OP_ADD,OS_ADDR,current_procinfo.final_localsize,NR_STACK_POINTER_REG);
+                  increase_sp(current_procinfo.final_localsize);
                 if (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
                 if (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
                   list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
                   list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
               end
               end