瀏覽代碼

Better SP adjustments on entry/exit for ARM

If the needed adjustment is not expressible in a shifterconst, the old code
loaded a temporary register (fixed to r12) via a_load_const_reg and used it
to adjust the SP. Resulting in:

mov r12, #44
orr r12, r12, #4096
sub sp, sp, r12

The new code will try to split the adjustment into 2 shifterconstants and
will do two seperate adjustments:

sub sp, sp, #44
sub sp, sp, #4096

If that doesn't work we'll fall back to the old code. But that should
happen VERY rarely, only for stacks bigger than 256k which are not
expressible in 2 shifter constants.

git-svn-id: trunk@21863 -
masta 13 年之前
父節點
當前提交
57b67dfa30
共有 1 個文件被更改,包括 22 次插入11 次删除
  1. 22 11
      compiler/arm/cgcpu.pas

+ 22 - 11
compiler/arm/cgcpu.pas

@@ -1409,6 +1409,7 @@ unit cgcpu;
          r7offset,
          stackmisalignment : pint;
          postfix: toppostfix;
+         imm1, imm2: DWord;
       begin
         LocalSize:=align(LocalSize,4);
         { call instruction does not put anything on the stack }
@@ -1536,18 +1537,24 @@ unit cgcpu;
                  (po_assembler in current_procinfo.procdef.procoptions))) then
               begin
                 localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
-                if not(is_shifter_const(localsize,shift)) then
+                if is_shifter_const(localsize,shift) then
                   begin
-                    if current_procinfo.framepointer=NR_STACK_POINTER_REG then
-                      a_reg_alloc(list,NR_R12);
-                    a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
-                    list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
                     a_reg_dealloc(list,NR_R12);
+                    list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
+                  end
+                else if split_into_shifter_const(localsize, imm1, imm2) then
+                  begin
+                    a_reg_dealloc(list,NR_R12);
+                    list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
+                    list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
                   end
                 else
                   begin
+                    if current_procinfo.framepointer=NR_STACK_POINTER_REG then
+                      a_reg_alloc(list,NR_R12);
+                    a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
+                    list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
                     a_reg_dealloc(list,NR_R12);
-                    list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
                   end;
               end;
 
@@ -1614,6 +1621,7 @@ unit cgcpu;
          regs : tcpuregisterset;
          stackmisalignment: pint;
          mmpostfix: toppostfix;
+         imm1, imm2: DWord;
       begin
         if not(nostackframe) then
           begin
@@ -1745,16 +1753,19 @@ unit cgcpu;
                      (po_assembler in current_procinfo.procdef.procoptions))) then
                   begin
                     localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
-                    if not(is_shifter_const(LocalSize,shift)) then
+                    if is_shifter_const(LocalSize,shift) then
+                      list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize))
+                    else if split_into_shifter_const(localsize, imm1, imm2) then
+                      begin
+                        list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm1));
+                        list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,imm2));
+                      end
+                    else
                       begin
                         a_reg_alloc(list,NR_R12);
                         a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
                         list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
                         a_reg_dealloc(list,NR_R12);
-                      end
-                    else
-                      begin
-                        list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
                       end;
                   end;