浏览代码

* x86_64: reworked register saving/restoring to use PUSH/POP instructions for procedures with RSP-based frame and SEH finalization procedures. XMM registers are also saved/restored without involving tempgen in mentioned cases. This prevents SEH finalization procedures from saving registers in stack frame of their parent procedures, fixing incorrect unwind bytecode (Mantis #24791). It also reduces executable size (for compiler itself, by about 100Kb).

git-svn-id: trunk@25389 -
sergei 12 年之前
父节点
当前提交
e41149a7ec
共有 1 个文件被更改,包括 112 次插入9 次删除
  1. 112 9
      compiler/x86_64/cgcpu.pas

+ 112 - 9
compiler/x86_64/cgcpu.pas

@@ -40,9 +40,14 @@ unit cgcpu;
         procedure g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);override;
         procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
         procedure g_local_unwind(list: TAsmList; l: TAsmLabel);override;
+        procedure g_save_registers(list: TAsmList);override;
+        procedure g_restore_registers(list: TAsmList);override;
 
         procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
         procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister;shuffle : pmmshuffle); override;
+      private
+        function use_push: boolean;
+        function saved_xmm_reg_size: longint;
       end;
 
     procedure create_codegen;
@@ -103,6 +108,29 @@ unit cgcpu;
       end;
 
 
+    function tcgx86_64.use_push: boolean;
+      begin
+        result:=(current_procinfo.framepointer=NR_STACK_POINTER_REG) or
+          (current_procinfo.procdef.proctypeoption=potype_exceptfilter);
+      end;
+
+
+    function tcgx86_64.saved_xmm_reg_size: longint;
+      var
+        i: longint;
+      begin
+        result:=0;
+        if (target_info.system<>system_x86_64_win64) or
+           (not uses_registers(R_MMREGISTER)) then
+          exit;
+        for i:=low(saved_mm_registers) to high(saved_mm_registers) do
+          begin
+            if (saved_mm_registers[i] in rg[R_MMREGISTER].used_in_proc) then
+              inc(result,tcgsize2size[OS_VECTOR]);
+          end;
+      end;
+
+
     procedure tcgx86_64.g_proc_entry(list : TAsmList;localsize:longint;nostackframe:boolean);
       var
         hitem: tlinkedlistitem;
@@ -113,7 +141,31 @@ unit cgcpu;
         suppress_endprologue: boolean;
         stackmisalignment: longint;
         para: tparavarsym;
+        xmmsize: longint;
+
+      procedure push_one_reg(reg: tregister);
+        begin
+          list.concat(taicpu.op_reg(A_PUSH,tcgsize2opsize[OS_ADDR],reg));
+          if (target_info.system=system_x86_64_win64) then
+            begin
+              list.concat(cai_seh_directive.create_reg(ash_pushreg,reg));
+              include(current_procinfo.flags,pi_has_unwind_info);
+            end;
+        end;
+
+      procedure push_regs;
+        var
+          r: longint;
         begin
+          for r := low(saved_standard_registers) to high(saved_standard_registers) do
+            if saved_standard_registers[r] in rg[R_INTREGISTER].used_in_proc then
+              begin
+                inc(stackmisalignment,sizeof(pint));
+                push_one_reg(newreg(R_INTREGISTER,saved_standard_registers[r],R_SUBWHOLE));
+              end;
+        end;
+
+      begin
         hitem:=list.last;
         { pi_has_unwind_info may already be set at this point if there are
           SEH directives in assembler body. In this case, .seh_endprologue
@@ -127,17 +179,15 @@ unit cgcpu;
             stackmisalignment := sizeof(pint);
             list.concat(tai_regalloc.alloc(current_procinfo.framepointer,nil));
             if current_procinfo.framepointer=NR_STACK_POINTER_REG then
-              CGmessage(cg_d_stackframe_omited)
+              begin
+                push_regs;
+                CGmessage(cg_d_stackframe_omited);
+              end
             else
               begin
                 { push <frame_pointer> }
                 inc(stackmisalignment,sizeof(pint));
-                list.concat(Taicpu.op_reg(A_PUSH,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
-                if (target_info.system=system_x86_64_win64) then
-                  begin
-                    list.concat(cai_seh_directive.create_reg(ash_pushreg,NR_FRAME_POINTER_REG));
-                    include(current_procinfo.flags,pi_has_unwind_info);
-                  end;
+                push_one_reg(NR_FRAME_POINTER_REG);
                 { Return address and FP are both on stack }
                 current_asmdata.asmcfi.cfa_def_cfa_offset(list,2*sizeof(pint));
                 current_asmdata.asmcfi.cfa_offset(list,NR_FRAME_POINTER_REG,-(2*sizeof(pint)));
@@ -145,6 +195,7 @@ unit cgcpu;
                   list.concat(Taicpu.op_reg_reg(A_MOV,tcgsize2opsize[OS_ADDR],NR_STACK_POINTER_REG,NR_FRAME_POINTER_REG))
                 else
                   begin
+                    push_regs;
                     { load framepointer from hidden $parentfp parameter }
                     para:=tparavarsym(current_procinfo.procdef.paras[0]);
                     if not (vo_is_parentfp in para.varoptions) then
@@ -170,6 +221,14 @@ unit cgcpu;
                 }
               end;
 
+            xmmsize:=saved_xmm_reg_size;
+            if use_push and (xmmsize<>0) then
+              begin
+                localsize:=align(localsize,target_info.stackalign)+xmmsize;
+                reference_reset_base(current_procinfo.save_regs_ref,NR_STACK_POINTER_REG,
+                  localsize-xmmsize,tcgsize2size[OS_VECTOR]);
+              end;
+
             { allocate stackframe space }
             if (localsize<>0) or
                ((target_info.stackalign>sizeof(pint)) and
@@ -188,6 +247,16 @@ unit cgcpu;
                     if localsize<>0 then
                       list.concat(cai_seh_directive.create_offset(ash_stackalloc,localsize));
                     include(current_procinfo.flags,pi_has_unwind_info);
+                    if use_push and (xmmsize<>0) then
+                      begin
+                        href:=current_procinfo.save_regs_ref;
+                        for r:=low(saved_mm_registers) to high(saved_mm_registers) do
+                          if saved_mm_registers[r] in rg[R_MMREGISTER].used_in_proc then
+                            begin
+                              a_loadmm_reg_ref(list,OS_VECTOR,OS_VECTOR,newreg(R_MMREGISTER,saved_mm_registers[r],R_SUBMMWHOLE),href,nil);
+                              inc(href.offset,tcgsize2size[OS_VECTOR]);
+                            end;
+                      end;
                   end;
                end;
           end;
@@ -209,6 +278,8 @@ unit cgcpu;
           since registers are not modified before they are saved, and saves do not
           change RSP, 'logically' all saves can happen at the end of prologue. }
         href:=current_procinfo.save_regs_ref;
+        if (not use_push) then
+          begin
             for r:=low(saved_standard_registers) to high(saved_standard_registers) do
               if saved_standard_registers[r] in rg[R_INTREGISTER].used_in_proc then
                 begin
@@ -217,6 +288,7 @@ unit cgcpu;
                     href.offset+frame_offset));
                  inc(href.offset,sizeof(aint));
                 end;
+          end;
         if uses_registers(R_MMREGISTER) then
           begin
             if (href.offset mod tcgsize2size[OS_VECTOR])<>0 then
@@ -256,6 +328,8 @@ unit cgcpu;
 
       var
         href : treference;
+        hreg : tregister;
+        r : longint;
       begin
         { Release PIC register }
         if cs_create_pic in current_settings.moduleswitches then
@@ -268,11 +342,26 @@ unit cgcpu;
         { remove stackframe }
         if not nostackframe then
           begin
-            if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
-               (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
+            if use_push then
               begin
+                if (saved_xmm_reg_size<>0) then
+                  begin
+                    href:=current_procinfo.save_regs_ref;
+                    for r:=low(saved_mm_registers) to high(saved_mm_registers) do
+                      if saved_mm_registers[r] in rg[R_MMREGISTER].used_in_proc then
+                        begin
+                          { Allocate register so the optimizer does not remove the load }
+                          hreg:=newreg(R_MMREGISTER,saved_mm_registers[r],R_SUBMMWHOLE);
+                          a_reg_alloc(list,hreg);
+                          a_loadmm_ref_reg(list,OS_VECTOR,OS_VECTOR,href,hreg,nil);
+                          inc(href.offset,tcgsize2size[OS_VECTOR]);
+                        end;
+                  end;
+
                 if (current_procinfo.final_localsize<>0) then
                   increase_sp(current_procinfo.final_localsize);
+                internal_restore_regs(list,true);
+
                 if (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
                   list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
               end
@@ -300,6 +389,20 @@ unit cgcpu;
       end;
 
 
+    procedure tcgx86_64.g_save_registers(list: TAsmList);
+      begin
+        if (not use_push) then
+          inherited g_save_registers(list);
+      end;
+
+
+    procedure tcgx86_64.g_restore_registers(list: TAsmList);
+      begin
+        if (not use_push) then
+          inherited g_restore_registers(list);
+      end;
+
+
     procedure tcgx86_64.g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);
       var
         make_global : boolean;