Browse Source

* x86_64-win64: fixed stack calculations so that 32-byte spilling area is only included once and only for non-leaf procedures (it was included twice with -O- and 3 times with -O2, wasting large amounts of stack memory).

git-svn-id: trunk@17695 -
sergei 14 years ago
parent
commit
318a55b96c
2 changed files with 11 additions and 6 deletions
  1. 1 4
      compiler/x86_64/cpupara.pas
  2. 10 2
      compiler/x86_64/cpupi.pas

+ 1 - 4
compiler/x86_64/cpupara.pas

@@ -1234,10 +1234,7 @@ unit cpupara;
       begin
         intparareg:=0;
         mmparareg:=0;
-        if target_info.system=system_x86_64_win64 then
-          parasize:=4*8
-        else
-          parasize:=0;
+        parasize:=0;
         create_paraloc_info_intern(p,side,p.paras,intparareg,mmparareg,parasize,false);
         { Create Function result paraloc }
         create_funcretloc_info(p,side);

+ 10 - 2
compiler/x86_64/cpupi.pas

@@ -42,6 +42,7 @@ implementation
 
     uses
       systems,
+      globtype,
       globals,
       cutils,
       symconst,
@@ -54,7 +55,8 @@ implementation
           begin
             if not(po_assembler in procdef.procoptions) and
               (tg.direction > 0) then
-              tg.setfirsttemp(tg.direction*maxpushedparasize+4*8);
+            { the spilling area is needed only for non-leaf procedures }
+              tg.setfirsttemp(tg.direction*maxpushedparasize+(4*8*ord(pi_do_call in flags)));
           end
         else
           tg.setfirsttemp(tg.direction*maxpushedparasize);
@@ -75,7 +77,13 @@ implementation
         { RSP should be aligned on 16 bytes }
         result:=Align(tg.direction*tg.lasttemp+maxpushedparasize,16);
         if target_info.system=system_x86_64_win64 then
-          inc(result,4*8);
+          begin
+          { case tg.direction>0 is handled above in set_first_temp_offset
+            (tg.setfirsttemp also sets tg.lasttemp, therefore the 32 byte
+             spilling area is already included in result) }
+            if (tg.direction<0) and (pi_do_call in flags) then
+              inc(result,4*8);
+          end;
       end;