瀏覽代碼

Fixes for aarch64.
Merged revision(s) 39948, 39951, 39953 from trunk:
* Fixed interface delegation for aarch64 (tdel1 and tdel2 tests).
It was broken by r31676 which did not take in account r29953. As a consequence $self was passed in register x1 instead of x0 when the method result is of a managed type.
This fix enables changing order of the $self and $result parameters only for aarch64 AND llvm combination where it is actually needed.
The issue with interface delegation still exists for aarch64-llvm and need to be fixed separately. Probably by applying llvm-specific workarounds as it was made in r29953 for regular aarch64 targets.
........
* Fixed bug #17413 (and the tw17413 test) for aarch64 by adding additional check.
........
* aarch64: Fixed loading of a function result which is returned in 4 32-bit MM registers to 2 64-bit int registers. Bug #30329.
........

git-svn-id: branches/fixes_3_2@40543 -

yury 6 年之前
父節點
當前提交
0dd9e4d6a5
共有 4 個文件被更改,包括 80 次插入32 次删除
  1. 8 2
      compiler/aarch64/cgcpu.pas
  2. 3 1
      compiler/ncgld.pas
  3. 63 22
      compiler/ncgutil.pas
  4. 6 7
      compiler/symconst.pas

+ 8 - 2
compiler/aarch64/cgcpu.pas

@@ -1040,13 +1040,19 @@ implementation
 
 
      procedure tcgaarch64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
+       var
+         r : tregister;
        begin
          if not shufflescalar(shuffle) then
            internalerror(2014122802);
          if not(tcgsize2size[fromsize] in [4,8]) or
-            (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then
+            (tcgsize2size[fromsize]>tcgsize2size[tosize]) then
            internalerror(2014122804);
-         list.concat(taicpu.op_reg_reg(A_UMOV,intreg,mmreg));
+         if tcgsize2size[fromsize]<tcgsize2size[tosize] then
+           r:=makeregsize(intreg,fromsize)
+         else
+           r:=intreg;
+         list.concat(taicpu.op_reg_reg(A_UMOV,r,mmreg));
        end;
 
 

+ 3 - 1
compiler/ncgld.pas

@@ -112,7 +112,9 @@ implementation
                  { ... at the place we are looking for }
                  references_equal(tabstractnormalvarsym(tloadnode(n).symtableentry).localloc.reference,rr^.old^) and
                  { its address cannot have escaped the current routine }
-                 not(tabstractvarsym(tloadnode(n).symtableentry).addr_taken) then
+                 not(tabstractvarsym(tloadnode(n).symtableentry).addr_taken) and
+                 { it is not accessed in nested procedures }
+                 not(tabstractvarsym(tloadnode(n).symtableentry).different_scope) then
                 begin
                   { relocate variable }
                   tcgloadnode(n).changereflocation(rr^.new^);

+ 63 - 22
compiler/ncgutil.pas

@@ -704,11 +704,11 @@ implementation
         sizeleft  : aint;
         tempref   : treference;
         loadsize  : tcgint;
+        tempreg  : tregister;
 {$ifdef mips}
         //tmpreg   : tregister;
 {$endif mips}
 {$ifndef cpu64bitalu}
-        tempreg  : tregister;
         reg64    : tregister64;
 {$if defined(cpu8bitalu)}
         curparaloc : PCGParaLocation;
@@ -780,27 +780,68 @@ implementation
                       begin
                         if not assigned(paraloc^.next) then
                           internalerror(200410104);
-                        if (target_info.endian=ENDIAN_BIG) then
-                          begin
-                            { paraloc^ -> high
-                              paraloc^.next -> low }
-                            unget_para(paraloc^);
-                            gen_alloc_regloc(list,destloc,vardef);
-                            { reg->reg, alignment is irrelevant }
-                            cg.a_load_cgparaloc_anyreg(list,OS_64,paraloc^,destloc.register128.reghi,8);
-                            unget_para(paraloc^.next^);
-                            cg.a_load_cgparaloc_anyreg(list,OS_64,paraloc^.next^,destloc.register128.reglo,8);
-                          end
-                        else
-                          begin
-                            { paraloc^ -> low
-                              paraloc^.next -> high }
-                            unget_para(paraloc^);
-                            gen_alloc_regloc(list,destloc,vardef);
-                            cg.a_load_cgparaloc_anyreg(list,OS_64,paraloc^,destloc.register128.reglo,8);
-                            unget_para(paraloc^.next^);
-                            cg.a_load_cgparaloc_anyreg(list,OS_64,paraloc^.next^,destloc.register128.reghi,8);
-                          end;
+                        case tcgsize2size[paraloc^.size] of
+                          8:
+                            begin
+                              if (target_info.endian=ENDIAN_BIG) then
+                                begin
+                                  { paraloc^ -> high
+                                    paraloc^.next -> low }
+                                  unget_para(paraloc^);
+                                  gen_alloc_regloc(list,destloc,vardef);
+                                  { reg->reg, alignment is irrelevant }
+                                  cg.a_load_cgparaloc_anyreg(list,OS_64,paraloc^,destloc.register128.reghi,8);
+                                  unget_para(paraloc^.next^);
+                                  cg.a_load_cgparaloc_anyreg(list,OS_64,paraloc^.next^,destloc.register128.reglo,8);
+                                end
+                              else
+                                begin
+                                  { paraloc^ -> low
+                                    paraloc^.next -> high }
+                                  unget_para(paraloc^);
+                                  gen_alloc_regloc(list,destloc,vardef);
+                                  cg.a_load_cgparaloc_anyreg(list,OS_64,paraloc^,destloc.register128.reglo,8);
+                                  unget_para(paraloc^.next^);
+                                  cg.a_load_cgparaloc_anyreg(list,OS_64,paraloc^.next^,destloc.register128.reghi,8);
+                                end;
+                            end;
+                          4:
+                            begin
+                              { The 128-bit parameter is located in 4 32-bit MM registers.
+                                It is needed to copy them to 2 64-bit int registers.
+                                A code generator or a target cpu must support loading of a 32-bit MM register to
+                                a 64-bit int register, zero extending it. }
+                              if target_info.endian=ENDIAN_BIG then
+                                internalerror(2018101702);  // Big endian support not implemented yet
+                              gen_alloc_regloc(list,destloc,vardef);
+                              tempreg:=cg.getintregister(list,OS_64);
+                              // Low part of the 128-bit param
+                              unget_para(paraloc^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_64,paraloc^,tempreg,4);
+                              paraloc:=paraloc^.next;
+                              if paraloc=nil then
+                                internalerror(2018101703);
+                              unget_para(paraloc^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_64,paraloc^,destloc.register128.reglo,4);
+                              cg.a_op_const_reg(list,OP_SHL,OS_64,32,destloc.register128.reglo);
+                              cg.a_op_reg_reg(list,OP_OR,OS_64,tempreg,destloc.register128.reglo);
+                              // High part of the 128-bit param
+                              paraloc:=paraloc^.next;
+                              if paraloc=nil then
+                                internalerror(2018101704);
+                              unget_para(paraloc^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_64,paraloc^,tempreg,4);
+                              paraloc:=paraloc^.next;
+                              if paraloc=nil then
+                                internalerror(2018101705);
+                              unget_para(paraloc^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_64,paraloc^,destloc.register128.reghi,4);
+                              cg.a_op_const_reg(list,OP_SHL,OS_64,32,destloc.register128.reghi);
+                              cg.a_op_reg_reg(list,OP_OR,OS_64,tempreg,destloc.register128.reghi);
+                            end
+                          else
+                            internalerror(2018101701);
+                        end;
                       end;
                     LOC_REFERENCE:
                       begin

+ 6 - 7
compiler/symconst.pas

@@ -124,18 +124,17 @@ const
   paranr_blockselfpara = 1;
   paranr_parentfp = 2;
   paranr_parentfp_delphi_cc_leftright = 2;
-{$ifndef aarch64}
-  paranr_self = 3;
-  paranr_result = 4;
-{$else aarch64}
-  { on AArch64, the result parameter is passed in a special register, so its
-    order doesn't really matter -- except for LLVM, where the "sret" parameter
+{$if defined(aarch64) and defined(llvm)}
+  { for AArch64 on LLVM, the "sret" parameter
     must always be the first -> give it a higher number; can't do it for other
     platforms, because that would change the register assignment/parameter order
     and the current one is presumably Delphi-compatible }
   paranr_result = 3;
   paranr_self = 4;
-{$endif aarch64}
+{$else}
+  paranr_self = 3;
+  paranr_result = 4;
+{$endif}
   paranr_vmt = 5;
 
   { the implicit parameters for Objective-C methods need to come