浏览代码

+ initial work for tls-based threadvar support on arm-linux

git-svn-id: trunk@40267 -
florian 6 年之前
父节点
当前提交
9f16c34329

+ 1 - 0
.gitattributes

@@ -85,6 +85,7 @@ compiler/arm/narmcal.pas svneol=native#text/plain
 compiler/arm/narmcnv.pas svneol=native#text/plain
 compiler/arm/narmcon.pas svneol=native#text/plain
 compiler/arm/narminl.pas svneol=native#text/plain
+compiler/arm/narmld.pas svneol=native#text/pascal
 compiler/arm/narmmat.pas svneol=native#text/plain
 compiler/arm/narmmem.pas svneol=native#text/plain
 compiler/arm/narmset.pas svneol=native#text/plain

+ 6 - 2
compiler/aasmtai.pas

@@ -145,7 +145,11 @@ interface
           { offset of symbol's GOT slot in GOT }
           aitconst_got,
           { offset of symbol itself from GOT }
-          aitconst_gotoff_symbol
+          aitconst_gotoff_symbol,
+          { ARM TLS code }
+          aitconst_gottpoff,
+          aitconst_tpoff
+
         );
 
         tairealconsttype = (
@@ -1759,7 +1763,7 @@ implementation
       end;
 
 
-    constructor tai_const.Create_rel_sym_offset(_typ: taiconst_type; _sym,_endsym: tasmsymbol; _ofs: int64);
+    constructor tai_const.Create_rel_sym_offset(_typ: taiconst_type; _sym, _endsym: tasmsymbol; _ofs: int64);
        begin
          self.create_sym_offset(_sym,_ofs);
          consttype:=_typ;

+ 12 - 3
compiler/aggas.pas

@@ -347,9 +347,13 @@ implementation
             exit;
           end;
 
-        if (atype=sec_threadvar) and
-          (target_info.system in (systems_windows+systems_wince)) then
-          secname:='.tls';
+        if atype=sec_threadvar then
+          begin
+            if (target_info.system in (systems_windows+systems_wince)) then
+              secname:='.tls'
+            else if (target_info.system in systems_linux) then
+              secname:='.tbss';
+          end;
 
         { go32v2 stub only loads .text and .data sections, and allocates space for .bss.
           Thus, data which normally goes into .rodata and .rodata_norel sections must
@@ -943,6 +947,11 @@ implementation
                         WriteAixIntConst(tai_const(hp));
                       writer.AsmLn;
                     end;
+                 aitconst_gottpoff:
+                   begin
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(gottpoff)+(.-'+tai_const(hp).endsym.name+tostr_with_plus(tai_const(hp).symofs)+')');
+                     writer.Asmln;
+                   end;
 {$endif cpu64bitaddr}
                  aitconst_got:
                    begin

+ 12 - 1
compiler/arm/aoptcpu.pas

@@ -2516,6 +2516,15 @@ Implementation
 
   { TODO : schedule also forward }
   { TODO : schedule distance > 1 }
+
+    { returns true if p might be a load of a pc relative tls offset }
+    function PossibleTLSLoad(const p: tai) : boolean;
+      begin
+        Result:=(p.typ=ait_instruction) and (taicpu(p).opcode=A_LDR) and (taicpu(p).oper[1]^.typ=top_ref) and (((taicpu(p).oper[1]^.ref^.base=NR_PC) and
+          (taicpu(p).oper[1]^.ref^.index<>NR_NO)) or ((taicpu(p).oper[1]^.ref^.base<>NR_NO) and
+          (taicpu(p).oper[1]^.ref^.index=NR_PC)));
+      end;
+
     var
       hp1,hp2,hp3,hp4,hp5,insertpos : tai;
       list : TAsmList;
@@ -2572,7 +2581,9 @@ Implementation
             ) and
             { if we modify the basereg AND the first instruction used that reg, we can not schedule }
             ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) or
-             not(instructionLoadsFromReg(taicpu(hp1).oper[1]^.ref^.base,p))) then
+             not(instructionLoadsFromReg(taicpu(hp1).oper[1]^.ref^.base,p))) and
+            not(PossibleTLSLoad(p)) and
+            not(PossibleTLSLoad(hp1)) then
             begin
               hp3:=tai(p.Previous);
               hp5:=tai(p.next);

+ 20 - 4
compiler/arm/cgcpu.pas

@@ -107,13 +107,15 @@ unit cgcpu;
         { try to generate optimized 32 Bit multiplication, returns true if successful generated }
         function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
 
-        { clear out potential overflow bits from 8 or 16 bit operations  }
-        { the upper 24/16 bits of a register after an operation          }
+        { clear out potential overflow bits from 8 or 16 bit operations
+          the upper 24/16 bits of a register after an operation          }
         procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
 
         { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
         procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
 
+
+        procedure g_maybe_tls_init(list : TAsmList); override;
       end;
 
       { tcgarm is shared between normal arm and thumb-2 }
@@ -2114,7 +2116,7 @@ unit cgcpu;
                    end;
                end;
              end;
-        end;
+          end;
       end;
 
 
@@ -2476,6 +2478,8 @@ unit cgcpu;
                     a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
                 indirection_done:=true;
               end
+            else if ref.refaddr=addr_gottpoff then
+              current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
             else if (cs_create_pic in current_settings.moduleswitches) then
               if (tf_pic_uses_got in target_info.flags) then
                 current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
@@ -3271,6 +3275,15 @@ unit cgcpu;
       end;
 
 
+    procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
+      begin
+        list.concat(tai_regalloc.alloc(NR_R0,nil));
+        a_call_name(list,'fpc_read_tp',false);
+        a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
+        list.concat(tai_regalloc.dealloc(NR_R0,nil));
+      end;
+
+
     procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
       begin
         case op of
@@ -5026,7 +5039,10 @@ unit cgcpu;
                 cg.a_label(current_procinfo.aktlocaldata,l);
                 tmpref.symboldata:=current_procinfo.aktlocaldata.last;
 
-                current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
+                if ref.refaddr=addr_gottpoff then
+                  current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
+                else
+                  current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
 
                 { load consts entry }
                 tmpref.symbol:=l;

+ 1 - 0
compiler/arm/cpunode.pas

@@ -38,6 +38,7 @@ unit cpunode;
        narmcal,
        narmmat,
        narminl,
+       narmld,
        narmcnv,
        narmcon,
        narmset,

+ 8 - 0
compiler/arm/cpupi.pas

@@ -49,6 +49,8 @@ unit cpupi;
           procedure generate_parameter_info;override;
           procedure allocate_got_register(list : TAsmList);override;
           procedure postprocess_code;override;
+
+          procedure allocate_tls_register(list : TAsmList);override;
        end;
 
 
@@ -276,6 +278,12 @@ unit cpupi;
         finalizearmcode(aktproccode,aktlocaldata);
       end;
 
+
+    procedure tcpuprocinfo.allocate_tls_register(list: TAsmList);
+      begin
+        current_procinfo.tlsoffset:=cg.getaddressregister(list);
+      end;
+
 begin
    cprocinfo:=tcpuprocinfo;
 end.

+ 97 - 0
compiler/arm/narmld.pas

@@ -0,0 +1,97 @@
+{
+    Copyright (c) 1998-2018 by Florian Klaempfl
+
+    Generate arm assembler for load nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit narmld;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      globtype,
+      symsym,
+      node,ncgld,pass_1,aasmbase;
+
+    type
+      tarmloadnode = class(tcgloadnode)
+         procedure generate_threadvar_access(gvs : tstaticvarsym); override;
+      end;
+
+
+implementation
+
+    uses
+      globals,verbose,
+      cgbase,cgobj,cgutils,
+      aasmdata,
+      systems,
+      symcpu,symdef,
+      nld,
+      cpubase,
+      parabase,
+      procinfo;
+
+{*****************************************************************************
+                            TI386LOADNODE
+*****************************************************************************}
+
+    procedure tarmloadnode.generate_threadvar_access(gvs: tstaticvarsym);
+      var
+        paraloc1 : tcgpara;
+        pd: tprocdef;
+        href: treference;
+        hregister : tregister;
+        handled: boolean;
+        l : TAsmLabel;
+      begin
+        handled:=false;
+        if tf_section_threadvars in target_info.flags then
+          begin
+            if target_info.system in [system_arm_linux] then
+              begin
+                if not(pi_uses_threadvar in current_procinfo.flags) then
+                  internalerror(2012012101);
+                current_asmdata.getjumplabel(l);
+                reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_DATA),-8,sizeof(AInt),[]);
+                href.refaddr:=addr_gottpoff;
+                href.relsymbol:=l;
+                hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,hregister);
+                cg.a_label(current_asmdata.CurrAsmList,l);
+                reference_reset(href,0,[]);
+                href.base:=NR_PC;
+                href.index:=hregister;
+                hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,href,hregister);
+                location.reference.base:=current_procinfo.tlsoffset;
+                location.reference.index:=hregister;
+                handled:=true;
+              end;
+          end;
+
+        if not handled then
+          inherited;
+      end;
+
+
+begin
+   cloadnode:=tarmloadnode;
+end.

+ 7 - 2
compiler/cgbase.pas

@@ -63,8 +63,6 @@ interface
        TCGNonRefLoc=low(TCGLoc)..pred(LOC_CREFERENCE);
        TCGRefLoc=LOC_CREFERENCE..LOC_REFERENCE;
 
-       { since we have only 16bit offsets, we need to be able to specify the high
-         and lower 16 bits of the address of a symbol of up to 64 bit }
        trefaddr = (
          addr_no,
          addr_full,
@@ -72,6 +70,8 @@ interface
          addr_pic_no_got
          {$IF defined(POWERPC) or defined(POWERPC64) or defined(SPARC) or defined(MIPS) or defined(SPARC64)}
          ,
+         { since we have only 16bit offsets, we need to be able to specify the high
+           and lower 16 bits of the address of a symbol of up to 64 bit }
          addr_low,         // bits 48-63
          addr_high,        // bits 32-47
          {$IF defined(POWERPC64)}
@@ -122,6 +122,11 @@ interface
          ,addr_gdop_hix22
          ,addr_gdop_lox22
          {$endif SPARC64}
+         {$IFDEF ARM}
+         ,addr_gottpoff
+         ,addr_tpoff
+         {$ENDIF}
+
          );
 
 

+ 8 - 0
compiler/cgobj.pas

@@ -437,6 +437,8 @@ unit cgobj;
 
           { initialize the pic/got register }
           procedure g_maybe_got_init(list: TAsmList); virtual;
+          { initialize the tls register if needed }
+          procedure g_maybe_tls_init(list : TAsmList); virtual;
           { allocallcpuregisters, a_call_name, deallocallcpuregisters sequence }
           procedure g_call(list: TAsmList; const s: string);
           { Generate code to exit an unwind-protected region. The default implementation
@@ -2781,6 +2783,12 @@ implementation
       begin
       end;
 
+
+    procedure tcg.g_maybe_tls_init(list: TAsmList);
+      begin
+      end;
+
+
     procedure tcg.g_call(list: TAsmList;const s: string);
       begin
         allocallcpuregisters(list);

+ 3 - 1
compiler/globtype.pas

@@ -700,7 +700,9 @@ interface
            for i8086 cpu huge memory model,
            as this changes SP register it requires special handling
            to restore DS segment register  }
-         pi_has_open_array_parameter
+         pi_has_open_array_parameter,
+         { subroutine uses threadvars }
+         pi_uses_threadvar
        );
        tprocinfoflags=set of tprocinfoflag;
 

+ 2 - 2
compiler/ncgld.pas

@@ -493,8 +493,8 @@ implementation
                          reference_reset_symbol(location.reference,current_asmdata.WeakRefAsmSymbol(gvs.mangledname,AT_DATA),0,location.reference.alignment,[])
                      end
                    else
-                     location:=gvs.localloc;
-                 end;
+                      location:=gvs.localloc;
+                  end;
 
                 { make const a LOC_CREFERENCE }
                 if (gvs.varspez=vs_const) and

+ 4 - 1
compiler/nld.pas

@@ -429,7 +429,10 @@ implementation
                   include(current_procinfo.flags,pi_needs_got);
                 { call to get address of threadvar }
                 if (vo_is_thread_var in tabstractvarsym(symtableentry).varoptions) then
-                  include(current_procinfo.flags,pi_do_call);
+                  begin
+                    include(current_procinfo.flags,pi_do_call);
+                    include(current_procinfo.flags,pi_uses_threadvar);
+                  end;
               end;
             procsym :
                 begin

+ 13 - 0
compiler/procinfo.pas

@@ -95,6 +95,11 @@ unit procinfo;
           got : tregister;
           CurrGOTLabel : tasmlabel;
 
+          { register containing the tlsoffset }
+          tlsoffset : tregister;
+          { reference label for tls addresses }
+          tlslabel : tasmlabel;
+
           { Holds the reference used to store all saved registers. }
           save_regs_ref : treference;
 
@@ -150,6 +155,9 @@ unit procinfo;
           { Allocate got register }
           procedure allocate_got_register(list: TAsmList);virtual;
 
+          { Allocate tls register }
+          procedure allocate_tls_register(list: TAsmList);virtual;
+
           { get frame pointer }
           procedure init_framepointer; virtual;
 
@@ -288,6 +296,11 @@ implementation
         { most os/cpu combo's don't use this yet, so not yet abstract }
       end;
 
+    procedure tprocinfo.allocate_tls_register(list : TAsmList);
+      begin
+      end;
+
+
     procedure tprocinfo.init_framepointer;
       begin
         { most targets use a constant, but some have a typed constant that must

+ 13 - 2
compiler/psub.pas

@@ -1451,6 +1451,9 @@ implementation
             { allocate got register if needed }
             allocate_got_register(aktproccode);
 
+            if pi_uses_threadvar in flags then
+              allocate_tls_register(aktproccode);
+
             { Allocate space in temp/registers for parast and localst }
             current_filepos:=entrypos;
             gen_alloc_symtable(aktproccode,procdef,procdef.parast);
@@ -1561,6 +1564,10 @@ implementation
                (got<>NR_NO) then
               cg.a_reg_sync(aktproccode,got);
 
+            if (pi_uses_threadvar in flags) and
+              (tlsoffset<>NR_NO) then
+              cg.a_reg_sync(aktproccode,tlsoffset);
+
             gen_free_symtable(aktproccode,procdef.localst);
             gen_free_symtable(aktproccode,procdef.parast);
 
@@ -1579,7 +1586,7 @@ implementation
               begin
                 current_filepos:=entrypos;
                 hlcg.gen_stack_check_call(templist);
-                aktproccode.insertlistafter(stackcheck_asmnode.currenttai,templist)
+                aktproccode.insertlistafter(stackcheck_asmnode.currenttai,templist);
               end;
 
             { this code (got loading) comes before everything which has }
@@ -1599,9 +1606,13 @@ implementation
             current_filepos:=entrypos;
             { load got if necessary }
             cg.g_maybe_got_init(templist);
-
             aktproccode.insertlistafter(headertai,templist);
 
+            if pi_uses_threadvar in flags then
+              cg.g_maybe_tls_init(templist);
+            aktproccode.insertlistafter(stackcheck_asmnode.currenttai,templist);
+
+
             { re-enable if more code at the end is ever generated here
             cg.set_regalloc_live_range_direction(rad_forward);
             }

+ 4 - 1
compiler/systems/i_linux.pas

@@ -589,7 +589,7 @@ unit i_linux;
             name         : 'Linux for ARMHF';
             shortname    : 'Linux';
             flags        : [tf_needs_symbol_size,tf_needs_symbol_type,tf_files_case_sensitive,
-                            tf_requires_proper_alignment,
+                            tf_requires_proper_alignment,tf_section_threadvars,
                             tf_smartlink_sections,tf_pic_uses_got,
                             tf_has_winlike_resources];
             cpu          : cpu_arm;
@@ -660,6 +660,9 @@ unit i_linux;
             shortname    : 'Linux';
             flags        : [tf_needs_symbol_size,tf_needs_symbol_type,tf_files_case_sensitive,
                             tf_requires_proper_alignment,
+{$ifdef tls_threadvars}
+                            tf_section_threadvars,
+{$endif tls_threadvars}
                             tf_smartlink_sections,tf_pic_uses_got,
                             tf_has_winlike_resources];
             cpu          : cpu_arm;

+ 3 - 1
compiler/utils/ppuutils/ppudump.pp

@@ -1354,7 +1354,9 @@ const
          (mask:pi_calls_c_varargs;
          str:' calls function with C-style varargs '),
          (mask:pi_has_open_array_parameter;
-         str:' has open array parameter ')
+         str:' has open array parameter '),
+         (mask:pi_uses_threadvar;
+         str:' uses threadvars ')
   );
 var
   procinfooptions : tprocinfoflags;

+ 9 - 0
rtl/arm/arm.inc

@@ -96,6 +96,15 @@ begin
 end;
 {$endif wince}
 
+{$ifdef linux}
+function fpc_read_tp : pointer; [public, alias: 'fpc_read_tp'];assembler; nostackframe;
+asm
+  // Helper is located at 0xffff0fe0
+  mvn r0,#0x0000f000 // mov r0, #0xffff0fff
+  sub pc,r0,#0x1f    // Jump to helper
+end;
+{$endif linux}
+
 {****************************************************************************
                        stack frame related stuff
 ****************************************************************************}