Browse Source

+ initial work for tls-based threadvar support on arm-linux

git-svn-id: trunk@40267 -
florian 6 years ago
parent
commit
9f16c34329

+ 1 - 0
.gitattributes

@@ -85,6 +85,7 @@ compiler/arm/narmcal.pas svneol=native#text/plain
 compiler/arm/narmcnv.pas svneol=native#text/plain
 compiler/arm/narmcnv.pas svneol=native#text/plain
 compiler/arm/narmcon.pas svneol=native#text/plain
 compiler/arm/narmcon.pas svneol=native#text/plain
 compiler/arm/narminl.pas svneol=native#text/plain
 compiler/arm/narminl.pas svneol=native#text/plain
+compiler/arm/narmld.pas svneol=native#text/pascal
 compiler/arm/narmmat.pas svneol=native#text/plain
 compiler/arm/narmmat.pas svneol=native#text/plain
 compiler/arm/narmmem.pas svneol=native#text/plain
 compiler/arm/narmmem.pas svneol=native#text/plain
 compiler/arm/narmset.pas svneol=native#text/plain
 compiler/arm/narmset.pas svneol=native#text/plain

+ 6 - 2
compiler/aasmtai.pas

@@ -145,7 +145,11 @@ interface
           { offset of symbol's GOT slot in GOT }
           { offset of symbol's GOT slot in GOT }
           aitconst_got,
           aitconst_got,
           { offset of symbol itself from GOT }
           { offset of symbol itself from GOT }
-          aitconst_gotoff_symbol
+          aitconst_gotoff_symbol,
+          { ARM TLS code }
+          aitconst_gottpoff,
+          aitconst_tpoff
+
         );
         );
 
 
         tairealconsttype = (
         tairealconsttype = (
@@ -1759,7 +1763,7 @@ implementation
       end;
       end;
 
 
 
 
-    constructor tai_const.Create_rel_sym_offset(_typ: taiconst_type; _sym,_endsym: tasmsymbol; _ofs: int64);
+    constructor tai_const.Create_rel_sym_offset(_typ: taiconst_type; _sym, _endsym: tasmsymbol; _ofs: int64);
        begin
        begin
          self.create_sym_offset(_sym,_ofs);
          self.create_sym_offset(_sym,_ofs);
          consttype:=_typ;
          consttype:=_typ;

+ 12 - 3
compiler/aggas.pas

@@ -347,9 +347,13 @@ implementation
             exit;
             exit;
           end;
           end;
 
 
-        if (atype=sec_threadvar) and
-          (target_info.system in (systems_windows+systems_wince)) then
-          secname:='.tls';
+        if atype=sec_threadvar then
+          begin
+            if (target_info.system in (systems_windows+systems_wince)) then
+              secname:='.tls'
+            else if (target_info.system in systems_linux) then
+              secname:='.tbss';
+          end;
 
 
         { go32v2 stub only loads .text and .data sections, and allocates space for .bss.
         { go32v2 stub only loads .text and .data sections, and allocates space for .bss.
           Thus, data which normally goes into .rodata and .rodata_norel sections must
           Thus, data which normally goes into .rodata and .rodata_norel sections must
@@ -943,6 +947,11 @@ implementation
                         WriteAixIntConst(tai_const(hp));
                         WriteAixIntConst(tai_const(hp));
                       writer.AsmLn;
                       writer.AsmLn;
                     end;
                     end;
+                 aitconst_gottpoff:
+                   begin
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(gottpoff)+(.-'+tai_const(hp).endsym.name+tostr_with_plus(tai_const(hp).symofs)+')');
+                     writer.Asmln;
+                   end;
 {$endif cpu64bitaddr}
 {$endif cpu64bitaddr}
                  aitconst_got:
                  aitconst_got:
                    begin
                    begin

+ 12 - 1
compiler/arm/aoptcpu.pas

@@ -2516,6 +2516,15 @@ Implementation
 
 
   { TODO : schedule also forward }
   { TODO : schedule also forward }
   { TODO : schedule distance > 1 }
   { TODO : schedule distance > 1 }
+
+    { returns true if p might be a load of a pc relative tls offset }
+    function PossibleTLSLoad(const p: tai) : boolean;
+      begin
+        Result:=(p.typ=ait_instruction) and (taicpu(p).opcode=A_LDR) and (taicpu(p).oper[1]^.typ=top_ref) and (((taicpu(p).oper[1]^.ref^.base=NR_PC) and
+          (taicpu(p).oper[1]^.ref^.index<>NR_NO)) or ((taicpu(p).oper[1]^.ref^.base<>NR_NO) and
+          (taicpu(p).oper[1]^.ref^.index=NR_PC)));
+      end;
+
     var
     var
       hp1,hp2,hp3,hp4,hp5,insertpos : tai;
       hp1,hp2,hp3,hp4,hp5,insertpos : tai;
       list : TAsmList;
       list : TAsmList;
@@ -2572,7 +2581,9 @@ Implementation
             ) and
             ) and
             { if we modify the basereg AND the first instruction used that reg, we can not schedule }
             { if we modify the basereg AND the first instruction used that reg, we can not schedule }
             ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) or
             ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) or
-             not(instructionLoadsFromReg(taicpu(hp1).oper[1]^.ref^.base,p))) then
+             not(instructionLoadsFromReg(taicpu(hp1).oper[1]^.ref^.base,p))) and
+            not(PossibleTLSLoad(p)) and
+            not(PossibleTLSLoad(hp1)) then
             begin
             begin
               hp3:=tai(p.Previous);
               hp3:=tai(p.Previous);
               hp5:=tai(p.next);
               hp5:=tai(p.next);

+ 20 - 4
compiler/arm/cgcpu.pas

@@ -107,13 +107,15 @@ unit cgcpu;
         { try to generate optimized 32 Bit multiplication, returns true if successful generated }
         { try to generate optimized 32 Bit multiplication, returns true if successful generated }
         function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
         function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
 
 
-        { clear out potential overflow bits from 8 or 16 bit operations  }
-        { the upper 24/16 bits of a register after an operation          }
+        { clear out potential overflow bits from 8 or 16 bit operations
+          the upper 24/16 bits of a register after an operation          }
         procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
         procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
 
 
         { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
         { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
         procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
         procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
 
 
+
+        procedure g_maybe_tls_init(list : TAsmList); override;
       end;
       end;
 
 
       { tcgarm is shared between normal arm and thumb-2 }
       { tcgarm is shared between normal arm and thumb-2 }
@@ -2114,7 +2116,7 @@ unit cgcpu;
                    end;
                    end;
                end;
                end;
              end;
              end;
-        end;
+          end;
       end;
       end;
 
 
 
 
@@ -2476,6 +2478,8 @@ unit cgcpu;
                     a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
                     a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
                 indirection_done:=true;
                 indirection_done:=true;
               end
               end
+            else if ref.refaddr=addr_gottpoff then
+              current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
             else if (cs_create_pic in current_settings.moduleswitches) then
             else if (cs_create_pic in current_settings.moduleswitches) then
               if (tf_pic_uses_got in target_info.flags) then
               if (tf_pic_uses_got in target_info.flags) then
                 current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
                 current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
@@ -3271,6 +3275,15 @@ unit cgcpu;
       end;
       end;
 
 
 
 
+    procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
+      begin
+        list.concat(tai_regalloc.alloc(NR_R0,nil));
+        a_call_name(list,'fpc_read_tp',false);
+        a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
+        list.concat(tai_regalloc.dealloc(NR_R0,nil));
+      end;
+
+
     procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
     procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
       begin
       begin
         case op of
         case op of
@@ -5026,7 +5039,10 @@ unit cgcpu;
                 cg.a_label(current_procinfo.aktlocaldata,l);
                 cg.a_label(current_procinfo.aktlocaldata,l);
                 tmpref.symboldata:=current_procinfo.aktlocaldata.last;
                 tmpref.symboldata:=current_procinfo.aktlocaldata.last;
 
 
-                current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
+                if ref.refaddr=addr_gottpoff then
+                  current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
+                else
+                  current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
 
 
                 { load consts entry }
                 { load consts entry }
                 tmpref.symbol:=l;
                 tmpref.symbol:=l;

+ 1 - 0
compiler/arm/cpunode.pas

@@ -38,6 +38,7 @@ unit cpunode;
        narmcal,
        narmcal,
        narmmat,
        narmmat,
        narminl,
        narminl,
+       narmld,
        narmcnv,
        narmcnv,
        narmcon,
        narmcon,
        narmset,
        narmset,

+ 8 - 0
compiler/arm/cpupi.pas

@@ -49,6 +49,8 @@ unit cpupi;
           procedure generate_parameter_info;override;
           procedure generate_parameter_info;override;
           procedure allocate_got_register(list : TAsmList);override;
           procedure allocate_got_register(list : TAsmList);override;
           procedure postprocess_code;override;
           procedure postprocess_code;override;
+
+          procedure allocate_tls_register(list : TAsmList);override;
        end;
        end;
 
 
 
 
@@ -276,6 +278,12 @@ unit cpupi;
         finalizearmcode(aktproccode,aktlocaldata);
         finalizearmcode(aktproccode,aktlocaldata);
       end;
       end;
 
 
+
+    procedure tcpuprocinfo.allocate_tls_register(list: TAsmList);
+      begin
+        current_procinfo.tlsoffset:=cg.getaddressregister(list);
+      end;
+
 begin
 begin
    cprocinfo:=tcpuprocinfo;
    cprocinfo:=tcpuprocinfo;
 end.
 end.

+ 97 - 0
compiler/arm/narmld.pas

@@ -0,0 +1,97 @@
+{
+    Copyright (c) 1998-2018 by Florian Klaempfl
+
+    Generate arm assembler for load nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit narmld;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      globtype,
+      symsym,
+      node,ncgld,pass_1,aasmbase;
+
+    type
+      tarmloadnode = class(tcgloadnode)
+         procedure generate_threadvar_access(gvs : tstaticvarsym); override;
+      end;
+
+
+implementation
+
+    uses
+      globals,verbose,
+      cgbase,cgobj,cgutils,
+      aasmdata,
+      systems,
+      symcpu,symdef,
+      nld,
+      cpubase,
+      parabase,
+      procinfo;
+
+{*****************************************************************************
+                            TI386LOADNODE
+*****************************************************************************}
+
+    procedure tarmloadnode.generate_threadvar_access(gvs: tstaticvarsym);
+      var
+        paraloc1 : tcgpara;
+        pd: tprocdef;
+        href: treference;
+        hregister : tregister;
+        handled: boolean;
+        l : TAsmLabel;
+      begin
+        handled:=false;
+        if tf_section_threadvars in target_info.flags then
+          begin
+            if target_info.system in [system_arm_linux] then
+              begin
+                if not(pi_uses_threadvar in current_procinfo.flags) then
+                  internalerror(2012012101);
+                current_asmdata.getjumplabel(l);
+                reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_DATA),-8,sizeof(AInt),[]);
+                href.refaddr:=addr_gottpoff;
+                href.relsymbol:=l;
+                hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,hregister);
+                cg.a_label(current_asmdata.CurrAsmList,l);
+                reference_reset(href,0,[]);
+                href.base:=NR_PC;
+                href.index:=hregister;
+                hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,href,hregister);
+                location.reference.base:=current_procinfo.tlsoffset;
+                location.reference.index:=hregister;
+                handled:=true;
+              end;
+          end;
+
+        if not handled then
+          inherited;
+      end;
+
+
+begin
+   cloadnode:=tarmloadnode;
+end.

+ 7 - 2
compiler/cgbase.pas

@@ -63,8 +63,6 @@ interface
        TCGNonRefLoc=low(TCGLoc)..pred(LOC_CREFERENCE);
        TCGNonRefLoc=low(TCGLoc)..pred(LOC_CREFERENCE);
        TCGRefLoc=LOC_CREFERENCE..LOC_REFERENCE;
        TCGRefLoc=LOC_CREFERENCE..LOC_REFERENCE;
 
 
-       { since we have only 16bit offsets, we need to be able to specify the high
-         and lower 16 bits of the address of a symbol of up to 64 bit }
        trefaddr = (
        trefaddr = (
          addr_no,
          addr_no,
          addr_full,
          addr_full,
@@ -72,6 +70,8 @@ interface
          addr_pic_no_got
          addr_pic_no_got
          {$IF defined(POWERPC) or defined(POWERPC64) or defined(SPARC) or defined(MIPS) or defined(SPARC64)}
          {$IF defined(POWERPC) or defined(POWERPC64) or defined(SPARC) or defined(MIPS) or defined(SPARC64)}
          ,
          ,
+         { since we have only 16bit offsets, we need to be able to specify the high
+           and lower 16 bits of the address of a symbol of up to 64 bit }
          addr_low,         // bits 48-63
          addr_low,         // bits 48-63
          addr_high,        // bits 32-47
          addr_high,        // bits 32-47
          {$IF defined(POWERPC64)}
          {$IF defined(POWERPC64)}
@@ -122,6 +122,11 @@ interface
          ,addr_gdop_hix22
          ,addr_gdop_hix22
          ,addr_gdop_lox22
          ,addr_gdop_lox22
          {$endif SPARC64}
          {$endif SPARC64}
+         {$IFDEF ARM}
+         ,addr_gottpoff
+         ,addr_tpoff
+         {$ENDIF}
+
          );
          );
 
 
 
 

+ 8 - 0
compiler/cgobj.pas

@@ -437,6 +437,8 @@ unit cgobj;
 
 
           { initialize the pic/got register }
           { initialize the pic/got register }
           procedure g_maybe_got_init(list: TAsmList); virtual;
           procedure g_maybe_got_init(list: TAsmList); virtual;
+          { initialize the tls register if needed }
+          procedure g_maybe_tls_init(list : TAsmList); virtual;
           { allocallcpuregisters, a_call_name, deallocallcpuregisters sequence }
           { allocallcpuregisters, a_call_name, deallocallcpuregisters sequence }
           procedure g_call(list: TAsmList; const s: string);
           procedure g_call(list: TAsmList; const s: string);
           { Generate code to exit an unwind-protected region. The default implementation
           { Generate code to exit an unwind-protected region. The default implementation
@@ -2781,6 +2783,12 @@ implementation
       begin
       begin
       end;
       end;
 
 
+
+    procedure tcg.g_maybe_tls_init(list: TAsmList);
+      begin
+      end;
+
+
     procedure tcg.g_call(list: TAsmList;const s: string);
     procedure tcg.g_call(list: TAsmList;const s: string);
       begin
       begin
         allocallcpuregisters(list);
         allocallcpuregisters(list);

+ 3 - 1
compiler/globtype.pas

@@ -700,7 +700,9 @@ interface
            for i8086 cpu huge memory model,
            for i8086 cpu huge memory model,
            as this changes SP register it requires special handling
            as this changes SP register it requires special handling
            to restore DS segment register  }
            to restore DS segment register  }
-         pi_has_open_array_parameter
+         pi_has_open_array_parameter,
+         { subroutine uses threadvars }
+         pi_uses_threadvar
        );
        );
        tprocinfoflags=set of tprocinfoflag;
        tprocinfoflags=set of tprocinfoflag;
 
 

+ 2 - 2
compiler/ncgld.pas

@@ -493,8 +493,8 @@ implementation
                          reference_reset_symbol(location.reference,current_asmdata.WeakRefAsmSymbol(gvs.mangledname,AT_DATA),0,location.reference.alignment,[])
                          reference_reset_symbol(location.reference,current_asmdata.WeakRefAsmSymbol(gvs.mangledname,AT_DATA),0,location.reference.alignment,[])
                      end
                      end
                    else
                    else
-                     location:=gvs.localloc;
-                 end;
+                      location:=gvs.localloc;
+                  end;
 
 
                 { make const a LOC_CREFERENCE }
                 { make const a LOC_CREFERENCE }
                 if (gvs.varspez=vs_const) and
                 if (gvs.varspez=vs_const) and

+ 4 - 1
compiler/nld.pas

@@ -429,7 +429,10 @@ implementation
                   include(current_procinfo.flags,pi_needs_got);
                   include(current_procinfo.flags,pi_needs_got);
                 { call to get address of threadvar }
                 { call to get address of threadvar }
                 if (vo_is_thread_var in tabstractvarsym(symtableentry).varoptions) then
                 if (vo_is_thread_var in tabstractvarsym(symtableentry).varoptions) then
-                  include(current_procinfo.flags,pi_do_call);
+                  begin
+                    include(current_procinfo.flags,pi_do_call);
+                    include(current_procinfo.flags,pi_uses_threadvar);
+                  end;
               end;
               end;
             procsym :
             procsym :
                 begin
                 begin

+ 13 - 0
compiler/procinfo.pas

@@ -95,6 +95,11 @@ unit procinfo;
           got : tregister;
           got : tregister;
           CurrGOTLabel : tasmlabel;
           CurrGOTLabel : tasmlabel;
 
 
+          { register containing the tlsoffset }
+          tlsoffset : tregister;
+          { reference label for tls addresses }
+          tlslabel : tasmlabel;
+
           { Holds the reference used to store all saved registers. }
           { Holds the reference used to store all saved registers. }
           save_regs_ref : treference;
           save_regs_ref : treference;
 
 
@@ -150,6 +155,9 @@ unit procinfo;
           { Allocate got register }
           { Allocate got register }
           procedure allocate_got_register(list: TAsmList);virtual;
           procedure allocate_got_register(list: TAsmList);virtual;
 
 
+          { Allocate tls register }
+          procedure allocate_tls_register(list: TAsmList);virtual;
+
           { get frame pointer }
           { get frame pointer }
           procedure init_framepointer; virtual;
           procedure init_framepointer; virtual;
 
 
@@ -288,6 +296,11 @@ implementation
         { most os/cpu combo's don't use this yet, so not yet abstract }
         { most os/cpu combo's don't use this yet, so not yet abstract }
       end;
       end;
 
 
+    procedure tprocinfo.allocate_tls_register(list : TAsmList);
+      begin
+      end;
+
+
     procedure tprocinfo.init_framepointer;
     procedure tprocinfo.init_framepointer;
       begin
       begin
         { most targets use a constant, but some have a typed constant that must
         { most targets use a constant, but some have a typed constant that must

+ 13 - 2
compiler/psub.pas

@@ -1451,6 +1451,9 @@ implementation
             { allocate got register if needed }
             { allocate got register if needed }
             allocate_got_register(aktproccode);
             allocate_got_register(aktproccode);
 
 
+            if pi_uses_threadvar in flags then
+              allocate_tls_register(aktproccode);
+
             { Allocate space in temp/registers for parast and localst }
             { Allocate space in temp/registers for parast and localst }
             current_filepos:=entrypos;
             current_filepos:=entrypos;
             gen_alloc_symtable(aktproccode,procdef,procdef.parast);
             gen_alloc_symtable(aktproccode,procdef,procdef.parast);
@@ -1561,6 +1564,10 @@ implementation
                (got<>NR_NO) then
                (got<>NR_NO) then
               cg.a_reg_sync(aktproccode,got);
               cg.a_reg_sync(aktproccode,got);
 
 
+            if (pi_uses_threadvar in flags) and
+              (tlsoffset<>NR_NO) then
+              cg.a_reg_sync(aktproccode,tlsoffset);
+
             gen_free_symtable(aktproccode,procdef.localst);
             gen_free_symtable(aktproccode,procdef.localst);
             gen_free_symtable(aktproccode,procdef.parast);
             gen_free_symtable(aktproccode,procdef.parast);
 
 
@@ -1579,7 +1586,7 @@ implementation
               begin
               begin
                 current_filepos:=entrypos;
                 current_filepos:=entrypos;
                 hlcg.gen_stack_check_call(templist);
                 hlcg.gen_stack_check_call(templist);
-                aktproccode.insertlistafter(stackcheck_asmnode.currenttai,templist)
+                aktproccode.insertlistafter(stackcheck_asmnode.currenttai,templist);
               end;
               end;
 
 
             { this code (got loading) comes before everything which has }
             { this code (got loading) comes before everything which has }
@@ -1599,9 +1606,13 @@ implementation
             current_filepos:=entrypos;
             current_filepos:=entrypos;
             { load got if necessary }
             { load got if necessary }
             cg.g_maybe_got_init(templist);
             cg.g_maybe_got_init(templist);
-
             aktproccode.insertlistafter(headertai,templist);
             aktproccode.insertlistafter(headertai,templist);
 
 
+            if pi_uses_threadvar in flags then
+              cg.g_maybe_tls_init(templist);
+            aktproccode.insertlistafter(stackcheck_asmnode.currenttai,templist);
+
+
             { re-enable if more code at the end is ever generated here
             { re-enable if more code at the end is ever generated here
             cg.set_regalloc_live_range_direction(rad_forward);
             cg.set_regalloc_live_range_direction(rad_forward);
             }
             }

+ 4 - 1
compiler/systems/i_linux.pas

@@ -589,7 +589,7 @@ unit i_linux;
             name         : 'Linux for ARMHF';
             name         : 'Linux for ARMHF';
             shortname    : 'Linux';
             shortname    : 'Linux';
             flags        : [tf_needs_symbol_size,tf_needs_symbol_type,tf_files_case_sensitive,
             flags        : [tf_needs_symbol_size,tf_needs_symbol_type,tf_files_case_sensitive,
-                            tf_requires_proper_alignment,
+                            tf_requires_proper_alignment,tf_section_threadvars,
                             tf_smartlink_sections,tf_pic_uses_got,
                             tf_smartlink_sections,tf_pic_uses_got,
                             tf_has_winlike_resources];
                             tf_has_winlike_resources];
             cpu          : cpu_arm;
             cpu          : cpu_arm;
@@ -660,6 +660,9 @@ unit i_linux;
             shortname    : 'Linux';
             shortname    : 'Linux';
             flags        : [tf_needs_symbol_size,tf_needs_symbol_type,tf_files_case_sensitive,
             flags        : [tf_needs_symbol_size,tf_needs_symbol_type,tf_files_case_sensitive,
                             tf_requires_proper_alignment,
                             tf_requires_proper_alignment,
+{$ifdef tls_threadvars}
+                            tf_section_threadvars,
+{$endif tls_threadvars}
                             tf_smartlink_sections,tf_pic_uses_got,
                             tf_smartlink_sections,tf_pic_uses_got,
                             tf_has_winlike_resources];
                             tf_has_winlike_resources];
             cpu          : cpu_arm;
             cpu          : cpu_arm;

+ 3 - 1
compiler/utils/ppuutils/ppudump.pp

@@ -1354,7 +1354,9 @@ const
          (mask:pi_calls_c_varargs;
          (mask:pi_calls_c_varargs;
          str:' calls function with C-style varargs '),
          str:' calls function with C-style varargs '),
          (mask:pi_has_open_array_parameter;
          (mask:pi_has_open_array_parameter;
-         str:' has open array parameter ')
+         str:' has open array parameter '),
+         (mask:pi_uses_threadvar;
+         str:' uses threadvars ')
   );
   );
 var
 var
   procinfooptions : tprocinfoflags;
   procinfooptions : tprocinfoflags;

+ 9 - 0
rtl/arm/arm.inc

@@ -96,6 +96,15 @@ begin
 end;
 end;
 {$endif wince}
 {$endif wince}
 
 
+{$ifdef linux}
+function fpc_read_tp : pointer; [public, alias: 'fpc_read_tp'];assembler; nostackframe;
+asm
+  // Helper is located at 0xffff0fe0
+  mvn r0,#0x0000f000 // mov r0, #0xffff0fff
+  sub pc,r0,#0x1f    // Jump to helper
+end;
+{$endif linux}
+
 {****************************************************************************
 {****************************************************************************
                        stack frame related stuff
                        stack frame related stuff
 ****************************************************************************}
 ****************************************************************************}