Browse Source

* Enable minor size optimization for thread var loading code when -Os is specified or PIC code is generated and PIC uses GOT.

git-svn-id: trunk@35551 -
yury 8 years ago
parent
commit
9f3759bcb8
1 changed files with 25 additions and 5 deletions
  1. 25 5
      compiler/ncgld.pas

+ 25 - 5
compiler/ncgld.pas

@@ -265,13 +265,14 @@ implementation
         norelocatelab : tasmlabel;
         norelocatelab : tasmlabel;
         tvref,
         tvref,
         href : treference;
         href : treference;
-        hregister : tregister;
+        hregister, hreg_tv_rec : tregister;
         tv_rec : trecorddef;
         tv_rec : trecorddef;
         tv_index_field,
         tv_index_field,
         tv_non_mt_data_field: tsym;
         tv_non_mt_data_field: tsym;
         tmpresloc: tlocation;
         tmpresloc: tlocation;
         issystemunit,
         issystemunit,
         indirect : boolean;
         indirect : boolean;
+        size_opt : boolean;
       begin
       begin
          if (tf_section_threadvars in target_info.flags) then
          if (tf_section_threadvars in target_info.flags) then
            begin
            begin
@@ -291,6 +292,7 @@ implementation
                is available it is called to retrieve the address.
                is available it is called to retrieve the address.
                Otherwise the address is loaded with the symbol
                Otherwise the address is loaded with the symbol
              }
              }
+
              tv_rec:=get_threadvar_record(resultdef,tv_index_field,tv_non_mt_data_field);
              tv_rec:=get_threadvar_record(resultdef,tv_index_field,tv_non_mt_data_field);
              fieldptrdef:=cpointerdef.getreusable(resultdef);
              fieldptrdef:=cpointerdef.getreusable(resultdef);
              current_asmdata.getjumplabel(norelocatelab);
              current_asmdata.getjumplabel(norelocatelab);
@@ -314,6 +316,20 @@ implementation
                          (target_info.system in systems_indirect_var_imports) and
                          (target_info.system in systems_indirect_var_imports) and
                          (cs_imported_data in current_settings.localswitches) and
                          (cs_imported_data in current_settings.localswitches) and
                          not issystemunit;
                          not issystemunit;
+             if not(vo_is_weak_external in gvs.varoptions) then
+               reference_reset_symbol(tvref,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_DATA,use_indirect_symbol(gvs)),0,sizeof(pint),[])
+             else
+               reference_reset_symbol(tvref,current_asmdata.WeakRefAsmSymbol(gvs.mangledname,AT_DATA),0,sizeof(pint),[]);
+             { Enable size optimization with -Os or PIC code is generated and PIC uses GOT }
+             size_opt:=(cs_opt_size in current_settings.optimizerswitches)
+                       or ((cs_create_pic in current_settings.moduleswitches) and (tf_pic_uses_got in target_info.flags));
+             if size_opt then
+               begin
+                 { Load a pointer to the thread var record into a register. }
+                 { This register will be used in both multithreaded and non-multithreaded cases. }
+                 hreg_tv_rec:=hlcg.getaddressregister(current_asmdata.CurrAsmList,fieldptrdef);
+                 hlcg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,resultdef,fieldptrdef,tvref,hreg_tv_rec);
+               end;
              paraloc1.init;
              paraloc1.init;
              paramanager.getintparaloc(current_asmdata.CurrAsmList,tprocvardef(pvd),1,paraloc1);
              paramanager.getintparaloc(current_asmdata.CurrAsmList,tprocvardef(pvd),1,paraloc1);
              hregister:=hlcg.getaddressregister(current_asmdata.CurrAsmList,pvd);
              hregister:=hlcg.getaddressregister(current_asmdata.CurrAsmList,pvd);
@@ -323,15 +339,17 @@ implementation
              hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,pvd,pvd,href,hregister);
              hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,pvd,pvd,href,hregister);
              hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,pvd,OC_EQ,0,hregister,norelocatelab);
              hlcg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,pvd,OC_EQ,0,hregister,norelocatelab);
              { no, call it with the index of the threadvar as parameter }
              { no, call it with the index of the threadvar as parameter }
-             if not(vo_is_weak_external in gvs.varoptions) then
-               reference_reset_symbol(tvref,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_DATA,use_indirect_symbol(gvs)),0,sizeof(pint),[])
-             else
-               reference_reset_symbol(tvref,current_asmdata.WeakRefAsmSymbol(gvs.mangledname,AT_DATA),0,sizeof(pint),[]);
              href:=tvref;
              href:=tvref;
              hlcg.g_set_addr_nonbitpacked_field_ref(current_asmdata.CurrAsmList,
              hlcg.g_set_addr_nonbitpacked_field_ref(current_asmdata.CurrAsmList,
                tv_rec,
                tv_rec,
                tfieldvarsym(tv_index_field),href);
                tfieldvarsym(tv_index_field),href);
+             if size_opt then
+               hlcg.reference_reset_base(href,tfieldvarsym(tv_index_field).vardef,hreg_tv_rec,href.offset,href.alignment,[]);
              hlcg.a_load_ref_cgpara(current_asmdata.CurrAsmList,tfieldvarsym(tv_index_field).vardef,href,paraloc1);
              hlcg.a_load_ref_cgpara(current_asmdata.CurrAsmList,tfieldvarsym(tv_index_field).vardef,href,paraloc1);
+             { Dealloc the threadvar record register before calling the helper function to allow  }
+             { the register allocator to assign non-mandatory real registers for hreg_tv_rec. }
+             if size_opt then
+               cg.a_reg_dealloc(current_asmdata.CurrAsmList,hreg_tv_rec);
              paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
              paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
              cg.allocallcpuregisters(current_asmdata.CurrAsmList);
              cg.allocallcpuregisters(current_asmdata.CurrAsmList);
              { result is the address of the threadvar }
              { result is the address of the threadvar }
@@ -358,6 +376,8 @@ implementation
                tfieldvarsym(tv_non_mt_data_field),href);
                tfieldvarsym(tv_non_mt_data_field),href);
              { load in the same "hregister" as above, so after this sequence
              { load in the same "hregister" as above, so after this sequence
                the address of the threadvar is always in hregister }
                the address of the threadvar is always in hregister }
+             if size_opt then
+               hlcg.reference_reset_base(href,fieldptrdef,hreg_tv_rec,href.offset,href.alignment,[]);
              hlcg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,resultdef,fieldptrdef,href,hregister);
              hlcg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,resultdef,fieldptrdef,href,hregister);
              hlcg.a_label(current_asmdata.CurrAsmList,endrelocatelab);
              hlcg.a_label(current_asmdata.CurrAsmList,endrelocatelab);