Selaa lähdekoodia

o patch from Sergej Gorelkin to improvement code generation for string literals
* Replaces linear search through assembler list by the hash lookup.
This considerably improves performance on large projects
(one example is winunits-jedi package, in which tcgstringconstnode.pass_generate_code
was top #1 in calltree, consuming about 12% IRefs).
* Enables reusing memory locations for widestring constants
(and in general, the same approach may be used for any other type of constants).
* Saves a sizeof(pointer) bytes per constant, by removing a location
which points to the string. This location is necessary for the
typed consts which may be modified, but redundant for string literals
because the language does not allow to modify string literals in any way.

git-svn-id: trunk@11657 -

florian 17 vuotta sitten
vanhempi
commit
9955d5b061
4 muutettua tiedostoa jossa 314 lisäystä ja 149 poistoa
  1. 18 0
      compiler/aasmdata.pas
  2. 244 4
      compiler/cclasses.pas
  3. 2 5
      compiler/ncgcnv.pas
  4. 50 140
      compiler/ncgcon.pas

+ 18 - 0
compiler/aasmdata.pas

@@ -66,6 +66,19 @@ interface
         al_end
       );
 
+      { Type of constant 'pools'. Currently contains only string types,
+        but may be extended with reals, sets, etc. }
+      
+      TConstPoolType = (
+         sp_invalid,
+         sp_conststr,
+         sp_shortstr,
+         sp_longstr,
+         sp_ansistr,
+         sp_widestr,
+         sp_unicodestr
+      );
+      
     const
       AsmListTypeStr : array[TAsmListType] of string[24] =(
         'al_begin',
@@ -126,6 +139,8 @@ interface
         { Assembler lists }
         AsmLists      : array[TAsmListType] of TAsmList;
         CurrAsmList   : TAsmList;
+        { hash tables for reusing constant storage }
+        ConstPools    : array[TConstPoolType] of THashSet;
         constructor create(const n:string);
         destructor  destroy;override;
         { asmsymbol }
@@ -293,6 +308,7 @@ implementation
     destructor TAsmData.destroy;
       var
         hal : TAsmListType;
+        hp  : TConstPoolType;
       begin
         { Symbols }
 {$ifdef MEMDEBUG}
@@ -321,6 +337,8 @@ implementation
 {$ifdef MEMDEBUG}
          memasmlists.stop;
 {$endif}
+         for hp := low(TConstPoolType) to high(TConstPoolType) do
+           ConstPools[hp].Free;
       end;
 
 

+ 244 - 4
compiler/cclasses.pas

@@ -459,7 +459,51 @@ type
        end;
 
 
+{******************************************************************
+   THashSet (keys not limited to ShortString, no indexed access)
+*******************************************************************}
+
+       PPHashSetItem = ^PHashSetItem;
+       PHashSetItem = ^THashSetItem;
+       THashSetItem = record
+         Next: PHashSetItem;
+         Key: Pointer;
+         KeyLength: Integer;
+         HashValue: LongWord;
+         Data: TObject;
+       end;
+
+       THashSet = class(TObject)
+       private
+         FCount: LongWord;
+         FBucketCount: LongWord;
+         FBucket: PPHashSetItem;
+         FOwnsObjects: Boolean;
+         FOwnsKeys: Boolean;
+         function Lookup(Key: Pointer; KeyLen: Integer; var Found: Boolean;
+           CanCreate: Boolean): PHashSetItem;
+         procedure Resize(NewCapacity: LongWord);
+       public
+         constructor Create(InitSize: Integer; OwnKeys, OwnObjects: Boolean);
+         destructor Destroy; override;
+         procedure Clear;
+         { finds an entry by key }
+         function Find(Key: Pointer; KeyLen: Integer): PHashSetItem;
+         { finds an entry, creates one if not exists }
+         function FindOrAdd(Key: Pointer; KeyLen: Integer;
+           var Found: Boolean): PHashSetItem;
+         { finds an entry, creates one if not exists }
+         function FindOrAdd(Key: Pointer; KeyLen: Integer): PHashSetItem;
+         { returns Data by given Key }
+         function Get(Key: Pointer; KeyLen: Integer): TObject;
+         { removes an entry, returns False if entry wasn't there }
+         function Remove(Entry: PHashSetItem): Boolean;
+         property Count: LongWord read FCount;
+      end;
+
+
     function FPHash(const s:shortstring):LongWord;
+    function FPHash(P: PChar; Len: Integer): LongWord;
 
 
 implementation
@@ -1043,7 +1087,7 @@ end;
         pmax:=@s[length(s)+1];
         while (p<pmax) do
           begin
-            result:=LongWord((result shl 5) - result) xor LongWord(P^);
+            result:=LongWord(LongInt(result shl 5) - LongInt(result)) xor LongWord(P^);
             inc(p);
           end;
 {$ifdef overflowon}
@@ -1052,6 +1096,26 @@ end;
 {$endif}
       end;
 
+    function FPHash(P: PChar; Len: Integer): LongWord;
+      Var
+        pmax : pchar;
+      begin
+{$ifopt Q+}
+{$define overflowon}
+{$Q-}
+{$endif}
+        result:=0;
+        pmax:=p+len;
+        while (p<pmax) do
+          begin
+            result:=LongWord(LongInt(result shl 5) - LongInt(result)) xor LongWord(P^);
+            inc(p);
+          end;
+{$ifdef overflowon}
+{$Q+}
+{$undef overflowon}
+{$endif}
+      end;
 
 procedure TFPHashList.RaiseIndexError(Index : Integer);
 begin
@@ -2226,16 +2290,14 @@ end;
     function TCmdStrList.Find(const s:TCmdStr):TCmdStrListItem;
       var
         NewNode : TCmdStrListItem;
-        ups     : string;
       begin
         result:=nil;
         if s='' then
          exit;
-        ups:=upper(s);
         NewNode:=TCmdStrListItem(FFirst);
         while assigned(NewNode) do
          begin
-           if upper(NewNode.FPStr)=ups then
+           if SysUtils.CompareText(s, NewNode.FPStr)=0 then
             begin
               result:=NewNode;
               exit;
@@ -2521,4 +2583,182 @@ end;
          end;
       end;
 
+{****************************************************************************
+                                thashset
+****************************************************************************}
+
+    constructor THashSet.Create(InitSize: Integer; OwnKeys, OwnObjects: Boolean);
+      var
+        I: Integer;
+      begin
+        inherited Create;
+        FOwnsObjects := OwnObjects;
+        FOwnsKeys := OwnKeys;
+        I := 64;
+        while I < InitSize do I := I shl 1;
+        FBucketCount := I;
+        FBucket := AllocMem(I * sizeof(PHashSetItem));
+      end;
+
+
+    destructor THashSet.Destroy;
+      begin
+        Clear;
+        FreeMem(FBucket);
+        inherited Destroy;
+      end;
+
+
+    procedure THashSet.Clear;
+      var
+        I: Integer;
+        item, next: PHashSetItem;
+      begin
+        for I := 0 to FBucketCount-1 do
+        begin
+          item := FBucket[I];
+          while Assigned(item) do
+          begin
+            next := item^.Next;
+            if FOwnsObjects then
+              item^.Data.Free;
+            if FOwnsKeys then
+              FreeMem(item^.Key);
+            Dispose(item);
+            item := next;
+          end;
+        end;
+        FillChar(FBucket^, FBucketCount * sizeof(PHashSetItem), 0);
+      end;
+
+
+    function THashSet.Find(Key: Pointer; KeyLen: Integer): PHashSetItem;
+      var
+        Dummy: Boolean;
+      begin
+        Result := Lookup(Key, KeyLen, Dummy, False);
+      end;
+
+
+    function THashSet.FindOrAdd(Key: Pointer; KeyLen: Integer;
+        var Found: Boolean): PHashSetItem;
+      begin
+        Result := Lookup(Key, KeyLen, Found, True);
+      end;
+
+
+    function THashSet.FindOrAdd(Key: Pointer; KeyLen: Integer): PHashSetItem;
+      var
+        Dummy: Boolean;
+      begin
+        Result := Lookup(Key, KeyLen, Dummy, True);
+      end;
+
+
+    function THashSet.Get(Key: Pointer; KeyLen: Integer): TObject;
+      var
+        e: PHashSetItem;
+        Dummy: Boolean;
+      begin
+        e := Lookup(Key, KeyLen, Dummy, False);
+        if Assigned(e) then
+          Result := e^.Data
+        else
+          Result := nil;
+      end;
+
+
+    function THashSet.Lookup(Key: Pointer; KeyLen: Integer;
+      var Found: Boolean; CanCreate: Boolean): PHashSetItem;
+      var
+        Entry: PPHashSetItem;
+        h: LongWord;
+      begin
+        h := FPHash(Key, KeyLen);
+        Entry := @FBucket[h mod FBucketCount];
+        while Assigned(Entry^) and
+          not ((Entry^^.HashValue = h) and (Entry^^.KeyLength = KeyLen) and
+            (CompareByte(Entry^^.Key^, Key^, KeyLen) = 0)) do
+              Entry := @Entry^^.Next;
+        Found := Assigned(Entry^);
+        if Found or (not CanCreate) then
+          begin
+            Result := Entry^;
+            Exit;
+          end;
+        if FCount > FBucketCount then  { arbitrary limit, probably too high }
+          begin
+            { rehash and repeat search }
+            Resize(FBucketCount * 2);
+            Result := Lookup(Key, KeyLen, Found, CanCreate);
+          end
+        else
+          begin
+            New(Result);
+            if FOwnsKeys then
+            begin
+              GetMem(Result^.Key, KeyLen);
+              Move(Key^, Result^.Key^, KeyLen);
+            end
+            else
+              Result^.Key := Key;
+            Result^.KeyLength := KeyLen;
+            Result^.HashValue := h;
+            Result^.Data := nil;
+            Result^.Next := nil;
+            Inc(FCount);
+            Entry^ := Result;
+          end;
+        end;
+
+
+    procedure THashSet.Resize(NewCapacity: LongWord);
+      var
+        p, chain: PPHashSetItem;
+        i: Integer;
+        e, n: PHashSetItem;
+      begin
+        p := AllocMem(NewCapacity * sizeof(PHashSetItem));
+        for i := 0 to FBucketCount-1 do
+          begin
+            e := FBucket[i];
+            while Assigned(e) do
+            begin
+              chain := @p[e^.HashValue mod NewCapacity];
+              n := e^.Next;
+              e^.Next := chain^;
+              chain^ := e;
+              e := n;
+            end;
+          end;
+        FBucketCount := NewCapacity;
+        FreeMem(FBucket);
+        FBucket := p;
+      end;
+
+
+    function THashSet.Remove(Entry: PHashSetItem): Boolean;
+      var
+        chain: PPHashSetItem;
+      begin
+        chain := @FBucket[Entry^.HashValue mod FBucketCount];
+        while Assigned(chain^) do
+          begin
+            if chain^ = Entry then
+              begin
+                chain^ := Entry^.Next;
+                if FOwnsObjects then
+                  Entry^.Data.Free;
+                if FOwnsKeys then
+                  FreeMem(Entry^.Key);
+                Dispose(Entry);
+                Dec(FCount);
+                Result := True;
+                Exit;
+              end;
+            chain := @chain^^.Next;
+          end;
+        Result := False;
+      end;
+
 end.

+ 2 - 5
compiler/ncgcnv.pas

@@ -159,8 +159,7 @@ interface
                 end
                else
                 begin
-                  location.register:=cg.getaddressregister(current_asmdata.CurrAsmList);
-                  cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,left.location.reference,location.register);
+                  location_copy(location,left.location);
                 end;
              end;
            cst_longstring:
@@ -179,9 +178,7 @@ interface
                 end
                else
                 begin
-                  location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
-                  cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_INT,left.location.reference,
-                    location.register);
+                  location_copy(location,left.location);
                 end;
              end;
          end;

+ 50 - 140
compiler/ncgcon.pas

@@ -71,7 +71,7 @@ implementation
       symconst,symdef,aasmbase,aasmtai,aasmdata,aasmcpu,defutil,
       cpuinfo,cpubase,
       cgbase,cgobj,cgutils,
-      ncgutil
+      ncgutil, cclasses
       ;
 
 
@@ -262,14 +262,24 @@ implementation
 
     procedure tcgstringconstnode.pass_generate_code;
       var
-         hp1,hp2 : tai;
          l1,
          lastlabel   : tasmlabel;
-         lastlabelhp : tai;
          pc       : pchar;
-         same_string : boolean;
-         l,j,
-         i,mylength  : longint;
+         l,i : longint;
+         href: treference;
+         pooltype: TConstPoolType;
+         pool: THashSet;
+         entry: PHashSetItem;
+         
+      const
+        PoolMap: array[tconststringtype] of TConstPoolType = (
+          sp_conststr,
+          sp_shortstr,
+          sp_longstr,
+          sp_ansistr,
+          sp_widestr,
+          sp_unicodestr
+        );
       begin
          { for empty ansistrings we could return a constant 0 }
          if (cst_type in [cst_ansistring,cst_widestring]) and (len=0) then
@@ -278,160 +288,49 @@ implementation
             location.value:=0;
             exit;
           end;
-         { return a constant reference in memory }
-         location_reset(location,LOC_CREFERENCE,def_cgsize(resultdef));
          { const already used ? }
-         lastlabel:=nil;
-         lastlabelhp:=nil;
          if not assigned(lab_str) then
            begin
-              if is_shortstring(resultdef) then
-                mylength:=len+2
+              pooltype := PoolMap[cst_type];
+              if current_asmdata.ConstPools[pooltype] = nil then
+                current_asmdata.ConstPools[pooltype] := THashSet.Create(64, True, False);
+              pool := current_asmdata.ConstPools[pooltype];
+           
+              if cst_type in [cst_widestring, cst_unicodestring] then
+                entry := pool.FindOrAdd(pcompilerwidestring(value_str)^.data, len*cwidechartype.size)
               else
-                mylength:=len+1;
-              { widestrings can't be reused yet }
-              if not(is_widestring(resultdef)) then
-                begin
-                  { tries to find an old entry }
-                  hp1:=tai(current_asmdata.asmlists[al_typedconsts].first);
-                  while assigned(hp1) do
-                    begin
-                       if hp1.typ=ait_label then
-                         begin
-                           lastlabel:=tai_label(hp1).labsym;
-                           lastlabelhp:=hp1;
-                         end
-                       else
-                         begin
-                            same_string:=false;
-                            if (hp1.typ=ait_string) and
-                               (lastlabel<>nil) and
-                               (tai_string(hp1).len=mylength) then
-                              begin
-                                 case cst_type of
-                                   cst_conststring:
-                                     begin
-                                       j:=0;
-                                       same_string:=true;
-                                       if len>0 then
-                                         begin
-                                           for i:=0 to len-1 do
-                                             begin
-                                               if tai_string(hp1).str[j]<>value_str[i] then
-                                                 begin
-                                                   same_string:=false;
-                                                   break;
-                                                 end;
-                                               inc(j);
-                                             end;
-                                         end;
-                                     end;
-                                   cst_shortstring:
-                                     begin
-                                       { if shortstring then check the length byte first and
-                                         set the start index to 1 }
-                                       if len=ord(tai_string(hp1).str[0]) then
-                                         begin
-                                           j:=1;
-                                           same_string:=true;
-                                           if len>0 then
-                                             begin
-                                               for i:=0 to len-1 do
-                                                begin
-                                                  if tai_string(hp1).str[j]<>value_str[i] then
-                                                   begin
-                                                     same_string:=false;
-                                                     break;
-                                                   end;
-                                                  inc(j);
-                                                end;
-                                             end;
-                                         end;
-                                     end;
-                                   cst_ansistring,
-                                   cst_widestring :
-                                     begin
-                                       { before the string the following sequence must be found:
-                                         <label>
-                                           constsymbol <datalabel>
-                                           constint -1
-                                           constint <len>
-                                         we must then return <label> to reuse
-                                       }
-                                       hp2:=tai(lastlabelhp.previous);
-                                       if assigned(hp2) and
-                                          (hp2.typ=ait_const) and
-                                          (tai_const(hp2).consttype=aitconst_aint) and
-                                          (tai_const(hp2).value=len) and
-                                          assigned(hp2.previous) and
-                                          (tai(hp2.previous).typ=ait_const) and
-                                          (tai_const(hp2.previous).consttype=aitconst_aint) and
-                                          (tai_const(hp2.previous).value=-1) and
-                                          assigned(hp2.previous.previous) and
-                                          (tai(hp2.previous.previous).typ=ait_const) and
-                                          (tai_const(hp2.previous.previous).consttype=aitconst_ptr) and
-                                          assigned(hp2.previous.previous.previous) and
-                                          (tai(hp2.previous.previous.previous).typ=ait_label) then
-                                         begin
-                                           lastlabel:=tai_label(hp2.previous.previous.previous).labsym;
-                                           same_string:=true;
-                                           j:=0;
-                                           if len>0 then
-                                             begin
-                                               for i:=0 to len-1 do
-                                                begin
-                                                  if tai_string(hp1).str[j]<>value_str[i] then
-                                                   begin
-                                                     same_string:=false;
-                                                     break;
-                                                   end;
-                                                  inc(j);
-                                                end;
-                                             end;
-                                         end;
-                                     end;
-                                 end;
-                                 { found ? }
-                                 if same_string then
-                                   begin
-                                     lab_str:=lastlabel;
-                                     break;
-                                   end;
-                              end;
-                            lastlabel:=nil;
-                         end;
-                       hp1:=tai(hp1.next);
-                    end;
-                end;
+                entry := pool.FindOrAdd(value_str, len);
+
+              lab_str := TAsmLabel(entry^.Data);  // is it needed anymore?
+
               { :-(, we must generate a new entry }
-              if not assigned(lab_str) then
+              if not assigned(entry^.Data) then
                 begin
                    current_asmdata.getdatalabel(lastlabel);
                    lab_str:=lastlabel;
+                   entry^.Data := lastlabel;
                    maybe_new_object_file(current_asmdata.asmlists[al_typedconsts]);
                    if (len=0) or
                       not(cst_type in [cst_ansistring,cst_widestring]) then
                      new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata_norel,lastlabel.name,const_align(sizeof(pint)))
                    else
                      new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata,lastlabel.name,const_align(sizeof(pint)));
-                   current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(lastlabel));
                    { generate an ansi string ? }
                    case cst_type of
                       cst_ansistring:
                         begin
-                           { an empty ansi string is nil! }
                            if len=0 then
-                             current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_sym(nil))
-                           else
+                             InternalError(2008032301)   { empty string should be handled above }
+                            else
                              begin
                                 current_asmdata.getdatalabel(l1);
-                                current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_sym(l1));
+                                current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1));
                                 current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_pint(-1));
                                 current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_pint(len));
+                                current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(lastlabel));
                                 { make sure the string doesn't get dead stripped if the header is referenced }
                                 if (target_info.system in systems_darwin) then
                                   current_asmdata.asmlists[al_typedconsts].concat(tai_directive.create(asd_reference,l1.name));
-                                current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1));
                                 { ... and vice versa }
                                 if (target_info.system in systems_darwin) then
                                   current_asmdata.asmlists[al_typedconsts].concat(tai_directive.create(asd_reference,lab_str.name));
@@ -444,14 +343,12 @@ implementation
                         end;
                       cst_widestring:
                         begin
-                           { an empty wide string is nil! }
                            if len=0 then
-                             current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_sym(nil))
+                             InternalError(2008032302)   { empty string should be handled above }
                            else
                              begin
                                 current_asmdata.getdatalabel(l1);
-                                current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_sym(l1));
-
+                                current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1));
                                 { we use always UTF-16 coding for constants }
                                 { at least for now                          }
                                 { Consts.concat(Tai_const.Create_8bit(2)); }
@@ -462,10 +359,10 @@ implementation
                                     current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_pint(-1));
                                     current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_pint(len*cwidechartype.size));
                                   end;
+                                current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(lastlabel));
                                 { make sure the string doesn't get dead stripped if the header is referenced }
                                 if (target_info.system in systems_darwin) then
                                   current_asmdata.asmlists[al_typedconsts].concat(tai_directive.create(asd_reference,l1.name));
-                                current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1));
                                 { ... and vice versa }
                                 if (target_info.system in systems_darwin) then
                                   current_asmdata.asmlists[al_typedconsts].concat(tai_directive.create(asd_reference,lab_str.name));
@@ -477,6 +374,7 @@ implementation
                         end;
                       cst_shortstring:
                         begin
+                          current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(lastlabel));
                           { truncate strings larger than 255 chars }
                           if len>255 then
                            l:=255
@@ -491,6 +389,7 @@ implementation
                         end;
                       cst_conststring:
                         begin
+                          current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(lastlabel));
                           { include terminating zero }
                           getmem(pc,len+1);
                           move(value_str^,pc[0],len);
@@ -500,7 +399,18 @@ implementation
                    end;
                 end;
            end;
-         location.reference.symbol:=lab_str;
+         if cst_type in [cst_ansistring, cst_widestring] then
+           begin
+             location_reset(location, LOC_CREGISTER, OS_ADDR);
+             reference_reset_symbol(href, lab_str, 0);
+             location.register:=cg.getaddressregister(current_asmdata.CurrAsmList);
+             cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,location.register);
+           end
+         else
+           begin
+             location_reset(location, LOC_CREFERENCE, def_cgsize(resultdef));
+             location.reference.symbol:=lab_str;
+           end;
       end;