Ver Fonte

Faster string concatenations.

Rika Ichinose há 1 ano atrás
pai
commit
854d944cfa
3 ficheiros alterados com 221 adições e 154 exclusões
  1. 79 91
      rtl/inc/astrings.inc
  2. 57 63
      rtl/inc/ustrings.inc
  3. 85 0
      rtl/java/jastrings.inc

+ 79 - 91
rtl/inc/astrings.inc

@@ -218,8 +218,8 @@ end;
 procedure fpc_AnsiStr_Concat (var DestS:RawByteString;const S1,S2 : RawByteString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}); compilerproc;
 Var
   S1Len, S2Len: SizeInt;
-  same : boolean;
   S1CP, S2CP, DestCP: TSystemCodePage;
+  OldDestP, NewDestP, RealDestP, Src : Pointer;
 begin
 {$ifdef FPC_HAS_CPSTRING}
   DestCP:=cp;
@@ -232,28 +232,25 @@ begin
   { if codepages are different then concat using unicodestring,
     but avoid conversions if either addend is empty (StringCodePage will return
     DefaultSystemCodePage in that case, which may differ from other addend/dest) }
-  if Length(S1)=0 then
-    S1CP:=DestCP
-  else
-    S1CP:=StringCodePage(S1);
-  S1CP:=TranslatePlaceholderCP(S1CP);
-  if Length(S2)=0 then
-    S2CP:=DestCP
-  else
-    S2CP:=StringCodePage(S2);
-  S2CP:=TranslatePlaceholderCP(S2CP);
+  S1CP:=DestCP;
+  if Length(S1)<>0 then
+    S1CP:=TranslatePlaceholderCP(StringCodePage(S1));
+  S2CP:=S1CP; { So if S2 is empty, S2CP = S1CP. }
+  if Length(S2)<>0 then
+    S2CP:=TranslatePlaceholderCP(StringCodePage(S2));
 {$ifdef FPC_HAS_CPSTRING}
   { if the result is rawbytestring and both strings have the same code page,
     keep that code page or keep the code page if the other string is empty }
   if cp=CP_NONE then
-    begin
-     if (S1CP=S2CP) or (Length(S2)=0) then
-       DestCP:=S1CP
-     else if Length(S1)=0 then
+   if S1CP=S2CP then { Includes the case of empty S2. }
+     DestCP:=S1CP
+   else if Length(S1)=0 then
+     begin
        DestCP:=S2CP;
-    end;
+       S1CP:=S2CP;
+     end;
 {$endif FPC_HAS_CPSTRING}
-  if ((S1CP<>DestCP) and (Length(s1)>0)) or ((S2CP<>DestCP) and (Length(s2)>0)) then
+  if (S1CP<>DestCP) or (S2CP<>DestCP) then
     begin
       ansistr_concat_complex(DestS,S1,S2,DestCP);
       exit;
@@ -269,31 +266,33 @@ begin
       DestS:=s1;
       exit;
     end;
-  S1Len:=Length(S1);
-  S2Len:=length(S2);
-  { Use Pointer() typecasts to prevent extra conversion code }
-  if Pointer(DestS)=Pointer(S1) then
+  S1Len:=PAnsiRec(Pointer(S1)-AnsiFirstOff)^.Len;
+  S2Len:=PAnsiRec(Pointer(S2)-AnsiFirstOff)^.Len;
+  OldDestP:=Pointer(DestS);
+  { Reallocate when possible; in the hope this will reuse the chunk more often than do a redundant copy. }
+  if Assigned(OldDestP) and (PAnsiRec(OldDestP-AnsiFirstOff)^.Ref=1) then
     begin
-      same:=Pointer(S1)=Pointer(S2);
-      SetLength(DestS,S1Len+S2Len);
-      if same then
-        fpc_pchar_ansistr_intern_charmove(PAnsiChar(DestS),0,DestS,S1Len,S2Len)
-      else
-        fpc_pchar_ansistr_intern_charmove(PAnsiChar(S2),0,DestS,S1Len,S2Len+1)
-    end
-  else if Pointer(DestS)=Pointer(S2) then
-    begin
-      SetLength(DestS,S1Len+S2Len);
-      fpc_pchar_ansistr_intern_charmove(PAnsiChar(DestS),0,DestS,S1Len,S2Len+1);
-      fpc_pchar_ansistr_intern_charmove(PAnsiChar(S1),0,DestS,0,S1Len);
+      RealDestP:=OldDestP-AnsiFirstOff;
+      NewDestP:=ReallocMem(RealDestP,AnsiFirstOff+1+S1Len+S2Len)+AnsiFirstOff;
+      { Copy S2 first, as in the case of OldDestP = Pointer(S2) it must be copied first and in other cases the order does not matter. }
+      Src:=Pointer(S2);
+      if Src=OldDestP then
+        Src:=NewDestP;
+      Move(Src^,PAnsiChar(NewDestP)[S1Len],S2Len);
+      if OldDestP<>Pointer(S1) then { Not an append, need to copy S1? }
+        Move(Pointer(S1)^,NewDestP^,S1Len);
     end
   else
     begin
-      SetLength(DestS,S1Len+S2Len);
-      fpc_pchar_ansistr_intern_charmove(PAnsiChar(S1),0,DestS,0,S1Len);
-      fpc_pchar_ansistr_intern_charmove(PAnsiChar(S2),0,DestS,S1Len,S2Len+1);
+      NewDestP:=NewAnsiString(S1Len+S2Len);
+      Move(Pointer(S1)^,NewDestP^,S1Len);
+      Move(Pointer(S2)^,PAnsiChar(NewDestP)[S1Len],S2Len);
+      fpc_ansistr_decr_ref(Pointer(DestS));
     end;
-  SetCodePage(DestS,DestCP,false);
+  PAnsiChar(NewDestP)[S1Len+S2Len]:=#0;
+  PAnsiRec(NewDestP-AnsiFirstOff)^.CodePage:=DestCP;
+  PAnsiRec(NewDestP-AnsiFirstOff)^.Len:=S1Len+S2Len;
+  Pointer(DestS):=NewDestP;
 end;
 {$endif FPC_HAS_ANSISTR_CONCAT}
 
@@ -315,15 +314,9 @@ end;
 
 procedure fpc_AnsiStr_Concat_multi (var DestS:RawByteString;const sarr:array of RawByteString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}); compilerproc;
 Var
-  lowstart,
-  nonemptystart,
-  i           : ObjpasInt;
-  p,pc        : pointer;
-  Size,NewLen,
-  OldDestLen  : SizeInt;
-  destcopy    : pointer;
-  DestCP,
-  tmpCP       : TSystemCodePage;
+  lowstart,i,Size,NewLen : SizeInt;
+  p,pc,olddestp,newdestp,realdestp : pointer;
+  DestCP,tmpCP : TSystemCodePage;
 begin
 {$ifdef FPC_HAS_CPSTRING}
   DestCP:=cp;
@@ -367,55 +360,50 @@ begin
   for i:=lowstart to high(sarr) do
     inc(NewLen,length(sarr[i]));
   { In the case of the only nonempty string, either return it directly (if SetCodePage has nothing to do) or skip 1 allocation. }
-  if NewLen=length(sarr[lowstart]) then
-    begin
-      DestS:=sarr[lowstart];
-      { SetCodePage does the conversion (or at least uniquifying) if DestCP is not exactly the code page stored in the string header. Avoid if possible. }
-      if DestCP<>tmpCP then
-        SetCodePage(DestS,DestCP,True);
-      exit;
-    end;
-  destcopy:=nil;
-  nonemptystart:=lowstart;
-  { Check for another reuse, then we can't use
-    the append optimization }
-  if Length(DestS)<>0 then
+  if NewLen=PAnsiRec(Pointer(sarr[lowstart])-AnsiFirstOff)^.Len then
+    DestS:=sarr[lowstart]
+  else
     begin
-      if Pointer(DestS)=Pointer(sarr[lowstart]) then
-        inc(lowstart);
-      for i:=lowstart to high(sarr) do
+      olddestp:=pointer(dests);
+      { Reallocate when possible; in the hope this will reuse the chunk more often than do a redundant copy. }
+      if assigned(olddestp) and (PAnsiRec(olddestp-AnsiFirstOff)^.Ref=1) then
         begin
-          if Pointer(DestS)=Pointer(sarr[i]) then
-            begin
-              { if DestS is used somewhere in the middle of the expression,
-                we need to make sure the original string still exists after
-                we empty/modify DestS                                       }
-              destcopy:=pointer(dests);
-              fpc_AnsiStr_Incr_Ref(destcopy);
-              lowstart:=nonemptystart;
-              break;
-            end;
+          realdestp:=olddestp-AnsiFirstOff;
+          newdestp:=ReallocMem(realdestp,AnsiFirstOff+1+NewLen)+AnsiFirstOff;
+          { First string can be skipped if appending. }
+          if olddestp=pointer(sarr[lowstart]) then
+            inc(lowstart);
+        end
+      else
+        begin
+          { Create new string. }
+          olddestp:=nil; { This case is distinguished as "not assigned(olddestp)". Also prevents "if p=olddestp" in the loop below shared with the ReallocMem branch. }
+          newdestp:=NewAnsiString(NewLen);
         end;
+      { Copy strings from last to the first, so that possible occurences of DestS could read from the beginning of the reallocated DestS. }
+      pc:=newdestp+NewLen;
+      for i:=high(sarr) downto lowstart do
+        begin
+          p:=pointer(sarr[i]);
+          if not assigned(p) then
+            continue;
+          if p=olddestp then
+            { DestS occured among pieces in the ReallocMem case! Use the new pointer. Its header still conveniently contains old DestS length. }
+            p:=newdestp;
+          Size:=PAnsiRec(p-AnsiFirstOff)^.Len;
+          dec(pc,size);
+          Move(p^,pc^,Size);
+        end;
+      if not assigned(olddestp) then
+        fpc_AnsiStr_Decr_Ref(pointer(DestS));
+      PAnsiChar(newdestp)[NewLen]:=#0;
+      PAnsiRec(newdestp-AnsiFirstOff)^.CodePage:=tmpCP;
+      PAnsiRec(newdestp-AnsiFirstOff)^.Len:=NewLen; { Careful, loop above relies on the old Len in the newdestp header. }
+      Pointer(DestS):=newdestp;
     end;
-  { Start with empty DestS if we start with concatting
-    the first (non-empty) array element }
-  if lowstart=nonemptystart then
-    DestS:='';
-  OldDestLen:=length(DestS);
-  SetLength(DestS,NewLen);
-  { Concat all strings, except the string we already
-    copied in DestS }
-  pc:=Pointer(DestS)+OldDestLen;
-  for i:=lowstart to high(sarr) do
-    begin
-      p:=pointer(sarr[i]);
-      Size:=length(ansistring(p));
-      Move(p^,pc^,Size);
-      inc(pc,size);
-    end;
-  SetCodePage(DestS,tmpCP,False);
-  SetCodePage(DestS,DestCP,True);
-  fpc_AnsiStr_Decr_Ref(destcopy);
+  { SetCodePage does the conversion (or at least uniquifying) if DestCP is not exactly the code page stored in the string header. Avoid if possible. }
+  if DestCP<>tmpCP then
+    SetCodePage(DestS,DestCP,True);
 end;
 {$endif FPC_HAS_ANSISTR_CONCAT_MULTI}
 

+ 57 - 63
rtl/inc/ustrings.inc

@@ -454,8 +454,8 @@ Procedure fpc_UnicodeStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name '
 {$define FPC_HAS_UNICODESTR_CONCAT}
 procedure fpc_UnicodeStr_Concat (var DestS:Unicodestring;const S1,S2 : UnicodeString); compilerproc;
 Var
-  Size,Location : SizeInt;
-  same : boolean;
+  S1Len,S2Len : SizeInt;
+  OldDestP,NewDestP,RealDestP,Src : Pointer;
 begin
   { only assign if s1 or s2 is empty }
   if Length(S1)=0 then
@@ -468,31 +468,32 @@ begin
       DestS:=s1;
       exit;
     end;
-  Location:=Length(S1);
-  Size:=length(S2);
-  { Use Pointer() typecasts to prevent extra conversion code }
-  if Pointer(DestS)=Pointer(S1) then
+  S1Len:=PUnicodeRec(Pointer(S1)-UnicodeFirstOff)^.Len;
+  S2Len:=PUnicodeRec(Pointer(S2)-UnicodeFirstOff)^.Len;
+  OldDestP:=Pointer(DestS);
+  { Reallocate when possible; in the hope this will reuse the chunk more often than do a redundant copy. }
+  if Assigned(OldDestP) and (PUnicodeRec(OldDestP-UnicodeFirstOff)^.Ref=1) then
     begin
-      same:=Pointer(S1)=Pointer(S2);
-      SetLength(DestS,Size+Location);
-      if same then
-        Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size)*sizeof(UnicodeChar))
-      else
-        Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
-    end
-  else if Pointer(DestS)=Pointer(S2) then
-    begin
-      SetLength(DestS,Size+Location);
-      Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
-      Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
+      RealDestP:=OldDestP-UnicodeFirstOff;
+      NewDestP:=ReallocMem(RealDestP,UnicodeFirstOff+sizeof(UnicodeChar)+(S1Len+S2Len)*sizeof(UnicodeChar))+UnicodeFirstOff;
+      { Copy S2 first, as in the case of OldDestP = Pointer(S2) it must be copied first and in other cases the order does not matter. }
+      Src:=Pointer(S2);
+      if Src=OldDestP then
+        Src:=NewDestP;
+      Move(Src^,PUnicodeChar(NewDestP)[S1Len],S2Len*sizeof(UnicodeChar));
+      if OldDestP<>Pointer(S1) then { Not an append, need to copy S1? }
+          Move(Pointer(S1)^,NewDestP^,S1Len*sizeof(UnicodeChar));
     end
   else
     begin
-      DestS:='';
-      SetLength(DestS,Size+Location);
-      Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(UnicodeChar));
-      Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(UnicodeChar))^,(Size+1)*sizeof(UnicodeChar));
+      NewDestP:=NewUnicodeString(S1Len+S2Len);
+      Move(Pointer(S1)^,NewDestP^,S1Len*sizeof(UnicodeChar));
+      Move(Pointer(S2)^,PUnicodeChar(NewDestP)[S1Len],S2Len*sizeof(UnicodeChar));
+      fpc_unicodestr_decr_ref(Pointer(DestS));
     end;
+  PUnicodeChar(NewDestP)[S1Len+S2Len]:=#0;
+  PUnicodeRec(NewDestP-UnicodeFirstOff)^.Len:=S1Len+S2Len;
+  Pointer(DestS):=NewDestP;
 end;
 {$endif FPC_HAS_UNICODESTR_CONCAT}
 
@@ -501,12 +502,8 @@ end;
 {$define FPC_HAS_UNICODESTR_CONCAT_MULTI}
 procedure fpc_UnicodeStr_Concat_multi (var DestS:Unicodestring;const sarr:array of Unicodestring); compilerproc;
 Var
-  i           : SizeInt;
-  p,pc        : pointer;
-  Size,NewLen : SizeInt;
-  lowstart,nonemptystart : SizeInt;
-  destcopy    : pointer;
-  OldDestLen  : SizeInt;
+  lowstart,i,Size,NewLen : SizeInt;
+  p,pc,OldDestP,NewDestP,RealDestP : pointer;
 begin
   lowstart:=low(sarr);
   { skip empty strings }
@@ -523,49 +520,46 @@ begin
   for i:=lowstart to high(sarr) do
     inc(NewLen,length(sarr[i]));
   { In the case of the only nonempty string, return it directly. }
-  if NewLen=length(sarr[lowstart]) then
+  if NewLen=PUnicodeRec(Pointer(sarr[lowstart])-UnicodeFirstOff)^.Len then
     begin
       DestS:=sarr[lowstart];
       exit;
     end;
-  destcopy:=nil;
-  nonemptystart:=lowstart;
-  if Pointer(DestS)=Pointer(sarr[lowstart]) then
-    inc(lowstart);
-  { Check for another reuse, then we can't use
-    the append optimization }
-  for i:=lowstart to high(sarr) do
+  OldDestP:=Pointer(DestS);
+  { Reallocate when possible; in the hope this will reuse the chunk more often than do a redundant copy. }
+  if Assigned(OldDestP) and (PUnicodeRec(OldDestP-UnicodeFirstOff)^.Ref=1) then
     begin
-      if Pointer(DestS)=Pointer(sarr[i]) then
-        begin
-          { if DestS is used somewhere in the middle of the expression,
-            we need to make sure the original string still exists after
-            we empty/modify DestS.
-            This trick only works with reference counted strings. Therefor
-            this optimization is disabled for WINLIKEUNICODESTRING }
-          destcopy:=pointer(dests);
-          fpc_UnicodeStr_Incr_Ref(destcopy);
-          lowstart:=nonemptystart;
-          break;
-        end;
+      RealDestP:=OldDestP-UnicodeFirstOff;
+      NewDestP:=ReallocMem(RealDestP,UnicodeFirstOff+sizeof(UnicodeChar)+NewLen*sizeof(UnicodeChar))+UnicodeFirstOff;
+      { First string can be skipped if appending. }
+      if OldDestP=Pointer(sarr[lowstart]) then
+        inc(lowstart);
+    end
+  else
+    begin
+      { Create new string. }
+      OldDestP:=nil; { This case is distinguished as "not assigned(olddestp)". Also prevents "if p=olddestp" in the loop below shared with the ReallocMem branch. }
+      NewDestP:=NewUnicodeString(NewLen);
     end;
-  { Start with empty DestS if we start with concatting
-    the first array element }
-  if lowstart=nonemptystart then
-    DestS:='';
-  OldDestLen:=length(DestS);
-  SetLength(DestS,NewLen);
-  { Concat all strings, except the string we already
-    copied in DestS }
-  pc:=Pointer(DestS)+OldDestLen*sizeof(UnicodeChar);
-  for i:=lowstart to high(sarr) do
+  { Copy strings from last to the first, so that possible occurences of DestS could read from the beginning of the reallocated DestS. }
+  pc:=NewDestP+NewLen*sizeof(UnicodeChar);
+  for i:=high(sarr) downto lowstart do
     begin
-      p:=pointer(sarr[i]);
-      Size:=length(unicodestring(p));
-      Move(p^,pc^,Size*sizeof(UnicodeChar));
-      inc(pc,size*sizeof(UnicodeChar));
+      p:=Pointer(sarr[i]);
+      if not Assigned(p) then
+        continue;
+      if p=OldDestP then
+        { DestS occured among pieces in the ReallocMem case! Use the new pointer. Its header still conveniently contains old DestS length. }
+        p:=NewDestP;
+      Size:=PUnicodeRec(p-UnicodeFirstOff)^.Len*sizeof(UnicodeChar);
+      dec(pc,size);
+      Move(p^,pc^,Size);
     end;
-  fpc_UnicodeStr_Decr_Ref(destcopy);
+  if not assigned(OldDestP) then
+    fpc_UnicodeStr_Decr_Ref(Pointer(DestS));
+  PUnicodeChar(NewDestP)[NewLen]:=#0;
+  PUnicodeRec(NewDestP-UnicodeFirstOff)^.Len:=NewLen; { Careful, loop above relies on the old Len in the NewDestP header. }
+  Pointer(DestS):=NewDestP;
 end;
 {$endif FPC_HAS_UNICODESTR_CONCAT_MULTI}
 

+ 85 - 0
rtl/java/jastrings.inc

@@ -300,6 +300,91 @@ end;
 {$endif FPC_HAS_ANSISTR_CONCAT_COMPLEX}
 
 
+{$ifndef FPC_HAS_ANSISTR_CONCAT}
+{$define FPC_HAS_ANSISTR_CONCAT}
+procedure fpc_AnsiStr_Concat (var DestS:RawByteString;const S1,S2 : RawByteString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}); compilerproc;
+Var
+  S1Len, S2Len: SizeInt;
+  same : boolean;
+  S1CP, S2CP, DestCP: TSystemCodePage;
+begin
+{$ifdef FPC_HAS_CPSTRING}
+  DestCP:=cp;
+  if DestCp=CP_NONE then
+    DestCP:=DefaultSystemCodePage;
+{$else FPC_HAS_CPSTRING}
+  DestCP:=StringCodePage(DestS);
+{$endif FPC_HAS_CPSTRING}
+  DestCP:=TranslatePlaceholderCP(DestCP);
+  { if codepages are different then concat using unicodestring,
+    but avoid conversions if either addend is empty (StringCodePage will return
+    DefaultSystemCodePage in that case, which may differ from other addend/dest) }
+  if Length(S1)=0 then
+    S1CP:=DestCP
+  else
+    S1CP:=StringCodePage(S1);
+  S1CP:=TranslatePlaceholderCP(S1CP);
+  if Length(S2)=0 then
+    S2CP:=DestCP
+  else
+    S2CP:=StringCodePage(S2);
+  S2CP:=TranslatePlaceholderCP(S2CP);
+{$ifdef FPC_HAS_CPSTRING}
+  { if the result is rawbytestring and both strings have the same code page,
+    keep that code page or keep the code page if the other string is empty }
+  if cp=CP_NONE then
+    begin
+     if (S1CP=S2CP) or (Length(S2)=0) then
+       DestCP:=S1CP
+     else if Length(S1)=0 then
+       DestCP:=S2CP;
+    end;
+{$endif FPC_HAS_CPSTRING}
+  if ((S1CP<>DestCP) and (Length(s1)>0)) or ((S2CP<>DestCP) and (Length(s2)>0)) then
+    begin
+      ansistr_concat_complex(DestS,S1,S2,DestCP);
+      exit;
+    end;
+  { only assign if s1 or s2 is empty }
+  if (Length(S1)=0) then
+    begin
+      DestS:=s2;
+      exit;
+    end;
+  if (Length(S2)=0) then
+    begin
+      DestS:=s1;
+      exit;
+    end;
+  S1Len:=Length(S1);
+  S2Len:=length(S2);
+  { Use Pointer() typecasts to prevent extra conversion code }
+  if Pointer(DestS)=Pointer(S1) then
+    begin
+      same:=Pointer(S1)=Pointer(S2);
+      SetLength(DestS,S1Len+S2Len);
+      if same then
+        fpc_pchar_ansistr_intern_charmove(PAnsiChar(DestS),0,DestS,S1Len,S2Len)
+      else
+        fpc_pchar_ansistr_intern_charmove(PAnsiChar(S2),0,DestS,S1Len,S2Len+1)
+    end
+  else if Pointer(DestS)=Pointer(S2) then
+    begin
+      SetLength(DestS,S1Len+S2Len);
+      fpc_pchar_ansistr_intern_charmove(PAnsiChar(DestS),0,DestS,S1Len,S2Len+1);
+      fpc_pchar_ansistr_intern_charmove(PAnsiChar(S1),0,DestS,0,S1Len);
+    end
+  else
+    begin
+      SetLength(DestS,S1Len+S2Len);
+      fpc_pchar_ansistr_intern_charmove(PAnsiChar(S1),0,DestS,0,S1Len);
+      fpc_pchar_ansistr_intern_charmove(PAnsiChar(S2),0,DestS,S1Len,S2Len+1);
+    end;
+  SetCodePage(DestS,DestCP,false);
+end;
+{$endif FPC_HAS_ANSISTR_CONCAT}
+
+
 {$define FPC_HAS_ANSISTR_TO_ANSISTR}
 Function fpc_AnsiStr_To_AnsiStr (const S : RawByteString;cp : TSystemCodePage): RawByteString; compilerproc;
 {