瀏覽代碼

* when concatenating ansistrings, do not map CP_NONE (rawbytestring) to
CP_ACP (defaultsystemcodepage), because if all input strings have the
same code page then the result should also have that code page if it's
assigned to a rawbytestring rather than getting defaultsystemcodepage
* do not consider empty strings to determine the code page of the result
in fpc_AnsiStr_Concat_multi(), because that will cause a different
result than when using a sequence of fpc_AnsiStr_Concat() calls (it
ignores empty strings to determine the result code page) and it's also
slower
* do not consider the run time code page of the destination string in
fpc_AnsiStr_Concat(_multi)() because Delphi does not do so either. This
was introduced in r19118, probably to hide another bug
+ test

git-svn-id: branches/cpstrrtl@25143 -

Jonas Maebe 12 年之前
父節點
當前提交
1a560e9875
共有 7 個文件被更改,包括 171 次插入65 次删除
  1. 1 0
      .gitattributes
  2. 6 2
      compiler/nadd.pas
  3. 6 2
      compiler/nopt.pas
  4. 5 1
      compiler/symdef.pas
  5. 56 33
      rtl/inc/astrings.inc
  6. 48 27
      rtl/java/jastrings.inc
  7. 49 0
      tests/test/tcpstrconcat4.pp

+ 1 - 0
.gitattributes

@@ -11011,6 +11011,7 @@ tests/test/tcpstrchar2ansistr.pp svneol=native#text/plain
 tests/test/tcpstrconcat.pp svneol=native#text/plain
 tests/test/tcpstrconcat2.pp svneol=native#text/plain
 tests/test/tcpstrconcat3.pp svneol=native#text/plain
+tests/test/tcpstrconcat4.pp svneol=native#text/plain
 tests/test/tcpstrconcatmulti.pp svneol=native#text/plain
 tests/test/tcpstrconcatmulti2.pp svneol=native#text/plain
 tests/test/tcpstrpchar2ansistr.pp svneol=native#text/plain

+ 6 - 2
compiler/nadd.pas

@@ -2170,7 +2170,9 @@ implementation
                   if is_ansistring(resultdef) then
                     para:=ccallparanode.create(
                             cordconstnode.create(
-                              getparaencoding(resultdef),
+                              { don't use getparaencoding(), we have to know
+                                when the result is rawbytestring }
+                              tstringdef(resultdef).encoding,
                               u16inttype,
                               true
                             ),
@@ -2208,7 +2210,9 @@ implementation
                   if is_ansistring(resultdef) then
                     para:=ccallparanode.create(
                             cordconstnode.create(
-                              getparaencoding(resultdef),
+                              { don't use getparaencoding(), we have to know
+                                when the result is rawbytestring }
+                              tstringdef(resultdef).encoding,
                               u16inttype,
                               true
                             ),

+ 6 - 2
compiler/nopt.pas

@@ -349,7 +349,9 @@ begin
       if is_ansistring(p.resultdef) then
         para:=ccallparanode.create(
                 cordconstnode.create(
-                  getparaencoding(p.resultdef),
+                  { don't use getparaencoding(), we have to know
+                    when the result is rawbytestring }
+                  tstringdef(p.resultdef).encoding,
                   u16inttype,
                   true
                 ),
@@ -383,7 +385,9 @@ begin
       if is_ansistring(p.resultdef) then
         para:=ccallparanode.create(
                 cordconstnode.create(
-                  getparaencoding(p.resultdef),
+                  { don't use getparaencoding(), we have to know
+                    when the result is rawbytestring }
+                  tstringdef(p.resultdef).encoding,
                   u16inttype,
                   true
                 ),

+ 5 - 1
compiler/symdef.pas

@@ -1125,7 +1125,11 @@ implementation
     function getparaencoding(def:tdef):tstringencoding; inline;
       begin
         { don't pass CP_NONE encoding to internal functions
-          they expect 0 encoding instead }
+          they expect 0 encoding instead
+          exception: result of string concatenation, because if you pass the
+          result of a string concatenation to a rawbytestring, the result of
+          that concatenation shouldn't be converted to defaultsystemcodepage
+          if all strings have the same type }
         result:=tstringdef(def).encoding;
         if result=CP_NONE then
           result:=0

+ 56 - 33
rtl/inc/astrings.inc

@@ -214,10 +214,9 @@ Var
   S1CP, S2CP, DestCP: TSystemCodePage;
 begin
 {$ifdef FPC_HAS_CPSTRING}
-  if (Pointer(DestS)=nil) then
-    DestCP:=cp
-  else
-    DestCP:=StringCodePage(DestS);
+  DestCP:=cp;
+  if DestCp=CP_NONE then
+    DestCP:=DefaultSystemCodePage;
 {$else FPC_HAS_CPSTRING}
   DestCP:=StringCodePage(DestS);
 {$endif FPC_HAS_CPSTRING}
@@ -235,6 +234,13 @@ begin
   else
     S2CP:=StringCodePage(S2);
   S2CP:=TranslatePlaceholderCP(S2CP);
+{$ifdef FPC_HAS_CPSTRING}
+  { if the result is rawbytestring and both strings have the same code page,
+    keep that code page }
+  if (cp=CP_NONE) and
+     (S1CP=S2CP) then
+    DestCP:=S1CP;
+{$endif FPC_HAS_CPSTRING}
   if (S1CP<>DestCP) or (S2CP<>DestCP) then
     begin
       ansistr_concat_complex(DestS,S1,S2,DestCP);
@@ -284,15 +290,17 @@ end;
 {$define FPC_HAS_ANSISTR_CONCAT_MULTI}
 procedure fpc_AnsiStr_Concat_multi (var DestS:RawByteString;const sarr:array of RawByteString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}); compilerproc;
 Var
-  lowstart,i  : Longint;
+  lowstart,
+  nonemptystart,
+  i           : Longint;
   p,pc        : pointer;
   Size,NewLen,
   OldDestLen  : SizeInt;
   destcopy    : pointer;
-  DestCP      : TSystemCodePage;
   U           : UnicodeString;
-  sameCP      : Boolean;
+  DestCP,
   tmpCP       : TSystemCodePage;
+  sameCP      : Boolean;
 begin
   if high(sarr)=0 then
     begin
@@ -300,20 +308,26 @@ begin
       exit;
     end;
 {$ifdef FPC_HAS_CPSTRING}
-  if (Pointer(DestS)=nil) then
-    DestCP:=cp
-  else
-    DestCP:=StringCodePage(DestS);
+  DestCP:=cp;
+  if DestCp=CP_NONE then
+    DestCP:=DefaultSystemCodePage;
 {$else FPC_HAS_CPSTRING}
   DestCP:=StringCodePage(DestS);
 {$endif FPC_HAS_CPSTRING}
   DestCP:=TranslatePlaceholderCP(DestCP);
   sameCP:=true;
   lowstart:=low(sarr);
-  for i:=lowstart to high(sarr) do
+  { skip empty strings }
+  while (lowstart<=high(sarr)) and
+        (sarr[lowstart]='') do
+    inc(lowstart);
+  tmpCP:=TranslatePlaceholderCP(StringCodePage(sarr[lowstart]));
+  for i:=lowstart+1 to high(sarr) do
     begin
-      tmpCP:=TranslatePlaceholderCP(StringCodePage(sarr[i]));
-      if (DestCP<>tmpCp) then
+      { ignore the code page of empty strings, it will always be
+        DefaultSystemCodePage but it doesn't matter for the outcome }
+      if (sarr[i]<>'') and
+         (tmpCP<>TranslatePlaceholderCP(StringCodePage(sarr[i]))) then
         begin
           sameCP:=false;
           break;
@@ -323,45 +337,52 @@ begin
     begin
       U:='';
       for i:=lowstart to high(sarr) do
-        U:=U+UnicodeString(sarr[i]);
+        if sarr[i]<>'' then
+          U:=U+UnicodeString(sarr[i]);
 
       DestS:='';
       widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(U)),DestS,DestCP,Length(U));
       exit;
     end;
-
+{$ifdef FPC_HAS_CPSTRING}
+  { if the result is rawbytestring and all strings have the same code page,
+    keep that code page }
+  if cp=CP_NONE then
+    DestCP:=tmpCP;
+{$endif FPC_HAS_CPSTRING}
   destcopy:=nil;
-  lowstart:=low(sarr);
-  if Pointer(DestS)=Pointer(sarr[lowstart]) then
-    inc(lowstart);
+  nonemptystart:=lowstart;
   { Check for another reuse, then we can't use
     the append optimization }
-  for i:=lowstart to high(sarr) do
+  if DestS<>'' then
     begin
-      if Pointer(DestS)=Pointer(sarr[i]) then
+      if Pointer(DestS)=Pointer(sarr[lowstart]) then
+        inc(lowstart);
+      for i:=lowstart to high(sarr) do
         begin
-          { if DestS is used somewhere in the middle of the expression,
-            we need to make sure the original string still exists after
-            we empty/modify DestS                                       }
-          destcopy:=pointer(dests);
-          fpc_AnsiStr_Incr_Ref(destcopy);
-          lowstart:=low(sarr);
-          break;
+          if Pointer(DestS)=Pointer(sarr[i]) then
+            begin
+              { if DestS is used somewhere in the middle of the expression,
+                we need to make sure the original string still exists after
+                we empty/modify DestS                                       }
+              destcopy:=pointer(dests);
+              fpc_AnsiStr_Incr_Ref(destcopy);
+              lowstart:=nonemptystart;
+              break;
+            end;
         end;
     end;
   { Start with empty DestS if we start with concatting
-    the first array element }
-  if lowstart=low(sarr) then
+    the first (non-empty) array element }
+  if lowstart=nonemptystart then
     DestS:='';
   OldDestLen:=length(DestS);
   { Calculate size of the result so we can do
     a single call to SetLength() }
   NewLen:=0;
-  for i:=low(sarr) to high(sarr) do
+  for i:=nonemptystart to high(sarr) do
     inc(NewLen,length(sarr[i]));
   SetLength(DestS,NewLen);
-  if (StringCodePage(DestS) <> DestCP) then
-    SetCodePage(DestS,DestCP,False);
   { Concat all strings, except the string we already
     copied in DestS }
   pc:=Pointer(DestS)+OldDestLen;
@@ -375,6 +396,8 @@ begin
           inc(pc,size);
         end;
     end;
+  SetCodePage(DestS,tmpCP,False);
+  SetCodePage(DestS,DestCP,True);
   fpc_AnsiStr_Decr_Ref(destcopy);
 end;
 {$endif FPC_HAS_ANSISTR_CONCAT_MULTI}

+ 48 - 27
rtl/java/jastrings.inc

@@ -334,15 +334,17 @@ Function fpc_AnsiStr_To_AnsiStr (const S : RawByteString;cp : TSystemCodePage):
 {$define FPC_HAS_ANSISTR_CONCAT_MULTI}
 procedure fpc_AnsiStr_Concat_multi (var DestS:RawByteString;const sarr:array of RawByteString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}); compilerproc;
 Var
-  lowstart,i  : Longint;
+  lowstart,
+  nonemptystart,
+  i           : Longint;
   p           : pointer;
   Size,NewLen,
   OldDestLen  : SizeInt;
   destcopy    : RawByteString;
-  DestCP      : TSystemCodePage;
   U           : UnicodeString;
-  sameCP      : Boolean;
+  DestCP      : TSystemCodePage;
   tmpCP       : TSystemCodePage;
+  sameCP      : Boolean;
 begin
   if high(sarr)=0 then
     begin
@@ -350,20 +352,26 @@ begin
       exit;
     end;
 {$ifdef FPC_HAS_CPSTRING}
-  if (Pointer(DestS)=nil) then
-    DestCP:=cp
-  else
-    DestCP:=StringCodePage(DestS);
+  DestCP:=cp;
+  if DestCp=CP_NONE then
+    DestCP:=DefaultSystemCodePage;
 {$else FPC_HAS_CPSTRING}
   DestCP:=StringCodePage(DestS);
 {$endif FPC_HAS_CPSTRING}
   DestCP:=TranslatePlaceholderCP(DestCP);
   sameCP:=true;
   lowstart:=low(sarr);
-  for i:=lowstart to high(sarr) do
+  { skip empty strings }
+  while (lowstart<=high(sarr)) and
+        (sarr[lowstart]='') do
+    inc(lowstart);
+  tmpCP:=TranslatePlaceholderCP(StringCodePage(sarr[lowstart]));
+  for i:=lowstart+1 to high(sarr) do
     begin
-      tmpCP:=TranslatePlaceholderCP(StringCodePage(sarr[i]));
-      if (DestCP<>tmpCp) then
+      { ignore the code page of empty strings, it will always be
+        DefaultSystemCodePage but it doesn't matter for the outcome }
+      if (sarr[i]<>'') and
+         (tmpCP<>TranslatePlaceholderCP(StringCodePage(sarr[i]))) then
         begin
           sameCP:=false;
           break;
@@ -373,33 +381,43 @@ begin
     begin
       U:='';
       for i:=lowstart to high(sarr) do
-        U:=U+UnicodeString(sarr[i]);
+        if sarr[i]<>'' then
+          U:=U+UnicodeString(sarr[i]);
 
       DestS:='';
       widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(JLString(U).toCharArray),DestS,DestCP,Length(U));
       exit;
     end;
-
-  lowstart:=low(sarr);
-  if Pointer(DestS)=Pointer(sarr[lowstart]) then
-    inc(lowstart);
+  {$ifdef FPC_HAS_CPSTRING}
+    { if the result is rawbytestring and all strings have the same code page,
+      keep that code page }
+    if cp=CP_NONE then
+      DestCP:=tmpCP;
+  {$endif FPC_HAS_CPSTRING}
+
+  nonemptystart:=lowstart;
   { Check for another reuse, then we can't use
     the append optimization }
-  for i:=lowstart to high(sarr) do
+  if DestS<>'' then
     begin
-      if Pointer(DestS)=Pointer(sarr[i]) then
+      if Pointer(DestS)=Pointer(sarr[lowstart]) then
+        inc(lowstart);
+      for i:=lowstart to high(sarr) do
         begin
-          { if DestS is used somewhere in the middle of the expression,
-            we need to make sure the original string still exists after
-            we empty/modify DestS -- not necessary on JVM platform, ansistrings
-            are not explicitly refrence counted there }
-          lowstart:=low(sarr);
-          break;
+          if Pointer(DestS)=Pointer(sarr[i]) then
+            begin
+              { if DestS is used somewhere in the middle of the expression,
+                we need to make sure the original string still exists after
+                we empty/modify DestS -- not necessary on JVM platform, ansistrings
+                are not explicitly refrence counted there }
+              lowstart:=nonemptystart;
+              break;
+            end;
         end;
     end;
   { Start with empty DestS if we start with concatting
-    the first array element }
-  if lowstart=low(sarr) then
+    the first (non-empty) array element }
+  if lowstart=nonemptystart then
     DestS:='';
   OldDestLen:=length(DestS);
   { Calculate size of the result so we can do
@@ -408,8 +426,6 @@ begin
   for i:=low(sarr) to high(sarr) do
     inc(NewLen,length(sarr[i]));
   SetLength(DestS,NewLen);
-  if (StringCodePage(DestS) <> DestCP) then
-    SetCodePage(DestS,DestCP,False);
   { Concat all strings, except the string we already
     copied in DestS }
   NewLen:=OldDestLen;
@@ -423,6 +439,11 @@ begin
           inc(NewLen,size);
         end;
     end;
+  if NewLen<>0 then
+    begin
+      SetCodePage(DestS,tmpCP,False);
+      SetCodePage(DestS,DestCP,True);
+    end;
 end;
 
 

+ 49 - 0
tests/test/tcpstrconcat4.pp

@@ -0,0 +1,49 @@
+{$mode delphiunicode}
+
+type
+  tstr850 = type ansistring(850);
+  tstr866 = type ansistring(866);
+  tstr65001 = type ansistring(65001);
+
+procedure test;
+var
+  s1: tstr850;
+  s2: tstr866;
+  s3: tstr65001;
+  r: rawbytestring;
+begin
+  s1:='a';
+  s2:='b';
+  s3:='c';
+  r:='d';
+  r:=s1+s2;
+  writeln(stringcodepage(r));
+  if (stringcodepage(r)<>0) and
+     (stringcodepage(r)<>defaultsystemcodepage) then
+    halt(1);
+  setcodepage(r,850);
+  r:=s1+s2;
+  writeln(stringcodepage(r));
+  if (stringcodepage(r)<>0) and
+     (stringcodepage(r)<>defaultsystemcodepage) then
+    halt(2);
+  setcodepage(r,CP_ASCII);
+  r:=s1+s2;
+  writeln(stringcodepage(r));
+  if (stringcodepage(r)<>0) and
+     (stringcodepage(r)<>defaultsystemcodepage) then
+    halt(3);
+  r:=s1+s1;
+  writeln(stringcodepage(r));
+  if (stringcodepage(r)<>stringcodepage(s1)) then
+    halt(4);
+  r:=s2+s2;
+  writeln(stringcodepage(r));
+  if (stringcodepage(r)<>stringcodepage(s2)) then
+    halt(5);
+end;
+
+begin
+  test;
+end.
+