Browse Source

compiler:
- add helper function getansistringcodepage which returns explicitly set codepage or 0 in other case
- add helper function getansistringdef which return a def with explicitly set codepage or cansistringtype in other case
- change tstoreddef.createnai constructor to allow set codepage in constructor
- don't convert string constants to rawbytestring. if string constant already has a codepage - preserve it or convert to ansistring codepage (delphi compatible)
- don't perform string conversion from ansistring to strings with explicitly set codepage (by directive or by compiler switch) and vice versa (delphi compatible)
+ test which covers most of the cases

git-svn-id: trunk@19510 -

paul 13 years ago
parent
commit
c6ca9e5091

+ 1 - 0
.gitattributes

@@ -9978,6 +9978,7 @@ tests/test/tcpstr13.pp svneol=native#text/pascal
 tests/test/tcpstr14.pp svneol=native#text/pascal
 tests/test/tcpstr15.pp svneol=native#text/pascal
 tests/test/tcpstr16.pp svneol=native#text/pascal
+tests/test/tcpstr17.pp svneol=native#text/pascal
 tests/test/tcpstr2.pp svneol=native#text/plain
 tests/test/tcpstr2a.pp svneol=native#text/plain
 tests/test/tcpstr3.pp svneol=native#text/plain

+ 3 - 3
compiler/cresstr.pas

@@ -150,7 +150,7 @@ uses
           make_mangledname('RESSTR',current_module.localsymtable,'START'),AT_DATA,0));
 
         { Write unitname entry }
-        namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@current_module.localsymtable.name^[1],length(current_module.localsymtable.name^),tstringdef(cansistringtype).encoding,False);
+        namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@current_module.localsymtable.name^[1],length(current_module.localsymtable.name^),getansistringcodepage,False);
         current_asmdata.asmlists[al_resourcestrings].concat(tai_const.create_sym(namelab));
         current_asmdata.asmlists[al_resourcestrings].concat(tai_const.create_sym(nil));
         current_asmdata.asmlists[al_resourcestrings].concat(tai_const.create_sym(nil));
@@ -166,12 +166,12 @@ uses
             new_section(current_asmdata.asmlists[al_const],sec_rodata,make_mangledname('RESSTR',current_module.localsymtable,'d_'+r.name),sizeof(pint));
             { Write default value }
             if assigned(R.value) and (R.len<>0) then
-              valuelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],R.Value,R.Len,tstringdef(cansistringtype).encoding,False)
+              valuelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],R.Value,R.Len,getansistringcodepage,False)
             else
               valuelab:=nil;
             { Append the name as a ansistring. }
             current_asmdata.asmlists[al_const].concat(cai_align.Create(const_align(sizeof(pint))));
-            namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@R.Name[1],length(R.name),tstringdef(cansistringtype).encoding,False);
+            namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@R.Name[1],length(R.name),getansistringcodepage,False);
 
             {
               Resourcestring index:

+ 6 - 1
compiler/defcmp.pas

@@ -366,10 +366,15 @@ implementation
                      else if (tstringdef(def_from).stringtype=tstringdef(def_to).stringtype) and
                              (tstringdef(def_from).stringtype=st_ansistring) then 
                       begin
+                        { don't convert ansistrings if any conditions is true:
+                          1) same encoding
+                          2) from explicit codepage ansistring to ansistring and vice versa
+                          3) from any ansistring to rawbytestring }
                         if (tstringdef(def_from).encoding=tstringdef(def_to).encoding) or
+                           ((tstringdef(def_to).encoding=0) and (tstringdef(def_from).encoding=getansistringcodepage)) or
+                           ((tstringdef(def_to).encoding=getansistringcodepage) and (tstringdef(def_from).encoding=0)) or
                            (tstringdef(def_to).encoding=globals.CP_NONE) then
                          begin
-                           //doconv := tc_string_2_string;
                            eq:=te_equal;
                          end
                         else

+ 11 - 0
compiler/defutil.pas

@@ -169,6 +169,9 @@ interface
     {# Returns true if p is an ansi string type }
     function is_ansistring(p : tdef) : boolean;
 
+    {# Returns true if p is an ansi string type with codepage 0 }
+    function is_rawbytestring(p : tdef) : boolean;
+
     {# Returns true if p is a long string type }
     function is_longstring(p : tdef) : boolean;
 
@@ -617,6 +620,14 @@ implementation
                         (tstringdef(p).stringtype=st_ansistring);
       end;
 
+    { true if p is an ansi string def with codepage CP_NONE }
+    function is_rawbytestring(p : tdef) : boolean;
+      begin
+        is_rawbytestring:=(p.typ=stringdef) and
+                       (tstringdef(p).stringtype=st_ansistring) and
+                       (tstringdef(p).encoding=globals.CP_NONE);
+      end;
+
     { true if p is an long string def }
     function is_longstring(p : tdef) : boolean;
       begin

+ 3 - 0
compiler/fmodule.pas

@@ -143,6 +143,7 @@ interface
         checkforwarddefs,
         deflist,
         symlist       : TFPObjectList;
+        ansistrdef    : tobject; { an ansistring def redefined for the current module }
         wpoinfo       : tunitwpoinfobase; { whole program optimization-related information that is generated during the current run for this unit }
         globalsymtable,           { pointer to the global symtable of this unit }
         localsymtable : TSymtable;{ pointer to the local symtable of this unit }
@@ -523,6 +524,7 @@ implementation
         derefdataintflen:=0;
         deflist:=TFPObjectList.Create(false);
         symlist:=TFPObjectList.Create(false);
+        ansistrdef:=nil;
         wpoinfo:=nil;
         checkforwarddefs:=TFPObjectList.Create(false);
         extendeddefs := TFPHashObjectList.Create(true);
@@ -634,6 +636,7 @@ implementation
         derefdata.free;
         deflist.free;
         symlist.free;
+        ansistrdef:=nil;
         wpoinfo.free;
         checkforwarddefs.free;
         globalsymtable.free;

+ 2 - 2
compiler/nadd.pas

@@ -1665,8 +1665,8 @@ implementation
                         inserttypeconv(left,rd)
                       else
                         begin
-                          inserttypeconv(left,cansistringtype);
-                          inserttypeconv(right,cansistringtype);
+                          inserttypeconv(left,getansistringdef);
+                          inserttypeconv(right,getansistringdef);
                         end;
                     end;
                   st_longstring :

+ 2 - 12
compiler/ncgcon.pas

@@ -258,7 +258,6 @@ implementation
          href: treference;
          pool: THashSet;
          entry: PHashSetItem;
-         cp: tstringencoding;
 
       const
         PoolMap: array[tconststringtype] of TConstPoolType = (
@@ -286,16 +285,7 @@ implementation
                 entry := pool.FindOrAdd(pcompilerwidestring(value_str)^.data,len*cwidechartype.size)
               else
               if cst_type = cst_ansistring then
-                begin
-                  cp:=tstringdef(resultdef).encoding;
-                  { force output of RawByteString constants in CP_ACP codepage }
-                  if cp=CP_NONE then
-                    cp:=0;
-                  { for delphiuncode mode output CP_ACP constants in the compiler codepage }
-                  if (cp=0) and (cs_explicit_codepage in current_settings.moduleswitches) then
-                    cp:=current_settings.sourcecodepage;
-                  entry := PHashSetItem(TTagHashSet(pool).FindOrAdd(value_str,len,cp))
-                end
+                entry := PHashSetItem(TTagHashSet(pool).FindOrAdd(value_str,len,tstringdef(resultdef).encoding))
               else
                 entry := pool.FindOrAdd(value_str,len);
 
@@ -310,7 +300,7 @@ implementation
                            if len=0 then
                              InternalError(2008032301)   { empty string should be handled above }
                            else
-                             lastlabel:=emit_ansistring_const(current_asmdata.AsmLists[al_typedconsts],value_str,len,cp);
+                             lastlabel:=emit_ansistring_const(current_asmdata.AsmLists[al_typedconsts],value_str,len,tstringdef(resultdef).encoding);
                         end;
                       cst_unicodestring,
                       cst_widestring:

+ 15 - 5
compiler/ncnv.pas

@@ -269,7 +269,12 @@ implementation
           remain too so that not too many/few bits are laoded }
         if equal_defs(p.resultdef,def) and
            not is_bitpacked_access(p) then
-          p.resultdef:=def
+          begin
+            { don't replace encoded string constants to rawbytestring encoding.
+              preserve the codepage }
+            if not (is_rawbytestring(def) and (p.nodetype=stringconstn)) then
+              p.resultdef:=def
+          end
         else
          begin
            case convtype of
@@ -598,7 +603,7 @@ implementation
            (p.nodetype=stringconstn) and
            { don't cast to AnsiString if already casted to Wide/UnicodeString, issue #18266 }
            (tstringconstnode(p).cst_type in [cst_conststring,cst_shortstring,cst_longstring]) then
-          p:=ctypeconvnode.create_internal(p,cansistringtype)
+          p:=ctypeconvnode.create_internal(p,getansistringdef)
         else
           case p.resultdef.typ of
             enumdef :
@@ -994,7 +999,7 @@ implementation
              else
                begin
                  if tstringconstnode(left).len>255 then
-                   inserttypeconv(left,cansistringtype)
+                   inserttypeconv(left,getansistringdef)
                  else
                    inserttypeconv(left,cshortstringtype);
                end;
@@ -1381,7 +1386,7 @@ implementation
               (is_widestring(left.resultdef) or
                is_unicodestring(left.resultdef)) then
              begin
-               inserttypeconv(left,cansistringtype);
+               inserttypeconv(left,getansistringdef);
                { the second pass of second_cstring_to_pchar expects a  }
                { strinconstn, but this may become a call to the        }
                { widestring manager in case left contains "high ascii" }
@@ -2286,8 +2291,13 @@ implementation
                 )
               ) then
               begin
-                tstringconstnode(left).changestringtype(resultdef);
+                { convert ansistring and rawbytestring constants to explicit source encoding if set }
+                if is_ansistring(resultdef) and ((tstringdef(resultdef).encoding=0)or(tstringdef(resultdef).encoding=globals.CP_NONE)) then
+                  tstringconstnode(left).changestringtype(getansistringdef)
+                else
+                  tstringconstnode(left).changestringtype(resultdef);
                 result:=left;
+                resultdef:=left.resultdef;
                 left:=nil;
                 exit;
               end;

+ 1 - 1
compiler/ncon.pas

@@ -926,7 +926,7 @@ implementation
           cst_shortstring :
             resultdef:=cshortstringtype;
           cst_ansistring :
-            resultdef:=cansistringtype;
+            resultdef:=getansistringdef;
           cst_unicodestring :
             resultdef:=cunicodestringtype;
           cst_widestring :

+ 3 - 3
compiler/ninl.pas

@@ -378,7 +378,7 @@ implementation
             if (tstringconstnode(n).len<=255) then
               inserttypeconv(n,cshortstringtype)
             else
-              inserttypeconv(n,cansistringtype)
+              inserttypeconv(n,getansistringdef)
           else if is_widechararray(n.resultdef) then
             inserttypeconv(n,cwidestringtype);
       end;
@@ -967,7 +967,7 @@ implementation
                 { (if you want to optimize to use shortstring, keep in mind that    }
                 {  readstr internally always uses ansistring, and to account for    }
                 {  chararrays with > 255 characters)                                }
-                inserttypeconv(filepara.left,cansistringtype);
+                inserttypeconv(filepara.left,getansistringdef);
                 filepara.resultdef:=filepara.left.resultdef;
                 if codegenerror then
                   exit;
@@ -2270,7 +2270,7 @@ implementation
                   case left.resultdef.typ of
                     variantdef:
                       begin
-                        inserttypeconv(left,cansistringtype);
+                        inserttypeconv(left,getansistringdef);
                       end;
 
                     stringdef :

+ 1 - 1
compiler/nld.pas

@@ -259,7 +259,7 @@ implementation
            constsym:
              begin
                if tconstsym(symtableentry).consttyp=constresourcestring then
-                 resultdef:=cansistringtype
+                 resultdef:=getansistringdef
                else
                  internalerror(22799);
              end;

+ 1 - 1
compiler/nmem.pas

@@ -782,7 +782,7 @@ implementation
             (tstringconstnode(left).cst_type=cst_conststring) then
            begin
              if tstringconstnode(left).len>255 then
-               inserttypeconv(left,cansistringtype)
+               inserttypeconv(left,getansistringdef)
              else
                inserttypeconv(left,cshortstringtype);
            end;

+ 2 - 2
compiler/pexpr.pas

@@ -133,7 +133,7 @@ implementation
          else
            begin
              if cs_ansistrings in current_settings.localswitches then
-               def:=cansistringtype
+               def:=getansistringdef
              else
                def:=cshortstringtype;
            end;
@@ -1608,7 +1608,7 @@ implementation
                       begin
                         p1:=cloadnode.create(srsym,srsymtable);
                         do_typecheckpass(p1);
-                        p1.resultdef:=cansistringtype;
+                        p1.resultdef:=getansistringdef;
                       end
                     else
                       p1:=genconstsymtree(tconstsym(srsym));

+ 1 - 1
compiler/psystem.pas

@@ -167,7 +167,7 @@ implementation
         cshortstringtype:=tstringdef.createshort(255);
         { should we give a length to the default long and ansi string definition ?? }
         clongstringtype:=tstringdef.createlong(-1);
-        cansistringtype:=tstringdef.createansi;
+        cansistringtype:=tstringdef.createansi(0);
         if target_info.system in systems_windows then
           cwidestringtype:=tstringdef.createwide
         else

+ 1 - 1
compiler/ptconst.pas

@@ -896,7 +896,7 @@ implementation
                       1:
                         begin
                           if (tstringconstnode(n).cst_type in [cst_unicodestring,cst_widestring]) then
-                            inserttypeconv(n,cansistringtype);
+                            inserttypeconv(n,getansistringdef);
                           if n.nodetype<>stringconstn then
                             internalerror(2010033003);
                           ca:=pointer(tstringconstnode(n).value_str);

+ 6 - 0
compiler/scanner.pas

@@ -354,6 +354,12 @@ implementation
               init_settings.sourcecodepage:=DefaultSystemCodePage;
               include(init_settings.moduleswitches,cs_explicit_codepage);
             end;
+          end
+        else
+          begin
+            exclude(current_settings.moduleswitches,cs_explicit_codepage);
+            if changeinit then
+              exclude(init_settings.moduleswitches,cs_explicit_codepage);
           end;
       end;
 

+ 37 - 3
compiler/symdef.pas

@@ -595,7 +595,7 @@ interface
           constructor loadshort(ppufile:tcompilerppufile);
           constructor createlong(l : asizeint);
           constructor loadlong(ppufile:tcompilerppufile);
-          constructor createansi;
+          constructor createansi(aencoding:tstringencoding);
           constructor loadansi(ppufile:tcompilerppufile);
           constructor createwide;
           constructor loadwide(ppufile:tcompilerppufile);
@@ -826,6 +826,9 @@ interface
 
     function use_vectorfpu(def : tdef) : boolean;
 
+    function getansistringcodepage:tstringencoding; inline;
+    function getansistringdef:tstringdef; inline;
+
 implementation
 
     uses
@@ -848,6 +851,37 @@ implementation
                                   Helpers
 ****************************************************************************}
 
+    function getansistringcodepage:tstringencoding; inline;
+      begin
+        if cs_explicit_codepage in current_settings.moduleswitches then
+          result:=current_settings.sourcecodepage
+        else
+          result:=0;
+      end;
+
+    function getansistringdef:tstringdef; inline;
+      begin
+        { if codepage is explicitly defined in this mudule we need to return
+          a replacement for ansistring def }
+        if cs_explicit_codepage in current_settings.moduleswitches then
+          begin
+            if not assigned(current_module) then
+              internalerror(2011101301);
+            { codepage can be redeclared only once per unit so we don't need a list of
+              redefined ansistring but only one pointer }
+            if not assigned(current_module.ansistrdef) then
+              begin
+                { if we did not create it yet we need to do this now }
+                symtablestack.push(current_module.localsymtable);
+                current_module.ansistrdef:=tstringdef.createansi(current_settings.sourcecodepage);
+                symtablestack.pop(current_module.localsymtable);
+              end;
+            result:=tstringdef(current_module.ansistrdef);
+          end
+        else
+          result:=tstringdef(cansistringtype);
+      end;
+
     function make_mangledname(const typeprefix:string;st:TSymtable;const suffix:string):string;
       var
         s,hs,
@@ -1448,11 +1482,11 @@ implementation
       end;
 
 
-    constructor tstringdef.createansi;
+    constructor tstringdef.createansi(aencoding:tstringencoding);
       begin
          inherited create(stringdef);
          stringtype:=st_ansistring;
-         encoding:=0;
+         encoding:=aencoding;
          len:=-1;
          savesize:=sizeof(pint);
       end;

+ 65 - 0
tests/test/tcpstr17.pp

@@ -0,0 +1,65 @@
+// to have correct test result with delphi set codepage option to 65001
+program tcpstr17;
+{$ifdef FPC}
+  {$mode delphi}
+  {$codepage utf8}
+{$endif}
+{$apptype console}
+type
+  TOEMStr = type AnsiString(866);
+{$ifndef FPC}
+  TSystemCodePage = Word;
+const
+  CP_UTF8 = 65001;
+{$endif}
+
+procedure TestCodeConvRaw(const s: rawbytestring; const CodePage: TSystemCodePage);
+begin
+  WriteLn(StringCodePage(s), ' ',s);
+  if CodePage <> StringCodePage(s) then
+    halt(1);
+end;
+
+procedure TestCodeConvAnsi(const s: ansistring; const CodePage: TSystemCodePage);
+begin
+  WriteLn(StringCodePage(s), ' ',s);
+  if CodePage <> StringCodePage(s) then
+    halt(2);
+end;
+
+procedure TestCodeConvUTF(const s: utf8string; const CodePage: TSystemCodePage);
+begin
+  WriteLn(StringCodePage(s), ' ',s);
+  if CodePage <> StringCodePage(s) then
+    halt(3);
+end;
+
+var
+  u: unicodestring;
+  u8: utf8string;
+  s: ansistring;
+  oemstr: TOEMStr;
+begin
+  u := #$0141#$00F3#$0064#$017A;
+  u8 := u;
+  TestCodeConvRaw(u8, CP_UTF8);
+  // if UTF8 codepage is set in options S will have UTF8 codepage
+  s := u8;
+  TestCodeConvRaw(s, CP_UTF8);
+  TestCodeConvAnsi(u8, CP_UTF8);
+  TestCodeConvAnsi(s, CP_UTF8);
+  // converts to 866
+  oemstr := u8;
+  TestCodeConvRaw(oemstr, 866);
+  TestCodeConvAnsi(oemstr, DefaultSystemCodePage);
+  s := 'test';
+  TestCodeConvRaw(s, CP_UTF8);
+  // converts to System codepage
+  s := oemstr;
+  TestCodeConvRaw(s, DefaultSystemCodePage);
+  TestCodeConvUTF(s, DefaultSystemCodePage);
+  // outputs in source codepage instead of OEM
+  TestCodeConvRaw('привет', CP_UTF8);
+  // outputs in OEM codepage
+  TestCodeConvRaw(TOEMStr('привет'), 866);
+end.