Browse Source

merge r17434 from cpstrnew branch by michael:
* Patch from Inoussa to fix constant strings with codepage

git-svn-id: trunk@19109 -

paul 14 năm trước cách đây
mục cha
commit
1db610ecbd

+ 1 - 0
.gitattributes

@@ -123,6 +123,7 @@ compiler/compinnr.inc svneol=native#text/plain
 compiler/comprsrc.pas svneol=native#text/plain
 compiler/constexp.pas svneol=native#text/x-pascal
 compiler/cp1251.pas svneol=native#text/plain
+compiler/cp1252.pp svneol=native#text/plain
 compiler/cp437.pas svneol=native#text/plain
 compiler/cp850.pas svneol=native#text/plain
 compiler/cp866.pas svneol=native#text/plain

+ 38 - 5
compiler/ccharset.pas

@@ -41,7 +41,8 @@ unit ccharset;
 
        punicodemap = ^tunicodemap;
        tunicodemap = record
-          cpname : string[20];
+          cpname : string[20];   
+          cp : word;           
           map : punicodecharmapping;
           lastchar : longint;
           next : punicodemap;
@@ -51,9 +52,10 @@ unit ccharset;
        tcp2unicode = class(tcsconvert)
        end;
 
-    function loadunicodemapping(const cpname,f : string) : punicodemap;
+    function loadunicodemapping(const cpname,f : string; cp :word) : punicodemap;
     procedure registermapping(p : punicodemap);
-    function getmap(const s : string) : punicodemap;
+    function getmap(const s : string) : punicodemap; 
+    function getmap(cp : word) : punicodemap;     
     function mappingavailable(const s : string) : boolean;
     function getunicode(c : char;p : punicodemap) : tunicodechar;
     function getascii(c : tunicodechar;p : punicodemap) : string;
@@ -63,7 +65,7 @@ unit ccharset;
     var
        mappings : punicodemap;
 
-    function loadunicodemapping(const cpname,f : string) : punicodemap;
+    function loadunicodemapping(const cpname,f : string; cp :word) : punicodemap;
 
       var
          data : punicodecharmapping;
@@ -158,6 +160,7 @@ unit ccharset;
          new(p);
          p^.lastchar:=lastchar;
          p^.cpname:=cpname;
+         p^.cp:=cp;
          p^.internalmap:=false;
          p^.next:=nil;
          p^.map:=data;
@@ -199,7 +202,37 @@ unit ccharset;
               hp:=hp^.next;
            end;
          getmap:=nil;
-      end;
+      end;    
+
+    function getmap(cp : word) : punicodemap;
+
+      var
+         hp : punicodemap;
+
+      const
+         mapcache : word = 0;
+         mapcachep : punicodemap = nil;
+
+      begin
+         if (mapcache=cp) and assigned(mapcachep) and (mapcachep^.cp=cp) then
+           begin
+              getmap:=mapcachep;
+              exit;
+           end;
+         hp:=mappings;
+         while assigned(hp) do
+           begin
+              if hp^.cp=cp then
+                begin
+                   getmap:=hp;
+                   mapcache:=cp;
+                   mapcachep:=hp;
+                   exit;
+                end;
+              hp:=hp^.next;
+           end;
+         getmap:=nil;
+      end;   
 
     function mappingavailable(const s : string) : boolean;
 

+ 2 - 1
compiler/cp1251.pas

@@ -269,7 +269,8 @@ unit cp1251;
      );
 
      unicodemap : tunicodemap = (
-       cpname : 'cp1251';
+       cpname : 'cp1251'; 
+       cp : 1251;     
        map : @map;
        lastchar : 255;
        next : nil;

+ 282 - 0
compiler/cp1252.pp

@@ -0,0 +1,282 @@
+{ This is an automatically created file, so don't edit it }
+unit CP1252;
+
+  interface
+
+  implementation
+
+  uses
+     charset;
+
+  const
+     map : array[0..255] of tunicodecharmapping = (
+       (unicode : 0; flag : umf_noinfo; reserved: 0),
+       (unicode : 1; flag : umf_noinfo; reserved: 0),
+       (unicode : 2; flag : umf_noinfo; reserved: 0),
+       (unicode : 3; flag : umf_noinfo; reserved: 0),
+       (unicode : 4; flag : umf_noinfo; reserved: 0),
+       (unicode : 5; flag : umf_noinfo; reserved: 0),
+       (unicode : 6; flag : umf_noinfo; reserved: 0),
+       (unicode : 7; flag : umf_noinfo; reserved: 0),
+       (unicode : 8; flag : umf_noinfo; reserved: 0),
+       (unicode : 9; flag : umf_noinfo; reserved: 0),
+       (unicode : 10; flag : umf_noinfo; reserved: 0),
+       (unicode : 11; flag : umf_noinfo; reserved: 0),
+       (unicode : 12; flag : umf_noinfo; reserved: 0),
+       (unicode : 13; flag : umf_noinfo; reserved: 0),
+       (unicode : 14; flag : umf_noinfo; reserved: 0),
+       (unicode : 15; flag : umf_noinfo; reserved: 0),
+       (unicode : 16; flag : umf_noinfo; reserved: 0),
+       (unicode : 17; flag : umf_noinfo; reserved: 0),
+       (unicode : 18; flag : umf_noinfo; reserved: 0),
+       (unicode : 19; flag : umf_noinfo; reserved: 0),
+       (unicode : 20; flag : umf_noinfo; reserved: 0),
+       (unicode : 21; flag : umf_noinfo; reserved: 0),
+       (unicode : 22; flag : umf_noinfo; reserved: 0),
+       (unicode : 23; flag : umf_noinfo; reserved: 0),
+       (unicode : 24; flag : umf_noinfo; reserved: 0),
+       (unicode : 25; flag : umf_noinfo; reserved: 0),
+       (unicode : 26; flag : umf_noinfo; reserved: 0),
+       (unicode : 27; flag : umf_noinfo; reserved: 0),
+       (unicode : 28; flag : umf_noinfo; reserved: 0),
+       (unicode : 29; flag : umf_noinfo; reserved: 0),
+       (unicode : 30; flag : umf_noinfo; reserved: 0),
+       (unicode : 31; flag : umf_noinfo; reserved: 0),
+       (unicode : 32; flag : umf_noinfo; reserved: 0),
+       (unicode : 33; flag : umf_noinfo; reserved: 0),
+       (unicode : 34; flag : umf_noinfo; reserved: 0),
+       (unicode : 35; flag : umf_noinfo; reserved: 0),
+       (unicode : 36; flag : umf_noinfo; reserved: 0),
+       (unicode : 37; flag : umf_noinfo; reserved: 0),
+       (unicode : 38; flag : umf_noinfo; reserved: 0),
+       (unicode : 39; flag : umf_noinfo; reserved: 0),
+       (unicode : 40; flag : umf_noinfo; reserved: 0),
+       (unicode : 41; flag : umf_noinfo; reserved: 0),
+       (unicode : 42; flag : umf_noinfo; reserved: 0),
+       (unicode : 43; flag : umf_noinfo; reserved: 0),
+       (unicode : 44; flag : umf_noinfo; reserved: 0),
+       (unicode : 45; flag : umf_noinfo; reserved: 0),
+       (unicode : 46; flag : umf_noinfo; reserved: 0),
+       (unicode : 47; flag : umf_noinfo; reserved: 0),
+       (unicode : 48; flag : umf_noinfo; reserved: 0),
+       (unicode : 49; flag : umf_noinfo; reserved: 0),
+       (unicode : 50; flag : umf_noinfo; reserved: 0),
+       (unicode : 51; flag : umf_noinfo; reserved: 0),
+       (unicode : 52; flag : umf_noinfo; reserved: 0),
+       (unicode : 53; flag : umf_noinfo; reserved: 0),
+       (unicode : 54; flag : umf_noinfo; reserved: 0),
+       (unicode : 55; flag : umf_noinfo; reserved: 0),
+       (unicode : 56; flag : umf_noinfo; reserved: 0),
+       (unicode : 57; flag : umf_noinfo; reserved: 0),
+       (unicode : 58; flag : umf_noinfo; reserved: 0),
+       (unicode : 59; flag : umf_noinfo; reserved: 0),
+       (unicode : 60; flag : umf_noinfo; reserved: 0),
+       (unicode : 61; flag : umf_noinfo; reserved: 0),
+       (unicode : 62; flag : umf_noinfo; reserved: 0),
+       (unicode : 63; flag : umf_noinfo; reserved: 0),
+       (unicode : 64; flag : umf_noinfo; reserved: 0),
+       (unicode : 65; flag : umf_noinfo; reserved: 0),
+       (unicode : 66; flag : umf_noinfo; reserved: 0),
+       (unicode : 67; flag : umf_noinfo; reserved: 0),
+       (unicode : 68; flag : umf_noinfo; reserved: 0),
+       (unicode : 69; flag : umf_noinfo; reserved: 0),
+       (unicode : 70; flag : umf_noinfo; reserved: 0),
+       (unicode : 71; flag : umf_noinfo; reserved: 0),
+       (unicode : 72; flag : umf_noinfo; reserved: 0),
+       (unicode : 73; flag : umf_noinfo; reserved: 0),
+       (unicode : 74; flag : umf_noinfo; reserved: 0),
+       (unicode : 75; flag : umf_noinfo; reserved: 0),
+       (unicode : 76; flag : umf_noinfo; reserved: 0),
+       (unicode : 77; flag : umf_noinfo; reserved: 0),
+       (unicode : 78; flag : umf_noinfo; reserved: 0),
+       (unicode : 79; flag : umf_noinfo; reserved: 0),
+       (unicode : 80; flag : umf_noinfo; reserved: 0),
+       (unicode : 81; flag : umf_noinfo; reserved: 0),
+       (unicode : 82; flag : umf_noinfo; reserved: 0),
+       (unicode : 83; flag : umf_noinfo; reserved: 0),
+       (unicode : 84; flag : umf_noinfo; reserved: 0),
+       (unicode : 85; flag : umf_noinfo; reserved: 0),
+       (unicode : 86; flag : umf_noinfo; reserved: 0),
+       (unicode : 87; flag : umf_noinfo; reserved: 0),
+       (unicode : 88; flag : umf_noinfo; reserved: 0),
+       (unicode : 89; flag : umf_noinfo; reserved: 0),
+       (unicode : 90; flag : umf_noinfo; reserved: 0),
+       (unicode : 91; flag : umf_noinfo; reserved: 0),
+       (unicode : 92; flag : umf_noinfo; reserved: 0),
+       (unicode : 93; flag : umf_noinfo; reserved: 0),
+       (unicode : 94; flag : umf_noinfo; reserved: 0),
+       (unicode : 95; flag : umf_noinfo; reserved: 0),
+       (unicode : 96; flag : umf_noinfo; reserved: 0),
+       (unicode : 97; flag : umf_noinfo; reserved: 0),
+       (unicode : 98; flag : umf_noinfo; reserved: 0),
+       (unicode : 99; flag : umf_noinfo; reserved: 0),
+       (unicode : 100; flag : umf_noinfo; reserved: 0),
+       (unicode : 101; flag : umf_noinfo; reserved: 0),
+       (unicode : 102; flag : umf_noinfo; reserved: 0),
+       (unicode : 103; flag : umf_noinfo; reserved: 0),
+       (unicode : 104; flag : umf_noinfo; reserved: 0),
+       (unicode : 105; flag : umf_noinfo; reserved: 0),
+       (unicode : 106; flag : umf_noinfo; reserved: 0),
+       (unicode : 107; flag : umf_noinfo; reserved: 0),
+       (unicode : 108; flag : umf_noinfo; reserved: 0),
+       (unicode : 109; flag : umf_noinfo; reserved: 0),
+       (unicode : 110; flag : umf_noinfo; reserved: 0),
+       (unicode : 111; flag : umf_noinfo; reserved: 0),
+       (unicode : 112; flag : umf_noinfo; reserved: 0),
+       (unicode : 113; flag : umf_noinfo; reserved: 0),
+       (unicode : 114; flag : umf_noinfo; reserved: 0),
+       (unicode : 115; flag : umf_noinfo; reserved: 0),
+       (unicode : 116; flag : umf_noinfo; reserved: 0),
+       (unicode : 117; flag : umf_noinfo; reserved: 0),
+       (unicode : 118; flag : umf_noinfo; reserved: 0),
+       (unicode : 119; flag : umf_noinfo; reserved: 0),
+       (unicode : 120; flag : umf_noinfo; reserved: 0),
+       (unicode : 121; flag : umf_noinfo; reserved: 0),
+       (unicode : 122; flag : umf_noinfo; reserved: 0),
+       (unicode : 123; flag : umf_noinfo; reserved: 0),
+       (unicode : 124; flag : umf_noinfo; reserved: 0),
+       (unicode : 125; flag : umf_noinfo; reserved: 0),
+       (unicode : 126; flag : umf_noinfo; reserved: 0),
+       (unicode : 127; flag : umf_noinfo; reserved: 0),
+       (unicode : 8364; flag : umf_noinfo; reserved: 0),
+       (unicode : 65535; flag : umf_unused; reserved: 0),
+       (unicode : 8218; flag : umf_noinfo; reserved: 0),
+       (unicode : 402; flag : umf_noinfo; reserved: 0),
+       (unicode : 8222; flag : umf_noinfo; reserved: 0),
+       (unicode : 8230; flag : umf_noinfo; reserved: 0),
+       (unicode : 8224; flag : umf_noinfo; reserved: 0),
+       (unicode : 8225; flag : umf_noinfo; reserved: 0),
+       (unicode : 710; flag : umf_noinfo; reserved: 0),
+       (unicode : 8240; flag : umf_noinfo; reserved: 0),
+       (unicode : 352; flag : umf_noinfo; reserved: 0),
+       (unicode : 8249; flag : umf_noinfo; reserved: 0),
+       (unicode : 338; flag : umf_noinfo; reserved: 0),
+       (unicode : 65535; flag : umf_unused; reserved: 0),
+       (unicode : 381; flag : umf_noinfo; reserved: 0),
+       (unicode : 65535; flag : umf_unused; reserved: 0),
+       (unicode : 65535; flag : umf_unused; reserved: 0),
+       (unicode : 8216; flag : umf_noinfo; reserved: 0),
+       (unicode : 8217; flag : umf_noinfo; reserved: 0),
+       (unicode : 8220; flag : umf_noinfo; reserved: 0),
+       (unicode : 8221; flag : umf_noinfo; reserved: 0),
+       (unicode : 8226; flag : umf_noinfo; reserved: 0),
+       (unicode : 8211; flag : umf_noinfo; reserved: 0),
+       (unicode : 8212; flag : umf_noinfo; reserved: 0),
+       (unicode : 732; flag : umf_noinfo; reserved: 0),
+       (unicode : 8482; flag : umf_noinfo; reserved: 0),
+       (unicode : 353; flag : umf_noinfo; reserved: 0),
+       (unicode : 8250; flag : umf_noinfo; reserved: 0),
+       (unicode : 339; flag : umf_noinfo; reserved: 0),
+       (unicode : 65535; flag : umf_unused; reserved: 0),
+       (unicode : 382; flag : umf_noinfo; reserved: 0),
+       (unicode : 376; flag : umf_noinfo; reserved: 0),
+       (unicode : 160; flag : umf_noinfo; reserved: 0),
+       (unicode : 161; flag : umf_noinfo; reserved: 0),
+       (unicode : 162; flag : umf_noinfo; reserved: 0),
+       (unicode : 163; flag : umf_noinfo; reserved: 0),
+       (unicode : 164; flag : umf_noinfo; reserved: 0),
+       (unicode : 165; flag : umf_noinfo; reserved: 0),
+       (unicode : 166; flag : umf_noinfo; reserved: 0),
+       (unicode : 167; flag : umf_noinfo; reserved: 0),
+       (unicode : 168; flag : umf_noinfo; reserved: 0),
+       (unicode : 169; flag : umf_noinfo; reserved: 0),
+       (unicode : 170; flag : umf_noinfo; reserved: 0),
+       (unicode : 171; flag : umf_noinfo; reserved: 0),
+       (unicode : 172; flag : umf_noinfo; reserved: 0),
+       (unicode : 173; flag : umf_noinfo; reserved: 0),
+       (unicode : 174; flag : umf_noinfo; reserved: 0),
+       (unicode : 175; flag : umf_noinfo; reserved: 0),
+       (unicode : 176; flag : umf_noinfo; reserved: 0),
+       (unicode : 177; flag : umf_noinfo; reserved: 0),
+       (unicode : 178; flag : umf_noinfo; reserved: 0),
+       (unicode : 179; flag : umf_noinfo; reserved: 0),
+       (unicode : 180; flag : umf_noinfo; reserved: 0),
+       (unicode : 181; flag : umf_noinfo; reserved: 0),
+       (unicode : 182; flag : umf_noinfo; reserved: 0),
+       (unicode : 183; flag : umf_noinfo; reserved: 0),
+       (unicode : 184; flag : umf_noinfo; reserved: 0),
+       (unicode : 185; flag : umf_noinfo; reserved: 0),
+       (unicode : 186; flag : umf_noinfo; reserved: 0),
+       (unicode : 187; flag : umf_noinfo; reserved: 0),
+       (unicode : 188; flag : umf_noinfo; reserved: 0),
+       (unicode : 189; flag : umf_noinfo; reserved: 0),
+       (unicode : 190; flag : umf_noinfo; reserved: 0),
+       (unicode : 191; flag : umf_noinfo; reserved: 0),
+       (unicode : 192; flag : umf_noinfo; reserved: 0),
+       (unicode : 193; flag : umf_noinfo; reserved: 0),
+       (unicode : 194; flag : umf_noinfo; reserved: 0),
+       (unicode : 195; flag : umf_noinfo; reserved: 0),
+       (unicode : 196; flag : umf_noinfo; reserved: 0),
+       (unicode : 197; flag : umf_noinfo; reserved: 0),
+       (unicode : 198; flag : umf_noinfo; reserved: 0),
+       (unicode : 199; flag : umf_noinfo; reserved: 0),
+       (unicode : 200; flag : umf_noinfo; reserved: 0),
+       (unicode : 201; flag : umf_noinfo; reserved: 0),
+       (unicode : 202; flag : umf_noinfo; reserved: 0),
+       (unicode : 203; flag : umf_noinfo; reserved: 0),
+       (unicode : 204; flag : umf_noinfo; reserved: 0),
+       (unicode : 205; flag : umf_noinfo; reserved: 0),
+       (unicode : 206; flag : umf_noinfo; reserved: 0),
+       (unicode : 207; flag : umf_noinfo; reserved: 0),
+       (unicode : 208; flag : umf_noinfo; reserved: 0),
+       (unicode : 209; flag : umf_noinfo; reserved: 0),
+       (unicode : 210; flag : umf_noinfo; reserved: 0),
+       (unicode : 211; flag : umf_noinfo; reserved: 0),
+       (unicode : 212; flag : umf_noinfo; reserved: 0),
+       (unicode : 213; flag : umf_noinfo; reserved: 0),
+       (unicode : 214; flag : umf_noinfo; reserved: 0),
+       (unicode : 215; flag : umf_noinfo; reserved: 0),
+       (unicode : 216; flag : umf_noinfo; reserved: 0),
+       (unicode : 217; flag : umf_noinfo; reserved: 0),
+       (unicode : 218; flag : umf_noinfo; reserved: 0),
+       (unicode : 219; flag : umf_noinfo; reserved: 0),
+       (unicode : 220; flag : umf_noinfo; reserved: 0),
+       (unicode : 221; flag : umf_noinfo; reserved: 0),
+       (unicode : 222; flag : umf_noinfo; reserved: 0),
+       (unicode : 223; flag : umf_noinfo; reserved: 0),
+       (unicode : 224; flag : umf_noinfo; reserved: 0),
+       (unicode : 225; flag : umf_noinfo; reserved: 0),
+       (unicode : 226; flag : umf_noinfo; reserved: 0),
+       (unicode : 227; flag : umf_noinfo; reserved: 0),
+       (unicode : 228; flag : umf_noinfo; reserved: 0),
+       (unicode : 229; flag : umf_noinfo; reserved: 0),
+       (unicode : 230; flag : umf_noinfo; reserved: 0),
+       (unicode : 231; flag : umf_noinfo; reserved: 0),
+       (unicode : 232; flag : umf_noinfo; reserved: 0),
+       (unicode : 233; flag : umf_noinfo; reserved: 0),
+       (unicode : 234; flag : umf_noinfo; reserved: 0),
+       (unicode : 235; flag : umf_noinfo; reserved: 0),
+       (unicode : 236; flag : umf_noinfo; reserved: 0),
+       (unicode : 237; flag : umf_noinfo; reserved: 0),
+       (unicode : 238; flag : umf_noinfo; reserved: 0),
+       (unicode : 239; flag : umf_noinfo; reserved: 0),
+       (unicode : 240; flag : umf_noinfo; reserved: 0),
+       (unicode : 241; flag : umf_noinfo; reserved: 0),
+       (unicode : 242; flag : umf_noinfo; reserved: 0),
+       (unicode : 243; flag : umf_noinfo; reserved: 0),
+       (unicode : 244; flag : umf_noinfo; reserved: 0),
+       (unicode : 245; flag : umf_noinfo; reserved: 0),
+       (unicode : 246; flag : umf_noinfo; reserved: 0),
+       (unicode : 247; flag : umf_noinfo; reserved: 0),
+       (unicode : 248; flag : umf_noinfo; reserved: 0),
+       (unicode : 249; flag : umf_noinfo; reserved: 0),
+       (unicode : 250; flag : umf_noinfo; reserved: 0),
+       (unicode : 251; flag : umf_noinfo; reserved: 0),
+       (unicode : 252; flag : umf_noinfo; reserved: 0),
+       (unicode : 253; flag : umf_noinfo; reserved: 0),
+       (unicode : 254; flag : umf_noinfo; reserved: 0),
+       (unicode : 255; flag : umf_noinfo; reserved: 0)
+     );
+
+     unicodemap : tunicodemap = (
+       cpname : 'CP1252';
+       cp : 1252;
+       map : @map;
+       lastchar : 255;
+       next : nil;
+       internalmap : true
+     );
+
+  begin
+     registermapping(@unicodemap)
+  end.

+ 1 - 0
compiler/cp437.pas

@@ -270,6 +270,7 @@ unit cp437;
 
      unicodemap : tunicodemap = (
        cpname : 'cp437';
+       cp : 437;     
        map : @map[0];
        lastchar : 255;
        next : nil;

+ 2 - 1
compiler/cp850.pas

@@ -269,7 +269,8 @@ unit cp850;
      );
 
      unicodemap : tunicodemap = (
-       cpname : 'cp850';
+       cpname : 'cp850'; 
+       cp : 850;     
        map : @map[0];
        lastchar : 255;
        next : nil;

+ 2 - 1
compiler/cp866.pas

@@ -269,7 +269,8 @@ unit cp866;
      );
 
      unicodemap : tunicodemap = (
-       cpname : 'cp866';
+       cpname : 'cp866'; 
+       cp : 866;     
        map : @map;
        lastchar : 255;
        next : nil;

+ 3 - 2
compiler/cp8859_1.pas

@@ -269,8 +269,9 @@ unit cp8859_1;
      );
 
      unicodemap : tunicodemap = (
-       cpname : '8859-1';
-       map : @map[0];
+       cpname : '8859-1'; 
+       cp : 28591;     
+       map : @map[0]; 
        lastchar : 255;
        next : nil;
        internalmap : true

+ 2 - 1
compiler/cp8859_5.pas

@@ -269,7 +269,8 @@ unit cp8859_5;
      );
 
      unicodemap : tunicodemap = (
-       cpname : '8859-5';
+       cpname : '8859-5'; 
+       cp : 28595;     
        map : @map;
        lastchar : 255;
        next : nil;

+ 1 - 0
compiler/globals.pas

@@ -104,6 +104,7 @@ interface
 {$endif}
        CP_UTF8 = 65001;
        CP_UTF16 = 1200;
+       CP_NONE  = 65535;
 
 
     type

+ 39 - 7
compiler/ncnv.pas

@@ -62,6 +62,7 @@ interface
           function typecheck_cord_to_pointer : tnode;
           function typecheck_chararray_to_string : tnode;
           function typecheck_string_to_chararray : tnode;
+          function typecheck_string_to_string : tnode;
           function typecheck_char_to_string : tnode;
           function typecheck_char_to_chararray : tnode;
           function typecheck_int_to_real : tnode;
@@ -1030,11 +1031,11 @@ implementation
          { we can't do widechar to ansichar conversions at compile time, since }
          { this maps all non-ascii chars to '?' -> loses information           }
 
-         if (left.nodetype=ordconstn) and
+         if (left.nodetype=ordconstn) {and
             ((tstringdef(resultdef).stringtype in [st_widestring,st_unicodestring]) or
              (torddef(left.resultdef).ordtype=uchar) or
              { widechar >=128 is destroyed }
-             (tordconstnode(left).value.uvalue<128)) then
+             (tordconstnode(left).value.uvalue<128))} then
            begin
               if (tstringdef(resultdef).stringtype in [st_widestring,st_unicodestring]) then
                begin
@@ -1112,6 +1113,36 @@ implementation
              end;
       end;
 
+    function ttypeconvnode.typecheck_string_to_string : tnode;
+      begin
+        result:=nil;
+        if (left.nodetype=stringconstn) and
+           (tstringdef(resultdef).stringtype in [st_ansistring,st_shortstring]) and
+           (tstringdef(left.resultdef).stringtype in [st_unicodestring,st_widestring]) then
+          begin
+            tstringconstnode(left).changestringtype(resultdef);
+            Result:=left;
+            left:=nil;
+          end
+        else if (tstringdef(resultdef).stringtype=st_ansistring) and
+                (tstringdef(left.resultdef).stringtype=st_ansistring) and
+                (tstringdef(resultdef).encoding<>tstringdef(left.resultdef).encoding) then
+          begin
+            result:=ccallnode.createinternres(
+                      'fpc_ansistr_to_ansistr',
+                      ccallparanode.create( 
+                        cordconstnode.create(
+                          tstringdef(resultdef).encoding,
+                          u16inttype,
+                          true
+                        ),
+                        ccallparanode.create(left,nil)
+                      ),
+                      resultdef
+                    );
+            left:=nil;
+          end;
+      end; 
 
     function ttypeconvnode.typecheck_char_to_chararray : tnode;
       begin
@@ -1133,11 +1164,12 @@ implementation
         hp : tordconstnode;
       begin
          result:=nil;
-         if (left.nodetype=ordconstn) and
+         if (left.nodetype=ordconstn) 
+         {and
             ((torddef(resultdef).ordtype<>uchar) or
              (torddef(left.resultdef).ordtype<>uwidechar) or
              { >= 128 is replaced by '?' currently -> loses information }
-             (tordconstnode(left).value.uvalue<128)) then
+             (tordconstnode(left).value.uvalue<128))} then
            begin
              if (torddef(resultdef).ordtype=uchar) and
                 (torddef(left.resultdef).ordtype=uwidechar) then
@@ -1677,7 +1709,7 @@ implementation
           {none} nil,
           {equal} nil,
           {not_possible} nil,
-          { string_2_string } nil,
+          { string_2_string } @ttypeconvnode.typecheck_string_to_string,
           { char_2_string } @ttypeconvnode.typecheck_char_to_string,
           { char_2_chararray } @ttypeconvnode.typecheck_char_to_chararray,
           { pchar_2_string } @ttypeconvnode.typecheck_pchar_to_string,
@@ -2205,9 +2237,9 @@ implementation
               (
                 ((not is_widechararray(left.resultdef) and
                   not is_wide_or_unicode_string(left.resultdef)) or
-                 (tstringdef(resultdef).stringtype in [st_widestring,st_unicodestring]) or
+                 (tstringdef(resultdef).stringtype in [st_widestring,st_unicodestring]) {or
                  { non-ascii chars would be replaced with '?' -> loses info }
-                 not hasnonasciichars(pcompilerwidestring(tstringconstnode(left).value_str)))
+                 not hasnonasciichars(pcompilerwidestring(tstringconstnode(left).value_str))})
               ) then
               begin
                 tstringconstnode(left).changestringtype(resultdef);

+ 37 - 5
compiler/ncon.pas

@@ -982,6 +982,9 @@ implementation
       var
         pw : pcompilerwidestring;
         pc : pchar;
+        cp1 : tstringencoding;
+        cp2 : tstringencoding;
+        l,l2 : longint;
       begin
         if def.typ<>stringdef then
           internalerror(200510011);
@@ -999,11 +1002,40 @@ implementation
           if (cst_type in [cst_widestring,cst_unicodestring]) and
             not(tstringdef(def).stringtype in [st_widestring,st_unicodestring]) then
             begin
-              pw:=pcompilerwidestring(value_str);
-              getmem(pc,getlengthwidestring(pw)+1);
-              unicode2ascii(pw,pc);
-              donewidestring(pw);
-              value_str:=pc;
+              if (tstringdef(def).encoding=CP_UTF8) then 
+                begin
+                  pw:=pcompilerwidestring(value_str);
+                  l:=(getlengthwidestring(pw)*4)+1;
+                  getmem(pc,l);   
+                  l2:=UnicodeToUtf8(pc,l,PUnicodeChar(pw^.data),getlengthwidestring(pw));
+                  if (l<>l2) then
+                    begin
+                      ReAllocMem(pc,l2);
+                      len:=l2;
+                    end;   
+                  donewidestring(pw);
+                  value_str:=pc;
+                end
+              else
+                begin
+                  pw:=pcompilerwidestring(value_str);
+                  getmem(pc,getlengthwidestring(pw)+1);
+                  unicode2ascii(pw,pc,tstringdef(def).encoding);
+                  donewidestring(pw);
+                  value_str:=pc;
+                end;
+            end
+        else 
+          if (tstringdef(def).stringtype = st_ansistring) and
+             not(cst_type in [cst_widestring,cst_unicodestring]) then
+            begin
+              cp1:=tstringdef(def).encoding;
+              if (cst_type = cst_ansistring) then
+                cp2:=tstringdef(resultdef).encoding
+              else if (cst_type in [cst_shortstring,cst_conststring,cst_longstring]) then
+                cp2:=codepagebyname(current_settings.sourcecodepage);
+              if cpavailable(cp1) and cpavailable(cp2) then
+                changecodepage(value_str,len,cp1,value_str,cp2);
             end;
         cst_type:=st2cst[tstringdef(def).stringtype];
         resultdef:=def;

+ 9 - 3
compiler/options.pas

@@ -903,9 +903,15 @@ begin
                    end;
                  'm' :
                    begin
-                     unicodemapping:=loadunicodemapping(More,More+'.txt');
-                     if assigned(unicodemapping) then
-                       registermapping(unicodemapping)
+                     s:=ExtractFileDir(more);
+                     if TryStrToInt(ExtractFileName(more),j) then 
+                       begin
+                         unicodemapping:=loadunicodemapping(More,More+'.txt',j);
+                         if assigned(unicodemapping) then
+                           registermapping(unicodemapping)
+                         else
+                           IllegalPara(opt);
+                       end
                      else
                        IllegalPara(opt);
                    end;

+ 18 - 0
compiler/pstatmnt.pas

@@ -176,6 +176,24 @@ implementation
                        p:=ctypeconvnode.create(p,cwidechartype);
                        do_typecheckpass(p);
                     end;
+               end
+             else
+               begin
+                 if is_char(casedef) and is_widechar(p.resultdef) then
+                   begin
+                      if (p.nodetype=ordconstn) then
+                        begin
+                           p:=ctypeconvnode.create(p,cchartype);
+                           do_typecheckpass(p);
+                        end
+                      else if (p.nodetype=rangen) then
+                        begin
+                           trangenode(p).left:=ctypeconvnode.create(trangenode(p).left,cchartype);
+                           trangenode(p).right:=ctypeconvnode.create(trangenode(p).right,cchartype);
+                           do_typecheckpass(trangenode(p).left);
+                           do_typecheckpass(trangenode(p).right);
+                        end;
+                   end;
                end;
              hl1:=0;
              hl2:=0;

+ 2 - 0
compiler/ptconst.pas

@@ -231,6 +231,8 @@ implementation
                 end;
               uchar :
                 begin
+                   if is_constwidecharnode(n) then 
+                     inserttypeconv(n,cchartype); 
                    if is_constcharnode(n) or
                      ((m_delphi in current_settings.modeswitches) and
                       is_constwidecharnode(n) and

+ 13 - 0
compiler/scanner.pas

@@ -4171,6 +4171,19 @@ In case not, the value returned can be arbitrary.
                       break;
                    end;
                  until false;
+                 //------------------
+                 { convert existing string to an utf-8 string }
+                 if (not iswidestring) and 
+                    (current_settings.sourcecodepage<>default_settings.sourcecodepage) then
+                   begin
+                     if len>0 then
+                       ascii2unicode(@cstringpattern[1],len,patternw)
+                     else
+                       ascii2unicode(nil,len,patternw);
+                     iswidestring:=true;
+                     len:=0;
+                   end;                  
+                 //-------------------
                  { strings with length 1 become const chars }
                  if iswidestring then
                    begin

+ 68 - 16
compiler/widestr.pas

@@ -53,15 +53,21 @@ unit widestr;
     function asciichar2unicode(c : char) : tcompilerwidechar;
     function unicode2asciichar(c : tcompilerwidechar) : char;
     procedure ascii2unicode(p : pchar;l : SizeInt;r : pcompilerwidestring);
-    procedure unicode2ascii(r : pcompilerwidestring;p : pchar);
+    procedure unicode2ascii(r : pcompilerwidestring;p : pchar;cp : tstringencoding);
     function hasnonasciichars(const p: pcompilerwidestring): boolean;
     function getcharwidestring(r : pcompilerwidestring;l : SizeInt) : tcompilerwidechar;
     function cpavailable(const s : string) : boolean;
+    function cpavailable(cp : word) : boolean;
+    procedure changecodepage(
+      s : pchar; l : SizeInt; scp : tstringencoding; 
+      d : pchar; dcp : tstringencoding
+    );
+    function codepagebyname(const s : string) : tstringencoding;
 
   implementation
 
     uses
-      cp8859_1,cp850,cp437,
+      cp8859_1,cp850,cp437,cp1252,
       { cyrillic code pages }
       cp1251,cp866,cp8859_5,
       globals,cutils;
@@ -173,11 +179,14 @@ unit widestr;
       end;
 
     function unicode2asciichar(c : tcompilerwidechar) : char;
-      begin
+      {begin
         if word(c)<128 then
           unicode2asciichar:=char(word(c))
          else
           unicode2asciichar:='?';
+      end;}
+      begin
+         Result := getascii(c,getmap(current_settings.sourcecodepage))[1];
       end;
 
     procedure ascii2unicode(p : pchar;l : SizeInt;r : pcompilerwidestring);
@@ -211,22 +220,28 @@ unit widestr;
            end;
       end;
 
-    procedure unicode2ascii(r : pcompilerwidestring;p:pchar);
-(*
+    procedure unicode2ascii(r : pcompilerwidestring;p:pchar;cp : tstringencoding);
       var
-         m : punicodemap;
-         i : longint;
-
+        m : punicodemap;
+        source : tcompilerwidecharptr;
+        dest   : pchar;
+        i      : longint;
       begin
-         m:=getmap(current_settings.sourcecodepage);
-         { should be a very good estimation :) }
-         setlengthwidestring(r,length(s));
+        if (cp = 0) or (cp=CP_NONE) then
+          m:=getmap(current_settings.sourcecodepage)
+        else
+          m:=getmap(cp);
          // !!!! MBCS
-         for i:=1 to length(s) do
-           begin
-           end;
+        source:=tcompilerwidecharptr(r^.data);
+        dest:=p;
+        for i:=1 to r^.len do
+         begin
+           dest^ := getascii(source^,m)[1];
+           inc(dest);
+           inc(source);
+         end;
       end;
-*)
+(*
       var
         source : tcompilerwidecharptr;
         dest   : pchar;
@@ -247,7 +262,7 @@ unit widestr;
            inc(source);
          end;
       end;
-
+*)
 
     function hasnonasciichars(const p: pcompilerwidestring): boolean;
       var
@@ -269,6 +284,43 @@ unit widestr;
     function cpavailable(const s : string) : boolean;
       begin
           cpavailable:=mappingavailable(lower(s));
+      end;  
+    
+    function cpavailable(cp : word) : boolean;
+      begin
+          cpavailable:=mappingavailable(cp);
+      end;     
+
+    procedure changecodepage(
+      s : pchar; l : SizeInt; scp : tstringencoding; 
+      d : pchar; dcp : tstringencoding
+    );
+      var
+        ms, md : punicodemap;
+        source : pchar;
+        dest   : pchar;
+        i      : longint;
+      begin
+        ms:=getmap(scp);
+        md:=getmap(dcp);
+        source:=s;
+        dest:=d;
+        for i:=1 to l do
+         begin
+           dest^ := getascii(getunicode(source^,ms),md)[1];
+           inc(dest);
+           inc(source);
+         end;
+      end;  
+
+    function codepagebyname(const s : string) : tstringencoding;
+      var
+        p : punicodemap;
+      begin
+        Result:=0;
+        p:=getmap(s);
+        if (p<>nil) then
+          Result:=p^.cp; 
       end;
 
 end.

+ 45 - 5
rtl/inc/charset.pp

@@ -39,6 +39,7 @@ unit charset;
        punicodemap = ^tunicodemap;
        tunicodemap = record
           cpname : string[20];
+          cp : word;
           map : punicodecharmapping;
           lastchar : longint;
           next : punicodemap;
@@ -48,10 +49,12 @@ unit charset;
        tcp2unicode = class(tcsconvert)
        end;
 
-    function loadunicodemapping(const cpname,f : string) : punicodemap;
+    function loadunicodemapping(const cpname,f : string; cp :word) : punicodemap;
     procedure registermapping(p : punicodemap);
-    function getmap(const s : string) : punicodemap;
+    function getmap(const s : string) : punicodemap; 
+    function getmap(cp : word) : punicodemap;   
     function mappingavailable(const s : string) : boolean;
+    function mappingavailable(cp :word) : boolean;
     function getunicode(c : char;p : punicodemap) : tunicodechar;
     function getascii(c : tunicodechar;p : punicodemap) : string;
 
@@ -60,7 +63,7 @@ unit charset;
     var
        mappings : punicodemap;
 
-    function loadunicodemapping(const cpname,f : string) : punicodemap;
+    function loadunicodemapping(const cpname,f : string; cp :word) : punicodemap;
 
       var
          data : punicodecharmapping;
@@ -155,6 +158,7 @@ unit charset;
          new(p);
          p^.lastchar:=lastchar;
          p^.cpname:=cpname;
+         p^.cp:=cp;
          p^.internalmap:=false;
          p^.next:=nil;
          p^.map:=data;
@@ -196,6 +200,36 @@ unit charset;
               hp:=hp^.next;
            end;
          getmap:=nil;
+      end;////////
+
+    function getmap(cp : word) : punicodemap;
+
+      var
+         hp : punicodemap;
+
+      const
+         mapcache : word = 0;
+         mapcachep : punicodemap = nil;
+
+      begin
+         if (mapcache=cp) and assigned(mapcachep) and (mapcachep^.cp=cp) then
+           begin
+              getmap:=mapcachep;
+              exit;
+           end;
+         hp:=mappings;
+         while assigned(hp) do
+           begin
+              if hp^.cp=cp then
+                begin
+                   getmap:=hp;
+                   mapcache:=cp;
+                   mapcachep:=hp;
+                   exit;
+                end;
+              hp:=hp^.next;
+           end;
+         getmap:=nil;
       end;
 
     function mappingavailable(const s : string) : boolean;
@@ -204,6 +238,12 @@ unit charset;
          mappingavailable:=getmap(s)<>nil;
       end;
 
+    function mappingavailable(cp : word) : boolean;
+
+      begin
+         mappingavailable:=getmap(cp)<>nil;
+      end;
+
     function getunicode(c : char;p : punicodemap) : tunicodechar;
 
       begin
@@ -219,8 +259,8 @@ unit charset;
          i : longint;
 
       begin
-         { at least map to space }
-         getascii:=#32;
+         { at least map to '?' }
+         getascii:=#63;
          for i:=0 to p^.lastchar do
            if p^.map[i].unicode=c then
              begin

+ 7 - 6
rtl/inc/ustrings.inc

@@ -23,9 +23,8 @@
   a punicodechar that points to :
 
   @-8  : SizeInt for reference count;
-  @-4  : SizeInt for size; size=number of bytes, not the number of chars. Divide or multiply
-         with sizeof(UnicodeChar) to convert. This is needed to be compatible with Delphi and
-         Windows COM BSTR.
+  @-4  : SizeInt for size; size=number of chars. Multiply with
+         sizeof(UnicodeChar) to get the number of bytes. This is compatible with Delphi.
   @    : String + Terminating #0;
   Punicodechar(Unicodestring) is a valid typecast.
   So WS[i] is converted to the address @WS+i-1.
@@ -810,7 +809,9 @@ var
 begin
 {$ifndef FPC_HAS_CPSTRING}
   cp:=$ffff;
-{$endif FPC_HAS_CPSTRING}
+{$endif FPC_HAS_CPSTRING}     
+  if cp=$ffff then
+    cp:=DefaultSystemCodePage;
   widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, cp, 1);
 end;
 
@@ -1552,10 +1553,10 @@ begin
     exit;
   if PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref<>1 then
    begin
-     L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len div sizeof(UnicodeChar);
+     L:=PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.len;
      SNew:=NewUnicodeString (L);
      Move (PUnicodeChar(S)^,SNew^,(L+1)*sizeof(UnicodeChar));
-     PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L * sizeof(UnicodeChar);
+     PUnicodeRec(SNew-UnicodeFirstOff)^.len:=L;
      fpc_unicodestr_decr_ref (Pointer(S));  { Thread safe }
      pointer(S):=SNew;
      pointer(result):=SNew;

+ 11 - 4
utils/creumap.pp

@@ -20,8 +20,9 @@ program creumap;
   procedure doerror;
 
     begin
-       writeln('Usage: creumap <cpname>');
-       writeln('A mapping file called <cpname>.txt must be present');
+       writeln('Usage: creumap <cpname> <cpnumber>');
+       writeln('cpname : A mapping file called <cpname>.txt must be present');
+       writeln('cpnumber : the code page number');
        halt(1);
     end;
 
@@ -29,11 +30,16 @@ program creumap;
      p : punicodemap;
      i : longint;
      t : text;
+     e : word;
 
 begin
-   if paramcount<>1 then
+   if paramcount<>2 then
      doerror;
-   p:=loadunicodemapping(paramstr(1),paramstr(1)+'.txt');
+   Val(paramstr(2),i,e);
+   if e<>0 then
+     doerror;
+     
+   p:=loadunicodemapping(paramstr(1),paramstr(1)+'.txt',i);
    if p=nil then
      doerror;
    assign(t,paramstr(1)+'.pp');
@@ -69,6 +75,7 @@ begin
    writeln(t);
    writeln(t,'     unicodemap : tunicodemap = (');
    writeln(t,'       cpname : ''',p^.cpname,''';');
+   writeln(t,'       cp : ',p^.cp,';');
    writeln(t,'       map : @map;');
    writeln(t,'       lastchar : ',p^.lastchar,';');
    writeln(t,'       next : nil;');