Bladeren bron

merge r13481 from cpstrnew branch by florian
+ support parsing of strings with code page specification
+ added encoding and elementsize field to ansi- and unicodestring records
+ some basic rtl support routines for encoding aware strings
+ DefaultSystemCodePage
+ DefaultUnicodeCodePage
+ ppu writing/loading of code page aware strings

git-svn-id: trunk@19080 -

paul 14 jaren geleden
bovenliggende
commit
8a4634a7b1

+ 1 - 1
compiler/defcmp.pas

@@ -357,7 +357,7 @@ implementation
                           (tstringdef(def_from).len=tstringdef(def_to).len)) and
                           (tstringdef(def_from).len=tstringdef(def_to).len)) and
                          { for ansi- and unicodestrings also the encoding must match }
                          { for ansi- and unicodestrings also the encoding must match }
                          (not(tstringdef(def_from).stringtype in [st_ansistring,st_unicodestring]) or
                          (not(tstringdef(def_from).stringtype in [st_ansistring,st_unicodestring]) or
-                          (tstringdef(def_from).stringencoding=tstringdef(def_to).stringencoding))then
+                          (tstringdef(def_from).encoding=tstringdef(def_to).encoding))then
                         eq:=te_equal
                         eq:=te_equal
                      else
                      else
                        begin
                        begin

+ 3 - 0
compiler/globals.pas

@@ -102,6 +102,9 @@ interface
        MathPiExtended : textendedrec = (bytes : (64,0,201,15,218,162,33,104,194,53));
        MathPiExtended : textendedrec = (bytes : (64,0,201,15,218,162,33,104,194,53));
 {$endif FPC_LITTLE_ENDIAN}
 {$endif FPC_LITTLE_ENDIAN}
 {$endif}
 {$endif}
+       CP_UTF8 = 65001;
+       CP_UTF16 = 1200;
+
 
 
     type
     type
        tcodepagestring = string[20];
        tcodepagestring = string[20];

+ 1 - 1
compiler/globtype.pas

@@ -513,7 +513,7 @@ interface
       end;
       end;
   {$endif}
   {$endif}
 
 
-      tstringencoding = dword;
+      tstringencoding = word;
 
 
     const
     const
        { link options }
        { link options }

+ 4 - 1
compiler/msg/errore.msg

@@ -375,7 +375,7 @@ scanner_e_illegal_alignment_directive=02088_E_Illegal alignment directive
 #
 #
 # Parser
 # Parser
 #
 #
-# 03310 is the last used one
+# 03314 is the last used one
 #
 #
 % \section{Parser messages}
 % \section{Parser messages}
 % This section lists all parser messages. The parser takes care of the
 % This section lists all parser messages. The parser takes care of the
@@ -1400,6 +1400,9 @@ parser_e_mapping_no_implements=03312_E_Interface "$1" can't be delegated by "$2"
 % has to implement the interface directly. Delegation is not possible.
 % has to implement the interface directly. Delegation is not possible.
 parser_e_implements_no_mapping=03313_E_Interface "$1" can't have method resolutions, "$2" already delegates it
 parser_e_implements_no_mapping=03313_E_Interface "$1" can't have method resolutions, "$2" already delegates it
 % Method resoulution is only possible for interfaces that are implemented directly, not by delegation.
 % Method resoulution is only possible for interfaces that are implemented directly, not by delegation.
+parser_e_invalid_codepage=03314_E_Invalid codepage
+% When declaring a string with a given codepage, the range of valid codepages values is limited
+% to 0 to 65535.
 % \end{description}
 % \end{description}
 # Type Checking
 # Type Checking
 #
 #

+ 3 - 2
compiler/msgidx.inc

@@ -405,6 +405,7 @@ const
   parser_e_duplicate_implements_clause=03311;
   parser_e_duplicate_implements_clause=03311;
   parser_e_mapping_no_implements=03312;
   parser_e_mapping_no_implements=03312;
   parser_e_implements_no_mapping=03313;
   parser_e_implements_no_mapping=03313;
+  parser_e_invalid_codepage=03314;
   type_e_mismatch=04000;
   type_e_mismatch=04000;
   type_e_incompatible_types=04001;
   type_e_incompatible_types=04001;
   type_e_not_equal_types=04002;
   type_e_not_equal_types=04002;
@@ -903,9 +904,9 @@ const
   option_info=11024;
   option_info=11024;
   option_help_pages=11025;
   option_help_pages=11025;
 
 
-  MsgTxtSize = 61274;
+  MsgTxtSize = 61299;
 
 
   MsgIdxMax : array[1..20] of longint=(
   MsgIdxMax : array[1..20] of longint=(
-    26,89,314,104,85,54,112,23,202,63,
+    26,89,315,104,85,54,112,23,202,63,
     50,20,1,1,1,1,1,1,1,1
     50,20,1,1,1,1,1,1,1,1
   );
   );

File diff suppressed because it is too large
+ 235 - 234
compiler/msgtxt.inc


+ 61 - 36
compiler/pexpr.pas

@@ -96,51 +96,76 @@ implementation
       begin
       begin
          def:=cshortstringtype;
          def:=cshortstringtype;
          consume(_STRING);
          consume(_STRING);
-         if (token=_LECKKLAMMER) then
+         if token=_LECKKLAMMER then
            begin
            begin
-              if not(allowtypedef) then
-                Message(parser_e_no_local_para_def);
-              consume(_LECKKLAMMER);
-              p:=comp_expr(true,false);
-              if not is_constintnode(p) then
-                begin
-                  Message(parser_e_illegal_expression);
-                  { error recovery }
-                  consume(_RECKKLAMMER);
-                end
-              else
-                begin
-                 if (tordconstnode(p).value<=0) then
-                   begin
-                      Message(parser_e_invalid_string_size);
-                      tordconstnode(p).value:=255;
-                   end;
+             if not(allowtypedef) then
+               Message(parser_e_no_local_para_def);
+             consume(_LECKKLAMMER);
+             p:=comp_expr(true,false);
+             if not is_constintnode(p) then
+               begin
+                 Message(parser_e_illegal_expression);
+                 { error recovery }
                  consume(_RECKKLAMMER);
                  consume(_RECKKLAMMER);
-                 if tordconstnode(p).value>255 then
+               end
+             else
+               begin
+                if (tordconstnode(p).value<=0) then
                   begin
                   begin
-                    { longstring is currently unsupported (CEC)! }
-{                   t:=tstringdef.createlong(tordconstnode(p).value))}
                      Message(parser_e_invalid_string_size);
                      Message(parser_e_invalid_string_size);
                      tordconstnode(p).value:=255;
                      tordconstnode(p).value:=255;
-                     def:=tstringdef.createshort(int64(tordconstnode(p).value));
+                  end;
+                if tordconstnode(p).value>255 then
+                  begin
+                    { longstring is currently unsupported (CEC)! }
+{                    t:=tstringdef.createlong(tordconstnode(p).value))}
+                    Message(parser_e_invalid_string_size);
+                    tordconstnode(p).value:=255;
+                    def:=tstringdef.createshort(int64(tordconstnode(p).value));
                   end
                   end
+                else
+                  if tordconstnode(p).value<>255 then
+                    def:=tstringdef.createshort(int64(tordconstnode(p).value));
+                consume(_RECKKLAMMER);
+              end;
+             p.free;
+           end
+         else if token=_LSHARPBRACKET then
+           begin
+             if not(allowtypedef) then
+               Message(parser_e_no_local_para_def);
+             consume(_LSHARPBRACKET);
+             p:=comp_expr(true,false);
+             if not is_constintnode(p) then
+               begin
+                 Message(parser_e_illegal_expression);
+                 { error recovery }
+               end
+             else
+               begin
+                 if (tordconstnode(p).value<0) or (tordconstnode(p).value>65535) then
+                   begin
+                     Message(parser_e_invalid_codepage);
+                     tordconstnode(p).value:=0;
+                   end;
+                 if tordconstnode(p).value=CP_UTF16 then
+                   def:=tstringdef.createunicode
                  else
                  else
-                   if tordconstnode(p).value<>255 then
-                     def:=tstringdef.createshort(int64(tordconstnode(p).value));
+                   begin
+                     def:=tstringdef.createansi;
+                     tstringdef(def).encoding:=int64(tordconstnode(p).value);
+                   end;
+                 consume(_RSHARPBRACKET);
                end;
                end;
-              p.free;
+             p.free;
            end
            end
-        else if try_to_consume(_GT) then
-          begin
-            consume(_LT);
-          end
-          else
-            begin
-              if cs_ansistrings in current_settings.localswitches then
-                def:=cansistringtype
-              else
-                def:=cshortstringtype;
-            end;
+         else
+           begin
+             if cs_ansistrings in current_settings.localswitches then
+               def:=cansistringtype
+             else
+               def:=cshortstringtype;
+           end;
        end;
        end;
 
 
 
 

+ 6 - 0
compiler/symdef.pas

@@ -1457,6 +1457,7 @@ implementation
          inherited ppuload(stringdef,ppufile);
          inherited ppuload(stringdef,ppufile);
          stringtype:=st_ansistring;
          stringtype:=st_ansistring;
          len:=ppufile.getaint;
          len:=ppufile.getaint;
+         encoding:=ppufile.getword;
          savesize:=sizeof(pint);
          savesize:=sizeof(pint);
       end;
       end;
 
 
@@ -1483,6 +1484,7 @@ implementation
       begin
       begin
          inherited create(stringdef);
          inherited create(stringdef);
          stringtype:=st_unicodestring;
          stringtype:=st_unicodestring;
+         encoding:=CP_UTF16;
          len:=-1;
          len:=-1;
          savesize:=sizeof(pint);
          savesize:=sizeof(pint);
       end;
       end;
@@ -1493,6 +1495,7 @@ implementation
          inherited ppuload(stringdef,ppufile);
          inherited ppuload(stringdef,ppufile);
          stringtype:=st_unicodestring;
          stringtype:=st_unicodestring;
          len:=ppufile.getaint;
          len:=ppufile.getaint;
+         encoding:=ppufile.getword;
          savesize:=sizeof(pint);
          savesize:=sizeof(pint);
       end;
       end;
 
 
@@ -1502,6 +1505,7 @@ implementation
         result:=tstringdef.create(typ);
         result:=tstringdef.create(typ);
         result.typ:=stringdef;
         result.typ:=stringdef;
         tstringdef(result).stringtype:=stringtype;
         tstringdef(result).stringtype:=stringtype;
+        tstringdef(result).encoding:=encoding;
         tstringdef(result).len:=len;
         tstringdef(result).len:=len;
         tstringdef(result).savesize:=savesize;
         tstringdef(result).savesize:=savesize;
       end;
       end;
@@ -1529,6 +1533,8 @@ implementation
            end
            end
          else
          else
            ppufile.putaint(len);
            ppufile.putaint(len);
+         if stringtype in [st_ansistring,st_unicodestring] then
+           ppufile.putword(encoding);
          case stringtype of
          case stringtype of
             st_shortstring : ppufile.writeentry(ibshortstringdef);
             st_shortstring : ppufile.writeentry(ibshortstringdef);
             st_longstring : ppufile.writeentry(iblongstringdef);
             st_longstring : ppufile.writeentry(iblongstringdef);

+ 59 - 25
rtl/inc/astrings.inc

@@ -36,22 +36,26 @@
 Type
 Type
   PAnsiRec = ^TAnsiRec;
   PAnsiRec = ^TAnsiRec;
   TAnsiRec = Packed Record
   TAnsiRec = Packed Record
-    Ref,
-    Len   : SizeInt;
-    First : Char;
+    CodePage    : TSystemCodePage;
+    ElementSize : Word;
+{$ifdef CPU64}	
+    { align fields  }
+	Dummy       : DWord;
+{$endif CPU64}
+    Ref         : SizeInt;
+    Len         : SizeInt;
+    First       : Char;
   end;
   end;
 
 
 Const
 Const
   AnsiRecLen = SizeOf(TAnsiRec);
   AnsiRecLen = SizeOf(TAnsiRec);
-  FirstOff   = SizeOf(TAnsiRec)-1;
+  AnsiFirstOff = SizeOf(TAnsiRec)-1;
 
 
 
 
 {****************************************************************************
 {****************************************************************************
                     Internal functions, not in interface.
                     Internal functions, not in interface.
 ****************************************************************************}
 ****************************************************************************}
 
 
-
-
 Function NewAnsiString(Len : SizeInt) : Pointer;
 Function NewAnsiString(Len : SizeInt) : Pointer;
 {
 {
   Allocate a new AnsiString on the heap.
   Allocate a new AnsiString on the heap.
@@ -66,8 +70,10 @@ begin
    begin
    begin
      PAnsiRec(P)^.Ref:=1;         { Set reference count }
      PAnsiRec(P)^.Ref:=1;         { Set reference count }
      PAnsiRec(P)^.Len:=0;         { Initial length }
      PAnsiRec(P)^.Len:=0;         { Initial length }
+     PAnsiRec(P)^.CodePage:=DefaultSystemCodePage;
+     PAnsiRec(P)^.ElementSize:=SizeOf(AnsiChar);
      PAnsiRec(P)^.First:=#0;      { Terminating #0 }
      PAnsiRec(P)^.First:=#0;      { Terminating #0 }
-     inc(p,firstoff);             { Points to string now }
+     inc(p,AnsiFirstOff);             { Points to string now }
    end;
    end;
   NewAnsiString:=P;
   NewAnsiString:=P;
 end;
 end;
@@ -80,7 +86,7 @@ Procedure DisposeAnsiString(Var S : Pointer); {$IFNDEF VER2_0} Inline; {$ENDIF}
 begin
 begin
   If S=Nil then
   If S=Nil then
     exit;
     exit;
-  Dec (S,FirstOff);
+  Dec (S,AnsiFirstOff);
   FreeMem (S);
   FreeMem (S);
   S:=Nil;
   S:=Nil;
 end;
 end;
@@ -99,7 +105,7 @@ Begin
   { Zero string }
   { Zero string }
   If S=Nil then exit;
   If S=Nil then exit;
   { check for constant strings ...}
   { check for constant strings ...}
-  l:=@PAnsiRec(S-FirstOff)^.Ref;
+  l:=@PAnsiRec(S-AnsiFirstOff)^.Ref;
   If l^<0 then exit;
   If l^<0 then exit;
   { declocked does a MT safe dec and returns true, if the counter is 0 }
   { declocked does a MT safe dec and returns true, if the counter is 0 }
   If declocked(l^) then
   If declocked(l^) then
@@ -117,8 +123,8 @@ Begin
   If S=Nil then
   If S=Nil then
     exit;
     exit;
   { Let's be paranoid : Constant string ??}
   { Let's be paranoid : Constant string ??}
-  If PAnsiRec(S-FirstOff)^.Ref<0 then exit;
-  inclocked(PAnsiRec(S-FirstOff)^.Ref);
+  If PAnsiRec(S-AnsiFirstOff)^.Ref<0 then exit;
+  inclocked(PAnsiRec(S-AnsiFirstOff)^.Ref);
 end;
 end;
 
 
 
 
@@ -133,8 +139,8 @@ begin
   if DestS=S2 then
   if DestS=S2 then
     exit;
     exit;
   If S2<>nil then
   If S2<>nil then
-    If PAnsiRec(S2-FirstOff)^.Ref>0 then
-      inclocked(PAnsiRec(S2-FirstOff)^.ref);
+    If PAnsiRec(S2-AnsiFirstOff)^.Ref>0 then
+      inclocked(PAnsiRec(S2-AnsiFirstOff)^.Ref);
   { Decrease the reference count on the old S1 }
   { Decrease the reference count on the old S1 }
   fpc_ansistr_decr_ref (DestS);
   fpc_ansistr_decr_ref (DestS);
   { And finally, have DestS pointing to S2 (or its copy) }
   { And finally, have DestS pointing to S2 (or its copy) }
@@ -576,17 +582,17 @@ begin
        begin
        begin
          GetMem(Pointer(S),AnsiRecLen+L);
          GetMem(Pointer(S),AnsiRecLen+L);
          PAnsiRec(S)^.Ref:=1;
          PAnsiRec(S)^.Ref:=1;
-         inc(Pointer(S),firstoff);
+         inc(Pointer(S),AnsiFirstOff);
        end
        end
-      else if PAnsiRec(Pointer(S)-FirstOff)^.Ref=1 then
+      else if PAnsiRec(Pointer(S)-AnsiFirstOff)^.Ref=1 then
         begin
         begin
-          Dec(Pointer(S),FirstOff);
+          Dec(Pointer(S),AnsiFirstOff);
           lens:=MemSize(Pointer(s));
           lens:=MemSize(Pointer(s));
           lena:=AnsiRecLen+L;
           lena:=AnsiRecLen+L;
           { allow shrinking string if that saves at least half of current size }
           { allow shrinking string if that saves at least half of current size }
           if (lena>lens) or ((lens>32) and (lena<=(lens div 2))) then
           if (lena>lens) or ((lens>32) and (lena<=(lens div 2))) then
             reallocmem(pointer(S),AnsiRecLen+L);
             reallocmem(pointer(S),AnsiRecLen+L);
-          Inc(Pointer(S),FirstOff);
+          Inc(Pointer(S),AnsiFirstOff);
         end
         end
       else
       else
         begin
         begin
@@ -601,14 +607,14 @@ begin
             movelen := lens;
             movelen := lens;
           Move(Pointer(S)^,Temp^,movelen);
           Move(Pointer(S)^,Temp^,movelen);
           { ref count dropped to zero in the mean time? }
           { ref count dropped to zero in the mean time? }
-          If (PAnsiRec(Pointer(S)-FirstOff)^.Ref > 0) and
-             declocked(PAnsiRec(Pointer(S)-FirstOff)^.Ref) then
-            freemem(PAnsiRec(Pointer(s)-FirstOff));
+          If (PAnsiRec(Pointer(S)-AnsiFirstOff)^.Ref > 0) and
+             declocked(PAnsiRec(Pointer(S)-AnsiFirstOff)^.Ref) then
+            freemem(PAnsiRec(Pointer(s)-AnsiFirstOff));
           Pointer(S):=Temp;
           Pointer(S):=Temp;
        end;
        end;
       { Force nil termination in case it gets shorter }
       { Force nil termination in case it gets shorter }
       PByte(Pointer(S)+l)^:=0;
       PByte(Pointer(S)+l)^:=0;
-      PAnsiRec(Pointer(S)-FirstOff)^.Len:=l;
+      PAnsiRec(Pointer(S)-AnsiFirstOff)^.Len:=l;
     end
     end
   else
   else
     begin
     begin
@@ -655,10 +661,10 @@ Var
   SNew : Pointer;
   SNew : Pointer;
   L    : SizeInt;
   L    : SizeInt;
 begin
 begin
-  L:=PAnsiRec(Pointer(S)-FirstOff)^.len;
+  L:=PAnsiRec(Pointer(S)-AnsiFirstOff)^.len;
   SNew:=NewAnsiString (L);
   SNew:=NewAnsiString (L);
   Move (Pointer(S)^,SNew^,L+1);
   Move (Pointer(S)^,SNew^,L+1);
-  PAnsiRec(SNew-FirstOff)^.len:=L;
+  PAnsiRec(SNew-AnsiFirstOff)^.len:=L;
   fpc_ansistr_decr_ref (Pointer(S));  { Thread safe }
   fpc_ansistr_decr_ref (Pointer(S));  { Thread safe }
   pointer(S):=SNew;
   pointer(S):=SNew;
   pointer(result):=SNew;
   pointer(result):=SNew;
@@ -677,7 +683,7 @@ begin
   pointer(result) := pointer(s);
   pointer(result) := pointer(s);
   If Pointer(S)=Nil then
   If Pointer(S)=Nil then
     exit;
     exit;
-  if PAnsiRec(Pointer(S)-Firstoff)^.Ref<>1 then
+  if PAnsiRec(Pointer(S)-AnsiFirstOff)^.Ref<>1 then
     result:=fpc_truely_ansistr_unique(s);
     result:=fpc_truely_ansistr_unique(s);
 end;
 end;
 {$endif FPC_SYSTEM_HAS_ANSISTR_UNIQUE}
 {$endif FPC_SYSTEM_HAS_ANSISTR_UNIQUE}
@@ -745,7 +751,7 @@ begin
      if ResultAddress<>Nil then
      if ResultAddress<>Nil then
       begin
       begin
         Move (Pointer(Pointer(S)+index)^,ResultAddress^,Size);
         Move (Pointer(Pointer(S)+index)^,ResultAddress^,Size);
-        PAnsiRec(ResultAddress-FirstOff)^.Len:=Size;
+        PAnsiRec(ResultAddress-AnsiFirstOff)^.Len:=Size;
         PByte(ResultAddress+Size)^:=0;
         PByte(ResultAddress+Size)^:=0;
       end;
       end;
    end;
    end;
@@ -1094,3 +1100,31 @@ begin
   for i := 1 to length (s) do
   for i := 1 to length (s) do
     result[i] := lowercase(s[i]);
     result[i] := lowercase(s[i]);
 end;
 end;
+
+
+function StringCodePage(const S: RawByteString): TSystemCodePage; overload;
+  begin
+    if assigned(S) then
+      Result:=PAnsiRec(pointer(S)-AnsiFirstOff)^.CodePage
+    else
+      Result:=SizeOf(AnsiChar);
+  end;
+
+
+function StringElementSize(const S: RawByteString): Word; overload;
+  begin
+    if assigned(S) then
+      Result:=PAnsiRec(pointer(S)-AnsiFirstOff)^.ElementSize
+    else
+      Result:=SizeOf(AnsiChar);
+  end;
+
+
+function StringRefCount(const S: RawByteString): SizeInt; overload;
+  begin
+    if assigned(S) then
+      Result:=PAnsiRec(pointer(S)-AnsiFirstOff)^.Ref
+    else
+      Result:=SizeOf(AnsiChar);
+  end;
+

+ 14 - 1
rtl/inc/systemh.inc

@@ -290,9 +290,11 @@ Type
   PUCS4CharArray      = ^TUCS4CharArray;
   PUCS4CharArray      = ^TUCS4CharArray;
   UCS4String          = array of UCS4Char;
   UCS4String          = array of UCS4Char;
 
 
-  UTF8String          = type ansistring;
+  UTF8String          = String<65001>;
   PUTF8String         = ^UTF8String;
   PUTF8String         = ^UTF8String;
 
 
+  RawByteString       = String<$ffff>;
+
   HRESULT             = type Longint;
   HRESULT             = type Longint;
 {$ifndef FPUNONE}
 {$ifndef FPUNONE}
   TDateTime           = type Double;
   TDateTime           = type Double;
@@ -361,6 +363,8 @@ Type
   PUnicodeChar        = ^UnicodeChar;
   PUnicodeChar        = ^UnicodeChar;
   PUnicodeString      = ^UnicodeString;
   PUnicodeString      = ^UnicodeString;
 
 
+  TSystemCodePage     = Word;
+
   { Needed for fpc_get_output }
   { Needed for fpc_get_output }
   PText               = ^Text;
   PText               = ^Text;
 
 
@@ -483,6 +487,11 @@ var
   { Threading support }
   { Threading support }
   fpc_threadvar_relocate_proc : pointer; public name 'FPC_THREADVAR_RELOCATE';
   fpc_threadvar_relocate_proc : pointer; public name 'FPC_THREADVAR_RELOCATE';
 
 
+  DefaultSystemCodePage,
+  DefaultUnicodeCodePage,
+  UTF8CompareLocale : TSystemCodePage;
+
+
 {$ifndef HAS_CMDLINE}
 {$ifndef HAS_CMDLINE}
 {Value should be changed during system initialization as appropriate.}
 {Value should be changed during system initialization as appropriate.}
 var cmdline:Pchar=nil;
 var cmdline:Pchar=nil;
@@ -871,6 +880,10 @@ Procedure Delete (var S : AnsiString; Index,Size: SizeInt);
 Function  StringOfChar(c : char;l : SizeInt) : AnsiString;
 Function  StringOfChar(c : char;l : SizeInt) : AnsiString;
 function  upcase(const s : ansistring) : ansistring;
 function  upcase(const s : ansistring) : ansistring;
 function  lowercase(const s : ansistring) : ansistring;
 function  lowercase(const s : ansistring) : ansistring;
+
+function StringCodePage(const S : RawByteString): Word; overload;
+function StringElementSize(const S : RawByteString): Word; overload;
+function StringRefCount(const S : RawByteString): SizeInt; overload;
 {$endif FPC_HAS_FEATURE_ANSISTRINGS}
 {$endif FPC_HAS_FEATURE_ANSISTRINGS}
 
 
 
 

+ 3 - 1
rtl/inc/ustringh.inc

@@ -125,4 +125,6 @@ Procedure GetUnicodeStringManager (Var Manager : TUnicodeStringManager);
 Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
 Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
 Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
 Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
 
 
-
+function StringElementSize(const S : UnicodeString): Word; overload;
+function StringRefCount(const S : UnicodeString): SizeInt; overload;
+function StringCodePage(const S : UnicodeString): Word; overload;

+ 54 - 21
rtl/inc/ustrings.inc

@@ -37,9 +37,15 @@
 Type
 Type
   PUnicodeRec = ^TUnicodeRec;
   PUnicodeRec = ^TUnicodeRec;
   TUnicodeRec = Packed Record
   TUnicodeRec = Packed Record
-    Ref : SizeInt;
-    Len : SizeInt;
-    First : UnicodeChar;
+    CodePage    : TSystemCodePage;
+	ElementSize : Word;
+{$ifdef CPU64}	
+    { align fields  }
+	Dummy       : DWord;
+{$endif CPU64}
+    Ref         : SizeInt;
+    Len         : SizeInt;
+    First       : UnicodeChar;
   end;
   end;
 
 
 Const
 Const
@@ -139,11 +145,12 @@ Procedure SetWideStringManager (Const New : TUnicodeStringManager);
 begin
 begin
   widestringmanager:=New;
   widestringmanager:=New;
 end;
 end;
-         {****************************************************************************
+
+
+{****************************************************************************
                     Internal functions, not in interface.
                     Internal functions, not in interface.
 ****************************************************************************}
 ****************************************************************************}
 
 
-
 procedure UnicodeStringError;
 procedure UnicodeStringError;
   begin
   begin
     HandleErrorFrame(204,get_frame);
     HandleErrorFrame(204,get_frame);
@@ -178,10 +185,12 @@ begin
   GetMem(P,Len*sizeof(UnicodeChar)+UnicodeRecLen);
   GetMem(P,Len*sizeof(UnicodeChar)+UnicodeRecLen);
   If P<>Nil then
   If P<>Nil then
     begin
     begin
-     PUnicodeRec(P)^.Len:=Len*2;     { Initial length }
-     PUnicodeRec(P)^.Ref:=1;         { Initial Refcount }
-     PUnicodeRec(P)^.First:=#0;      { Terminating #0 }
-     inc(p,UnicodeFirstOff);         { Points to string now }
+      PUnicodeRec(P)^.Len:=Len*2;     { Initial length }
+      PUnicodeRec(P)^.Ref:=1;         { Initial Refcount }
+      PUnicodeRec(P)^.CodePage:=DefaultUnicodeCodePage;
+      PUnicodeRec(P)^.ElementSize:=SizeOf(UnicodeChar);      
+      PUnicodeRec(P)^.First:=#0;      { Terminating #0 }
+      inc(p,UnicodeFirstOff);         { Points to string now }
     end
     end
   else
   else
     UnicodeStringError;
     UnicodeStringError;
@@ -1338,20 +1347,17 @@ begin
    if (l>0) then
    if (l>0) then
     begin
     begin
       if Pointer(S)=nil then
       if Pointer(S)=nil then
-       begin
-         { Need a complete new string...}
-         Pointer(s):=NewUnicodeString(l);
-       end
-      { windows doesn't support reallocing unicodestrings, this code
-        is anyways subject to be removed because unicodestrings shouldn't be
-        ref. counted anymore (FK) }
+        begin
+          { Need a complete new string...}
+          Pointer(s):=NewUnicodeString(l);
+        end
       else
       else
         if (PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref = 1) then
         if (PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref = 1) then
-        begin
-          Dec(Pointer(S),UnicodeFirstOff);
-          if SizeUInt(L*sizeof(UnicodeChar)+UnicodeRecLen)>MemSize(Pointer(S)) then
+          begin
+            Dec(Pointer(S),UnicodeFirstOff);
+            if SizeUInt(L*sizeof(UnicodeChar)+UnicodeRecLen)>MemSize(Pointer(S)) then
               reallocmem(pointer(S), L*sizeof(UnicodeChar)+UnicodeRecLen);
               reallocmem(pointer(S), L*sizeof(UnicodeChar)+UnicodeRecLen);
-          Inc(Pointer(S), UnicodeFirstOff);
+            Inc(Pointer(S), UnicodeFirstOff);
         end
         end
       else
       else
         begin
         begin
@@ -1612,7 +1618,6 @@ begin
 end;
 end;
 
 
 
 
-
 Procedure Delete (Var S : UnicodeString; Index,Size: SizeInt);
 Procedure Delete (Var S : UnicodeString; Index,Size: SizeInt);
 Var
 Var
   LS : SizeInt;
   LS : SizeInt;
@@ -2522,6 +2527,34 @@ procedure unimplementedunicodestring;
     HandleErrorFrame(233,get_frame);
     HandleErrorFrame(233,get_frame);
   end;
   end;
 
 
+  
+function StringElementSize(const S: UnicodeString): Word; overload;
+  begin
+    if assigned(S) then
+      Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.ElementSize
+    else
+      Result:=SizeOf(UnicodeChar);
+  end;
+  
+  
+function StringRefCount(const S: UnicodeString): SizeInt; overload;
+  begin
+    if assigned(S) then
+      Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.Ref
+    else
+      Result:=SizeOf(UnicodeChar);
+  end;
+
+  
+function StringCodePage(const S: UnicodeString): TSystemCodePage; overload;
+  begin
+    if assigned(S) then
+      Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.CodePage
+    else
+      Result:=SizeOf(UnicodeChar);
+  end;
+
+  
 {$warnings off}
 {$warnings off}
 function GenericUnicodeCase(const s : UnicodeString) : UnicodeString;
 function GenericUnicodeCase(const s : UnicodeString) : UnicodeString;
   begin
   begin

Some files were not shown because too many files changed in this diff