ustringh.inc 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2005 by Florian Klaempfl,
  4. member of the Free Pascal development team.
  5. This file implements support routines for UnicodeStrings with FPC
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************}
  12. Procedure UniqueString (Var S : UnicodeString);external name 'FPC_UNICODESTR_UNIQUE';
  13. Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  14. Function Pos (c : Char; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  15. Function Pos (c : UnicodeChar; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  16. Function Pos (const c : RawByteString; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  17. Function Pos (const c : UnicodeString; Const s : RawByteString; Offset: Sizeint = 1) : SizeInt;
  18. Function Pos (const c : ShortString; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
  19. Function UpCase(const s : UnicodeString) : UnicodeString;
  20. Function UpCase(c:UnicodeChar):UnicodeChar;
  21. Function LowerCase(const s : UnicodeString) : UnicodeString;
  22. Function LowerCase(c:UnicodeChar):UnicodeChar;
  23. Procedure Insert (Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
  24. Procedure Delete (Var S : UnicodeString; Index,Size: SizeInt);
  25. Procedure {$ifdef FPC_HAS_CPSTRING}fpc_setstring_unicodestr_pwidechar{$else}SetString{$endif}(Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt); {$ifdef FPC_HAS_CPSTRING} compilerproc; {$endif FPC_HAS_CPSTRING}
  26. Procedure {$ifdef FPC_HAS_CPSTRING}fpc_setstring_unicodestr_pansichar{$else}SetString{$endif}(Out S : UnicodeString; Buf : PChar; Len : SizeInt); {$ifdef FPC_HAS_CPSTRING} compilerproc; {$endif FPC_HAS_CPSTRING}
  27. function WideCharToString(S : PWideChar) : UnicodeString;
  28. function StringToWideChar(const Src : RawByteString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
  29. function WideCharLenToString(S : PWideChar;Len : SizeInt) : UnicodeString;
  30. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : UnicodeString);
  31. procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : AnsiString);
  32. procedure WideCharToStrVar(S : PWideChar;out Dest : UnicodeString);
  33. procedure WideCharToStrVar(S : PWideChar;out Dest : AnsiString);
  34. function UnicodeCharToString(S : PUnicodeChar) : UnicodeString;
  35. function StringToUnicodeChar(const Src : RawByteString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
  36. function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : UnicodeString;
  37. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : UnicodeString);
  38. procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
  39. procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
  40. procedure DefaultUnicode2AnsiMove(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
  41. procedure DefaultAnsi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  42. Type
  43. { please only enable options below after creating a test program for them that
  44. passes on Windows and committing it, so it can be used to verify the
  45. functionality on other platforms }
  46. TCompareOption = ({coLingIgnoreCase, coLingIgnoreDiacritic, }coIgnoreCase{,
  47. coIgnoreKanaType, coIgnoreNonSpace, coIgnoreSymbols, coIgnoreWidth,
  48. coLingCasing, coDigitAsNumbers, coStringSort});
  49. TCompareOptions = set of TCompareOption;
  50. TStandardCodePageEnum = (
  51. scpAnsi, // system Ansi code page (GetACP on windows)
  52. scpConsoleInput, // system console input code page (GetConsoleCP on windows)
  53. scpConsoleOutput, // system console output code page (GetConsoleOutputCP on windows)
  54. scpFileSystemSingleByte // file system code page used by single byte OS FileSystem APIs (GetACP on Windows),
  55. );
  56. {$ifndef FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  57. { hooks for internationalization
  58. please add new procedures at the end, it makes it easier to detect new procedures }
  59. TUnicodeStringManager = record
  60. Wide2AnsiMoveProc : procedure(source:pwidechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
  61. Ansi2WideMoveProc : procedure(source:pchar;cp : TSystemCodePage;var dest:widestring;len:SizeInt);
  62. // UpperUTF8 : procedure(p:PUTF8String);
  63. UpperWideStringProc : function(const S: WideString): WideString;
  64. // UpperUCS4 : procedure(p:PUCS4Char);
  65. // LowerUTF8 : procedure(p:PUTF8String);
  66. LowerWideStringProc : function(const S: WideString): WideString;
  67. // LowerUCS4 : procedure(p:PUCS4Char);
  68. {
  69. CompUTF8 : function(p1,p2:PUTF8String) : shortint;
  70. CompUCS2 : function(p1,p2:PUCS2Char) : shortint;
  71. CompUCS4 : function(p1,p2:PUC42Char) : shortint;
  72. }
  73. CompareWideStringProc : function(const s1, s2 : WideString; Options : TCompareOptions) : PtrInt;
  74. // CompareTextWideStringProc is CompareWideStringProc with coIgnoreCase in options.
  75. // CompareTextWideStringProc : function(const s1, s2 : WideString): PtrInt;
  76. { return value: number of code points in the string. Whenever an invalid
  77. code point is encountered, all characters part of this invalid code point
  78. are considered to form one "character" and the next character is
  79. considered to be the start of a new (possibly also invalid) code point }
  80. CharLengthPCharProc : function(const Str: PChar): PtrInt;
  81. { return value:
  82. -1 if incomplete or invalid code point
  83. 0 if NULL character,
  84. > 0 if that's the length in bytes of the code point }
  85. CodePointLengthProc : function(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
  86. UpperAnsiStringProc : function(const s : ansistring) : ansistring;
  87. LowerAnsiStringProc : function(const s : ansistring) : ansistring;
  88. CompareStrAnsiStringProc : function(const S1, S2: ansistring): PtrInt;
  89. CompareTextAnsiStringProc : function(const S1, S2: ansistring): PtrInt;
  90. StrCompAnsiStringProc : function(S1, S2: PChar): PtrInt;
  91. StrICompAnsiStringProc : function(S1, S2: PChar): PtrInt;
  92. StrLCompAnsiStringProc : function(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  93. StrLICompAnsiStringProc : function(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  94. StrLowerAnsiStringProc : function(Str: PChar): PChar;
  95. StrUpperAnsiStringProc : function(Str: PChar): PChar;
  96. ThreadInitProc : procedure;
  97. ThreadFiniProc : procedure;
  98. { this is only different on windows }
  99. Unicode2AnsiMoveProc : procedure(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
  100. Ansi2UnicodeMoveProc : procedure(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
  101. UpperUnicodeStringProc : function(const S: UnicodeString): UnicodeString;
  102. LowerUnicodeStringProc : function(const S: UnicodeString): UnicodeString;
  103. CompareUnicodeStringProc : function(const s1, s2 : UnicodeString; Options : TCompareOptions) : PtrInt;
  104. // CompareTextUnicodeStringProc is CompareUnicodeStringProc with coIgnoreCase in options.
  105. /// CompareTextUnicodeStringProc : function(const s1, s2 : UnicodeString): PtrInt;
  106. { codepage retrieve function }
  107. GetStandardCodePageProc: function(const stdcp: TStandardCodePageEnum): TSystemCodePage;
  108. end;
  109. {$endif FPC_HAS_BUILTIN_WIDESTR_MANAGER}
  110. var
  111. widestringmanager : TUnicodeStringManager;
  112. function UnicodeToUtf8(Dest: PChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  113. function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
  114. function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  115. function Utf8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
  116. function UTF8Encode(const s : RawByteString) : RawByteString; inline;
  117. function UTF8Encode(const s : UnicodeString) : RawByteString;
  118. function UTF8Decode(const s : RawByteString): UnicodeString;
  119. function UTF8ToString(const s : RawByteString): UnicodeString;inline;
  120. function UTF8ToString(const S: ShortString): unicodestring;
  121. function UTF8ToString(const S: PAnsiChar): unicodestring;
  122. { byte and ansichar are the same on the JVM, and "array of" and "pointer to"
  123. are as well }
  124. {$ifndef CPUJVM}
  125. function UTF8ToString(const S: array of AnsiChar): unicodestring;
  126. function UTF8ToString(const S: array of Byte): unicodestring;
  127. {$endif not CPUJVM}
  128. function AnsiToUtf8(const s : RawByteString): RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  129. function Utf8ToAnsi(const s : RawByteString) : RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
  130. function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
  131. function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
  132. function WideStringToUCS4String(const s : WideString) : UCS4String;
  133. function UCS4StringToWideString(const s : UCS4String) : WideString;
  134. Procedure GetWideStringManager (Var Manager : TUnicodeStringManager);
  135. Procedure SetWideStringManager (Const New : TUnicodeStringManager);
  136. Procedure SetWideStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  137. Procedure GetUnicodeStringManager (Var Manager : TUnicodeStringManager);
  138. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
  139. Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
  140. function StringElementSize(const S : UnicodeString): Word; overload;
  141. function StringRefCount(const S : UnicodeString): SizeInt; overload;
  142. function StringCodePage(const S : UnicodeString): TSystemCodePage; overload;
  143. Function ToSingleByteFileSystemEncodedFileName(const Str: UnicodeString): RawByteString;
  144. Function ToSingleByteFileSystemEncodedFileName(const arr: array of widechar): RawByteString;
  145. Function ToSingleByteFileSystemEncodedFileName(const Str: RawByteString): RawByteString;