FbCharsets.pas 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. {
  2. * PROGRAM: UDR samples.
  3. * MODULE: FbCharsets.pas
  4. * DESCRIPTION: Charset helpers.
  5. *
  6. * The contents of this file are subject to the Initial
  7. * Developer's Public License Version 1.0 (the "License");
  8. * you may not use this file except in compliance with the
  9. * License. You may obtain a copy of the License at
  10. * https://www.ibphoenix.com/about/firebird/idpl.
  11. *
  12. * Software distributed under the License is distributed AS IS,
  13. * WITHOUT WARRANTY OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing rights
  15. * and limitations under the License.
  16. *
  17. * The Original Code was created by Simonov Denis
  18. * for the book Writing UDR Firebird in Pascal.
  19. *
  20. * Copyright (c) 2018 Simonov Denis <[email protected]>
  21. * and all contributors signed below.
  22. *
  23. * All Rights Reserved.
  24. * Contributor(s): ______________________________________. }
  25. unit FbCharsets;
  26. {$IFDEF MSWINDOWS}
  27. {$DEFINE WINDOWS}
  28. {$ENDIF}
  29. {$IFDEF FPC}
  30. {$mode delphi}
  31. {$ENDIF}
  32. interface
  33. uses
  34. Classes, SysUtils {$IFDEF WINDOWS}, windows {$ENDIF};
  35. type
  36. // Firebird character sets
  37. TFBCharSet = (
  38. CS_NONE = 0, // No Character Set
  39. CS_BINARY = 1, // BINARY BYTES
  40. CS_ASCII = 2, // ASCII
  41. CS_UNICODE_FSS = 3, // UNICODE in FSS format
  42. CS_UTF8 = 4, // UTF-8
  43. CS_SJIS = 5, // SJIS
  44. CS_EUCJ = 6, // EUC-J
  45. CS_JIS_0208 = 7 , // JIS 0208; 1990
  46. CS_UNICODE_UCS2 = 8 , // UNICODE v 1.10
  47. CS_DOS_737 = 9,
  48. CS_DOS_437 = 10 , // DOS CP 437
  49. CS_DOS_850 = 11 , // DOS CP 850
  50. CS_DOS_865 = 12 , // DOS CP 865
  51. CS_DOS_860 = 13 , // DOS CP 860
  52. CS_DOS_863 = 14 , // DOS CP 863
  53. CS_DOS_775 = 15,
  54. CS_DOS_858 = 16,
  55. CS_DOS_862 = 17,
  56. CS_DOS_864 = 18,
  57. CS_NEXT = 19, // NeXTSTEP OS native charset
  58. CS_ISO8859_1 = 21, // ISO-8859.1
  59. CS_ISO8859_2 = 22, // ISO-8859.2
  60. CS_ISO8859_3 = 23, // ISO-8859.3
  61. CS_ISO8859_4 = 34, // ISO-8859.4
  62. CS_ISO8859_5 = 35, // ISO-8859.5
  63. CS_ISO8859_6 = 36, // ISO-8859.6
  64. CS_ISO8859_7 = 37, // ISO-8859.7
  65. CS_ISO8859_8 = 38, // ISO-8859.8
  66. CS_ISO8859_9 = 39, // ISO-8859.9
  67. CS_ISO8859_13 = 40, // ISO-8859.13
  68. CS_KSC5601 = 44, // KOREAN STANDARD 5601
  69. CS_DOS_852 = 45 , // DOS CP 852
  70. CS_DOS_857 = 46 , // DOS CP 857
  71. CS_DOS_861 = 47 , // DOS CP 861
  72. CS_DOS_866 = 48,
  73. CS_DOS_869 = 49,
  74. CS_CYRL = 50 ,
  75. CS_WIN1250 = 51, // Windows cp 1250
  76. CS_WIN1251 = 52, // Windows cp 1251
  77. CS_WIN1252 = 53, // Windows cp 1252
  78. CS_WIN1253 = 54, // Windows cp 1253
  79. CS_WIN1254 = 55, // Windows cp 1254
  80. CS_BIG5 = 56, // Big Five unicode cs
  81. CS_GB2312 = 57, // GB 2312-80 cs
  82. CS_WIN1255 = 58, // Windows cp 1255
  83. CS_WIN1256 = 59, // Windows cp 1256
  84. CS_WIN1257 = 60, // Windows cp 1257
  85. CS_UTF16 = 61, // UTF-16
  86. CS_UTF32 = 62, // UTF-32
  87. CS_KOI8R = 63, // Russian KOI8R
  88. CS_KOI8U = 64, // Ukrainian KOI8U
  89. CS_WIN1258 = 65, // Windows cp 1258
  90. CS_TIS620 = 66 , // TIS620
  91. CS_GBK = 67, // GBK
  92. CS_CP943C = 68, // CP943C
  93. CS_GB18030 = 69 // GB18030
  94. );
  95. // Firebird character set mappig to code pages
  96. TCharsetMap = record
  97. CharsetID: Integer;
  98. CharSetName: AnsiString;
  99. CharSetWidth: Word;
  100. CodePage: Integer;
  101. end;
  102. { TFbCharsetHelper }
  103. TFbCharsetHelper = record helper for TFBCharSet
  104. function GetCharset : TCharsetMap;
  105. function GetCodePage: Integer;
  106. function GetCharWidth: Word;
  107. function GetCharSetName: string;
  108. function GetEncoding : TEncoding;
  109. function GetString(const Bytes: TBytes; ByteIndex, ByteCount: Integer): UnicodeString;
  110. end;
  111. implementation
  112. const
  113. CharSetMap: array [0 .. 69] of TCharsetMap = (
  114. (CharsetID: 0; CharSetName: 'NONE'; CharSetWidth: 1; CodePage: CP_ACP),
  115. (CharsetID: 1; CharSetName: 'OCTETS'; CharSetWidth: 1; CodePage: CP_NONE),
  116. (CharsetID: 2; CharSetName: 'ASCII'; CharSetWidth: 1; CodePage: {CP_ASCII} CP_ACP),
  117. (CharsetID: 3; CharSetName: 'UNICODE_FSS'; CharSetWidth: 3; CodePage: CP_UTF8),
  118. (CharsetID: 4; CharSetName: 'UTF8'; CharSetWidth: 4; CodePage: CP_UTF8),
  119. (CharsetID: 5; CharSetName: 'SJIS_0208'; CharSetWidth: 2; CodePage: 20932),
  120. (CharsetID: 6; CharSetName: 'EUCJ_0208'; CharSetWidth: 2; CodePage: 20932),
  121. (CharsetID: 7; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
  122. (CharsetID: 8; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
  123. (CharsetID: 9; CharSetName: 'DOS737'; CharSetWidth: 1; CodePage: 737),
  124. (CharsetID: 10; CharSetName: 'DOS437'; CharSetWidth: 1; CodePage: 437),
  125. (CharsetID: 11; CharSetName: 'DOS850'; CharSetWidth: 1; CodePage: 850),
  126. (CharsetID: 12; CharSetName: 'DOS865'; CharSetWidth: 1; CodePage: 865),
  127. (CharsetID: 13; CharSetName: 'DOS860'; CharSetWidth: 1; CodePage: 860),
  128. (CharsetID: 14; CharSetName: 'DOS863'; CharSetWidth: 1; CodePage: 863),
  129. (CharsetID: 15; CharSetName: 'DOS775'; CharSetWidth: 1; CodePage: 775),
  130. (CharsetID: 16; CharSetName: 'DOS858'; CharSetWidth: 1; CodePage: 858),
  131. (CharsetID: 17; CharSetName: 'DOS862'; CharSetWidth: 1; CodePage: 862),
  132. (CharsetID: 18; CharSetName: 'DOS864'; CharSetWidth: 1; CodePage: 864),
  133. (CharsetID: 19; CharSetName: 'NEXT'; CharSetWidth: 1; CodePage: CP_NONE),
  134. (CharsetID: 20; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
  135. (CharsetID: 21; CharSetName: 'ISO8859_1'; CharSetWidth: 1; CodePage: 28591),
  136. (CharsetID: 22; CharSetName: 'ISO8859_2'; CharSetWidth: 1; CodePage: 28592),
  137. (CharsetID: 23; CharSetName: 'ISO8859_3'; CharSetWidth: 1; CodePage: 28593),
  138. (CharsetID: 24; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
  139. (CharsetID: 25; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
  140. (CharsetID: 26; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
  141. (CharsetID: 27; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
  142. (CharsetID: 28; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
  143. (CharsetID: 29; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
  144. (CharsetID: 30; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
  145. (CharsetID: 31; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
  146. (CharsetID: 32; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
  147. (CharsetID: 33; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
  148. (CharsetID: 34; CharSetName: 'ISO8859_4'; CharSetWidth: 1; CodePage: 28594),
  149. (CharsetID: 35; CharSetName: 'ISO8859_5'; CharSetWidth: 1; CodePage: 28595),
  150. (CharsetID: 36; CharSetName: 'ISO8859_6'; CharSetWidth: 1; CodePage: 28596),
  151. (CharsetID: 37; CharSetName: 'ISO8859_7'; CharSetWidth: 1; CodePage: 28597),
  152. (CharsetID: 38; CharSetName: 'ISO8859_8'; CharSetWidth: 1; CodePage: 28598),
  153. (CharsetID: 39; CharSetName: 'ISO8859_9'; CharSetWidth: 1; CodePage: 28599),
  154. (CharsetID: 40; CharSetName: 'ISO8859_13'; CharSetWidth: 1; CodePage: 28603),
  155. (CharsetID: 41; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
  156. (CharsetID: 42; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
  157. (CharsetID: 43; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
  158. (CharsetID: 44; CharSetName: 'KSC_5601'; CharSetWidth: 2; CodePage: 949),
  159. (CharsetID: 45; CharSetName: 'DOS852'; CharSetWidth: 1; CodePage: 852),
  160. (CharsetID: 46; CharSetName: 'DOS857'; CharSetWidth: 1; CodePage: 857),
  161. (CharsetID: 47; CharSetName: 'DOS861'; CharSetWidth: 1; CodePage: 861),
  162. (CharsetID: 48; CharSetName: 'DOS866'; CharSetWidth: 1; CodePage: 866),
  163. (CharsetID: 49; CharSetName: 'DOS869'; CharSetWidth: 1; CodePage: 869),
  164. (CharsetID: 50; CharSetName: 'CYRL'; CharSetWidth: 1; CodePage: 1251),
  165. (CharsetID: 51; CharSetName: 'WIN1250'; CharSetWidth: 1; CodePage: 1250),
  166. (CharsetID: 52; CharSetName: 'WIN1251'; CharSetWidth: 1; CodePage: 1251),
  167. (CharsetID: 53; CharSetName: 'WIN1252'; CharSetWidth: 1; CodePage: 1252),
  168. (CharsetID: 54; CharSetName: 'WIN1253'; CharSetWidth: 1; CodePage: 1253),
  169. (CharsetID: 55; CharSetName: 'WIN1254'; CharSetWidth: 1; CodePage: 1254),
  170. (CharsetID: 56; CharSetName: 'BIG_5'; CharSetWidth: 2; CodePage: 950),
  171. (CharsetID: 57; CharSetName: 'GB_2312'; CharSetWidth: 2; CodePage: 936),
  172. (CharsetID: 58; CharSetName: 'WIN1255'; CharSetWidth: 1; CodePage: 1255),
  173. (CharsetID: 59; CharSetName: 'WIN1256'; CharSetWidth: 1; CodePage: 1256),
  174. (CharsetID: 60; CharSetName: 'WIN1257'; CharSetWidth: 1; CodePage: 1257),
  175. (CharsetID: 61; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
  176. (CharsetID: 62; CharSetName: 'Unknown'; CharSetWidth: 0; CodePage: CP_NONE),
  177. (CharsetID: 63; CharSetName: 'KOI8R'; CharSetWidth: 1; CodePage: 20866),
  178. (CharsetID: 64; CharSetName: 'KOI8U'; CharSetWidth: 1; CodePage: 21866),
  179. (CharsetID: 65; CharSetName: 'WIN1258'; CharSetWidth: 1; CodePage: 1258),
  180. (CharsetID: 66; CharSetName: 'TIS620'; CharSetWidth: 1; CodePage: 874),
  181. (CharsetID: 67; CharSetName: 'GBK'; CharSetWidth: 2; CodePage: 936),
  182. (CharsetID: 68; CharSetName: 'CP943C'; CharSetWidth: 2; CodePage: 943),
  183. (CharsetID: 69; CharSetName: 'GB18030'; CharSetWidth: 4; CodePage: 54936));
  184. { TFbCharset }
  185. function TFbCharsetHelper.GetCharset(): TCharsetMap;
  186. begin
  187. Result := CharSetMap[Integer(Self)];
  188. end;
  189. function TFbCharsetHelper.GetCodePage(): Integer;
  190. begin
  191. Result := CharSetMap[Integer(Self)].CodePage;
  192. end;
  193. function TFbCharsetHelper.GetCharWidth(): Word;
  194. begin
  195. Result := CharSetMap[Integer(Self)].CharSetWidth;
  196. end;
  197. function TFbCharsetHelper.GetCharSetName(): string;
  198. begin
  199. Result := CharSetMap[Integer(Self)].CharSetName;
  200. end;
  201. function TFbCharsetHelper.GetEncoding (): TEncoding;
  202. begin
  203. Result := TEncoding.GetEncoding(CharSetMap[Integer(Self)].CodePage);
  204. end;
  205. function TFbCharsetHelper.GetString(
  206. const Bytes: TBytes; ByteIndex, ByteCount: Integer): UnicodeString;
  207. var
  208. xEncoding: TEncoding;
  209. begin
  210. xEncoding := GetEncoding();
  211. try
  212. Result := xEncoding.GetString(Bytes, ByteIndex, ByteCount);
  213. finally
  214. xEncoding.Free;
  215. end;
  216. end;
  217. end.