2
0

unixcp.pp 25 KB


  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2013 by the Free Pascal development team.
  4. String code page support functions for unix styled systems.
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. {$mode objfpc}
  12. unit unixcp;
  13. interface
  14. uses baseunix;
  15. { source: http://win-iconv.googlecode.com/svn-history/r6/trunk/win_iconv.c
  16. public domain
  17. }
  18. type
  19. TUnixCpData = record
  20. cp: word;
  21. name: ansistring; { for null-termination }
  22. end;
  23. (*
  24. * Code Page Identifiers
  25. * http://msdn2.microsoft.com/en-us/library/ms776446.aspx
  26. *)
  27. const
  28. UnixCpMapLimit = 406{$ifndef aix}-83{$endif}{$ifdef ACCEPT_646}+1{$endif};
  29. UnixCpMap: array[-1..UnixCpMapLimit] of TUnixCpData =
  30. ((cp:0; name: 'UTF-8'), { invalid/unknown -> utf-8 }
  31. (cp:37; name:'IBM037'), (* IBM EBCDIC US-Canada *)
  32. (cp:37; name:'IBM-037'), (* IBM EBCDIC US-Canada, AIX *)
  33. (cp:154; name:'CP154'),
  34. (cp:154; name:'CYRILLIC-ASIAN'),
  35. (cp:154; name:'PT154'),
  36. (cp:154; name:'PTCP154'),
  37. (cp:154; name:'CSPTCP154'),
  38. (cp:437; name:'437'),
  39. (cp:437; name:'CP437'),
  40. (cp:437; name:'IBM-437'),
  41. (cp:437; name:'CSPC8CODEPAGE437'),
  42. (cp:437; name:'IBM437'), (* OEM United States *)
  43. (cp:500; name:'IBM500'), (* IBM EBCDIC International *)
  44. (cp:500; name:'IBM-500'), (* IBM EBCDIC International, AIX *)
  45. (cp:708; name:'ASMO-708'), (* Arabic (ASMO 708) *)
  46. (cp:720; name:'DOS-720'), (* Arabic (Transparent ASMO); Arabic (DOS) *)
  47. (cp:737; name:'CP737'),
  48. (cp:737; name:'ibm737'), (* OEM Greek (formerly 437G); Greek (DOS) *)
  49. (cp:775; name:'CP775'),
  50. (cp:775; name:'IBM775'),
  51. (cp:775; name:'CSPC775BALTIC'),
  52. (cp:775; name:'ibm775'), (* OEM Baltic; Baltic (DOS) *)
  53. (cp:850; name:'850'),
  54. {$ifdef aix}
  55. (cp:850; name:'IBM-850'), (* AIX *)
  56. {$endif}
  57. (cp:850; name:'CP850'),
  58. (cp:850; name:'IBM850'),
  59. (cp:850; name:'CSPC850MULTILINGUAL'),
  60. (cp:850; name:'ibm850'), (* OEM Multilingual Latin 1; Western European (DOS) *)
  61. (cp:852; name:'852'),
  62. {$ifdef aix}
  63. (cp:852; name:'IBM-852'), (* AIX *)
  64. {$endif}
  65. (cp:852; name:'CP852'),
  66. (cp:852; name:'IBM852'),
  67. (cp:852; name:'CSPCP852'),
  68. (cp:852; name:'ibm852'), (* OEM Latin 2; Central European (DOS) *)
  69. (cp:853; name:'CP853'),
  70. (cp:855; name:'855'),
  71. {$ifdef aix}
  72. (cp:855; name:'IBM-855'), (* AIX *)
  73. {$endif}
  74. (cp:855; name:'CP855'),
  75. (cp:855; name:'IBM855'),
  76. (cp:855; name:'CSIBM855'),
  77. (cp:855; name:'IBM855'), (* OEM Cyrillic (primarily Russian) *)
  78. (cp:857; name:'857'),
  79. {$ifdef aix}
  80. (cp:857; name:'IBM-857'), (* AIX *)
  81. {$endif}
  82. (cp:857; name:'CP857'),
  83. (cp:857; name:'IBM857'),
  84. (cp:857; name:'CSIBM857'),
  85. (cp:857; name:'ibm857'), (* OEM Turkish; Turkish (DOS) *)
  86. (cp:858; name:'CP858'),
  87. (cp:858; name:'IBM00858'), (* OEM Multilingual Latin 1 + Euro symbol *)
  88. (cp:860; name:'860'),
  89. {$ifdef aix}
  90. (cp:860; name:'IBM-860'), (* AIX *)
  91. {$endif}
  92. (cp:860; name:'CP860'),
  93. (cp:860; name:'IBM860'),
  94. (cp:860; name:'CSIBM860'),
  95. (cp:860; name:'IBM860'), (* OEM Portuguese; Portuguese (DOS) *)
  96. (cp:861; name:'861'),
  97. {$ifdef aix}
  98. (cp:861; name:'IBM-861'), (* AIX *)
  99. {$endif}
  100. (cp:861; name:'CP-IS'),
  101. (cp:861; name:'CP861'),
  102. (cp:861; name:'IBM861'),
  103. (cp:861; name:'CSIBM861'),
  104. (cp:861; name:'ibm861'), (* OEM Icelandic; Icelandic (DOS) *)
  105. (cp:862; name:'862'),
  106. {$ifdef aix}
  107. (cp:862; name:'IBM-862'), (* AIX *)
  108. {$endif}
  109. (cp:862; name:'CP862'),
  110. (cp:862; name:'IBM862'),
  111. (cp:862; name:'CSPC862LATINHEBREW'),
  112. (cp:862; name:'DOS-862'), (* OEM Hebrew; Hebrew (DOS) *)
  113. (cp:863; name:'863'),
  114. (cp:863; name:'CP863'),
  115. {$ifdef aix}
  116. (cp:863; name:'IBM-863'), (* AIX *)
  117. {$endif}
  118. (cp:863; name:'CSIBM863'),
  119. (cp:863; name:'IBM863'), (* OEM French Canadian; French Canadian (DOS) *)
  120. (cp:864; name:'CP864'),
  121. {$ifdef aix}
  122. (cp:864; name:'IBM-864'), (* AIX *)
  123. {$endif}
  124. (cp:864; name:'CSIBM864'),
  125. (cp:864; name:'IBM864'), (* OEM Arabic; Arabic (864) *)
  126. (cp:865; name:'865'),
  127. (cp:865; name:'IBM-865'), (*AIX *)
  128. (cp:865; name:'CP865'),
  129. (cp:865; name:'CSIBM865'),
  130. (cp:865; name:'IBM865'), (* OEM Nordic; Nordic (DOS) *)
  131. (cp:866; name:'866'),
  132. {$ifdef aix}
  133. (cp:866; name:'IBM-866'), (* AIX *)
  134. {$endif}
  135. (cp:866; name:'CP866'),
  136. (cp:866; name:'IBM866'),
  137. (cp:866; name:'CSIBM866'),
  138. (cp:866; name:'cp866'), (* OEM Russian; Cyrillic (DOS) *)
  139. (cp:869; name:'869'),
  140. (cp:869; name:'IBM-869'),
  141. (cp:869; name:'CP-GR'),
  142. (cp:869; name:'CP869'),
  143. (cp:869; name:'IBM869'),
  144. (cp:869; name:'CSIBM869'),
  145. (cp:869; name:'ibm869'), (* OEM Modern Greek; Greek, Modern (DOS) *)
  146. (cp:870; name:'IBM870'), (* IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 *)
  147. {$ifdef aix}
  148. (cp:870; name:'IBM-870'), (* AIX *)
  149. {$endif}
  150. (cp:874; name:'CP874'),
  151. (cp:874; name:'WINDOWS-874'),
  152. (cp:874; name:'windows-874'), (* ANSI/OEM Thai (same as 28605, ISO 8859-15); Thai (Windows) *)
  153. (cp:875; name:'cp875'), (* IBM EBCDIC Greek Modern *)
  154. (cp:932; name:'CP932'),
  155. (cp:932; name:'IBM-943'), (* AIX -- note: IBM-943 = MS 932; IBM-932 is something different*)
  156. (cp:932; name:'MS932'),
  157. (cp:932; name:'SHIFFT_JIS'),
  158. (cp:932; name:'SHIFFT_JIS-MS'),
  159. (cp:932; name:'SJIS'),
  160. (cp:932; name:'SJIS-MS'),
  161. (cp:932; name:'SJIS-OPEN'),
  162. (cp:932; name:'SJIS-WIN'),
  163. (cp:932; name:'WINDOWS-31J'),
  164. (cp:932; name:'WINDOWS-932'),
  165. (cp:932; name:'CSWINDOWS31J'),
  166. (cp:932; name:'shift_jis'), (* ANSI/OEM Japanese; Japanese (Shift-JIS) *)
  167. (cp:932; name:'shift-jis'), (* alternative name for it *)
  168. (cp:936; name:'CP936'),
  169. {$ifdef aix}
  170. (cp:936; name:'IBM-eucCN'), (* AIX *)
  171. {$endif}
  172. (cp:936; name:'GBK'),
  173. (cp:936; name:'MS936'),
  174. (cp:936; name:'WINDOWS-936'),
  175. (cp:936; name:'gb2312'), (* ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) *)
  176. (cp:949; name:'CP949'),
  177. {$ifdef aix}
  178. (cp:949; name:'IBM-eucKR'), (* AIX *)
  179. {$endif}
  180. (cp:949; name:'UHC'),
  181. (cp:949; name:'EUC-KR'),
  182. (cp:949; name:'ks_c_5601-1987'), (* ANSI/OEM Korean (Unified Hangul Code) *)
  183. (cp:950; name:'CP950'),
  184. (cp:950; name:'BIG5'),
  185. (cp:950; name:'big5'), (* ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) *)
  186. (cp:1026; name:'IBM1026'), (* IBM EBCDIC Turkish (Latin 5) *)
  187. {$ifdef aix}
  188. (cp:1026; name:'IBM-1026'), (* AIX *)
  189. {$endif}
  190. (cp:1047; name:'IBM01047'), (* IBM EBCDIC Latin 1/Open System *)
  191. {$ifdef aix}
  192. (cp:1047; name:'IBM-1047'), (* AIX *)
  193. {$endif}
  194. (cp:1125; name:'CP1125'),
  195. (cp:1125; name:'IBM-1125'),
  196. (cp:1133; name:'CP1133'),
  197. (cp:1133; name:'IBM-1133'),
  198. (cp:1133; name:'IBM-CP1133'),
  199. (cp:1140; name:'IBM01140'), (* IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) *)
  200. {$ifdef aix}
  201. (cp:1140; name:'IBM-1140'), (* AIX *)
  202. {$endif}
  203. (cp:1141; name:'IBM01141'), (* IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) *)
  204. {$ifdef aix}
  205. (cp:1141; name:'IBM-1141'), (* AIX *)
  206. {$endif}
  207. (cp:1142; name:'IBM01142'), (* IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) *)
  208. {$ifdef aix}
  209. (cp:1142; name:'IBM-1142'), (* AIX *)
  210. {$endif}
  211. (cp:1143; name:'IBM01143'), (* IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) *)
  212. {$ifdef aix}
  213. (cp:1143; name:'IBM-1143'), (* AIX *)
  214. {$endif}
  215. (cp:1144; name:'IBM01144'), (* IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) *)
  216. {$ifdef aix}
  217. (cp:1144; name:'IBM-1144'), (* AIX *)
  218. {$endif}
  219. (cp:1145; name:'IBM01145'), (* IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) *)
  220. {$ifdef aix}
  221. (cp:1145; name:'IBM-1145'), (* AIX *)
  222. {$endif}
  223. (cp:1146; name:'IBM01146'), (* IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) *)
  224. {$ifdef aix}
  225. (cp:1146; name:'IBM-1146'), (* AIX *)
  226. {$endif}
  227. (cp:1147; name:'IBM01147'), (* IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) *)
  228. {$ifdef aix}
  229. (cp:1147; name:'IBM-1147'), (* AIX *)
  230. {$endif}
  231. (cp:1148; name:'IBM01148'), (* IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) *)
  232. {$ifdef aix}
  233. (cp:1148; name:'IBM-1148'), (* AIX *)
  234. {$endif}
  235. (cp:1149; name:'IBM01149'), (* IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) *)
  236. {$ifdef aix}
  237. (cp:1149; name:'IBM-1149'), (* AIX *)
  238. {$endif}
  239. (cp:1200; name:'UTF-16LE'),
  240. {$ifdef aix}
  241. (cp:1200; name:'UTF-16le'), (* AIX *)
  242. {$endif}
  243. (cp:1200; name:'UTF16LE'),
  244. (cp:1200; name:'UCS-2LE'),
  245. (cp:1200; name:'CP1200'),
  246. {$ifdef FPC_LITTLE_ENDIAN}
  247. (* Default is little endian, because the platform is *)
  248. (cp:1200; name:'UTF16'),
  249. (cp:1200; name:'UTF-16'),
  250. (cp:1200; name:'UCS-2'),
  251. {$endif}
  252. (cp:1201; name:'UTF-16BE'),
  253. (cp:1201; name:'UTF16BE'),
  254. (cp:1201; name:'UCS-2BE'),
  255. (cp:1201; name:'unicodeFFFE'),
  256. (cp:1201; name:'CP1201'),
  257. {$ifdef FPC_BIG_ENDIAN}
  258. (*
  259. * Default is big endian.
  260. * See rfc2781 4.3 Interpreting text labelled as UTF-16.
  261. *)
  262. (cp:1201; name:'UTF16'),
  263. (cp:1201; name:'UTF-16'),
  264. (cp:1201; name:'UCS-2'),
  265. {$endif}
  266. (cp:1250; name:'CP1250'),
  267. (cp:1250; name:'MS-EE'),
  268. (cp:1250; name:'WINDOWS-1250'),
  269. (cp:1250; name:'windows-1250'), (* ANSI Central European; Central European (Windows) *)
  270. (cp:1251; name:'CP1251'),
  271. (cp:1251; name:'MS-CYRL'),
  272. (cp:1251; name:'WINDOWS-1251'),
  273. (cp:1251; name:'windows-1251'), (* ANSI Cyrillic; Cyrillic (Windows) *)
  274. (cp:1252; name:'CP1252'),
  275. (cp:1252; name:'MS-ANSI'),
  276. (cp:1252; name:'WINDOWS-1252'),
  277. (cp:1252; name:'windows-1252'), (* ANSI Latin 1; Western European (Windows) *)
  278. (cp:1253; name:'CP1253'),
  279. (cp:1253; name:'MS-GREEK'),
  280. (cp:1253; name:'WINDOWS-1253'),
  281. (cp:1253; name:'windows-1253'), (* ANSI Greek; Greek (Windows) *)
  282. (cp:1254; name:'CP1254'),
  283. (cp:1254; name:'MS-TURK'),
  284. (cp:1254; name:'WINDOWS-1254'),
  285. (cp:1254; name:'windows-1254'), (* ANSI Turkish; Turkish (Windows) *)
  286. (cp:1255; name:'CP1255'),
  287. (cp:1255; name:'MS-HEBR'),
  288. (cp:1255; name:'WINDOWS-1255'),
  289. (cp:1255; name:'windows-1255'), (* ANSI Hebrew; Hebrew (Windows) *)
  290. (cp:1256; name:'CP1256'),
  291. (cp:1256; name:'MS-ARAB'),
  292. (cp:1256; name:'WINDOWS-1256'),
  293. (cp:1256; name:'windows-1256'), (* ANSI Arabic; Arabic (Windows) *)
  294. (cp:1257; name:'CP1257'),
  295. (cp:1257; name:'WINBALTRIM'),
  296. (cp:1257; name:'WINDOWS-1257'),
  297. (cp:1257; name:'windows-1257'), (* ANSI Baltic; Baltic (Windows) *)
  298. (cp:1258; name:'CP1258'),
  299. (cp:1258; name:'WINDOWS-1258'),
  300. (cp:1258; name:'windows-1258'), (* ANSI/OEM Vietnamese; Vietnamese (Windows) *)
  301. (cp:1361; name:'CP1361'),
  302. (cp:1361; name:'JOHAB'),
  303. (cp:1361; name:'Johab'), (* Korean (Johab) *)
  304. (cp:10000; name:'macintosh'), (* MAC Roman; Western European (Mac) *)
  305. (cp:10001; name:'x-mac-japanese'), (* Japanese (Mac) *)
  306. (cp:10002; name:'x-mac-chinesetrad'), (* MAC Traditional Chinese (Big5); Chinese Traditional (Mac) *)
  307. (cp:10003; name:'x-mac-korean'), (* Korean (Mac) *)
  308. (cp:10004; name:'x-mac-arabic'), (* Arabic (Mac) *)
  309. (cp:10005; name:'x-mac-hebrew'), (* Hebrew (Mac) *)
  310. (cp:10006; name:'x-mac-greek'), (* Greek (Mac) *)
  311. (cp:10007; name:'x-mac-cyrillic'), (* Cyrillic (Mac) *)
  312. (cp:10008; name:'x-mac-chinesesimp'), (* MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) *)
  313. (cp:10010; name:'x-mac-romanian'), (* Romanian (Mac) *)
  314. (cp:10017; name:'x-mac-ukrainian'), (* Ukrainian (Mac) *)
  315. (cp:10021; name:'x-mac-thai'), (* Thai (Mac) *)
  316. (cp:10029; name:'x-mac-ce'), (* MAC Latin 2; Central European (Mac) *)
  317. (cp:10079; name:'x-mac-icelandic'), (* Icelandic (Mac) *)
  318. (cp:10081; name:'x-mac-turkish'), (* Turkish (Mac) *)
  319. (cp:10082; name:'x-mac-croatian'), (* Croatian (Mac) *)
  320. (cp:12000; name:'UTF-32LE'),
  321. (cp:12000; name:'CP12000'),
  322. (cp:12000; name:'UTF32LE'),
  323. {$ifdef FPC_LITTLE_ENDIAN}
  324. (cp:12000; name:'UTF32'),
  325. (cp:12000; name:'UTF-32'),
  326. {$endif}
  327. (cp:12001; name:'UTF-32BE'),
  328. (cp:12001; name:'CP12001'),
  329. (cp:12001; name:'UTF32BE'),
  330. {$ifdef FPC_BIG_ENDIAN}
  331. (cp:12001; name:'UTF32'),
  332. (cp:12001; name:'UTF-32'),
  333. {$endif}
  334. (cp:20000; name:'x-Chinese_CNS'), (* CNS Taiwan; Chinese Traditional (CNS) *)
  335. (cp:20001; name:'x-cp20001'), (* TCA Taiwan *)
  336. (cp:20002; name:'x_Chinese-Eten'), (* Eten Taiwan; Chinese Traditional (Eten) *)
  337. (cp:20003; name:'x-cp20003'), (* IBM5550 Taiwan *)
  338. (cp:20004; name:'x-cp20004'), (* TeleText Taiwan *)
  339. (cp:20005; name:'x-cp20005'), (* Wang Taiwan *)
  340. (cp:20105; name:'x-IA5'), (* IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) *)
  341. (cp:20106; name:'x-IA5-German'), (* IA5 German (7-bit) *)
  342. (cp:20107; name:'x-IA5-Swedish'), (* IA5 Swedish (7-bit) *)
  343. (cp:20108; name:'x-IA5-Norwegian'), (* IA5 Norwegian (7-bit) *)
  344. (cp:20127; name:'US-ASCII'),
  345. (cp:20127; name:'ASCII'),
  346. {$ifdef aix}
  347. (cp:20127; name:'ASCII-GR'), (* AIX *)
  348. {$endif}
  349. (cp:20127; name:'ANSI_X3.4-1968'),
  350. (cp:20127; name:'ANSI_X3.4-1986'),
  351. (cp:20127; name:'CP367'),
  352. {$ifdef aix}
  353. (cp:20127; name:'IBM-367'), (* AIX *)
  354. {$endif}
  355. (cp:20127; name:'IBM367'),
  356. (cp:20127; name:'ISO-IR-6'),
  357. {$ifdef ACCEPT_646}
  358. (cp:20127; name:'646'),
  359. {$endif ACCEPT_646}
  360. (cp:20127; name:'ISO646-US'),
  361. (cp:20127; name:'ISO_646.IRV:1991'),
  362. (cp:20127; name:'US'),
  363. (cp:20127; name:'CSASCII'),
  364. (cp:20127; name:'us-ascii'), (* US-ASCII (7-bit) *)
  365. (cp:20261; name:'x-cp20261'), (* T.61 *)
  366. (cp:20269; name:'x-cp20269'), (* ISO 6937 Non-Spacing Accent *)
  367. (cp:20273; name:'IBM273'), (* IBM EBCDIC Germany *)
  368. {$ifdef aix}
  369. (cp:20273; name:'IBM-273'), (* AIX *)
  370. {$endif}
  371. (cp:20277; name:'IBM277'), (* IBM EBCDIC Denmark-Norway *)
  372. {$ifdef aix}
  373. (cp:20277; name:'IBM-277'), (* AIX *)
  374. {$endif}
  375. (cp:20278; name:'IBM278'), (* IBM EBCDIC Finland-Sweden *)
  376. {$ifdef aix}
  377. (cp:20278; name:'IBM-278'), (* AIX *)
  378. {$endif}
  379. (cp:20280; name:'IBM280'), (* IBM EBCDIC Italy *)
  380. {$ifdef aix}
  381. (cp:20280; name:'IBM-280'), (* AIX *)
  382. {$endif}
  383. (cp:20284; name:'IBM284'), (* IBM EBCDIC Latin America-Spain *)
  384. {$ifdef aix}
  385. (cp:20284; name:'IBM-284'), (* AIX *)
  386. {$endif}
  387. (cp:20285; name:'IBM285'), (* IBM EBCDIC United Kingdom *)
  388. {$ifdef aix}
  389. (cp:20285; name:'IBM-285'), (* AIX *)
  390. {$endif}
  391. (cp:20290; name:'IBM290'), (* IBM EBCDIC Japanese Katakana Extended *)
  392. {$ifdef aix}
  393. (cp:20290; name:'IBM-290'), (* AIX *)
  394. {$endif}
  395. (cp:20297; name:'IBM297'), (* IBM EBCDIC France *)
  396. {$ifdef aix}
  397. (cp:20297; name:'IBM-297'), (* AIX *)
  398. {$endif}
  399. (cp:20420; name:'IBM420'), (* IBM EBCDIC Arabic *)
  400. {$ifdef aix}
  401. (cp:20420; name:'IBM-420'), (* AIX *)
  402. {$endif}
  403. (cp:20423; name:'IBM423'), (* IBM EBCDIC Greek *)
  404. {$ifdef aix}
  405. (cp:20423; name:'IBM-423'), (* AIX *)
  406. {$endif}
  407. (cp:20424; name:'IBM424'), (* IBM EBCDIC Hebrew *)
  408. {$ifdef aix}
  409. (cp:20424; name:'IBM-424'), (* AIX *)
  410. {$endif}
  411. (cp:20833; name:'x-EBCDIC-KoreanExtended'), (* IBM EBCDIC Korean Extended *)
  412. (cp:20838; name:'IBM-Thai'), (* IBM EBCDIC Thai *)
  413. {$ifdef aix}
  414. (cp:20838; name:'TIS-620'), (* AIX *)
  415. {$endif}
  416. (cp:20866; name:'koi8-r'), (* Russian (KOI8-R); Cyrillic (KOI8-R) *)
  417. (cp:20871; name:'IBM871'), (* IBM EBCDIC Icelandic *)
  418. {$ifdef aix}
  419. (cp:20871; name:'IBM-871'), (* AIX *)
  420. {$endif}
  421. (cp:20880; name:'IBM880'), (* IBM EBCDIC Cyrillic Russian *)
  422. {$ifdef aix}
  423. (cp:20880; name:'IBM-880'), (* AIX *)
  424. {$endif}
  425. (cp:20905; name:'IBM905'), (* IBM EBCDIC Turkish *)
  426. {$ifdef aix}
  427. (cp:20905; name:'IBM-905'), (* AIX *)
  428. {$endif}
  429. (cp:20924; name:'IBM00924'), (* IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) *)
  430. {$ifdef aix}
  431. (cp:20924; name:'IBM-924'), (* AIX *)
  432. {$endif}
  433. (cp:20932; name:'EUC-JP'), (* Japanese (JIS 0208-1990 and 0121-1990) *)
  434. {$ifdef aix}
  435. (cp:20932; name:'IBM-eucJP'), (* AIX *)
  436. {$endif}
  437. (cp:20936; name:'x-cp20936'), (* Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) *)
  438. {$ifdef aix}
  439. (cp:20936; name:'GB2312.1980-0'), (* AIX *)
  440. {$endif}
  441. {$ifdef aix}
  442. (cp:20936; name:'GB2312.1980-0-GL'), (* AIX *)
  443. {$endif}
  444. {$ifdef aix}
  445. (cp:20936; name:'GB2312.1980-0-GR'), (* AIX *)
  446. {$endif}
  447. (cp:20949; name:'x-cp20949'), (* Korean Wansung *)
  448. (cp:21025; name:'cp1025'), (* IBM EBCDIC Cyrillic Serbian-Bulgarian *)
  449. {$ifdef aix}
  450. (cp:21025; name:'IBM-1025'), (* AIX *)
  451. {$endif}
  452. (cp:21866; name:'koi8-u'), (* Ukrainian (KOI8-U); Cyrillic (KOI8-U) *)
  453. {$ifdef aix}
  454. (cp:21866; name:'IBM-1124'), (* AIX *)
  455. {$endif}
  456. (cp:28591; name:'CP819'),
  457. (cp:28591; name:'IBM819'),
  458. (cp:28591; name:'ISO-8859-1'),
  459. (cp:28591; name:'ISO-IR-100'),
  460. (cp:28591; name:'ISO8859-1'),
  461. (cp:28591; name:'ISO_8859-1'),
  462. (cp:28591; name:'ISO_8859-1:1987'),
  463. (cp:28591; name:'L1'),
  464. (cp:28591; name:'LATIN1'),
  465. (cp:28591; name:'CSISOLATIN1'),
  466. (cp:28591; name:'iso-8859-1'), (* ISO 8859-1 Latin 1; Western European (ISO) *)
  467. {$ifdef aix}
  468. (cp:28591; name:'ISO8859-1'), (* AIX *)
  469. {$endif}
  470. {$ifdef aix}
  471. (cp:28591; name:'ISO8859-1-GL'), (* AIX *)
  472. {$endif}
  473. {$ifdef aix}
  474. (cp:28591; name:'ISO8859-1-GT'), (* AIX *)
  475. {$endif}
  476. (cp:28591; name:'iso8859-1'), (* ISO 8859-1 Latin 1; Western European (ISO) *)
  477. (cp:28592; name:'iso-8859-2'), (* ISO 8859-2 Central European; Central European (ISO) *)
  478. {$ifdef aix}
  479. (cp:28592; name:'ISO8859-2'), (* AIX *)
  480. {$endif}
  481. {$ifdef aix}
  482. (cp:28592; name:'ISO8859-2-GL'), (* AIX *)
  483. {$endif}
  484. {$ifdef aix}
  485. (cp:28593; name:'ISO8859-2-GT'), (* AIX *)
  486. {$endif}
  487. (cp:28592; name:'iso8859-2'), (* ISO 8859-2 Central European; Central European (ISO) *)
  488. (cp:28593; name:'iso-8859-3'), (* ISO 8859-3 Latin 3 *)
  489. {$ifdef aix}
  490. (cp:28593; name:'ISO8859-3'), (* AIX *)
  491. {$endif}
  492. {$ifdef aix}
  493. (cp:28593; name:'ISO8859-3-GL'), (* AIX *)
  494. {$endif}
  495. {$ifdef aix}
  496. (cp:28593; name:'ISO8859-3-GT'), (* AIX *)
  497. {$endif}
  498. (cp:28593; name:'iso8859-3'), (* ISO 8859-3 Latin 3 *)
  499. (cp:28594; name:'iso-8859-4'), (* ISO 8859-4 Baltic *)
  500. {$ifdef aix}
  501. (cp:28594; name:'ISO8859-4'), (* AIX *)
  502. {$endif}
  503. {$ifdef aix}
  504. (cp:28594; name:'ISO8859-4-GL'), (* AIX *)
  505. {$endif}
  506. {$ifdef aix}
  507. (cp:28594; name:'ISO8859-4-GT'), (* AIX *)
  508. {$endif}
  509. (cp:28594; name:'iso8859-4'), (* ISO 8859-4 Baltic *)
  510. (cp:28595; name:'iso-8859-5'), (* ISO 8859-5 Cyrillic *)
  511. {$ifdef aix}
  512. (cp:28595; name:'ISO8859-5'), (* AIX *)
  513. {$endif}
  514. {$ifdef aix}
  515. (cp:28595; name:'ISO8859-5-GL'), (* AIX *)
  516. {$endif}
  517. {$ifdef aix}
  518. (cp:28595; name:'ISO-8859-5-GT'), (* AIX *)
  519. {$endif}
  520. (cp:28595; name:'iso8859-5'), (* ISO 8859-5 Cyrillic *)
  521. (cp:28596; name:'iso-8859-6'), (* ISO 8859-6 Arabic *)
  522. {$ifdef aix}
  523. (cp:28596; name:'ISO8859-6'), (* AIX *)
  524. {$endif}
  525. {$ifdef aix}
  526. (cp:28596; name:'ISO-8859-6-GL'), (* AIX *)
  527. {$endif}
  528. {$ifdef aix}
  529. (cp:28596; name:'ISO-8859-6-GT'), (* AIX *)
  530. {$endif}
  531. (cp:28596; name:'iso8859-6'), (* ISO 8859-6 Arabic *)
  532. (cp:28597; name:'iso-8859-7'), (* ISO 8859-7 Greek *)
  533. {$ifdef aix}
  534. (cp:28597; name:'ISO8859-7'), (* AIX *)
  535. {$endif}
  536. {$ifdef aix}
  537. (cp:28597; name:'ISO-8859-7-GL'), (* AIX *)
  538. {$endif}
  539. {$ifdef aix}
  540. (cp:28597; name:'ISO8859-7-GT'), (* AIX *)
  541. {$endif}
  542. (cp:28597; name:'iso8859-7'), (* ISO 8859-7 Greek *)
  543. (cp:28598; name:'iso-8859-8'), (* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) *)
  544. {$ifdef aix}
  545. (cp:28598; name:'ISO8859-8'), (* AIX *)
  546. {$endif}
  547. {$ifdef aix}
  548. (cp:28598; name:'ISO8859-8-GL'), (* AIX *)
  549. {$endif}
  550. {$ifdef aix}
  551. (cp:28598; name:'ISO8859-8-GT'), (* AIX *)
  552. {$endif}
  553. (cp:28598; name:'iso8859-8'), (* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) *)
  554. (cp:28599; name:'iso-8859-9'), (* ISO 8859-9 Turkish *)
  555. {$ifdef aix}
  556. (cp:28599; name:'ISO8859-9'), (* AIX *)
  557. {$endif}
  558. {$ifdef aix}
  559. (cp:28599; name:'ISO8859-9-GL'), (* AIX *)
  560. {$endif}
  561. {$ifdef aix}
  562. (cp:28599; name:'ISO8859-9-GT'), (* AIX *)
  563. {$endif}
  564. (cp:28599; name:'iso8859-9'), (* ISO 8859-9 Turkish *)
  565. (cp:28603; name:'iso-8859-13'), (* ISO 8859-13 Estonian *)
  566. {$ifdef aix}
  567. (cp:28603; name:'ISO8859-13'), (* AIX *)
  568. {$endif}
  569. {$ifdef aix}
  570. (cp:28603; name:'ISO8859-13-GL'), (* AIX *)
  571. {$endif}
  572. {$ifdef aix}
  573. (cp:28603; name:'ISO8859-13-GT'), (* AIX *)
  574. {$endif}
  575. (cp:28603; name:'iso8859-13'), (* ISO 8859-13 Estonian *)
  576. (cp:28605; name:'iso-8859-15'), (* ISO 8859-15 Latin 9 *)
  577. {$ifdef aix}
  578. (cp:28605; name:'ISO8859-15'), (* AIX *)
  579. {$endif}
  580. {$ifdef aix}
  581. (cp:28605; name:'ISO8859-15-GL'), (* AIX *)
  582. {$endif}
  583. {$ifdef aix}
  584. (cp:28605; name:'ISO8859-15-GT'), (* AIX *)
  585. {$endif}
  586. (cp:28605; name:'iso8859-15'), (* ISO 8859-15 Latin 9 *)
  587. (cp:29001; name:'x-Europa'), (* Europa 3 *)
  588. (cp:38598; name:'iso-8859-8-i'), (* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) *)
  589. (cp:38598; name:'iso8859-8-i'), (* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) *)
  590. (cp:50220; name:'iso-2022-jp'), (* ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) *)
  591. (cp:50221; name:'ISO-2022-JP'),
  592. (cp:50221; name:'CP50221'),
  593. (cp:50221; name:'ISO-2022-JP-MS'),
  594. (cp:50221; name:'ISO2022-JP'),
  595. (cp:50221; name:'ISO2022-JP-MS'),
  596. (cp:50221; name:'MS50221'),
  597. (cp:50221; name:'WINDOWS-50221'),
  598. (cp:50221; name:'csISO2022JP'), (* ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) *)
  599. (cp:50222; name:'iso-2022-jp'), (* ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) *)
  600. (cp:50225; name:'iso-2022-kr'), (* ISO 2022 Korean *)
  601. (cp:50225; name:'iso2022-kr'), (* ISO 2022 Korean *)
  602. (cp:50227; name:'x-cp50227'), (* ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) *)
  603. (cp:51932; name:'EUC-JP'),
  604. (cp:51932; name:'CP51932'),
  605. (cp:51932; name:'MS51932'),
  606. (cp:51932; name:'WINDOWS-51932'),
  607. (cp:51932; name:'euc-jp'), (* EUC Japanese *)
  608. (cp:51936; name:'EUC-CN'), (* EUC Simplified Chinese; Chinese Simplified (EUC) *)
  609. (cp:51949; name:'euc-kr'), (* EUC Korean *)
  610. (cp:52936; name:'hz-gb-2312'), (* HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) *)
  611. (cp:54936; name:'GB18030'), (* Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) *)
  612. (cp:57002; name:'x-iscii-de'), (* ISCII Devanagari *)
  613. (cp:57003; name:'x-iscii-be'), (* ISCII Bengali *)
  614. (cp:57004; name:'x-iscii-ta'), (* ISCII Tamil *)
  615. (cp:57005; name:'x-iscii-te'), (* ISCII Telugu *)
  616. (cp:57006; name:'x-iscii-as'), (* ISCII Assamese *)
  617. (cp:57007; name:'x-iscii-or'), (* ISCII Oriya *)
  618. (cp:57008; name:'x-iscii-ka'), (* ISCII Kannada *)
  619. (cp:57009; name:'x-iscii-ma'), (* ISCII Malayalam *)
  620. (cp:57010; name:'x-iscii-gu'), (* ISCII Gujarati *)
  621. (cp:57011; name:'x-iscii-pa'), (* ISCII Punjabi *)
  622. (cp:65001; name:'UTF-8'),
  623. (cp:65001; name:'CP65001'),
  624. (cp:65001; name:'UTF8'));
  625. { returns index in UnixCpMap with first code page name with matching
  626. cp number (so that multiple names can be tried if necessary) }
  627. function GetCodepageData(cp: TSystemCodePage): longint;
  628. function GetCodepageByName(cpname: rawbytestring): TSystemCodePage;
  629. function GetSystemCodepage: TSystemCodePage;
  630. implementation
  631. { returns index in UnixCpMap with first code page name with matching
  632. cp number (so that multiple names can be tried if necessary) }
  633. function GetCodepageData(cp: TSystemCodePage): longint;
  634. var
  635. l, h, i, ccp: longint;
  636. begin
  637. l:=low(UnixCpMap);
  638. h:=high(UnixCpMap);
  639. repeat
  640. i:=(l+h+1) shr 1;
  641. ccp:=UnixCpMap[i].cp;
  642. if cp=ccp then
  643. break;
  644. if cp>=ccp then
  645. l:=i
  646. else
  647. h:=i-1;
  648. until l>=h;
  649. if cp=UnixCpMap[i].cp then
  650. begin
  651. { the array has been ordered so that in case multiple alias names
  652. exist, the first entry for the cp is the most commonly supported
  653. one
  654. }
  655. while (i>low(UnixCpMap)) and
  656. (UnixCpMap[i-1].cp=cp) do
  657. dec(i);
  658. result:=i;
  659. end
  660. else
  661. { or better raise an error? }
  662. result:=-1;
  663. end;
  664. function GetCodepageByName(cpname: rawbytestring): TSystemCodePage;
  665. var
  666. i: longint;
  667. begin
  668. { clear encoding to prevent nonsense code page conversion of the input
  669. ansistring (encoding names are always ascii) }
  670. SetCodePage(cpname,CP_ACP,false);
  671. { Linux uses cpXXXX instead of CPXXXX }
  672. if (length(cpname)>2) and
  673. (cpname[1]='c') and
  674. (cpname[2]='p') and
  675. (cpname[3] in ['0'..'9']) then
  676. begin
  677. cpname[1]:='C';
  678. cpname[2]:='P';
  679. end;
  680. { simple linear scan, not a common operation and hence not worth
  681. building a separate array for -- start from index 1 rather than
  682. 0, because 0 = fake "code page 0" that maps to UTF-8 as default
  683. }
  684. for i:=low(UnixCpMap)+1 to high(UnixCpMap) do
  685. with UnixCpMap[i] do
  686. if name=cpname then
  687. begin
  688. result:=cp;
  689. exit;
  690. end;
  691. { rawbytestring (or better raise an error?) }
  692. result:=CP_NONE;
  693. end;
  694. function GetSystemCodepage: TSystemCodePage;
  695. var
  696. p: SizeInt;
  697. lang: ansistring;
  698. cp: TSystemCodePage;
  699. begin
  700. // Get one of non-empty environment variables in the next order:
  701. // LC_ALL, LC_CTYPE, LANG. Default is UTF-8 or ASCII.
  702. {$if defined(linux) or defined(darwin) or defined(haiku)}
  703. Result:=CP_UTF8;
  704. {$else}
  705. Result:=CP_ASCII;
  706. {$endif linux}
  707. lang:=FpGetEnv('LC_ALL');
  708. if lang='' then
  709. lang:=FpGetEnv('LC_CTYPE');
  710. if lang='' then
  711. lang:=FpGetEnv('LANG');
  712. if lang<>'' then
  713. begin
  714. // clean up, for example en_US.UTF-8 => UTF-8
  715. p:=Pos('.',lang);
  716. if p>0 then
  717. Delete(lang,1,p);
  718. p:=Pos('@',lang);
  719. if p>0 then
  720. Delete(lang,p,length(lang)-p+1);
  721. cp:=GetCodepageByName(lang);
  722. if cp <> CP_NONE then
  723. Result:=cp;
  724. end;
  725. end;
  726. end.