jwin2javacharset.inc 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. { source: http://download.java.net/jdk8/docs/technotes/guides/intl/encoding.doc.html
  2. }
  3. type
  4. twin2javacs = record
  5. cp: word;
  6. name: unicodestring;
  7. end;
  8. (*
  9. * Code Page Identifiers
  10. * http://msdn.microsoft.com/en-us/library/dd317756.aspx
  11. *)
  12. const
  13. win2javacs_arr: array[0..154] of twin2javacs =
  14. (* (cp:; name:'x-MacDingbat'), not supported ? *)
  15. (* (cp:; name:'x-MacSymbol'), not supported ? *)
  16. ((cp:037; name:'IBM037'),
  17. (cp:437; name:'IBM437'),
  18. (cp:500; name:'IBM500'),
  19. (cp:737; name:'x-IBM737'),
  20. (cp:775; name:'IBM775'),
  21. (cp:850; name:'IBM850'),
  22. (cp:852; name:'IBM852'),
  23. (cp:855; name:'IBM855'),
  24. (cp:857; name:'IBM857'),
  25. (cp:858; name:'IBM00858'),
  26. (cp:860; name:'IBM860'),
  27. (cp:861; name:'IBM861'),
  28. (cp:862; name:'IBM862'),
  29. (cp:863; name:'IBM863'),
  30. (cp:864; name:'IBM864'),
  31. (cp:865; name:'IBM865'),
  32. (cp:866; name:'IBM866'),
  33. (cp:868; name:'IBM868'),
  34. (cp:869; name:'IBM869'),
  35. (cp:870; name:'IBM870'),
  36. (cp:874; name:'x-windows-874'),
  37. (cp:918; name:'IBM918'),
  38. (cp:932; name:'Shift_JIS'),
  39. (cp:932; name:'x-PCK'),
  40. (cp:936; name:'GB2312'),
  41. (cp:936; name:'x-mswin-936'),
  42. (cp:942; name:'x-IBM942'),
  43. (cp:943; name:'x-IBM943'),
  44. (cp:948; name:'x-IBM948'),
  45. (cp:949; name:'x-IBM949'),
  46. (cp:949; name:'x-windows-949'),
  47. (cp:950; name:'Big5'),
  48. (cp:950; name:'x-IBM950'),
  49. (cp:950; name:'x-windows-950'),
  50. (cp:951; name:'x-Big5-HKSCS-2001'),
  51. (cp:951; name:'Big5-HKSCS'),
  52. (cp:951; name:'x-MS950-HKSCS'),
  53. (cp:951; name:'x-MS950-HKSCS-XP'),
  54. (cp:964; name:'x-IBM964'),
  55. (cp:970; name:'x-IBM970'),
  56. (cp:1006; name:'x-IBM1006'),
  57. (cp:1025; name:'x-IBM1025'),
  58. (cp:1026; name:'IBM1026'),
  59. (cp:1046; name:'x-IBM1046'),
  60. (cp:1047; name:'IBM1047'),
  61. (cp:1097; name:'x-IBM1097'),
  62. (cp:1098; name:'x-IBM1098'),
  63. (cp:1112; name:'x-IBM1112'),
  64. (cp:1122; name:'x-IBM1122'),
  65. (cp:1123; name:'x-IBM1123'),
  66. (cp:1124; name:'x-IBM1124'),
  67. (cp:1140; name:'IBM01140'),
  68. (cp:1141; name:'IBM01141'),
  69. (cp:1142; name:'IBM01142'),
  70. (cp:1143; name:'IBM01143'),
  71. (cp:1144; name:'IBM01144'),
  72. (cp:1145; name:'IBM01145'),
  73. (cp:1146; name:'IBM01146'),
  74. (cp:1147; name:'IBM01147'),
  75. (cp:1148; name:'IBM01148'),
  76. (cp:1149; name:'IBM01149'),
  77. (cp:1200; name:'UTF-16LE'),
  78. (cp:1200; name:'x-UTF-16LE-BOM'),
  79. (cp:1201; name:'UTF-16'),
  80. (cp:1201; name:'UTF-16BE'),
  81. (cp:1250; name:'windows-1250'),
  82. (cp:1251; name:'windows-1251'),
  83. (cp:1252; name:'windows-1252'),
  84. (cp:1253; name:'windows-1253'),
  85. (cp:1254; name:'windows-1254'),
  86. (cp:1255; name:'windows-1255'),
  87. (cp:1256; name:'windows-1256'),
  88. (cp:1257; name:'windows-1257'),
  89. (cp:1258; name:'windows-1258'),
  90. (cp:1259; name:'windows-31j'),
  91. (cp:1361; name:'x-Johab'),
  92. (cp:1381; name:'x-IBM1381'),
  93. (cp:1383; name:'x-IBM1383'),
  94. (cp:10000; name:'MacRoman'),
  95. (cp:10004; name:'x-MacArabic'),
  96. (cp:10005; name:'x-MacHebrew'),
  97. (cp:10006; name:'x-MacGreek'),
  98. (cp:10007; name:'x-MacCyrillic'),
  99. (cp:10010; name:'x-MacRomania'),
  100. (cp:10017; name:'x-MacUkraine'),
  101. (cp:10021; name:'x-MacThai'),
  102. (cp:10029; name:'x-MacCentralEurope'),
  103. (cp:10079; name:'x-MacIceland'),
  104. (cp:10081; name:'x-MacTurkish'),
  105. (cp:10082; name:'x-MacCroatian'),
  106. (cp:12000; name:'UTF-32LE'),
  107. (cp:12000; name:'X-UTF-32LE-BOM'),
  108. (cp:12001; name:'UTF-32'),
  109. (cp:12001; name:'UTF-32BE'),
  110. (cp:12001; name:'X-UTF-32BE-BOM'),
  111. (cp:20127; name:'US-ASCII'),
  112. (cp:20273; name:'IBM273'),
  113. (cp:20277; name:'IBM277'),
  114. (cp:20278; name:'IBM278'),
  115. (cp:20280; name:'IBM280'),
  116. (cp:20284; name:'IBM284'),
  117. (cp:20285; name:'IBM285'),
  118. (cp:20297; name:'IBM297'),
  119. (cp:20420; name:'IBM420'),
  120. (cp:20424; name:'IBM424'),
  121. (cp:20833; name:'x-IBM833'),
  122. (cp:20834; name:'x-IBM834'),
  123. (cp:20838; name:'IBM-Thai'),
  124. (cp:20856; name:'x-IBM856'),
  125. (cp:20866; name:'KOI8-R'),
  126. (cp:20871; name:'IBM871'),
  127. (cp:20874; name:'x-IBM874'),
  128. (cp:20875; name:'x-IBM875'),
  129. (cp:20921; name:'x-IBM921'),
  130. (cp:20922; name:'x-IBM922'),
  131. (cp:20932; name:'EUC-JP'),
  132. (cp:20932; name:'x-JIS0208'),
  133. (cp:20932; name:'x-JISAutoDetect'),
  134. (cp:21866; name:'KOI8-U'),
  135. (cp:28591; name:'ISO-8859-1'),
  136. (cp:28592; name:'ISO-8859-2'),
  137. (cp:28593; name:'ISO-8859-3'),
  138. (cp:28594; name:'ISO-8859-4'),
  139. (cp:28595; name:'ISO-8859-5'),
  140. (cp:28596; name:'ISO-8859-6'),
  141. (cp:28597; name:'ISO-8859-7'),
  142. (cp:28598; name:'ISO-8859-8'),
  143. (cp:28599; name:'ISO-8859-9'),
  144. (cp:28601; name:'TIS-620'),
  145. (cp:28601; name:'x-iso-8859-11'),
  146. (cp:28603; name:'ISO-8859-13'),
  147. (cp:28605; name:'ISO-8859-15'),
  148. (cp:33722; name:'x-IBM33722'),
  149. (cp:50220; name:'x-windows-50220'),
  150. (cp:50220; name:'x-windows-iso2022jp'),
  151. (cp:50221; name:'JIS_X0201'),
  152. (cp:50221; name:'x-windows-50221'),
  153. (cp:50222; name:'ISO-2022-JP'),
  154. (cp:50222; name:'ISO-2022-JP-2'), (* not exact, Windows does not support ISO-2022-JP-2 *)
  155. (cp:50222; name:'JIS_X0212-1990'),
  156. (cp:50225; name:'ISO-2022-KR'),
  157. (cp:50227; name:'ISO-2022-CN'),
  158. (cp:50227; name:'x-ISO-2022-CN-CNS'),
  159. (cp:50229; name:'x-ISO-2022-CN-GB'),
  160. (cp:50930; name:'x-IBM930'),
  161. (cp:50933; name:'x-IBM933'),
  162. (cp:50935; name:'x-IBM935'),
  163. (cp:50937; name:'x-IBM937'),
  164. (cp:50939; name:'x-IBM939'),
  165. (cp:51932; name:'x-MS932_0213'),
  166. (cp:51932; name:'x-SJIS_0213'),
  167. (cp:51949; name:'EUC-KR'),
  168. (cp:54936; name:'GB18030'),
  169. (cp:57002; name:'x-ISCII91'),
  170. (cp:65001; name:'UTF-8'));
  171. function win2javacs(cp: word): unicodestring;
  172. var
  173. l, h, i, ccp: longint;
  174. begin
  175. l:=low(win2javacs_arr);
  176. h:=high(win2javacs_arr);
  177. repeat
  178. i:=(l+h+1) shr 1;
  179. ccp:=win2javacs_arr[i].cp;
  180. if cp=ccp then
  181. break;
  182. if cp>=ccp then
  183. l:=i
  184. else
  185. h:=i-1;
  186. until l>=h;
  187. if cp=win2javacs_arr[i].cp then
  188. begin
  189. { the array has been ordered so that in case multiple alias names
  190. exist, the first entry for the cp is the most commonly supported
  191. one
  192. }
  193. while (i>low(win2javacs_arr)) and
  194. (win2javacs_arr[i-1].cp=cp) do
  195. dec(i);
  196. result:=win2javacs_arr[i].name;
  197. end
  198. else
  199. { or better raise an error? }
  200. result:='<unsupported>';
  201. end;
  202. function javacs2win(cpname: unicodestring): word;
  203. var
  204. i: longint;
  205. begin
  206. { simple linear scan, not a common operation and hence not worth
  207. building a separate array for }
  208. for i:=low(win2javacs_arr) to high(win2javacs_arr) do
  209. if win2javacs_arr[i].name=cpname then
  210. begin
  211. result:=win2javacs_arr[i].cp;
  212. exit;
  213. end;
  214. { rawbytestring (or better raise an error?) }
  215. result:=65535;
  216. end;