XmlUtil.cs 61 KB


  1. // -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
  2. //
  3. // internal System.Xml.XmlUtil
  4. //
  5. // Author:
  6. // Daniel Weber ([email protected])
  7. // Code ported from Open XML 2.3.17 (Delphi/Kylix)
  8. //
  9. // (C) 2001 Daniel Weber
  10. //
  11. using System;
  12. using System.IO;
  13. namespace System.Xml
  14. {
  15. /// <summary>
  16. /// Helper class with static utility functions that are not Xml version specific
  17. /// Such as encoding changes
  18. /// </summary>
  19. internal class XmlUtil
  20. {
  21. public static char Iso8859_1ToUTF16Char(byte P)
  22. {
  23. return (char) P;
  24. }
  25. public static char Iso8859_2ToUTF16Char(byte P)
  26. {
  27. switch (P)
  28. {
  29. case 0xa1: return (char) 0x0104; // LATIN CAPITAL LETTER A WITH OGONEK
  30. case 0xa2: return (char) 0x02d8; // BREVE
  31. case 0xa3: return (char) 0x0141; // LATIN CAPITAL LETTER L WITH STROKE
  32. case 0xa5: return (char) 0x0132; // LATIN CAPITAL LETTER L WITH CARON
  33. case 0xa6: return (char) 0x015a; // LATIN CAPITAL LETTER S WITH ACUTE
  34. case 0xa9: return (char) 0x0160; // LATIN CAPITAL LETTER S WITH CARON
  35. case 0xaa: return (char) 0x015e; // LATIN CAPITAL LETTER S WITH CEDILLA
  36. case 0xab: return (char) 0x0164; // LATIN CAPITAL LETTER T WITH CARON
  37. case 0xac: return (char) 0x0179; // LATIN CAPITAL LETTER Z WITH ACUTE
  38. case 0xae: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON
  39. case 0xaf: return (char) 0x017b; // LATIN CAPITAL LETTER Z WITH DOT ABOVE
  40. case 0xb1: return (char) 0x0105; // LATIN SMALL LETTER A WITH OGONEK
  41. case 0xb2: return (char) 0x02db; // OGONEK
  42. case 0xb3: return (char) 0x0142; // LATIN SMALL LETTER L WITH STROKE
  43. case 0xb5: return (char) 0x013e; // LATIN SMALL LETTER L WITH CARON
  44. case 0xb6: return (char) 0x015b; // LATIN SMALL LETTER S WITH ACUTE
  45. case 0xb7: return (char) 0x02c7; // CARON
  46. case 0xb9: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON
  47. case 0xba: return (char) 0x015f; // LATIN SMALL LETTER S WITH CEDILLA
  48. case 0xbb: return (char) 0x0165; // LATIN SMALL LETTER T WITH CARON
  49. case 0xbc: return (char) 0x017a; // LATIN SMALL LETTER Z WITH ACUTE
  50. case 0xbd: return (char) 0x02dd; // DOUBLE ACUTE ACCENT
  51. case 0xbe: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON
  52. case 0xbf: return (char) 0x017c; // LATIN SMALL LETTER Z WITH DOT ABOVE
  53. case 0xc0: return (char) 0x0154; // LATIN CAPITAL LETTER R WITH ACUTE
  54. case 0xc3: return (char) 0x0102; // LATIN CAPITAL LETTER A WITH BREVE
  55. case 0xc5: return (char) 0x0139; // LATIN CAPITAL LETTER L WITH ACUTE
  56. case 0xc6: return (char) 0x0106; // LATIN CAPITAL LETTER C WITH ACUTE
  57. case 0xc8: return (char) 0x010c; // LATIN CAPITAL LETTER C WITH CARON
  58. case 0xca: return (char) 0x0118; // LATIN CAPITAL LETTER E WITH OGONEK
  59. case 0xcc: return (char) 0x011a; // LATIN CAPITAL LETTER E WITH CARON
  60. case 0xcf: return (char) 0x010e; // LATIN CAPITAL LETTER D WITH CARON
  61. case 0xd0: return (char) 0x0110; // LATIN CAPITAL LETTER D WITH STROKE
  62. case 0xd1: return (char) 0x0143; // LATIN CAPITAL LETTER N WITH ACUTE
  63. case 0xd2: return (char) 0x0147; // LATIN CAPITAL LETTER N WITH CARON
  64. case 0xd5: return (char) 0x0150; // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
  65. case 0xd8: return (char) 0x0158; // LATIN CAPITAL LETTER R WITH CARON
  66. case 0xd9: return (char) 0x016e; // LATIN CAPITAL LETTER U WITH RING ABOVE
  67. case 0xdb: return (char) 0x0170; // LATIN CAPITAL LETTER U WITH WITH DOUBLE ACUTE
  68. case 0xde: return (char) 0x0162; // LATIN CAPITAL LETTER T WITH CEDILLA
  69. case 0xe0: return (char) 0x0155; // LATIN SMALL LETTER R WITH ACUTE
  70. case 0xe3: return (char) 0x0103; // LATIN SMALL LETTER A WITH BREVE
  71. case 0xe5: return (char) 0x013a; // LATIN SMALL LETTER L WITH ACUTE
  72. case 0xe6: return (char) 0x0107; // LATIN SMALL LETTER C WITH ACUTE
  73. case 0xe8: return (char) 0x010d; // LATIN SMALL LETTER C WITH CARON
  74. case 0xea: return (char) 0x0119; // LATIN SMALL LETTER E WITH OGONEK
  75. case 0xec: return (char) 0x011b; // LATIN SMALL LETTER E WITH CARON
  76. case 0xef: return (char) 0x010f; // LATIN SMALL LETTER D WITH CARON
  77. case 0xf0: return (char) 0x0111; // LATIN SMALL LETTER D WITH STROKE
  78. case 0xf1: return (char) 0x0144; // LATIN SMALL LETTER N WITH ACUTE
  79. case 0xf2: return (char) 0x0148; // LATIN SMALL LETTER N WITH CARON
  80. case 0xf5: return (char) 0x0151; // LATIN SMALL LETTER O WITH DOUBLE ACUTE
  81. case 0xf8: return (char) 0x0159; // LATIN SMALL LETTER R WITH CARON
  82. case 0xf9: return (char) 0x016f; // LATIN SMALL LETTER U WITH RING ABOVE
  83. case 0xfb: return (char) 0x0171; // LATIN SMALL LETTER U WITH WITH DOUBLE ACUTE
  84. case 0xfe: return (char) 0x0163; // LATIN SMALL LETTER T WITH CEDILLA
  85. case 0xff: return (char) 0x02d9; // DOT ABOVE
  86. default:
  87. return (char) P;
  88. }
  89. }
  90. public static char Iso8859_3ToUTF16Char( byte P)
  91. {
  92. switch (P)
  93. {
  94. case 0xa1: return (char) 0x0126; // LATIN CAPITAL LETTER H WITH STROKE
  95. case 0xa2: return (char) 0x02d8; // BREVE
  96. case 0xa5: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");
  97. case 0xa6: return (char) 0x0124; // LATIN CAPITAL LETTER H WITH CIRCUMFLEX
  98. case 0xa9: return (char) 0x0130; // LATIN CAPITAL LETTER I WITH DOT ABOVE
  99. case 0xaa: return (char) 0x015e; // LATIN CAPITAL LETTER S WITH CEDILLA
  100. case 0xab: return (char) 0x011e; // LATIN CAPITAL LETTER G WITH BREVE
  101. case 0xac: return (char) 0x0134; // LATIN CAPITAL LETTER J WITH CIRCUMFLEX
  102. case 0xae: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");
  103. case 0xaf: return (char) 0x017b; // LATIN CAPITAL LETTER Z WITH DOT
  104. case 0xb1: return (char) 0x0127; // LATIN SMALL LETTER H WITH STROKE
  105. case 0xb6: return (char) 0x0125; // LATIN SMALL LETTER H WITH CIRCUMFLEX
  106. case 0xb9: return (char) 0x0131; // LATIN SMALL LETTER DOTLESS I
  107. case 0xba: return (char) 0x015f; // LATIN SMALL LETTER S WITH CEDILLA
  108. case 0xbb: return (char) 0x011f; // LATIN SMALL LETTER G WITH BREVE
  109. case 0xbc: return (char) 0x0135; // LATIN SMALL LETTER J WITH CIRCUMFLEX
  110. case 0xbe: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");
  111. case 0xbf: return (char) 0x017c; // LATIN SMALL LETTER Z WITH DOT
  112. case 0xc3: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");
  113. case 0xc5: return (char) 0x010a; // LATIN CAPITAL LETTER C WITH DOT ABOVE
  114. case 0xc6: return (char) 0x0108; // LATIN CAPITAL LETTER C WITH CIRCUMFLEX
  115. case 0xd0: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");
  116. case 0xd5: return (char) 0x0120; // LATIN CAPITAL LETTER G WITH DOT ABOVE
  117. case 0xd8: return (char) 0x011c; // LATIN CAPITAL LETTER G WITH CIRCUMFLEX
  118. case 0xdd: return (char) 0x016c; // LATIN CAPITAL LETTER U WITH BREVE
  119. case 0xde: return (char) 0x015c; // LATIN CAPITAL LETTER S WITH CIRCUMFLEX
  120. case 0xe3: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");
  121. case 0xe5: return (char) 0x010b; // LATIN SMALL LETTER C WITH DOT ABOVE
  122. case 0xe6: return (char) 0x0109; // LATIN SMALL LETTER C WITH CIRCUMFLEX
  123. case 0xf0: throw new InvalidOperationException("Invalid ISO-8859-3 sequence [" + P.ToString() + "]");
  124. case 0xf5: return (char) 0x0121; // LATIN SMALL LETTER G WITH DOT ABOVE
  125. case 0xf8: return (char) 0x011d; // LATIN SMALL LETTER G WITH CIRCUMFLEX
  126. case 0xfd: return (char) 0x016d; // LATIN SMALL LETTER U WITH BREVE
  127. case 0xfe: return (char) 0x015d; // LATIN SMALL LETTER S WITH CIRCUMFLEX
  128. case 0xff: return (char) 0x02d9; // DOT ABOVE
  129. default:
  130. return (char) P;
  131. }
  132. }
  133. public static char Iso8859_4ToUTF16Char( byte P)
  134. {
  135. switch (P)
  136. {
  137. case 0xa1: return (char) 0x0104; // LATIN CAPITAL LETTER A WITH OGONEK
  138. case 0xa2: return (char) 0x0138; // LATIN SMALL LETTER KRA
  139. case 0xa3: return (char) 0x0156; // LATIN CAPITAL LETTER R WITH CEDILLA
  140. case 0xa5: return (char) 0x0128; // LATIN CAPITAL LETTER I WITH TILDE
  141. case 0xa6: return (char) 0x013b; // LATIN CAPITAL LETTER L WITH CEDILLA
  142. case 0xa9: return (char) 0x0160; // LATIN CAPITAL LETTER S WITH CARON
  143. case 0xaa: return (char) 0x0112; // LATIN CAPITAL LETTER E WITH MACRON
  144. case 0xab: return (char) 0x0122; // LATIN CAPITAL LETTER G WITH CEDILLA
  145. case 0xac: return (char) 0x0166; // LATIN CAPITAL LETTER T WITH STROKE
  146. case 0xae: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON
  147. case 0xb1: return (char) 0x0105; // LATIN SMALL LETTER A WITH OGONEK
  148. case 0xb2: return (char) 0x02db; // OGONEK
  149. case 0xb3: return (char) 0x0157; // LATIN SMALL LETTER R WITH CEDILLA
  150. case 0xb5: return (char) 0x0129; // LATIN SMALL LETTER I WITH TILDE
  151. case 0xb6: return (char) 0x013c; // LATIN SMALL LETTER L WITH CEDILLA
  152. case 0xb7: return (char) 0x02c7; // CARON
  153. case 0xb9: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON
  154. case 0xba: return (char) 0x0113; // LATIN SMALL LETTER E WITH MACRON
  155. case 0xbb: return (char) 0x0123; // LATIN SMALL LETTER G WITH CEDILLA
  156. case 0xbc: return (char) 0x0167; // LATIN SMALL LETTER T WITH STROKE
  157. case 0xbd: return (char) 0x014a; // LATIN CAPITAL LETTER ENG
  158. case 0xbe: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON
  159. case 0xbf: return (char) 0x014b; // LATIN SMALL LETTER ENG
  160. case 0xc0: return (char) 0x0100; // LATIN CAPITAL LETTER A WITH MACRON
  161. case 0xc7: return (char) 0x012e; // LATIN CAPITAL LETTER I WITH OGONEK
  162. case 0xc8: return (char) 0x010c; // LATIN CAPITAL LETTER C WITH CARON
  163. case 0xca: return (char) 0x0118; // LATIN CAPITAL LETTER E WITH OGONEK
  164. case 0xcc: return (char) 0x0116; // LATIN CAPITAL LETTER E WITH DOT ABOVE
  165. case 0xcf: return (char) 0x012a; // LATIN CAPITAL LETTER I WITH MACRON
  166. case 0xd0: return (char) 0x0110; // LATIN CAPITAL LETTER D WITH STROKE
  167. case 0xd1: return (char) 0x0145; // LATIN CAPITAL LETTER N WITH CEDILLA
  168. case 0xd2: return (char) 0x014c; // LATIN CAPITAL LETTER O WITH MACRON
  169. case 0xd3: return (char) 0x0136; // LATIN CAPITAL LETTER K WITH CEDILLA
  170. case 0xd9: return (char) 0x0172; // LATIN CAPITAL LETTER U WITH OGONEK
  171. case 0xdd: return (char) 0x0168; // LATIN CAPITAL LETTER U WITH TILDE
  172. case 0xde: return (char) 0x016a; // LATIN CAPITAL LETTER U WITH MACRON
  173. case 0xe0: return (char) 0x0101; // LATIN SMALL LETTER A WITH MACRON
  174. case 0xe7: return (char) 0x012f; // LATIN SMALL LETTER I WITH OGONEK
  175. case 0xe8: return (char) 0x010d; // LATIN SMALL LETTER C WITH CARON
  176. case 0xea: return (char) 0x0119; // LATIN SMALL LETTER E WITH OGONEK
  177. case 0xec: return (char) 0x0117; // LATIN SMALL LETTER E WITH DOT ABOVE
  178. case 0xef: return (char) 0x012b; // LATIN SMALL LETTER I WITH MACRON
  179. case 0xf0: return (char) 0x0111; // LATIN SMALL LETTER D WITH STROKE
  180. case 0xf1: return (char) 0x0146; // LATIN SMALL LETTER N WITH CEDILLA
  181. case 0xf2: return (char) 0x014d; // LATIN SMALL LETTER O WITH MACRON
  182. case 0xf3: return (char) 0x0137; // LATIN SMALL LETTER K WITH CEDILLA
  183. case 0xf9: return (char) 0x0173; // LATIN SMALL LETTER U WITH OGONEK
  184. case 0xfd: return (char) 0x0169; // LATIN SMALL LETTER U WITH TILDE
  185. case 0xfe: return (char) 0x016b; // LATIN SMALL LETTER U WITH MACRON
  186. case 0xff: return (char) 0x02d9; // DOT ABOVE
  187. default:
  188. return (char) P;
  189. }
  190. }
  191. public static char Iso8859_5ToUTF16Char(byte P)
  192. {
  193. if ( (P >= 0x00) & (P <= 0xa0) )
  194. return (char) P;
  195. else if ( P == 0xad )
  196. return (char) P;
  197. else if ( P == 0xf0 )
  198. return (char) 0x2116; // NUMERO SIGN
  199. else if ( P == 0xfd )
  200. return (char) 0x00a7; // SECTION SIGN
  201. else
  202. return System.Convert.ToChar( 0x0360 + P );
  203. }
  204. public static char Iso8859_6ToUTF16Char(byte P)
  205. {
  206. if ( (P >= 0x00) & ( P <= 0xa0) )
  207. return (char) P;
  208. else if ( P == 0xa4)
  209. return (char) P;
  210. else if ( ( P == 0xac ) | (P==0xbb) | (P==0xbf) )
  211. return System.Convert.ToChar(P + 0x0580);
  212. else if ( (P >= 0xc1) & ( P <= 0xda) )
  213. return System.Convert.ToChar(P + 0x0580);
  214. else if ( (P >= 0xe0) & ( P <= 0xf2) )
  215. return System.Convert.ToChar(P + 0x0580);
  216. else
  217. throw new InvalidOperationException("Invalid ISO-8859-6 sequence [" + P.ToString() + "]");
  218. }
  219. public static char Iso8859_7ToUTF16Char(byte P)
  220. {
  221. if ( (P >= 0x00) & ( P <= 0xa0) )
  222. return (char) P;
  223. else if ( (P >= 0xa6) & ( P <= 0xa9) )
  224. return (char) P;
  225. else if ( (P >= 0xab) & ( P <= 0xad) )
  226. return (char) P;
  227. else if ( (P >= 0xb0) & ( P <= 0xb3) )
  228. return (char) P;
  229. else if ( (P == 0xb7) | (P==0xbb) | (P==0xbd) )
  230. return (char) P;
  231. else if ( P ==0xa1 ) // LEFT SINGLE QUOTATION MARK
  232. return (char) 0x2018;
  233. else if ( P==0xa2 ) // RIGHT SINGLE QUOTATION MARK
  234. return (char) 0x2019;
  235. else if ( P==0xaf ) // HORIZONTAL BAR
  236. return (char) 0x2015;
  237. else if ( (P==0xd2) | (P==0xff) )
  238. throw new InvalidOperationException("Invalid ISO-8859-7 sequence [" + P.ToString() + "]");
  239. else
  240. return System.Convert.ToChar(P + 0x02d0);
  241. }
  242. public static char Iso8859_8ToUTF16Char(byte P)
  243. {
  244. if ( (P >= 0x00) & ( P <= 0xa0) )
  245. return (char) P;
  246. else if ( (P >= 0xa2) & ( P <= 0xa9) )
  247. return (char) P;
  248. else if ( (P >= 0xab) & ( P <= 0xae) )
  249. return (char) P;
  250. else if ( (P >= 0xb0) & ( P <= 0xb9) )
  251. return (char) P;
  252. else if ( (P >= 0xbb) & ( P <= 0xbe) )
  253. return (char) P;
  254. else if ( P==0xaa ) // MULTIPLICATION SIGN
  255. return (char) 0x00d7;
  256. else if ( P==0xaf ) // OVERLINE
  257. return (char) 0x203e;
  258. else if ( P==0xba ) // DIVISION SIGN
  259. return (char) 0x00f7;
  260. else if ( P==0xdf ) // DOUBLE LOW LINE
  261. return (char) 0x2017;
  262. else if ( (P >= 0xe0) & ( P <= 0xfa) )
  263. return System.Convert.ToChar(P + 0x04e0);
  264. else
  265. throw new InvalidOperationException("Invalid ISO-8859-8 sequence [" + P.ToString() + "]");
  266. }
  267. public static char Iso8859_9ToUTF16Char(byte P)
  268. {
  269. switch (P)
  270. {
  271. case 0xd0: return (char) 0x011e; // LATIN CAPITAL LETTER G WITH BREVE
  272. case 0xdd: return (char) 0x0130; // LATIN CAPITAL LETTER I WITH DOT ABOVE
  273. case 0xde: return (char) 0x015e; // LATIN CAPITAL LETTER S WITH CEDILLA
  274. case 0xf0: return (char) 0x011f; // LATIN SMALL LETTER G WITH BREVE
  275. case 0xfd: return (char) 0x0131; // LATIN SMALL LETTER I WITH DOT ABOVE
  276. case 0xfe: return (char) 0x015f; // LATIN SMALL LETTER S WITH CEDILLA
  277. default:
  278. return (char) P;
  279. }
  280. }
  281. public static char Iso8859_10ToUTF16Char(byte P)
  282. {
  283. switch (P)
  284. {
  285. case 0xa1: return (char) 0x0104; // LATIN CAPITAL LETTER A WITH OGONEK
  286. case 0xa2: return (char) 0x0112; // LATIN CAPITAL LETTER E WITH MACRON
  287. case 0xa3: return (char) 0x0122; // LATIN CAPITAL LETTER G WITH CEDILLA
  288. case 0xa4: return (char) 0x012a; // LATIN CAPITAL LETTER I WITH MACRON
  289. case 0xa5: return (char) 0x0128; // LATIN CAPITAL LETTER I WITH TILDE
  290. case 0xa6: return (char) 0x0136; // LATIN CAPITAL LETTER K WITH CEDILLA
  291. case 0xa8: return (char) 0x013b; // LATIN CAPITAL LETTER L WITH CEDILLA
  292. case 0xa9: return (char) 0x0110; // LATIN CAPITAL LETTER D WITH STROKE
  293. case 0xaa: return (char) 0x0160; // LATIN CAPITAL LETTER S WITH CARON
  294. case 0xab: return (char) 0x0166; // LATIN CAPITAL LETTER T WITH STROKE
  295. case 0xac: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON
  296. case 0xae: return (char) 0x016a; // LATIN CAPITAL LETTER U WITH MACRON
  297. case 0xaf: return (char) 0x014a; // LATIN CAPITAL LETTER ENG
  298. case 0xb1: return (char) 0x0105; // LATIN SMALL LETTER A WITH OGONEK
  299. case 0xb2: return (char) 0x0113; // LATIN SMALL LETTER E WITH MACRON
  300. case 0xb3: return (char) 0x0123; // LATIN SMALL LETTER G WITH CEDILLA
  301. case 0xb4: return (char) 0x012b; // LATIN SMALL LETTER I WITH MACRON
  302. case 0xb5: return (char) 0x0129; // LATIN SMALL LETTER I WITH TILDE
  303. case 0xb6: return (char) 0x0137; // LATIN SMALL LETTER K WITH CEDILLA
  304. case 0xb8: return (char) 0x013c; // LATIN SMALL LETTER L WITH CEDILLA
  305. case 0xb9: return (char) 0x0111; // LATIN SMALL LETTER D WITH STROKE
  306. case 0xba: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON
  307. case 0xbb: return (char) 0x0167; // LATIN SMALL LETTER T WITH STROKE
  308. case 0xbc: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON
  309. case 0xbd: return (char) 0x2015; // HORIZONTAL BAR
  310. case 0xbe: return (char) 0x016b; // LATIN SMALL LETTER U WITH MACRON
  311. case 0xbf: return (char) 0x014b; // LATIN SMALL LETTER ENG
  312. case 0xc0: return (char) 0x0100; // LATIN CAPITAL LETTER A WITH MACRON
  313. case 0xc7: return (char) 0x012e; // LATIN CAPITAL LETTER I WITH OGONEK
  314. case 0xc8: return (char) 0x010c; // LATIN CAPITAL LETTER C WITH CARON
  315. case 0xca: return (char) 0x0118; // LATIN CAPITAL LETTER E WITH OGONEK
  316. case 0xcc: return (char) 0x0116; // LATIN CAPITAL LETTER E WITH DOT ABOVE
  317. case 0xd1: return (char) 0x0145; // LATIN CAPITAL LETTER N WITH CEDILLA
  318. case 0xd2: return (char) 0x014c; // LATIN CAPITAL LETTER O WITH MACRON
  319. case 0xd7: return (char) 0x0168; // LATIN CAPITAL LETTER U WITH TILDE
  320. case 0xd9: return (char) 0x0172; // LATIN CAPITAL LETTER U WITH OGONEK
  321. case 0xe0: return (char) 0x0101; // LATIN SMALL LETTER A WITH MACRON
  322. case 0xe7: return (char) 0x012f; // LATIN SMALL LETTER I WITH OGONEK
  323. case 0xe8: return (char) 0x010d; // LATIN SMALL LETTER C WITH CARON
  324. case 0xea: return (char) 0x0119; // LATIN SMALL LETTER E WITH OGONEK
  325. case 0xec: return (char) 0x0117; // LATIN SMALL LETTER E WITH DOT ABOVE
  326. case 0xf1: return (char) 0x0146; // LATIN SMALL LETTER N WITH CEDILLA
  327. case 0xf2: return (char) 0x014d; // LATIN SMALL LETTER O WITH MACRON
  328. case 0xf7: return (char) 0x0169; // LATIN SMALL LETTER U WITH TILDE
  329. case 0xf9: return (char) 0x0173; // LATIN SMALL LETTER U WITH OGONEK
  330. case 0xff: return (char) 0x0138; // LATIN SMALL LETTER KRA
  331. default:
  332. return (char) P;
  333. }
  334. }
  335. public static char Iso8859_13ToUTF16Char(byte P)
  336. {
  337. switch(P)
  338. {
  339. case 0xa1: return (char) 0x201d; // RIGHT DOUBLE QUOTATION MARK
  340. case 0xa5: return (char) 0x201e; // DOUBLE LOW-9 QUOTATION MARK
  341. case 0xa8: return (char) 0x00d8; // LATIN CAPITAL LETTER O WITH STROKE
  342. case 0xaa: return (char) 0x0156; // LATIN CAPITAL LETTER R WITH CEDILLA
  343. case 0xaf: return (char) 0x00c6; // LATIN CAPITAL LETTER AE
  344. case 0xb4: return (char) 0x201c; // LEFT DOUBLE QUOTATION MARK
  345. case 0xb8: return (char) 0x00f8; // LATIN SMALL LETTER O WITH STROKE
  346. case 0xba: return (char) 0x0157; // LATIN SMALL LETTER R WITH CEDILLA
  347. case 0xbf: return (char) 0x00e6; // LATIN SMALL LETTER AE
  348. case 0xc0: return (char) 0x0104; // LATIN CAPITAL LETTER A WITH OGONEK
  349. case 0xc1: return (char) 0x012e; // LATIN CAPITAL LETTER I WITH OGONEK
  350. case 0xc2: return (char) 0x0100; // LATIN CAPITAL LETTER A WITH MACRON
  351. case 0xc3: return (char) 0x0106; // LATIN CAPITAL LETTER C WITH ACUTE
  352. case 0xc6: return (char) 0x0118; // LATIN CAPITAL LETTER E WITH OGONEK
  353. case 0xc7: return (char) 0x0112; // LATIN CAPITAL LETTER E WITH MACRON
  354. case 0xc8: return (char) 0x010c; // LATIN CAPITAL LETTER C WITH CARON
  355. case 0xca: return (char) 0x0179; // LATIN CAPITAL LETTER Z WITH ACUTE
  356. case 0xcb: return (char) 0x0116; // LATIN CAPITAL LETTER E WITH DOT ABOVE
  357. case 0xcc: return (char) 0x0122; // LATIN CAPITAL LETTER G WITH CEDILLA
  358. case 0xcd: return (char) 0x0136; // LATIN CAPITAL LETTER K WITH CEDILLA
  359. case 0xce: return (char) 0x012a; // LATIN CAPITAL LETTER I WITH MACRON
  360. case 0xcf: return (char) 0x013b; // LATIN CAPITAL LETTER L WITH CEDILLA
  361. case 0xd0: return (char) 0x0160; // LATIN CAPITAL LETTER S WITH CARON
  362. case 0xd1: return (char) 0x0143; // LATIN CAPITAL LETTER N WITH ACUTE
  363. case 0xd2: return (char) 0x0145; // LATIN CAPITAL LETTER N WITH CEDILLA
  364. case 0xd4: return (char) 0x014c; // LATIN CAPITAL LETTER O WITH MACRON
  365. case 0xd8: return (char) 0x0172; // LATIN CAPITAL LETTER U WITH OGONEK
  366. case 0xd9: return (char) 0x0141; // LATIN CAPITAL LETTER L WITH STROKE
  367. case 0xda: return (char) 0x015a; // LATIN CAPITAL LETTER S WITH ACUTE
  368. case 0xdb: return (char) 0x016a; // LATIN CAPITAL LETTER U WITH MACRON
  369. case 0xdd: return (char) 0x017b; // LATIN CAPITAL LETTER Z WITH DOT ABOVE
  370. case 0xde: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON
  371. case 0xe0: return (char) 0x0105; // LATIN SMALL LETTER A WITH OGONEK
  372. case 0xe1: return (char) 0x012f; // LATIN SMALL LETTER I WITH OGONEK
  373. case 0xe2: return (char) 0x0101; // LATIN SMALL LETTER A WITH MACRON
  374. case 0xe3: return (char) 0x0107; // LATIN SMALL LETTER C WITH ACUTE
  375. case 0xe6: return (char) 0x0119; // LATIN SMALL LETTER E WITH OGONEK
  376. case 0xe7: return (char) 0x0113; // LATIN SMALL LETTER E WITH MACRON
  377. case 0xe8: return (char) 0x010d; // LATIN SMALL LETTER C WITH CARON
  378. case 0xea: return (char) 0x017a; // LATIN SMALL LETTER Z WITH ACUTE
  379. case 0xeb: return (char) 0x0117; // LATIN SMALL LETTER E WITH DOT ABOVE
  380. case 0xec: return (char) 0x0123; // LATIN SMALL LETTER G WITH CEDILLA
  381. case 0xed: return (char) 0x0137; // LATIN SMALL LETTER K WITH CEDILLA
  382. case 0xee: return (char) 0x012b; // LATIN SMALL LETTER I WITH MACRON
  383. case 0xef: return (char) 0x013c; // LATIN SMALL LETTER L WITH CEDILLA
  384. case 0xf0: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON
  385. case 0xf1: return (char) 0x0144; // LATIN SMALL LETTER N WITH ACUTE
  386. case 0xf2: return (char) 0x0146; // LATIN SMALL LETTER N WITH CEDILLA
  387. case 0xf4: return (char) 0x014d; // LATIN SMALL LETTER O WITH MACRON
  388. case 0xf8: return (char) 0x0173; // LATIN SMALL LETTER U WITH OGONEK
  389. case 0xf9: return (char) 0x0142; // LATIN SMALL LETTER L WITH STROKE
  390. case 0xfa: return (char) 0x015b; // LATIN SMALL LETTER S WITH ACUTE
  391. case 0xfb: return (char) 0x016b; // LATIN SMALL LETTER U WITH MACRON
  392. case 0xfd: return (char) 0x017c; // LATIN SMALL LETTER Z WITH DOT ABOVE
  393. case 0xfe: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON
  394. case 0xff: return (char) 0x2019; // RIGHT SINGLE QUOTATION MARK
  395. default:
  396. return (char) P;
  397. }
  398. }
  399. public static char Iso8859_14ToUTF16Char(byte P)
  400. {
  401. switch (P)
  402. {
  403. case 0xa1: return (char) 0x1e02; // LATIN CAPITAL LETTER B WITH DOT ABOVE
  404. case 0xa2: return (char) 0x1e03; // LATIN SMALL LETTER B WITH DOT ABOVE
  405. case 0xa4: return (char) 0x010a; // LATIN CAPITAL LETTER C WITH DOT ABOVE
  406. case 0xa5: return (char) 0x010b; // LATIN SMALL LETTER C WITH DOT ABOVE
  407. case 0xa6: return (char) 0x1e0a; // LATIN CAPITAL LETTER D WITH DOT ABOVE
  408. case 0xa8: return (char) 0x1e80; // LATIN CAPITAL LETTER W WITH GRAVE
  409. case 0xaa: return (char) 0x1e82; // LATIN CAPITAL LETTER W WITH ACUTE
  410. case 0xab: return (char) 0x1e0b; // LATIN SMALL LETTER D WITH DOT ABOVE
  411. case 0xac: return (char) 0x1ef2; // LATIN CAPITAL LETTER Y WITH GRAVE
  412. case 0xaf: return (char) 0x0178; // LATIN CAPITAL LETTER Y WITH DIAERESIS
  413. case 0xb0: return (char) 0x1e1e; // LATIN CAPITAL LETTER F WITH DOT ABOVE
  414. case 0xb1: return (char) 0x1e1f; // LATIN SMALL LETTER F WITH DOT ABOVE
  415. case 0xb2: return (char) 0x0120; // LATIN CAPITAL LETTER G WITH DOT ABOVE
  416. case 0xb3: return (char) 0x0121; // LATIN SMALL LETTER G WITH DOT ABOVE
  417. case 0xb4: return (char) 0x1e40; // LATIN CAPITAL LETTER M WITH DOT ABOVE
  418. case 0xb5: return (char) 0x1e41; // LATIN SMALL LETTER M WITH DOT ABOVE
  419. case 0xb7: return (char) 0x1e56; // LATIN CAPITAL LETTER P WITH DOT ABOVE
  420. case 0xb8: return (char) 0x1e81; // LATIN SMALL LETTER W WITH GRAVE
  421. case 0xb9: return (char) 0x1e57; // LATIN SMALL LETTER P WITH DOT ABOVE
  422. case 0xba: return (char) 0x1e83; // LATIN SMALL LETTER W WITH ACUTE
  423. case 0xbb: return (char) 0x1e60; // LATIN CAPITAL LETTER S WITH DOT ABOVE
  424. case 0xbc: return (char) 0x1ef3; // LATIN SMALL LETTER Y WITH GRAVE
  425. case 0xbd: return (char) 0x1e84; // LATIN CAPITAL LETTER W WITH DIAERESIS
  426. case 0xbe: return (char) 0x1e85; // LATIN SMALL LETTER W WITH DIAERESIS
  427. case 0xbf: return (char) 0x1e61; // LATIN SMALL LETTER S WITH DOT ABOVE
  428. case 0xd0: return (char) 0x0174; // LATIN CAPITAL LETTER W WITH CIRCUMFLEX
  429. case 0xd7: return (char) 0x1e6a; // LATIN CAPITAL LETTER T WITH DOT ABOVE
  430. case 0xde: return (char) 0x0176; // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
  431. case 0xf0: return (char) 0x0175; // LATIN SMALL LETTER W WITH CIRCUMFLEX
  432. case 0xf7: return (char) 0x1e6b; // LATIN SMALL LETTER T WITH DOT ABOVE
  433. case 0xfe: return (char) 0x0177; // LATIN SMALL LETTER Y WITH CIRCUMFLEX
  434. default:
  435. return (char) P;
  436. }
  437. }
  438. public static char Iso8859_15ToUTF16Char(byte P)
  439. {
  440. switch (P)
  441. {
  442. case 0xa4: return (char) 0x20ac; // EURO SIGN
  443. case 0xa6: return (char) 0x00a6; // LATIN CAPITAL LETTER S WITH CARON
  444. case 0xa8: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON
  445. case 0xb4: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON
  446. case 0xb8: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON
  447. case 0xbc: return (char) 0x0152; // LATIN CAPITAL LIGATURE OE
  448. case 0xbd: return (char) 0x0153; // LATIN SMALL LIGATURE OE
  449. case 0xbe: return (char) 0x0178; // LATIN CAPITAL LETTER Y WITH DIAERESIS
  450. default:
  451. return (char) P;
  452. }
  453. }
  454. public static char KOI8_RToUTF16Char(byte P)
  455. {
  456. switch (P)
  457. {
  458. case 0x80: return (char) 0x2500; // BOX DRAWINGS LIGHT HORIZONTAL
  459. case 0x81: return (char) 0x2502; // BOX DRAWINGS LIGHT VERTICAL
  460. case 0x82: return (char) 0x250c; // BOX DRAWINGS LIGHT DOWN AND RIGHT
  461. case 0x83: return (char) 0x2510; // BOX DRAWINGS LIGHT DOWN AND LEFT
  462. case 0x84: return (char) 0x2514; // BOX DRAWINGS LIGHT UP AND RIGHT
  463. case 0x85: return (char) 0x2518; // BOX DRAWINGS LIGHT UP AND LEFT
  464. case 0x86: return (char) 0x251c; // BOX DRAWINGS LIGHT VERTICAL AND RIGHT
  465. case 0x87: return (char) 0x2524; // BOX DRAWINGS LIGHT VERTICAL AND LEFT
  466. case 0x88: return (char) 0x252c; // BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
  467. case 0x89: return (char) 0x2534; // BOX DRAWINGS LIGHT UP AND HORIZONTAL
  468. case 0x8a: return (char) 0x253c; // BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
  469. case 0x8b: return (char) 0x2580; // UPPER HALF BLOCK
  470. case 0x8c: return (char) 0x2584; // LOWER HALF BLOCK
  471. case 0x8d: return (char) 0x2588; // FULL BLOCK
  472. case 0x8e: return (char) 0x258c; // LEFT HALF BLOCK
  473. case 0x8f: return (char) 0x2590; // RIGHT HALF BLOCK
  474. case 0x90: return (char) 0x2591; // LIGHT SHADE
  475. case 0x91: return (char) 0x2592; // MEDIUM SHADE
  476. case 0x92: return (char) 0x2593; // DARK SHADE
  477. case 0x93: return (char) 0x2320; // TOP HALF INTEGRAL
  478. case 0x94: return (char) 0x25a0; // BLACK SQUARE
  479. case 0x95: return (char) 0x2219; // BULLET OPERATOR
  480. case 0x96: return (char) 0x221a; // SQUARE ROOT
  481. case 0x97: return (char) 0x2248; // ALMOST EQUAL TO
  482. case 0x98: return (char) 0x2264; // LESS-THAN OR EQUAL TO
  483. case 0x99: return (char) 0x2265; // GREATER-THAN OR EQUAL TO
  484. case 0x9a: return (char) 0x00a0; // NO-BREAK SPACE
  485. case 0x9b: return (char) 0x2321; // BOTTOM HALF INTEGRAL
  486. case 0x9c: return (char) 0x00b0; // DEGREE SIGN
  487. case 0x9d: return (char) 0x00b2; // SUPERSCRIPT TWO
  488. case 0x9e: return (char) 0x00b7; // MIDDLE DOT
  489. case 0x9f: return (char) 0x00f7; // DIVISION SIGN
  490. case 0xa0: return (char) 0x2550; // BOX DRAWINGS DOUBLE HORIZONTAL
  491. case 0xa1: return (char) 0x2551; // BOX DRAWINGS DOUBLE VERTICAL
  492. case 0xa2: return (char) 0x2552; // BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
  493. case 0xa3: return (char) 0x0451; // CYRILLIC SMALL LETTER IO
  494. case 0xa4: return (char) 0x2553; // BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
  495. case 0xa5: return (char) 0x2554; // BOX DRAWINGS DOUBLE DOWN AND RIGHT
  496. case 0xa6: return (char) 0x2555; // BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
  497. case 0xa7: return (char) 0x2556; // BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
  498. case 0xa8: return (char) 0x2557; // BOX DRAWINGS DOUBLE DOWN AND LEFT
  499. case 0xa9: return (char) 0x2558; // BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
  500. case 0xaa: return (char) 0x2559; // BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
  501. case 0xab: return (char) 0x255a; // BOX DRAWINGS DOUBLE UP AND RIGHT
  502. case 0xac: return (char) 0x255b; // BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
  503. case 0xad: return (char) 0x255c; // BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
  504. case 0xae: return (char) 0x255d; // BOX DRAWINGS DOUBLE UP AND LEFT
  505. case 0xaf: return (char) 0x255e; // BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
  506. case 0xb0: return (char) 0x255f; // BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
  507. case 0xb1: return (char) 0x2560; // BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
  508. case 0xb2: return (char) 0x2561; // BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
  509. case 0xb3: return (char) 0x0401; // CYRILLIC CAPITAL LETTER IO
  510. case 0xb4: return (char) 0x2562; // BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
  511. case 0xb5: return (char) 0x2563; // BOX DRAWINGS DOUBLE VERTICAL AND LEFT
  512. case 0xb6: return (char) 0x2564; // BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
  513. case 0xb7: return (char) 0x2565; // BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
  514. case 0xb8: return (char) 0x2566; // BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
  515. case 0xb9: return (char) 0x2567; // BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
  516. case 0xba: return (char) 0x2568; // BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
  517. case 0xbb: return (char) 0x2569; // BOX DRAWINGS DOUBLE UP AND HORIZONTAL
  518. case 0xbc: return (char) 0x256a; // BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
  519. case 0xbd: return (char) 0x256b; // BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
  520. case 0xbe: return (char) 0x256c; // BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
  521. case 0xbf: return (char) 0x00a9; // COPYRIGHT SIGN
  522. case 0xc0: return (char) 0x044e; // CYRILLIC SMALL LETTER YU
  523. case 0xc1: return (char) 0x0430; // CYRILLIC SMALL LETTER A
  524. case 0xc2: return (char) 0x0431; // CYRILLIC SMALL LETTER BE
  525. case 0xc3: return (char) 0x0446; // CYRILLIC SMALL LETTER TSE
  526. case 0xc4: return (char) 0x0434; // CYRILLIC SMALL LETTER DE
  527. case 0xc5: return (char) 0x0435; // CYRILLIC SMALL LETTER IE
  528. case 0xc6: return (char) 0x0444; // CYRILLIC SMALL LETTER EF
  529. case 0xc7: return (char) 0x0433; // CYRILLIC SMALL LETTER GHE
  530. case 0xc8: return (char) 0x0445; // CYRILLIC SMALL LETTER HA
  531. case 0xc9: return (char) 0x0438; // CYRILLIC SMALL LETTER I
  532. case 0xca: return (char) 0x0439; // CYRILLIC SMALL LETTER SHORT I
  533. case 0xcb: return (char) 0x043a; // CYRILLIC SMALL LETTER KA
  534. case 0xcc: return (char) 0x043b; // CYRILLIC SMALL LETTER EL
  535. case 0xcd: return (char) 0x043c; // CYRILLIC SMALL LETTER EM
  536. case 0xce: return (char) 0x043d; // CYRILLIC SMALL LETTER EN
  537. case 0xcf: return (char) 0x043e; // CYRILLIC SMALL LETTER O
  538. case 0xd0: return (char) 0x043f; // CYRILLIC SMALL LETTER PE
  539. case 0xd1: return (char) 0x044f; // CYRILLIC SMALL LETTER YA
  540. case 0xd2: return (char) 0x0440; // CYRILLIC SMALL LETTER ER
  541. case 0xd3: return (char) 0x0441; // CYRILLIC SMALL LETTER ES
  542. case 0xd4: return (char) 0x0442; // CYRILLIC SMALL LETTER TE
  543. case 0xd5: return (char) 0x0443; // CYRILLIC SMALL LETTER U
  544. case 0xd6: return (char) 0x0436; // CYRILLIC SMALL LETTER ZHE
  545. case 0xd7: return (char) 0x0432; // CYRILLIC SMALL LETTER VE
  546. case 0xd8: return (char) 0x044c; // CYRILLIC SMALL LETTER SOFT SIGN
  547. case 0xd9: return (char) 0x044b; // CYRILLIC SMALL LETTER YERU
  548. case 0xda: return (char) 0x0437; // CYRILLIC SMALL LETTER ZE
  549. case 0xdb: return (char) 0x0448; // CYRILLIC SMALL LETTER SHA
  550. case 0xdc: return (char) 0x044d; // CYRILLIC SMALL LETTER E
  551. case 0xdd: return (char) 0x0449; // CYRILLIC SMALL LETTER SHCHA
  552. case 0xde: return (char) 0x0447; // CYRILLIC SMALL LETTER CHE
  553. case 0xdf: return (char) 0x044a; // CYRILLIC SMALL LETTER HARD SIGN
  554. case 0xe0: return (char) 0x042e; // CYRILLIC CAPITAL LETTER YU
  555. case 0xe1: return (char) 0x0410; // CYRILLIC CAPITAL LETTER A
  556. case 0xe2: return (char) 0x0411; // CYRILLIC CAPITAL LETTER BE
  557. case 0xe3: return (char) 0x0426; // CYRILLIC CAPITAL LETTER TSE
  558. case 0xe4: return (char) 0x0414; // CYRILLIC CAPITAL LETTER DE
  559. case 0xe5: return (char) 0x0415; // CYRILLIC CAPITAL LETTER IE
  560. case 0xe6: return (char) 0x0424; // CYRILLIC CAPITAL LETTER EF
  561. case 0xe7: return (char) 0x0413; // CYRILLIC CAPITAL LETTER GHE
  562. case 0xe8: return (char) 0x0425; // CYRILLIC CAPITAL LETTER HA
  563. case 0xe9: return (char) 0x0418; // CYRILLIC CAPITAL LETTER I
  564. case 0xea: return (char) 0x0419; // CYRILLIC CAPITAL LETTER SHORT I
  565. case 0xeb: return (char) 0x041a; // CYRILLIC CAPITAL LETTER KA
  566. case 0xec: return (char) 0x041b; // CYRILLIC CAPITAL LETTER EL
  567. case 0xed: return (char) 0x041c; // CYRILLIC CAPITAL LETTER EM
  568. case 0xee: return (char) 0x041d; // CYRILLIC CAPITAL LETTER EN
  569. case 0xef: return (char) 0x041e; // CYRILLIC CAPITAL LETTER O
  570. case 0xf0: return (char) 0x041f; // CYRILLIC CAPITAL LETTER PE
  571. case 0xf1: return (char) 0x042f; // CYRILLIC CAPITAL LETTER YA
  572. case 0xf2: return (char) 0x0420; // CYRILLIC CAPITAL LETTER ER
  573. case 0xf3: return (char) 0x0421; // CYRILLIC CAPITAL LETTER ES
  574. case 0xf4: return (char) 0x0422; // CYRILLIC CAPITAL LETTER TE
  575. case 0xf5: return (char) 0x0423; // CYRILLIC CAPITAL LETTER U
  576. case 0xf6: return (char) 0x0416; // CYRILLIC CAPITAL LETTER ZHE
  577. case 0xf7: return (char) 0x0412; // CYRILLIC CAPITAL LETTER VE
  578. case 0xf8: return (char) 0x042c; // CYRILLIC CAPITAL LETTER SOFT SIGN
  579. case 0xf9: return (char) 0x042b; // CYRILLIC CAPITAL LETTER YERU
  580. case 0xfa: return (char) 0x0417; // CYRILLIC CAPITAL LETTER ZE
  581. case 0xfb: return (char) 0x0428; // CYRILLIC CAPITAL LETTER SHA
  582. case 0xfc: return (char) 0x042d; // CYRILLIC CAPITAL LETTER E
  583. case 0xfd: return (char) 0x0429; // CYRILLIC CAPITAL LETTER SHCHA
  584. case 0xfe: return (char) 0x0427; // CYRILLIC CAPITAL LETTER CHE
  585. case 0xff: return (char) 0x042a; // CYRILLIC CAPITAL LETTER HARD SIGN
  586. default:
  587. return (char) P;
  588. }
  589. }
  590. public static char cp10000_MacRomanToUTF16Char(byte P)
  591. {
  592. switch (P)
  593. {
  594. case 0x80: return (char) 0x00c4; // LATIN CAPITAL LETTER A WITH DIAERESIS
  595. case 0x81: return (char) 0x00c5; // LATIN CAPITAL LETTER A WITH RING ABOVE
  596. case 0x82: return (char) 0x00c7; // LATIN CAPITAL LETTER C WITH CEDILLA
  597. case 0x83: return (char) 0x00c9; // LATIN CAPITAL LETTER E WITH ACUTE
  598. case 0x84: return (char) 0x00d1; // LATIN CAPITAL LETTER N WITH TILDE
  599. case 0x85: return (char) 0x00d6; // LATIN CAPITAL LETTER O WITH DIAERESIS
  600. case 0x86: return (char) 0x00dc; // LATIN CAPITAL LETTER U WITH DIAERESIS
  601. case 0x87: return (char) 0x00e1; // LATIN SMALL LETTER A WITH ACUTE
  602. case 0x88: return (char) 0x00e0; // LATIN SMALL LETTER A WITH GRAVE
  603. case 0x89: return (char) 0x00e2; // LATIN SMALL LETTER A WITH CIRCUMFLEX
  604. case 0x8a: return (char) 0x00e4; // LATIN SMALL LETTER A WITH DIAERESIS
  605. case 0x8b: return (char) 0x00e3; // LATIN SMALL LETTER A WITH TILDE
  606. case 0x8c: return (char) 0x00e5; // LATIN SMALL LETTER A WITH RING ABOVE
  607. case 0x8d: return (char) 0x00e7; // LATIN SMALL LETTER C WITH CEDILLA
  608. case 0x8e: return (char) 0x00e9; // LATIN SMALL LETTER E WITH ACUTE
  609. case 0x8f: return (char) 0x00e8; // LATIN SMALL LETTER E WITH GRAVE
  610. case 0x90: return (char) 0x00ea; // LATIN SMALL LETTER E WITH CIRCUMFLEX
  611. case 0x91: return (char) 0x00eb; // LATIN SMALL LETTER E WITH DIAERESIS
  612. case 0x92: return (char) 0x00ed; // LATIN SMALL LETTER I WITH ACUTE
  613. case 0x93: return (char) 0x00ec; // LATIN SMALL LETTER I WITH GRAVE
  614. case 0x94: return (char) 0x00ee; // LATIN SMALL LETTER I WITH CIRCUMFLEX
  615. case 0x95: return (char) 0x00ef; // LATIN SMALL LETTER I WITH DIAERESIS
  616. case 0x96: return (char) 0x00f1; // LATIN SMALL LETTER N WITH TILDE
  617. case 0x97: return (char) 0x00f3; // LATIN SMALL LETTER O WITH ACUTE
  618. case 0x98: return (char) 0x00f2; // LATIN SMALL LETTER O WITH GRAVE
  619. case 0x99: return (char) 0x00f4; // LATIN SMALL LETTER O WITH CIRCUMFLEX
  620. case 0x9a: return (char) 0x00f6; // LATIN SMALL LETTER O WITH DIAERESIS
  621. case 0x9b: return (char) 0x00f5; // LATIN SMALL LETTER O WITH TILDE
  622. case 0x9c: return (char) 0x00fa; // LATIN SMALL LETTER U WITH ACUTE
  623. case 0x9d: return (char) 0x00f9; // LATIN SMALL LETTER U WITH GRAVE
  624. case 0x9e: return (char) 0x00fb; // LATIN SMALL LETTER U WITH CIRCUMFLEX
  625. case 0x9f: return (char) 0x00fc; // LATIN SMALL LETTER U WITH DIAERESIS
  626. case 0xa0: return (char) 0x2020; // DAGGER
  627. case 0xa1: return (char) 0x00b0; // DEGREE SIGN
  628. case 0xa4: return (char) 0x00a7; // SECTION SIGN
  629. case 0xa5: return (char) 0x2022; // BULLET
  630. case 0xa6: return (char) 0x00b6; // PILCROW SIGN
  631. case 0xa7: return (char) 0x00df; // LATIN SMALL LETTER SHARP S
  632. case 0xa8: return (char) 0x00ae; // REGISTERED SIGN
  633. case 0xaa: return (char) 0x2122; // TRADE MARK SIGN
  634. case 0xab: return (char) 0x00b4; // ACUTE ACCENT
  635. case 0xac: return (char) 0x00a8; // DIAERESIS
  636. case 0xad: return (char) 0x2260; // NOT EQUAL TO
  637. case 0xae: return (char) 0x00c6; // LATIN CAPITAL LIGATURE AE
  638. case 0xaf: return (char) 0x00d8; // LATIN CAPITAL LETTER O WITH STROKE
  639. case 0xb0: return (char) 0x221e; // INFINITY
  640. case 0xb2: return (char) 0x2264; // LESS-THAN OR EQUAL TO
  641. case 0xb3: return (char) 0x2265; // GREATER-THAN OR EQUAL TO
  642. case 0xb4: return (char) 0x00a5; // YEN SIGN
  643. case 0xb6: return (char) 0x2202; // PARTIAL DIFFERENTIAL
  644. case 0xb7: return (char) 0x2211; // N-ARY SUMMATION
  645. case 0xb8: return (char) 0x220f; // N-ARY PRODUCT
  646. case 0xb9: return (char) 0x03c0; // GREEK SMALL LETTER PI
  647. case 0xba: return (char) 0x222b; // INTEGRAL
  648. case 0xbb: return (char) 0x00aa; // FEMININE ORDINAL INDICATOR
  649. case 0xbc: return (char) 0x00ba; // MASCULINE ORDINAL INDICATOR
  650. case 0xbd: return (char) 0x2126; // OHM SIGN
  651. case 0xbe: return (char) 0x00e6; // LATIN SMALL LIGATURE AE
  652. case 0xbf: return (char) 0x00f8; // LATIN SMALL LETTER O WITH STROKE
  653. case 0xc0: return (char) 0x00bf; // INVERTED QUESTION MARK
  654. case 0xc1: return (char) 0x00a1; // INVERTED EXCLAMATION MARK
  655. case 0xc2: return (char) 0x00ac; // NOT SIGN
  656. case 0xc3: return (char) 0x221a; // SQUARE ROOT
  657. case 0xc4: return (char) 0x0192; // LATIN SMALL LETTER F WITH HOOK
  658. case 0xc5: return (char) 0x2248; // ALMOST EQUAL TO
  659. case 0xc6: return (char) 0x2206; // INCREMENT
  660. case 0xc7: return (char) 0x00ab; // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
  661. case 0xc8: return (char) 0x00bb; // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
  662. case 0xc9: return (char) 0x2026; // HORIZONTAL ELLIPSIS
  663. case 0xca: return (char) 0x00a0; // NO-BREAK SPACE
  664. case 0xcb: return (char) 0x00c0; // LATIN CAPITAL LETTER A WITH GRAVE
  665. case 0xcc: return (char) 0x00c3; // LATIN CAPITAL LETTER A WITH TILDE
  666. case 0xcd: return (char) 0x00d5; // LATIN CAPITAL LETTER O WITH TILDE
  667. case 0xce: return (char) 0x0152; // LATIN CAPITAL LIGATURE OE
  668. case 0xcf: return (char) 0x0153; // LATIN SMALL LIGATURE OE
  669. case 0xd0: return (char) 0x2013; // EN DASH
  670. case 0xd1: return (char) 0x2014; // EM DASH
  671. case 0xd2: return (char) 0x201c; // LEFT DOUBLE QUOTATION MARK
  672. case 0xd3: return (char) 0x201d; // RIGHT DOUBLE QUOTATION MARK
  673. case 0xd4: return (char) 0x2018; // LEFT SINGLE QUOTATION MARK
  674. case 0xd5: return (char) 0x2019; // RIGHT SINGLE QUOTATION MARK
  675. case 0xd6: return (char) 0x00f7; // DIVISION SIGN
  676. case 0xd7: return (char) 0x25ca; // LOZENGE
  677. case 0xd8: return (char) 0x00ff; // LATIN SMALL LETTER Y WITH DIAERESIS
  678. case 0xd9: return (char) 0x0178; // LATIN CAPITAL LETTER Y WITH DIAERESIS
  679. case 0xda: return (char) 0x2044; // FRACTION SLASH
  680. case 0xdb: return (char) 0x00a4; // CURRENCY SIGN
  681. case 0xdc: return (char) 0x2039; // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
  682. case 0xdd: return (char) 0x203a; // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
  683. case 0xde: return (char) 0xfb01; // LATIN SMALL LIGATURE FI
  684. case 0xdf: return (char) 0xfb02; // LATIN SMALL LIGATURE FL
  685. case 0xe0: return (char) 0x2021; // DOUBLE DAGGER
  686. case 0xe1: return (char) 0x00b7; // MIDDLE DOT
  687. case 0xe2: return (char) 0x201a; // SINGLE LOW-9 QUOTATION MARK
  688. case 0xe3: return (char) 0x201e; // DOUBLE LOW-9 QUOTATION MARK
  689. case 0xe4: return (char) 0x2030; // PER MILLE SIGN
  690. case 0xe5: return (char) 0x00c2; // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
  691. case 0xe6: return (char) 0x00ca; // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
  692. case 0xe7: return (char) 0x00c1; // LATIN CAPITAL LETTER A WITH ACUTE
  693. case 0xe8: return (char) 0x00cb; // LATIN CAPITAL LETTER E WITH DIAERESIS
  694. case 0xe9: return (char) 0x00c8; // LATIN CAPITAL LETTER E WITH GRAVE
  695. case 0xea: return (char) 0x00cd; // LATIN CAPITAL LETTER I WITH ACUTE
  696. case 0xeb: return (char) 0x00ce; // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
  697. case 0xec: return (char) 0x00cf; // LATIN CAPITAL LETTER I WITH DIAERESIS
  698. case 0xed: return (char) 0x00cc; // LATIN CAPITAL LETTER I WITH GRAVE
  699. case 0xee: return (char) 0x00d3; // LATIN CAPITAL LETTER O WITH ACUTE
  700. case 0xef: return (char) 0x00d4; // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
  701. case 0xf0: throw new InvalidOperationException("Invalid cp10000_MacRoman sequence [" + P.ToString() + "]");
  702. case 0xf1: return (char) 0x00d2; // LATIN CAPITAL LETTER O WITH GRAVE
  703. case 0xf2: return (char) 0x00da; // LATIN CAPITAL LETTER U WITH ACUTE
  704. case 0xf3: return (char) 0x00db; // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
  705. case 0xf4: return (char) 0x00d9; // LATIN CAPITAL LETTER U WITH GRAVE
  706. case 0xf5: return (char) 0x0131; // LATIN SMALL LETTER DOTLESS I
  707. case 0xf6: return (char) 0x02c6; // MODIFIER LETTER CIRCUMFLEX ACCENT
  708. case 0xf7: return (char) 0x02dc; // SMALL TILDE
  709. case 0xf8: return (char) 0x00af; // MACRON
  710. case 0xf9: return (char) 0x02d8; // BREVE
  711. case 0xfa: return (char) 0x02d9; // DOT ABOVE
  712. case 0xfb: return (char) 0x02da; // RING ABOVE
  713. case 0xfc: return (char) 0x00b8; // CEDILLA
  714. case 0xfd: return (char) 0x02dd; // DOUBLE ACUTE ACCENT
  715. case 0xfe: return (char) 0x02db; // OGONEK
  716. case 0xff: return (char) 0x02c7; // CARON
  717. default:
  718. return (char) P;
  719. }
  720. }
  721. public static char cp1250ToUTF16Char(byte P)
  722. {
  723. // This function was provided by Miloslav Skácel (ported by DrW)
  724. switch (P)
  725. {
  726. case 0x80: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]");
  727. case 0x81: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]");
  728. case 0x83: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]");
  729. case 0x88: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]");
  730. case 0x90: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]");
  731. case 0x98: throw new InvalidOperationException("Invalid Windows-1250 sequence [" + P.ToString() + "]");
  732. case 0x82: return (char) 0x201a; // SINGLE LOW-9 QUOTATION MARK
  733. case 0x84: return (char) 0x201e; // DOUBLE LOW-9 QUOTATION MARK
  734. case 0x85: return (char) 0x2026; // HORIZONTAL ELLIPSIS
  735. case 0x86: return (char) 0x2020; // DAGGER
  736. case 0x87: return (char) 0x2021; // DOUBLE DAGGER
  737. case 0x89: return (char) 0x2030; // PER MILLE SIGN
  738. case 0x8a: return (char) 0x0160; // LATIN CAPITAL LETTER S WITH CARON
  739. case 0x8b: return (char) 0x2039; // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
  740. case 0x8c: return (char) 0x015a; // LATIN CAPITAL LETTER S WITH ACUTE
  741. case 0x8d: return (char) 0x0164; // LATIN CAPITAL LETTER T WITH CARON
  742. case 0x8e: return (char) 0x017d; // LATIN CAPITAL LETTER Z WITH CARON
  743. case 0x8f: return (char) 0x0179; // LATIN CAPITAL LETTER Z WITH ACUTE
  744. case 0x91: return (char) 0x2018; // LEFT SINGLE QUOTATION MARK
  745. case 0x92: return (char) 0x2019; // RIGHT SINGLE QUOTATION MARK
  746. case 0x93: return (char) 0x201c; // LEFT DOUBLE QUOTATION MARK
  747. case 0x94: return (char) 0x201d; // RIGHT DOUBLE QUOTATION MARK
  748. case 0x95: return (char) 0x2022; // BULLET
  749. case 0x96: return (char) 0x2013; // EN-DASH
  750. case 0x97: return (char) 0x2014; // EM-DASH
  751. case 0x99: return (char) 0x2122; // TRADE MARK SIGN
  752. case 0x9a: return (char) 0x0161; // LATIN SMALL LETTER S WITH CARON
  753. case 0x9b: return (char) 0x203a; // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
  754. case 0x9c: return (char) 0x015b; // LATIN SMALL LETTER S WITH ACUTE
  755. case 0x9d: return (char) 0x0165; // LATIN SMALL LETTER T WITH CARON
  756. case 0x9e: return (char) 0x017e; // LATIN SMALL LETTER Z WITH CARON
  757. case 0x9f: return (char) 0x017a; // LATIN SMALL LETTER Z WITH ACUTE
  758. case 0xa0: return (char) 0x00a0; // NO-BREAK SPACE
  759. case 0xa1: return (char) 0x02c7; // CARON
  760. case 0xa2: return (char) 0x02d8; // BREVE
  761. case 0xa3: return (char) 0x0141; // LATIN CAPITAL LETTER L WITH STROKE
  762. case 0xa4: return (char) 0x00a4; // CURRENCY SIGN
  763. case 0xa5: return (char) 0x0104; // LATIN CAPITAL LETTER A WITH OGONEK
  764. case 0xa6: return (char) 0x00a6; // BROKEN BAR
  765. case 0xa7: return (char) 0x00a7; // SECTION SIGN
  766. case 0xa8: return (char) 0x00a8; // DIAERESIS
  767. case 0xa9: return (char) 0x00a9; // COPYRIGHT SIGN
  768. case 0xaa: return (char) 0x015e; // LATIN CAPITAL LETTER S WITH CEDILLA
  769. case 0xab: return (char) 0x00ab; // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
  770. case 0xac: return (char) 0x00ac; // NOT SIGN
  771. case 0xad: return (char) 0x00ad; // SOFT HYPHEN
  772. case 0xae: return (char) 0x00ae; // REGISTERED SIGN
  773. case 0xaf: return (char) 0x017b; // LATIN CAPITAL LETTER Z WITH DOT ABOVE
  774. case 0xb0: return (char) 0x00b0; // DEGREE SIGN
  775. case 0xb1: return (char) 0x00b1; // PLUS-MINUS SIGN
  776. case 0xb2: return (char) 0x02db; // OGONEK
  777. case 0xb3: return (char) 0x0142; // LATIN SMALL LETTER L WITH STROKE
  778. case 0xb4: return (char) 0x00b4; // ACUTE ACCENT
  779. case 0xb5: return (char) 0x00b5; // MIKRO SIGN
  780. case 0xb6: return (char) 0x00b6; // PILCROW SIGN
  781. case 0xb7: return (char) 0x00b7; // MIDDLE DOT
  782. case 0xb8: return (char) 0x00b8; // CEDILLA
  783. case 0xb9: return (char) 0x0105; // LATIN SMALL LETTER A WITH OGONEK
  784. case 0xba: return (char) 0x015f; // LATIN SMALL LETTER S WITH CEDILLA
  785. case 0xbb: return (char) 0x00bb; // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
  786. case 0xbc: return (char) 0x013d; // LATIN CAPITAL LETTER L WITH CARON
  787. case 0xbd: return (char) 0x02dd; // DOUBLE ACUTE ACCENT
  788. case 0xbe: return (char) 0x013e; // LATIN SMALL LETTER L WITH CARON
  789. case 0xbf: return (char) 0x017c; // LATIN SMALL LETTER Z WITH DOT ABOVE
  790. case 0xc0: return (char) 0x0154; // LATIN CAPITAL LETTER R WITH ACUTE
  791. case 0xc1: return (char) 0x00c1; // LATIN CAPITAL LETTER A WITH ACUTE
  792. case 0xc2: return (char) 0x00c2; // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
  793. case 0xc3: return (char) 0x0102; // LATIN CAPITAL LETTER A WITH BREVE
  794. case 0xc4: return (char) 0x00c4; // LATIN CAPITAL LETTER A WITH DIAERESIS
  795. case 0xc5: return (char) 0x0139; // LATIN CAPITAL LETTER L WITH ACUTE
  796. case 0xc6: return (char) 0x0106; // LATIN CAPITAL LETTER C WITH ACUTE
  797. case 0xc7: return (char) 0x00c7; // LATIN CAPITAL LETTER C WITH CEDILLA
  798. case 0xc8: return (char) 0x010c; // LATIN CAPITAL LETTER C WITH CARON
  799. case 0xc9: return (char) 0x00c9; // LATIN CAPITAL LETTER E WITH ACUTE
  800. case 0xca: return (char) 0x0118; // LATIN CAPITAL LETTER E WITH OGONEK
  801. case 0xcb: return (char) 0x00cb; // LATIN CAPITAL LETTER E WITH DIAERESIS
  802. case 0xcc: return (char) 0x011a; // LATIN CAPITAL LETTER E WITH CARON
  803. case 0xcd: return (char) 0x00cd; // LATIN CAPITAL LETTER I WITH ACUTE
  804. case 0xce: return (char) 0x00ce; // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
  805. case 0xcf: return (char) 0x010e; // LATIN CAPITAL LETTER D WITH CARON
  806. case 0xd0: return (char) 0x0110; // LATIN CAPITAL LETTER D WITH STROKE
  807. case 0xd1: return (char) 0x0143; // LATIN CAPITAL LETTER N WITH ACUTE
  808. case 0xd2: return (char) 0x0147; // LATIN CAPITAL LETTER N WITH CARON
  809. case 0xd3: return (char) 0x00d3; // LATIN CAPITAL LETTER O WITH ACUTE
  810. case 0xd4: return (char) 0x00d4; // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
  811. case 0xd5: return (char) 0x0150; // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
  812. case 0xd6: return (char) 0x00d6; // LATIN CAPITAL LETTER O WITH DIAERESIS
  813. case 0xd7: return (char) 0x00d7; // MULTIPLICATION SIGN
  814. case 0xd8: return (char) 0x0158; // LATIN CAPITAL LETTER R WITH CARON
  815. case 0xd9: return (char) 0x016e; // LATIN CAPITAL LETTER U WITH RING ABOVE
  816. case 0xda: return (char) 0x00da; // LATIN CAPITAL LETTER U WITH ACUTE
  817. case 0xdb: return (char) 0x0170; // LATIN CAPITAL LETTER U WITH WITH DOUBLE ACUTE
  818. case 0xdc: return (char) 0x00dc; // LATIN CAPITAL LETTER U WITH DIAERESIS
  819. case 0xdd: return (char) 0x00dd; // LATIN CAPITAL LETTER Y WITH ACUTE
  820. case 0xde: return (char) 0x0162; // LATIN CAPITAL LETTER T WITH CEDILLA
  821. case 0xdf: return (char) 0x00df; // LATIN SMALL LETTER SHARP S
  822. case 0xe0: return (char) 0x0155; // LATIN SMALL LETTER R WITH ACUTE
  823. case 0xe1: return (char) 0x00e1; // LATIN SMALL LETTER A WITH ACUTE
  824. case 0xe2: return (char) 0x00e2; // LATIN SMALL LETTER A WITH CIRCUMFLEX
  825. case 0xe3: return (char) 0x0103; // LATIN SMALL LETTER A WITH BREVE
  826. case 0xe4: return (char) 0x00e4; // LATIN SMALL LETTER A WITH DIAERESIS
  827. case 0xe5: return (char) 0x013a; // LATIN SMALL LETTER L WITH ACUTE
  828. case 0xe6: return (char) 0x0107; // LATIN SMALL LETTER C WITH ACUTE
  829. case 0xe7: return (char) 0x00e7; // LATIN SMALL LETTER C WITH CEDILLA
  830. case 0xe8: return (char) 0x010d; // LATIN SMALL LETTER C WITH CARON 100D
  831. case 0xe9: return (char) 0x00e9; // LATIN SMALL LETTER E WITH ACUTE
  832. case 0xea: return (char) 0x0119; // LATIN SMALL LETTER E WITH OGONEK
  833. case 0xeb: return (char) 0x00eb; // LATIN SMALL LETTER E WITH DIAERESIS
  834. case 0xec: return (char) 0x011b; // LATIN SMALL LETTER E WITH CARON
  835. case 0xed: return (char) 0x00ed; // LATIN SMALL LETTER I WITH ACUTE
  836. case 0xee: return (char) 0x00ee; // LATIN SMALL LETTER I WITH CIRCUMFLEX
  837. case 0xef: return (char) 0x010f; // LATIN SMALL LETTER D WITH CARON
  838. case 0xf0: return (char) 0x0111; // LATIN SMALL LETTER D WITH STROKE
  839. case 0xf1: return (char) 0x0144; // LATIN SMALL LETTER N WITH ACUTE
  840. case 0xf2: return (char) 0x0148; // LATIN SMALL LETTER N WITH CARON
  841. case 0xf3: return (char) 0x00f3; // LATIN SMALL LETTER O WITH ACUTE
  842. case 0xf4: return (char) 0x00f4; // LATIN SMALL LETTER O WITH CIRCUMFLEX
  843. case 0xf5: return (char) 0x0151; // LATIN SMALL LETTER O WITH DOUBLE ACUTE
  844. case 0xf6: return (char) 0x00f6; // LATIN SMALL LETTER O WITH DIAERESIS
  845. case 0xf7: return (char) 0x00f7; // DIVISION SIGN
  846. case 0xf8: return (char) 0x0159; // LATIN SMALL LETTER R WITH CARON
  847. case 0xf9: return (char) 0x016f; // LATIN SMALL LETTER U WITH RING ABOVE
  848. case 0xfa: return (char) 0x00fa; // LATIN SMALL LETTER U WITH ACUTE
  849. case 0xfb: return (char) 0x0171; // LATIN SMALL LETTER U WITH WITH DOUBLE ACUTE
  850. case 0xfc: return (char) 0x00fc; // LATIN SMALL LETTER U WITH DIAERESIS
  851. case 0xfd: return (char) 0x00fd; // LATIN SMALL LETTER Y WITH ACUTE
  852. case 0xfe: return (char) 0x0163; // LATIN SMALL LETTER T WITH CEDILLA
  853. case 0xff: return (char) 0x02d9; // DOT ABOVE
  854. default:
  855. return (char) P;
  856. }
  857. }
  858. public static char cp1251ToUTF16Char(byte P)
  859. {
  860. switch (P)
  861. {
  862. case 0x80: return (char) 0x0402; // CYRILLIC CAPITAL LETTER DJE
  863. case 0x81: return (char) 0x0403; // CYRILLIC CAPITAL LETTER GJE
  864. case 0x82: return (char) 0x201a; // SINGLE LOW-9 QUOTATION MARK
  865. case 0x83: return (char) 0x0453; // CYRILLIC SMALL LETTER GJE
  866. case 0x84: return (char) 0x201e; // DOUBLE LOW-9 QUOTATION MARK
  867. case 0x85: return (char) 0x2026; // HORIZONTAL ELLIPSIS
  868. case 0x86: return (char) 0x2020; // DAGGER
  869. case 0x87: return (char) 0x2021; // DOUBLE DAGGER
  870. case 0x88: return (char) 0x20ac; // EURO SIGN
  871. case 0x89: return (char) 0x2030; // PER MILLE SIGN
  872. case 0x8a: return (char) 0x0409; // CYRILLIC CAPITAL LETTER LJE
  873. case 0x8b: return (char) 0x2039; // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
  874. case 0x8c: return (char) 0x040a; // CYRILLIC CAPITAL LETTER NJE
  875. case 0x8d: return (char) 0x040c; // CYRILLIC CAPITAL LETTER KJE
  876. case 0x8e: return (char) 0x040b; // CYRILLIC CAPITAL LETTER TSHE
  877. case 0x8f: return (char) 0x040f; // CYRILLIC CAPITAL LETTER DZHE
  878. case 0x90: return (char) 0x0452; // CYRILLIC SMALL LETTER DJE
  879. case 0x91: return (char) 0x2018; // LEFT SINGLE QUOTATION MARK
  880. case 0x92: return (char) 0x2019; // RIGHT SINGLE QUOTATION MARK
  881. case 0x93: return (char) 0x201c; // LEFT DOUBLE QUOTATION MARK
  882. case 0x94: return (char) 0x201d; // RIGHT DOUBLE QUOTATION MARK
  883. case 0x95: return (char) 0x2022; // BULLET
  884. case 0x96: return (char) 0x2013; // EN DASH
  885. case 0x97: return (char) 0x2014; // EM DASH
  886. case 0x98: throw new InvalidOperationException("Invalid cp1251 sequence [" + P.ToString() + "]");
  887. case 0x99: return (char) 0x2122; // TRADE MARK SIGN
  888. case 0x9a: return (char) 0x0459; // CYRILLIC SMALL LETTER LJE
  889. case 0x9b: return (char) 0x203a; // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
  890. case 0x9c: return (char) 0x045a; // CYRILLIC SMALL LETTER NJE
  891. case 0x9d: return (char) 0x045c; // CYRILLIC SMALL LETTER KJE
  892. case 0x9e: return (char) 0x045b; // CYRILLIC SMALL LETTER TSHE
  893. case 0x9f: return (char) 0x045f; // CYRILLIC SMALL LETTER DZHE
  894. case 0xa0: return (char) 0x00a0; // NO-BREAK SPACE
  895. case 0xa1: return (char) 0x040e; // CYRILLIC CAPITAL LETTER SHORT U
  896. case 0xa2: return (char) 0x045e; // CYRILLIC SMALL LETTER SHORT U
  897. case 0xa3: return (char) 0x0408; // CYRILLIC CAPITAL LETTER JE
  898. case 0xa4: return (char) 0x00a4; // CURRENCY SIGN
  899. case 0xa5: return (char) 0x0490; // CYRILLIC CAPITAL LETTER GHE WITH UPTURN
  900. case 0xa8: return (char) 0x0401; // CYRILLIC CAPITAL LETTER IO
  901. case 0xaa: return (char) 0x0404; // CYRILLIC CAPITAL LETTER UKRAINIAN IE
  902. case 0xaf: return (char) 0x0407; // CYRILLIC CAPITAL LETTER YI
  903. case 0xb2: return (char) 0x0406; // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
  904. case 0xb3: return (char) 0x0456; // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
  905. case 0xb4: return (char) 0x0491; // CYRILLIC SMALL LETTER GHE WITH UPTURN
  906. case 0xb8: return (char) 0x0451; // CYRILLIC SMALL LETTER IO
  907. case 0xb9: return (char) 0x2116; // NUMERO SIGN
  908. case 0xba: return (char) 0x0454; // CYRILLIC SMALL LETTER UKRAINIAN IE
  909. case 0xbc: return (char) 0x0458; // CYRILLIC SMALL LETTER JE
  910. case 0xbd: return (char) 0x0405; // CYRILLIC CAPITAL LETTER DZE
  911. case 0xbe: return (char) 0x0455; // CYRILLIC SMALL LETTER DZE
  912. case 0xbf: return (char) 0x0457; // CYRILLIC SMALL LETTER YI
  913. }
  914. if ( (P >= 0xc0) | (P <= 0xff) )
  915. return System.Convert.ToChar( P + 0x0350);
  916. return (char) P;
  917. }
  918. public static char cp1252ToUTF16Char(byte P)
  919. {
  920. // Provided by Olaf Lösken. (ported by DrW)
  921. // Info taken from
  922. // ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT
  923. switch (P)
  924. {
  925. case 0x80 : return (char) 0x20AC; //EUROSIGN
  926. case 0x81 : throw new InvalidOperationException("Invalid Windows-1252 sequence [" + P.ToString() + "]");
  927. case 0x82 : return (char) 0x201A; //SINGLE LOW-9 QUOTATION MARK
  928. case 0x83 : return (char) 0x0192; //ATIN SMALL LETTER F WITH HOOK
  929. case 0x84 : return (char) 0x201E; //DOUBLE LOW-9 QUOTATION MARK
  930. case 0x85 : return (char) 0x2026; //HORIZONTAL ELLIPSIS
  931. case 0x86 : return (char) 0x2020; //DAGGER
  932. case 0x87 : return (char) 0x2021; //DOUBLE DAGGER
  933. case 0x88 : return (char) 0x02C6; //MODIFIER LETTER CIRCUMFLEX ACCENT
  934. case 0x89 : return (char) 0x2030; //PER MILLE SIGN
  935. case 0x8A : return (char) 0x0160; //LATIN CAPITAL LETTER S WITH CARON
  936. case 0x8B : return (char) 0x2039; //SINGLE LEFT-POINTING ANGLE QUOTATION MARK
  937. case 0x8C : return (char) 0x0152; //LATIN CAPITAL LIGATURE OE
  938. case 0x8D : throw new InvalidOperationException("Invalid Windows-1252 sequence [" + P.ToString() + "]");
  939. case 0x8E : return (char) 0x017D; //LATIN CAPITAL LETTER Z WITH CARON
  940. case 0x8F : throw new InvalidOperationException("Invalid Windows-1252 sequence [" + P.ToString() + "]");
  941. case 0x90 : throw new InvalidOperationException("Invalid Windows-1252 sequence [" + P.ToString() + "]");
  942. case 0x91 : return (char) 0x2018; //LEFT SINGLE QUOTATION MARK
  943. case 0x92 : return (char) 0x2019; //RIGHT SINGLE QUOTATION MARK
  944. case 0x93 : return (char) 0x201C; //LEFT DOUBLE QUOTATION MARK
  945. case 0x94 : return (char) 0x201D; //RIGHT DOUBLE QUOTATION MARK
  946. case 0x95 : return (char) 0x2022; //BULLET
  947. case 0x96 : return (char) 0x2013; //EN DASH
  948. case 0x97 : return (char) 0x2014; //EM DASH
  949. case 0x98 : return (char) 0x02DC; //SMALL TILDE
  950. case 0x99 : return (char) 0x2122; //TRADE MARK SIGN
  951. case 0x9A : return (char) 0x0161; //LATIN SMALL LETTER S WITH CARON
  952. case 0x9B : return (char) 0x203A; //SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
  953. case 0x9C : return (char) 0x0153; //LATIN SMALL LIGATURE OE
  954. case 0x9D : throw new InvalidOperationException("Invalid Windows-1252 sequence [" + P.ToString() + "]");
  955. case 0x9E : return (char) 0x017E; //LATIN SMALL LETTER Z WITH CARON
  956. case 0x9F : return (char) 0x0178; //LATIN CAPITAL LETTER Y WITH D
  957. default:
  958. return (char) P;
  959. }
  960. }
  961. /// <summary>
  962. /// Read in a UTF-8 encoded character. If no character is on the stream, throws
  963. /// an ArgumentException.<seealso cref="http://www.ietf.org/rfc/rfc2279.txt"/>
  964. /// </summary>
  965. /// <exception cref="InvalidOperationException">Thrownn if 1) called at EOF,
  966. /// 2) invalid UTF-8 encoding found.</exception>
  967. /// <param name="stream">Stream to read from</param>
  968. /// <returns>Encoded character (could be two characters, upper/lower Surragate pair)</returns>
  969. public static int ReadUTF8Char(Stream stream)
  970. {
  971. byte[] buf = new byte[1];
  972. if ( stream.Read(buf, 0, 1) != 1)
  973. throw new InvalidOperationException("Unexptected EOF reading stream");
  974. if (buf[0] >= 0x80) // UTF-8 sequence
  975. {
  976. int numOctets = 1;
  977. byte first = buf[0];
  978. int mask = 0x40;
  979. int ucs4 = buf[0];
  980. // first octed must be 110x xxxx to 1111 110x if high order bit set
  981. if ( (buf[0] & 0xc0) != 0xc0)
  982. throw new InvalidOperationException("Invalid UTF-8 sequence at position " + stream.Position.ToString());
  983. // we could mask off the first octet and get the number of octets,
  984. // but it's easier to cycle through. If the bit is set, we have another character to read
  985. while ( (mask & first) != 0 )
  986. {
  987. // read next character of stream
  988. if (stream.Length == stream.Position)
  989. throw new InvalidOperationException("Aborted UTF-8 (unexpected EOF) sequence at position " + stream.Position.ToString());
  990. if ( stream.Read(buf, 0, 1) != 1)
  991. throw new InvalidOperationException("Aborted UTF-8 sequence (missing characters) at position " + stream.Position.ToString());
  992. // all octet sequence bytes start with 10nn nnnn, or they are invalid
  993. if ( (buf[0] & 0xc0) != 0x80 )
  994. throw new InvalidOperationException("Invalid UTF-8 sequence at position " + stream.Position.ToString());
  995. // 6 bits are valid in this item (low order 6)
  996. // mask them off and add them
  997. ucs4 = (ucs4 << 6) | (buf[0] & 0x3F); // add bits to result
  998. numOctets++;
  999. mask = mask >> 1; // adjust mask
  1000. }
  1001. // Max 6 octets in sequence
  1002. if ( numOctets > 6)
  1003. throw new InvalidOperationException("Invalid UTF-8 sequence (no 0-bit in hdr) at position " + stream.Position.ToString());
  1004. // UTF-8 can encode up to the following values, per octet size
  1005. int[] MaxCode = {0x7F, 0x7FF, 0xFFFF, 0x1FFFFF, 0x3FFFFFF, 0x7FFFFFFF};
  1006. // mask off the original header bits
  1007. ucs4 = ucs4 & MaxCode[numOctets - 1]; // array is zero-based
  1008. // check for invalid sequence as suggested by RFC2279
  1009. // (check that proper octet sequence size was used to encode character)
  1010. // (if 0x7F was mapped to a 2-octet sequence, this is an improper coding)
  1011. if ( (numOctets > 1) && (ucs4 <= MaxCode[numOctets -2]))
  1012. throw new InvalidOperationException("Invalid UTF-8 sequence (invalid sequence) at position " + stream.Position.ToString());
  1013. return ucs4;
  1014. }
  1015. else
  1016. // 1-byte value, return it
  1017. return buf[0];
  1018. }
  1019. public static char Utf16LowSurrogate(int val)
  1020. {
  1021. int val2 = 0xDC00 ^ (val & 0x03FF); // 0xdc00 xor (val and 0x03ff)
  1022. return (char) val2;
  1023. }
  1024. public static char Utf16HighSurrogate(int val)
  1025. {
  1026. int value2 = 0xD7C0 + ( val >> 10 );
  1027. return (char) value2;
  1028. }
  1029. }
  1030. }