UnicodeEncoding.cs 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489
  1. /*
  2. * UnicodeEncoding.cs - Implementation of the
  3. * "System.Text.UnicodeEncoding" class.
  4. *
  5. * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
  6. *
  7. * Permission is hereby granted, free of charge, to any person obtaining
  8. * a copy of this software and associated documentation files (the "Software"),
  9. * to deal in the Software without restriction, including without limitation
  10. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11. * and/or sell copies of the Software, and to permit persons to whom the
  12. * Software is furnished to do so, subject to the following conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be included
  15. * in all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  21. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  22. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  23. * OTHER DEALINGS IN THE SOFTWARE.
  24. */
  25. namespace System.Text
  26. {
  27. using System;
  28. [Serializable]
  29. public class UnicodeEncoding : Encoding
  30. {
  31. // Magic numbers used by Windows for Unicode.
  32. internal const int UNICODE_CODE_PAGE = 1200;
  33. internal const int BIG_UNICODE_CODE_PAGE = 1201;
  34. #if !ECMA_COMPAT
  35. // Size of characters in this encoding.
  36. public const int CharSize = 2;
  37. #endif
  38. // Internal state.
  39. private bool bigEndian;
  40. private bool byteOrderMark;
  41. // Constructors.
  42. public UnicodeEncoding () : base(UNICODE_CODE_PAGE)
  43. {
  44. bigEndian = false;
  45. byteOrderMark = true;
  46. }
  47. public UnicodeEncoding (bool bigEndian, bool byteOrderMark)
  48. : base ((bigEndian ? BIG_UNICODE_CODE_PAGE : UNICODE_CODE_PAGE))
  49. {
  50. this.bigEndian = bigEndian;
  51. this.byteOrderMark = byteOrderMark;
  52. }
  53. // Get the number of bytes needed to encode a character buffer.
  54. public override int GetByteCount (char[] chars, int index, int count)
  55. {
  56. if (chars == null) {
  57. throw new ArgumentNullException ("chars");
  58. }
  59. if (index < 0 || index > chars.Length) {
  60. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  61. }
  62. if (count < 0 || count > (chars.Length - index)) {
  63. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  64. }
  65. return count * 2;
  66. }
  67. // Convenience wrappers for "GetByteCount".
  68. public override int GetByteCount (String s)
  69. {
  70. if (s == null) {
  71. throw new ArgumentNullException ("s");
  72. }
  73. return s.Length * 2;
  74. }
  75. // Get the bytes that result from encoding a character buffer.
  76. public override int GetBytes (char[] chars, int charIndex, int charCount,
  77. byte[] bytes, int byteIndex)
  78. {
  79. if (chars == null) {
  80. throw new ArgumentNullException ("chars");
  81. }
  82. if (bytes == null) {
  83. throw new ArgumentNullException ("bytes");
  84. }
  85. if (charIndex < 0 || charIndex > chars.Length) {
  86. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  87. }
  88. if (charCount < 0 || charCount > (chars.Length - charIndex)) {
  89. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
  90. }
  91. if (byteIndex < 0 || byteIndex > bytes.Length) {
  92. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  93. }
  94. if ((bytes.Length - byteIndex) < (charCount * 2)) {
  95. throw new ArgumentException (_("Arg_InsufficientSpace"));
  96. }
  97. int posn = byteIndex;
  98. char ch;
  99. if (bigEndian) {
  100. while (charCount-- > 0) {
  101. ch = chars[charIndex++];
  102. bytes[posn++] = (byte)(ch >> 8);
  103. bytes[posn++] = (byte)ch;
  104. }
  105. } else {
  106. while (charCount-- > 0) {
  107. ch = chars[charIndex++];
  108. bytes[posn++] = (byte)ch;
  109. bytes[posn++] = (byte)(ch >> 8);
  110. }
  111. }
  112. return posn - byteIndex;
  113. }
  114. // Convenience wrappers for "GetBytes".
  115. public override int GetBytes (String s, int charIndex, int charCount,
  116. byte[] bytes, int byteIndex)
  117. {
  118. if (s == null) {
  119. throw new ArgumentNullException ("s");
  120. }
  121. if (bytes == null) {
  122. throw new ArgumentNullException ("bytes");
  123. }
  124. if (charIndex < 0 || charIndex > s.Length) {
  125. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
  126. }
  127. if (charCount < 0 || charCount > (s.Length - charIndex)) {
  128. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
  129. }
  130. if (byteIndex < 0 || byteIndex > bytes.Length) {
  131. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  132. }
  133. if ((bytes.Length - byteIndex) < (charCount * 2)) {
  134. throw new ArgumentException (_("Arg_InsufficientSpace"));
  135. }
  136. int posn = byteIndex;
  137. char ch;
  138. if (bigEndian) {
  139. while (charCount-- > 0) {
  140. ch = s[charIndex++];
  141. bytes[posn++] = (byte)(ch >> 8);
  142. bytes[posn++] = (byte)ch;
  143. }
  144. } else {
  145. while (charCount-- > 0) {
  146. ch = s[charIndex++];
  147. bytes[posn++] = (byte)ch;
  148. bytes[posn++] = (byte)(ch >> 8);
  149. }
  150. }
  151. return posn - byteIndex;
  152. }
  153. // Get the number of characters needed to decode a byte buffer.
  154. public override int GetCharCount (byte[] bytes, int index, int count)
  155. {
  156. if (bytes == null) {
  157. throw new ArgumentNullException ("bytes");
  158. }
  159. if (index < 0 || index > bytes.Length) {
  160. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  161. }
  162. if (count < 0 || count > (bytes.Length - index)) {
  163. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  164. }
  165. if (count >= 2) {
  166. if ((bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) ||
  167. (bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE)) {
  168. return ((count - 1) / 2);
  169. }
  170. }
  171. return count / 2;
  172. }
  173. // Get the characters that result from decoding a byte buffer.
  174. public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
  175. char[] chars, int charIndex)
  176. {
  177. if (bytes == null) {
  178. throw new ArgumentNullException ("bytes");
  179. }
  180. if (chars == null) {
  181. throw new ArgumentNullException ("chars");
  182. }
  183. if (byteIndex < 0 || byteIndex > bytes.Length) {
  184. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  185. }
  186. if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
  187. throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
  188. }
  189. if (charIndex < 0 || charIndex > chars.Length) {
  190. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  191. }
  192. // Determine the byte order in the incoming buffer.
  193. bool isBigEndian;
  194. if (byteCount >= 2) {
  195. if (bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) {
  196. isBigEndian = true;
  197. byteCount -= 2;
  198. byteIndex += 2;
  199. } else if (bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE) {
  200. isBigEndian = false;
  201. byteCount -= 2;
  202. byteIndex += 2;
  203. } else {
  204. isBigEndian = bigEndian;
  205. }
  206. } else {
  207. isBigEndian = bigEndian;
  208. }
  209. // Validate that we have sufficient space in "chars".
  210. if ((chars.Length - charIndex) < (byteCount / 2)) {
  211. throw new ArgumentException (_("Arg_InsufficientSpace"));
  212. }
  213. // Convert the characters.
  214. int posn = charIndex;
  215. if (isBigEndian) {
  216. while (byteCount >= 2) {
  217. chars[posn++] =
  218. ((char)((((int)(bytes[byteIndex])) << 8) |
  219. ((int)(bytes[byteIndex + 1]))));
  220. byteIndex += 2;
  221. byteCount -= 2;
  222. }
  223. } else {
  224. while (byteCount >= 2) {
  225. chars[posn++] =
  226. ((char)((((int)(bytes[byteIndex + 1])) << 8) |
  227. ((int)(bytes[byteIndex]))));
  228. byteIndex += 2;
  229. byteCount -= 2;
  230. }
  231. }
  232. return posn - charIndex;
  233. }
  234. // Get the maximum number of bytes needed to encode a
  235. // specified number of characters.
  236. public override int GetMaxByteCount (int charCount)
  237. {
  238. if (charCount < 0) {
  239. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
  240. }
  241. return charCount * 2;
  242. }
  243. // Get the maximum number of characters needed to decode a
  244. // specified number of bytes.
  245. public override int GetMaxCharCount (int byteCount)
  246. {
  247. if (byteCount < 0) {
  248. throw new ArgumentOutOfRangeException
  249. ("byteCount", _("ArgRange_NonNegative"));
  250. }
  251. return byteCount / 2;
  252. }
  253. // Get a Unicode-specific decoder that is attached to this instance.
  254. public override Decoder GetDecoder ()
  255. {
  256. return new UnicodeDecoder (bigEndian);
  257. }
  258. // Get the Unicode preamble.
  259. public override byte[] GetPreamble ()
  260. {
  261. if (byteOrderMark) {
  262. byte[] preamble = new byte[2];
  263. if (bigEndian) {
  264. preamble[0] = (byte)0xFE;
  265. preamble[1] = (byte)0xFF;
  266. } else {
  267. preamble[0] = (byte)0xFF;
  268. preamble[1] = (byte)0xFE;
  269. }
  270. return preamble;
  271. } else {
  272. return new byte [0];
  273. }
  274. }
  275. // Determine if this object is equal to another.
  276. public override bool Equals (Object value)
  277. {
  278. UnicodeEncoding enc = (value as UnicodeEncoding);
  279. if (enc != null) {
  280. return (codePage == enc.codePage &&
  281. bigEndian == enc.bigEndian &&
  282. byteOrderMark == enc.byteOrderMark);
  283. } else {
  284. return false;
  285. }
  286. }
  287. // Get the hash code for this object.
  288. public override int GetHashCode ()
  289. {
  290. return base.GetHashCode ();
  291. }
  292. #if !ECMA_COMPAT
  293. // Get the mail body name for this encoding.
  294. public override String BodyName
  295. {
  296. get {
  297. if (bigEndian) {
  298. return "unicodeFFFE";
  299. } else {
  300. return "utf-16";
  301. }
  302. }
  303. }
  304. // Get the human-readable name for this encoding.
  305. public override String EncodingName
  306. {
  307. get {
  308. if (bigEndian) {
  309. return "Unicode (Big-Endian)";
  310. } else {
  311. return "Unicode";
  312. }
  313. }
  314. }
  315. // Get the mail agent header name for this encoding.
  316. public override String HeaderName
  317. {
  318. get {
  319. if (bigEndian) {
  320. return "unicodeFFFE";
  321. } else {
  322. return "utf-16";
  323. }
  324. }
  325. }
  326. // Determine if this encoding can be saved from a Web browser.
  327. public override bool IsBrowserSave
  328. {
  329. get {
  330. return !bigEndian;
  331. }
  332. }
  333. // Get the IANA-preferred Web name for this encoding.
  334. public override String WebName
  335. {
  336. get {
  337. if (bigEndian) {
  338. return "unicodeFFFE";
  339. } else {
  340. return "utf-16";
  341. }
  342. }
  343. }
  344. // Get the Windows code page represented by this object.
  345. public override int WindowsCodePage
  346. {
  347. get {
  348. // Windows reports the same code page number for
  349. // both the little-endian and big-endian forms.
  350. return UNICODE_CODE_PAGE;
  351. }
  352. }
  353. #endif // !ECMA_COMPAT
  354. // Unicode decoder implementation.
  355. private sealed class UnicodeDecoder : Decoder
  356. {
  357. private bool bigEndian;
  358. private int leftOverByte;
  359. // Constructor.
  360. public UnicodeDecoder (bool bigEndian)
  361. {
  362. this.bigEndian = bigEndian;
  363. leftOverByte = -1;
  364. }
  365. // Override inherited methods.
  366. public override int GetCharCount (byte[] bytes, int index, int count)
  367. {
  368. if (bytes == null) {
  369. throw new ArgumentNullException ("bytes");
  370. }
  371. if (index < 0 || index > bytes.Length) {
  372. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  373. }
  374. if (count < 0 || count > (bytes.Length - index)) {
  375. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  376. }
  377. if (leftOverByte != -1) {
  378. return (count + 1) / 2;
  379. } else {
  380. return count / 2;
  381. }
  382. }
  383. public override int GetChars (byte[] bytes, int byteIndex,
  384. int byteCount, char[] chars,
  385. int charIndex)
  386. {
  387. if (bytes == null) {
  388. throw new ArgumentNullException ("bytes");
  389. }
  390. if (chars == null) {
  391. throw new ArgumentNullException ("chars");
  392. }
  393. if (byteIndex < 0 || byteIndex > bytes.Length) {
  394. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  395. }
  396. if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
  397. throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
  398. }
  399. if (charIndex < 0 || charIndex > chars.Length) {
  400. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  401. }
  402. // Convert the characters.
  403. int posn = charIndex;
  404. bool isBigEndian = bigEndian;
  405. int leftOver = leftOverByte;
  406. int length = chars.Length;
  407. char ch;
  408. while (byteCount > 0) {
  409. if (leftOver != -1) {
  410. if (isBigEndian) {
  411. ch = ((char)((leftOver << 8) | ((int)(bytes[byteIndex]))));
  412. } else {
  413. ch = ((char)(leftOver |
  414. (((int)(bytes[byteIndex])) << 8)));
  415. }
  416. leftOver = -1;
  417. ++byteIndex;
  418. --byteCount;
  419. } else if (byteCount > 1) {
  420. if (isBigEndian) {
  421. ch = ((char)((((int)(bytes[byteIndex])) << 8) |
  422. ((int)(bytes[byteIndex + 1]))));
  423. } else {
  424. ch = ((char)((((int)(bytes[byteIndex + 1])) << 8) |
  425. ((int)(bytes[byteIndex]))));
  426. }
  427. byteIndex += 2;
  428. byteCount -= 2;
  429. } else {
  430. leftOver = (int)(bytes[byteIndex]);
  431. break;
  432. }
  433. if (ch == '\uFFFE') {
  434. // Switch byte orders.
  435. bigEndian = !bigEndian;
  436. } else if (ch != '\uFEFF') {
  437. // Ordinary character.
  438. if (posn < length) {
  439. chars[posn++] = ch;
  440. } else {
  441. throw new ArgumentException (_("Arg_InsufficientSpace"));
  442. }
  443. }
  444. }
  445. leftOverByte = leftOver;
  446. bigEndian = isBigEndian;
  447. // Finished - return the converted length.
  448. return posn - charIndex;
  449. }
  450. } // class UnicodeDecoder
  451. }; // class UnicodeEncoding
  452. }; // namespace System.Text