UnicodeEncoding.cs 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. /*
  2. * UnicodeEncoding.cs - Implementation of the
  3. * "System.Text.UnicodeEncoding" class.
  4. *
  5. * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
  6. * Copyright (C) 2003, 2004 Novell, Inc.
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining
  9. * a copy of this software and associated documentation files (the "Software"),
  10. * to deal in the Software without restriction, including without limitation
  11. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  12. * and/or sell copies of the Software, and to permit persons to whom the
  13. * Software is furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included
  16. * in all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  21. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  22. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  23. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  24. * OTHER DEALINGS IN THE SOFTWARE.
  25. */
  26. namespace System.Text
  27. {
  28. using System;
  29. [Serializable]
  30. [MonoTODO ("Fix serialization compatibility with MS.NET")]
  31. public class UnicodeEncoding : Encoding
  32. {
  33. // Magic numbers used by Windows for Unicode.
  34. internal const int UNICODE_CODE_PAGE = 1200;
  35. internal const int BIG_UNICODE_CODE_PAGE = 1201;
  36. #if !ECMA_COMPAT
  37. // Size of characters in this encoding.
  38. public const int CharSize = 2;
  39. #endif
  40. // Internal state.
  41. private bool bigEndian;
  42. private bool byteOrderMark;
  43. // Constructors.
  44. public UnicodeEncoding () : this (false, true)
  45. {
  46. bigEndian = false;
  47. byteOrderMark = true;
  48. }
  49. public UnicodeEncoding (bool bigEndian, bool byteOrderMark)
  50. : base ((bigEndian ? BIG_UNICODE_CODE_PAGE : UNICODE_CODE_PAGE))
  51. {
  52. this.bigEndian = bigEndian;
  53. this.byteOrderMark = byteOrderMark;
  54. if (bigEndian){
  55. body_name = "unicodeFFFE";
  56. encoding_name = "Unicode (Big-Endian)";
  57. header_name = "unicodeFFFE";
  58. is_browser_save = false;
  59. web_name = "utf-16be";
  60. } else {
  61. body_name = "utf-16";
  62. encoding_name = "Unicode";
  63. header_name = "utf-16";
  64. is_browser_save = true;
  65. web_name = "utf-16";
  66. }
  67. // Windows reports the same code page number for
  68. // both the little-endian and big-endian forms.
  69. windows_code_page = UNICODE_CODE_PAGE;
  70. }
  71. // Get the number of bytes needed to encode a character buffer.
  72. public override int GetByteCount (char[] chars, int index, int count)
  73. {
  74. if (chars == null) {
  75. throw new ArgumentNullException ("chars");
  76. }
  77. if (index < 0 || index > chars.Length) {
  78. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  79. }
  80. if (count < 0 || count > (chars.Length - index)) {
  81. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  82. }
  83. return count * 2;
  84. }
  85. // Convenience wrappers for "GetByteCount".
  86. public override int GetByteCount (String s)
  87. {
  88. if (s == null) {
  89. throw new ArgumentNullException ("s");
  90. }
  91. return s.Length * 2;
  92. }
  93. // Get the bytes that result from encoding a character buffer.
  94. public override int GetBytes (char[] chars, int charIndex, int charCount,
  95. byte[] bytes, int byteIndex)
  96. {
  97. if (chars == null) {
  98. throw new ArgumentNullException ("chars");
  99. }
  100. if (bytes == null) {
  101. throw new ArgumentNullException ("bytes");
  102. }
  103. if (charIndex < 0 || charIndex > chars.Length) {
  104. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  105. }
  106. if (charCount < 0 || charCount > (chars.Length - charIndex)) {
  107. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
  108. }
  109. if (byteIndex < 0 || byteIndex > bytes.Length) {
  110. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  111. }
  112. if ((bytes.Length - byteIndex) < (charCount * 2)) {
  113. throw new ArgumentException (_("Arg_InsufficientSpace"));
  114. }
  115. int posn = byteIndex;
  116. char ch;
  117. if (bigEndian) {
  118. while (charCount-- > 0) {
  119. ch = chars[charIndex++];
  120. bytes[posn++] = (byte)(ch >> 8);
  121. bytes[posn++] = (byte)ch;
  122. }
  123. } else {
  124. while (charCount-- > 0) {
  125. ch = chars[charIndex++];
  126. bytes[posn++] = (byte)ch;
  127. bytes[posn++] = (byte)(ch >> 8);
  128. }
  129. }
  130. return posn - byteIndex;
  131. }
  132. // Convenience wrappers for "GetBytes".
  133. public override int GetBytes (String s, int charIndex, int charCount,
  134. byte[] bytes, int byteIndex)
  135. {
  136. if (s == null) {
  137. throw new ArgumentNullException ("s");
  138. }
  139. if (bytes == null) {
  140. throw new ArgumentNullException ("bytes");
  141. }
  142. if (charIndex < 0 || charIndex > s.Length) {
  143. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
  144. }
  145. if (charCount < 0 || charCount > (s.Length - charIndex)) {
  146. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
  147. }
  148. if (byteIndex < 0 || byteIndex > bytes.Length) {
  149. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  150. }
  151. if ((bytes.Length - byteIndex) < (charCount * 2)) {
  152. throw new ArgumentException (_("Arg_InsufficientSpace"));
  153. }
  154. int posn = byteIndex;
  155. char ch;
  156. if (bigEndian) {
  157. while (charCount-- > 0) {
  158. ch = s[charIndex++];
  159. bytes[posn++] = (byte)(ch >> 8);
  160. bytes[posn++] = (byte)ch;
  161. }
  162. } else {
  163. while (charCount-- > 0) {
  164. ch = s[charIndex++];
  165. bytes[posn++] = (byte)ch;
  166. bytes[posn++] = (byte)(ch >> 8);
  167. }
  168. }
  169. return posn - byteIndex;
  170. }
  171. public override byte [] GetBytes (String s)
  172. {
  173. return base.GetBytes (s);
  174. }
  175. // Get the number of characters needed to decode a byte buffer.
  176. public override int GetCharCount (byte[] bytes, int index, int count)
  177. {
  178. if (bytes == null) {
  179. throw new ArgumentNullException ("bytes");
  180. }
  181. if (index < 0 || index > bytes.Length) {
  182. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  183. }
  184. if (count < 0 || count > (bytes.Length - index)) {
  185. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  186. }
  187. if (count >= 2) {
  188. if ((bytes[index] == (byte)0xFE && bytes[index + 1] == (byte)0xFF) ||
  189. (bytes[index] == (byte)0xFF && bytes[index + 1] == (byte)0xFE)) {
  190. return ((count - 1) / 2);
  191. }
  192. }
  193. return count / 2;
  194. }
  195. // Get the characters that result from decoding a byte buffer.
  196. public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
  197. char[] chars, int charIndex)
  198. {
  199. if (bytes == null) {
  200. throw new ArgumentNullException ("bytes");
  201. }
  202. if (chars == null) {
  203. throw new ArgumentNullException ("chars");
  204. }
  205. if (byteIndex < 0 || byteIndex > bytes.Length) {
  206. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  207. }
  208. if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
  209. throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
  210. }
  211. if (charIndex < 0 || charIndex > chars.Length) {
  212. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  213. }
  214. // Determine the byte order in the incoming buffer.
  215. bool isBigEndian;
  216. if (byteCount >= 2) {
  217. if (bytes[byteIndex] == (byte)0xFE && bytes[byteIndex + 1] == (byte)0xFF) {
  218. isBigEndian = true;
  219. byteCount -= 2;
  220. byteIndex += 2;
  221. } else if (bytes[byteIndex] == (byte)0xFF && bytes[byteIndex + 1] == (byte)0xFE) {
  222. isBigEndian = false;
  223. byteCount -= 2;
  224. byteIndex += 2;
  225. } else {
  226. isBigEndian = bigEndian;
  227. }
  228. } else {
  229. isBigEndian = bigEndian;
  230. }
  231. // Validate that we have sufficient space in "chars".
  232. if ((chars.Length - charIndex) < (byteCount / 2)) {
  233. throw new ArgumentException (_("Arg_InsufficientSpace"));
  234. }
  235. // Convert the characters.
  236. int posn = charIndex;
  237. if (isBigEndian) {
  238. while (byteCount >= 2) {
  239. chars[posn++] =
  240. ((char)((((int)(bytes[byteIndex])) << 8) |
  241. ((int)(bytes[byteIndex + 1]))));
  242. byteIndex += 2;
  243. byteCount -= 2;
  244. }
  245. } else {
  246. while (byteCount >= 2) {
  247. chars[posn++] =
  248. ((char)((((int)(bytes[byteIndex + 1])) << 8) |
  249. ((int)(bytes[byteIndex]))));
  250. byteIndex += 2;
  251. byteCount -= 2;
  252. }
  253. }
  254. return posn - charIndex;
  255. }
  256. // Get the maximum number of bytes needed to encode a
  257. // specified number of characters.
  258. public override int GetMaxByteCount (int charCount)
  259. {
  260. if (charCount < 0) {
  261. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
  262. }
  263. return charCount * 2;
  264. }
  265. // Get the maximum number of characters needed to decode a
  266. // specified number of bytes.
  267. public override int GetMaxCharCount (int byteCount)
  268. {
  269. if (byteCount < 0) {
  270. throw new ArgumentOutOfRangeException
  271. ("byteCount", _("ArgRange_NonNegative"));
  272. }
  273. return byteCount / 2;
  274. }
  275. // Get a Unicode-specific decoder that is attached to this instance.
  276. public override Decoder GetDecoder ()
  277. {
  278. return new UnicodeDecoder (bigEndian);
  279. }
  280. // Get the Unicode preamble.
  281. public override byte[] GetPreamble ()
  282. {
  283. if (byteOrderMark) {
  284. byte[] preamble = new byte[2];
  285. if (bigEndian) {
  286. preamble[0] = (byte)0xFE;
  287. preamble[1] = (byte)0xFF;
  288. } else {
  289. preamble[0] = (byte)0xFF;
  290. preamble[1] = (byte)0xFE;
  291. }
  292. return preamble;
  293. } else {
  294. return new byte [0];
  295. }
  296. }
  297. // Determine if this object is equal to another.
  298. public override bool Equals (Object value)
  299. {
  300. UnicodeEncoding enc = (value as UnicodeEncoding);
  301. if (enc != null) {
  302. return (codePage == enc.codePage &&
  303. bigEndian == enc.bigEndian &&
  304. byteOrderMark == enc.byteOrderMark);
  305. } else {
  306. return false;
  307. }
  308. }
  309. // Get the hash code for this object.
  310. public override int GetHashCode ()
  311. {
  312. return base.GetHashCode ();
  313. }
  314. // Unicode decoder implementation.
  315. private sealed class UnicodeDecoder : Decoder
  316. {
  317. private bool bigEndian;
  318. private int leftOverByte;
  319. // Constructor.
  320. public UnicodeDecoder (bool bigEndian)
  321. {
  322. this.bigEndian = bigEndian;
  323. leftOverByte = -1;
  324. }
  325. // Override inherited methods.
  326. public override int GetCharCount (byte[] bytes, int index, int count)
  327. {
  328. if (bytes == null) {
  329. throw new ArgumentNullException ("bytes");
  330. }
  331. if (index < 0 || index > bytes.Length) {
  332. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  333. }
  334. if (count < 0 || count > (bytes.Length - index)) {
  335. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  336. }
  337. if (leftOverByte != -1) {
  338. return (count + 1) / 2;
  339. } else {
  340. return count / 2;
  341. }
  342. }
  343. public override int GetChars (byte[] bytes, int byteIndex,
  344. int byteCount, char[] chars,
  345. int charIndex)
  346. {
  347. if (bytes == null) {
  348. throw new ArgumentNullException ("bytes");
  349. }
  350. if (chars == null) {
  351. throw new ArgumentNullException ("chars");
  352. }
  353. if (byteIndex < 0 || byteIndex > bytes.Length) {
  354. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  355. }
  356. if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
  357. throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
  358. }
  359. if (charIndex < 0 || charIndex > chars.Length) {
  360. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  361. }
  362. // Convert the characters.
  363. int posn = charIndex;
  364. bool isBigEndian = bigEndian;
  365. int leftOver = leftOverByte;
  366. int length = chars.Length;
  367. char ch;
  368. while (byteCount > 0) {
  369. if (leftOver != -1) {
  370. if (isBigEndian) {
  371. ch = ((char)((leftOver << 8) | ((int)(bytes[byteIndex]))));
  372. } else {
  373. ch = ((char)(leftOver |
  374. (((int)(bytes[byteIndex])) << 8)));
  375. }
  376. leftOver = -1;
  377. ++byteIndex;
  378. --byteCount;
  379. } else if (byteCount > 1) {
  380. if (isBigEndian) {
  381. ch = ((char)((((int)(bytes[byteIndex])) << 8) |
  382. ((int)(bytes[byteIndex + 1]))));
  383. } else {
  384. ch = ((char)((((int)(bytes[byteIndex + 1])) << 8) |
  385. ((int)(bytes[byteIndex]))));
  386. }
  387. byteIndex += 2;
  388. byteCount -= 2;
  389. } else {
  390. leftOver = (int)(bytes[byteIndex]);
  391. break;
  392. }
  393. if (ch == '\uFFFE') {
  394. // Switch byte orders.
  395. bigEndian = !bigEndian;
  396. } else if (ch != '\uFEFF') {
  397. // Ordinary character.
  398. if (posn < length) {
  399. chars[posn++] = ch;
  400. } else {
  401. throw new ArgumentException (_("Arg_InsufficientSpace"));
  402. }
  403. }
  404. }
  405. leftOverByte = leftOver;
  406. bigEndian = isBigEndian;
  407. // Finished - return the converted length.
  408. return posn - charIndex;
  409. }
  410. } // class UnicodeDecoder
  411. }; // class UnicodeEncoding
  412. }; // namespace System.Text