UnicodeEncoding.cs 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. /*
  2. * UnicodeEncoding.cs - Implementation of the
  3. * "System.Text.UnicodeEncoding" class.
  4. *
  5. * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
  6. * Copyright (C) 2003, 2004 Novell, Inc.
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining
  9. * a copy of this software and associated documentation files (the "Software"),
  10. * to deal in the Software without restriction, including without limitation
  11. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  12. * and/or sell copies of the Software, and to permit persons to whom the
  13. * Software is furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included
  16. * in all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  21. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  22. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  23. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  24. * OTHER DEALINGS IN THE SOFTWARE.
  25. */
  26. namespace System.Text
  27. {
  28. using System;
  29. [Serializable]
  30. [MonoTODO ("Fix serialization compatibility with MS.NET")]
  31. public class UnicodeEncoding : Encoding
  32. {
  33. // Magic numbers used by Windows for Unicode.
  34. internal const int UNICODE_CODE_PAGE = 1200;
  35. internal const int BIG_UNICODE_CODE_PAGE = 1201;
  36. #if !ECMA_COMPAT
  37. // Size of characters in this encoding.
  38. public const int CharSize = 2;
  39. #endif
  40. // Internal state.
  41. private bool bigEndian;
  42. private bool byteOrderMark;
  43. // Constructors.
  44. public UnicodeEncoding () : this (false, true)
  45. {
  46. bigEndian = false;
  47. byteOrderMark = true;
  48. }
  49. public UnicodeEncoding (bool bigEndian, bool byteOrderMark)
  50. : base ((bigEndian ? BIG_UNICODE_CODE_PAGE : UNICODE_CODE_PAGE))
  51. {
  52. this.bigEndian = bigEndian;
  53. this.byteOrderMark = byteOrderMark;
  54. if (bigEndian){
  55. body_name = "unicodeFFFE";
  56. encoding_name = "Unicode (Big-Endian)";
  57. header_name = "unicodeFFFE";
  58. is_browser_save = false;
  59. web_name = "unicodeFFFE";
  60. } else {
  61. body_name = "utf-16";
  62. encoding_name = "Unicode";
  63. header_name = "utf-16";
  64. is_browser_save = true;
  65. web_name = "utf-16";
  66. }
  67. // Windows reports the same code page number for
  68. // both the little-endian and big-endian forms.
  69. windows_code_page = UNICODE_CODE_PAGE;
  70. }
  71. // Get the number of bytes needed to encode a character buffer.
  72. public override int GetByteCount (char[] chars, int index, int count)
  73. {
  74. if (chars == null) {
  75. throw new ArgumentNullException ("chars");
  76. }
  77. if (index < 0 || index > chars.Length) {
  78. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  79. }
  80. if (count < 0 || count > (chars.Length - index)) {
  81. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  82. }
  83. return count * 2;
  84. }
  85. // Convenience wrappers for "GetByteCount".
  86. public override int GetByteCount (String s)
  87. {
  88. if (s == null) {
  89. throw new ArgumentNullException ("s");
  90. }
  91. return s.Length * 2;
  92. }
  93. // Get the bytes that result from encoding a character buffer.
  94. public override int GetBytes (char[] chars, int charIndex, int charCount,
  95. byte[] bytes, int byteIndex)
  96. {
  97. if (chars == null) {
  98. throw new ArgumentNullException ("chars");
  99. }
  100. if (bytes == null) {
  101. throw new ArgumentNullException ("bytes");
  102. }
  103. if (charIndex < 0 || charIndex > chars.Length) {
  104. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  105. }
  106. if (charCount < 0 || charCount > (chars.Length - charIndex)) {
  107. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
  108. }
  109. if (byteIndex < 0 || byteIndex > bytes.Length) {
  110. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  111. }
  112. if ((bytes.Length - byteIndex) < (charCount * 2)) {
  113. throw new ArgumentException (_("Arg_InsufficientSpace"));
  114. }
  115. int posn = byteIndex;
  116. char ch;
  117. if (bigEndian) {
  118. while (charCount-- > 0) {
  119. ch = chars[charIndex++];
  120. bytes[posn++] = (byte)(ch >> 8);
  121. bytes[posn++] = (byte)ch;
  122. }
  123. } else {
  124. while (charCount-- > 0) {
  125. ch = chars[charIndex++];
  126. bytes[posn++] = (byte)ch;
  127. bytes[posn++] = (byte)(ch >> 8);
  128. }
  129. }
  130. return posn - byteIndex;
  131. }
  132. // Convenience wrappers for "GetBytes".
  133. public override int GetBytes (String s, int charIndex, int charCount,
  134. byte[] bytes, int byteIndex)
  135. {
  136. if (s == null) {
  137. throw new ArgumentNullException ("s");
  138. }
  139. if (bytes == null) {
  140. throw new ArgumentNullException ("bytes");
  141. }
  142. if (charIndex < 0 || charIndex > s.Length) {
  143. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
  144. }
  145. if (charCount < 0 || charCount > (s.Length - charIndex)) {
  146. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
  147. }
  148. if (byteIndex < 0 || byteIndex > bytes.Length) {
  149. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  150. }
  151. if ((bytes.Length - byteIndex) < (charCount * 2)) {
  152. throw new ArgumentException (_("Arg_InsufficientSpace"));
  153. }
  154. int posn = byteIndex;
  155. char ch;
  156. if (bigEndian) {
  157. while (charCount-- > 0) {
  158. ch = s[charIndex++];
  159. bytes[posn++] = (byte)(ch >> 8);
  160. bytes[posn++] = (byte)ch;
  161. }
  162. } else {
  163. while (charCount-- > 0) {
  164. ch = s[charIndex++];
  165. bytes[posn++] = (byte)ch;
  166. bytes[posn++] = (byte)(ch >> 8);
  167. }
  168. }
  169. return posn - byteIndex;
  170. }
  171. public override byte [] GetBytes (String s)
  172. {
  173. return base.GetBytes (s);
  174. }
  175. // Get the number of characters needed to decode a byte buffer.
  176. public override int GetCharCount (byte[] bytes, int index, int count)
  177. {
  178. if (bytes == null) {
  179. throw new ArgumentNullException ("bytes");
  180. }
  181. if (index < 0 || index > bytes.Length) {
  182. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  183. }
  184. if (count < 0 || count > (bytes.Length - index)) {
  185. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  186. }
  187. return count / 2;
  188. }
  189. // Get the characters that result from decoding a byte buffer.
  190. public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
  191. char[] chars, int charIndex)
  192. {
  193. if (bytes == null) {
  194. throw new ArgumentNullException ("bytes");
  195. }
  196. if (chars == null) {
  197. throw new ArgumentNullException ("chars");
  198. }
  199. if (byteIndex < 0 || byteIndex > bytes.Length) {
  200. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  201. }
  202. if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
  203. throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
  204. }
  205. if (charIndex < 0 || charIndex > chars.Length) {
  206. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  207. }
  208. // Determine the byte order in the incoming buffer.
  209. bool isBigEndian;
  210. if (byteCount >= 2) {
  211. if (bytes[byteIndex] == (byte)0xFE && bytes[byteIndex + 1] == (byte)0xFF) {
  212. isBigEndian = true;
  213. } else if (bytes[byteIndex] == (byte)0xFF && bytes[byteIndex + 1] == (byte)0xFE) {
  214. isBigEndian = false;
  215. } else {
  216. isBigEndian = bigEndian;
  217. }
  218. } else {
  219. isBigEndian = bigEndian;
  220. }
  221. // Validate that we have sufficient space in "chars".
  222. if ((chars.Length - charIndex) < (byteCount / 2)) {
  223. throw new ArgumentException (_("Arg_InsufficientSpace"));
  224. }
  225. // Convert the characters.
  226. int posn = charIndex;
  227. if (isBigEndian) {
  228. while (byteCount >= 2) {
  229. chars[posn++] =
  230. ((char)((((int)(bytes[byteIndex])) << 8) |
  231. ((int)(bytes[byteIndex + 1]))));
  232. byteIndex += 2;
  233. byteCount -= 2;
  234. }
  235. } else {
  236. while (byteCount >= 2) {
  237. chars[posn++] =
  238. ((char)((((int)(bytes[byteIndex + 1])) << 8) |
  239. ((int)(bytes[byteIndex]))));
  240. byteIndex += 2;
  241. byteCount -= 2;
  242. }
  243. }
  244. return posn - charIndex;
  245. }
  246. // Get the maximum number of bytes needed to encode a
  247. // specified number of characters.
  248. public override int GetMaxByteCount (int charCount)
  249. {
  250. if (charCount < 0) {
  251. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
  252. }
  253. return charCount * 2;
  254. }
  255. // Get the maximum number of characters needed to decode a
  256. // specified number of bytes.
  257. public override int GetMaxCharCount (int byteCount)
  258. {
  259. if (byteCount < 0) {
  260. throw new ArgumentOutOfRangeException
  261. ("byteCount", _("ArgRange_NonNegative"));
  262. }
  263. return byteCount / 2;
  264. }
  265. // Get a Unicode-specific decoder that is attached to this instance.
  266. public override Decoder GetDecoder ()
  267. {
  268. return new UnicodeDecoder (bigEndian);
  269. }
  270. // Get the Unicode preamble.
  271. public override byte[] GetPreamble ()
  272. {
  273. if (byteOrderMark) {
  274. byte[] preamble = new byte[2];
  275. if (bigEndian) {
  276. preamble[0] = (byte)0xFE;
  277. preamble[1] = (byte)0xFF;
  278. } else {
  279. preamble[0] = (byte)0xFF;
  280. preamble[1] = (byte)0xFE;
  281. }
  282. return preamble;
  283. } else {
  284. return new byte [0];
  285. }
  286. }
  287. // Determine if this object is equal to another.
  288. public override bool Equals (Object value)
  289. {
  290. UnicodeEncoding enc = (value as UnicodeEncoding);
  291. if (enc != null) {
  292. return (codePage == enc.codePage &&
  293. bigEndian == enc.bigEndian &&
  294. byteOrderMark == enc.byteOrderMark);
  295. } else {
  296. return false;
  297. }
  298. }
  299. // Get the hash code for this object.
  300. public override int GetHashCode ()
  301. {
  302. return base.GetHashCode ();
  303. }
  304. // Unicode decoder implementation.
  305. private sealed class UnicodeDecoder : Decoder
  306. {
  307. private bool bigEndian;
  308. private int leftOverByte;
  309. // Constructor.
  310. public UnicodeDecoder (bool bigEndian)
  311. {
  312. this.bigEndian = bigEndian;
  313. leftOverByte = -1;
  314. }
  315. // Override inherited methods.
  316. public override int GetCharCount (byte[] bytes, int index, int count)
  317. {
  318. if (bytes == null) {
  319. throw new ArgumentNullException ("bytes");
  320. }
  321. if (index < 0 || index > bytes.Length) {
  322. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  323. }
  324. if (count < 0 || count > (bytes.Length - index)) {
  325. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  326. }
  327. if (leftOverByte != -1) {
  328. return (count + 1) / 2;
  329. } else {
  330. return count / 2;
  331. }
  332. }
  333. public override int GetChars (byte[] bytes, int byteIndex,
  334. int byteCount, char[] chars,
  335. int charIndex)
  336. {
  337. if (bytes == null) {
  338. throw new ArgumentNullException ("bytes");
  339. }
  340. if (chars == null) {
  341. throw new ArgumentNullException ("chars");
  342. }
  343. if (byteIndex < 0 || byteIndex > bytes.Length) {
  344. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  345. }
  346. if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
  347. throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
  348. }
  349. if (charIndex < 0 || charIndex > chars.Length) {
  350. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  351. }
  352. // Convert the characters.
  353. int posn = charIndex;
  354. bool isBigEndian = bigEndian;
  355. int leftOver = leftOverByte;
  356. int length = chars.Length;
  357. char ch;
  358. while (byteCount > 0) {
  359. if (leftOver != -1) {
  360. if (isBigEndian) {
  361. ch = ((char)((leftOver << 8) | ((int)(bytes[byteIndex]))));
  362. } else {
  363. ch = ((char)(leftOver |
  364. (((int)(bytes[byteIndex])) << 8)));
  365. }
  366. leftOver = -1;
  367. ++byteIndex;
  368. --byteCount;
  369. } else if (byteCount > 1) {
  370. if (isBigEndian) {
  371. ch = ((char)((((int)(bytes[byteIndex])) << 8) |
  372. ((int)(bytes[byteIndex + 1]))));
  373. } else {
  374. ch = ((char)((((int)(bytes[byteIndex + 1])) << 8) |
  375. ((int)(bytes[byteIndex]))));
  376. }
  377. byteIndex += 2;
  378. byteCount -= 2;
  379. } else {
  380. leftOver = (int)(bytes[byteIndex]);
  381. break;
  382. }
  383. if (posn < length) {
  384. chars[posn++] = ch;
  385. } else {
  386. throw new ArgumentException (_("Arg_InsufficientSpace"));
  387. }
  388. }
  389. leftOverByte = leftOver;
  390. bigEndian = isBigEndian;
  391. // Finished - return the converted length.
  392. return posn - charIndex;
  393. }
  394. } // class UnicodeDecoder
  395. }; // class UnicodeEncoding
  396. }; // namespace System.Text