UnicodeEncoding.cs 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. /*
  2. * UnicodeEncoding.cs - Implementation of the
  3. * "System.Text.UnicodeEncoding" class.
  4. *
  5. * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
  6. * Copyright (C) 2003, 2004 Novell, Inc.
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining
  9. * a copy of this software and associated documentation files (the "Software"),
  10. * to deal in the Software without restriction, including without limitation
  11. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  12. * and/or sell copies of the Software, and to permit persons to whom the
  13. * Software is furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included
  16. * in all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  21. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  22. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  23. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  24. * OTHER DEALINGS IN THE SOFTWARE.
  25. */
  26. namespace System.Text
  27. {
  28. using System;
  29. [Serializable]
  30. [MonoTODO ("Fix serialization compatibility with MS.NET")]
  31. public class UnicodeEncoding : Encoding
  32. {
  33. // Magic numbers used by Windows for Unicode.
  34. internal const int UNICODE_CODE_PAGE = 1200;
  35. internal const int BIG_UNICODE_CODE_PAGE = 1201;
  36. #if !ECMA_COMPAT
  37. // Size of characters in this encoding.
  38. public const int CharSize = 2;
  39. #endif
  40. // Internal state.
  41. private bool bigEndian;
  42. private bool byteOrderMark;
  43. // Constructors.
  44. public UnicodeEncoding () : this (false, true)
  45. {
  46. bigEndian = false;
  47. byteOrderMark = true;
  48. }
  49. public UnicodeEncoding (bool bigEndian, bool byteOrderMark)
  50. : base ((bigEndian ? BIG_UNICODE_CODE_PAGE : UNICODE_CODE_PAGE))
  51. {
  52. this.bigEndian = bigEndian;
  53. this.byteOrderMark = byteOrderMark;
  54. if (bigEndian){
  55. body_name = "unicodeFFFE";
  56. encoding_name = "Unicode (Big-Endian)";
  57. header_name = "unicodeFFFE";
  58. is_browser_save = false;
  59. web_name = "unicodeFFFE";
  60. } else {
  61. body_name = "utf-16";
  62. encoding_name = "Unicode";
  63. header_name = "utf-16";
  64. is_browser_save = true;
  65. web_name = "utf-16";
  66. }
  67. // Windows reports the same code page number for
  68. // both the little-endian and big-endian forms.
  69. windows_code_page = UNICODE_CODE_PAGE;
  70. }
  71. // Get the number of bytes needed to encode a character buffer.
  72. public override int GetByteCount (char[] chars, int index, int count)
  73. {
  74. if (chars == null) {
  75. throw new ArgumentNullException ("chars");
  76. }
  77. if (index < 0 || index > chars.Length) {
  78. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  79. }
  80. if (count < 0 || count > (chars.Length - index)) {
  81. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  82. }
  83. return count * 2;
  84. }
  85. // Convenience wrappers for "GetByteCount".
  86. public override int GetByteCount (String s)
  87. {
  88. if (s == null) {
  89. throw new ArgumentNullException ("s");
  90. }
  91. return s.Length * 2;
  92. }
  93. // Get the bytes that result from encoding a character buffer.
  94. public override int GetBytes (char[] chars, int charIndex, int charCount,
  95. byte[] bytes, int byteIndex)
  96. {
  97. if (chars == null) {
  98. throw new ArgumentNullException ("chars");
  99. }
  100. if (bytes == null) {
  101. throw new ArgumentNullException ("bytes");
  102. }
  103. if (charIndex < 0 || charIndex > chars.Length) {
  104. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  105. }
  106. if (charCount < 0 || charCount > (chars.Length - charIndex)) {
  107. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
  108. }
  109. if (byteIndex < 0 || byteIndex > bytes.Length) {
  110. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  111. }
  112. if ((bytes.Length - byteIndex) < (charCount * 2)) {
  113. throw new ArgumentException (_("Arg_InsufficientSpace"));
  114. }
  115. int posn = byteIndex;
  116. char ch;
  117. if (bigEndian) {
  118. while (charCount-- > 0) {
  119. ch = chars[charIndex++];
  120. bytes[posn++] = (byte)(ch >> 8);
  121. bytes[posn++] = (byte)ch;
  122. }
  123. } else {
  124. while (charCount-- > 0) {
  125. ch = chars[charIndex++];
  126. bytes[posn++] = (byte)ch;
  127. bytes[posn++] = (byte)(ch >> 8);
  128. }
  129. }
  130. return posn - byteIndex;
  131. }
  132. // Convenience wrappers for "GetBytes".
  133. public override int GetBytes (String s, int charIndex, int charCount,
  134. byte[] bytes, int byteIndex)
  135. {
  136. if (s == null) {
  137. throw new ArgumentNullException ("s");
  138. }
  139. if (bytes == null) {
  140. throw new ArgumentNullException ("bytes");
  141. }
  142. if (charIndex < 0 || charIndex > s.Length) {
  143. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
  144. }
  145. if (charCount < 0 || charCount > (s.Length - charIndex)) {
  146. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
  147. }
  148. if (byteIndex < 0 || byteIndex > bytes.Length) {
  149. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  150. }
  151. if ((bytes.Length - byteIndex) < (charCount * 2)) {
  152. throw new ArgumentException (_("Arg_InsufficientSpace"));
  153. }
  154. int posn = byteIndex;
  155. char ch;
  156. if (bigEndian) {
  157. while (charCount-- > 0) {
  158. ch = s[charIndex++];
  159. bytes[posn++] = (byte)(ch >> 8);
  160. bytes[posn++] = (byte)ch;
  161. }
  162. } else {
  163. while (charCount-- > 0) {
  164. ch = s[charIndex++];
  165. bytes[posn++] = (byte)ch;
  166. bytes[posn++] = (byte)(ch >> 8);
  167. }
  168. }
  169. return posn - byteIndex;
  170. }
  171. // Get the number of characters needed to decode a byte buffer.
  172. public override int GetCharCount (byte[] bytes, int index, int count)
  173. {
  174. if (bytes == null) {
  175. throw new ArgumentNullException ("bytes");
  176. }
  177. if (index < 0 || index > bytes.Length) {
  178. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  179. }
  180. if (count < 0 || count > (bytes.Length - index)) {
  181. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  182. }
  183. return count / 2;
  184. }
  185. // Get the characters that result from decoding a byte buffer.
  186. public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
  187. char[] chars, int charIndex)
  188. {
  189. if (bytes == null) {
  190. throw new ArgumentNullException ("bytes");
  191. }
  192. if (chars == null) {
  193. throw new ArgumentNullException ("chars");
  194. }
  195. if (byteIndex < 0 || byteIndex > bytes.Length) {
  196. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  197. }
  198. if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
  199. throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
  200. }
  201. if (charIndex < 0 || charIndex > chars.Length) {
  202. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  203. }
  204. // Determine the byte order in the incoming buffer.
  205. bool isBigEndian;
  206. if (byteCount >= 2) {
  207. if (bytes[byteIndex] == (byte)0xFE && bytes[byteIndex + 1] == (byte)0xFF) {
  208. isBigEndian = true;
  209. } else if (bytes[byteIndex] == (byte)0xFF && bytes[byteIndex + 1] == (byte)0xFE) {
  210. isBigEndian = false;
  211. } else {
  212. isBigEndian = bigEndian;
  213. }
  214. } else {
  215. isBigEndian = bigEndian;
  216. }
  217. // Validate that we have sufficient space in "chars".
  218. if ((chars.Length - charIndex) < (byteCount / 2)) {
  219. throw new ArgumentException (_("Arg_InsufficientSpace"));
  220. }
  221. // Convert the characters.
  222. int posn = charIndex;
  223. if (isBigEndian) {
  224. while (byteCount >= 2) {
  225. chars[posn++] =
  226. ((char)((((int)(bytes[byteIndex])) << 8) |
  227. ((int)(bytes[byteIndex + 1]))));
  228. byteIndex += 2;
  229. byteCount -= 2;
  230. }
  231. } else {
  232. while (byteCount >= 2) {
  233. chars[posn++] =
  234. ((char)((((int)(bytes[byteIndex + 1])) << 8) |
  235. ((int)(bytes[byteIndex]))));
  236. byteIndex += 2;
  237. byteCount -= 2;
  238. }
  239. }
  240. return posn - charIndex;
  241. }
  242. // Get the maximum number of bytes needed to encode a
  243. // specified number of characters.
  244. public override int GetMaxByteCount (int charCount)
  245. {
  246. if (charCount < 0) {
  247. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
  248. }
  249. return charCount * 2;
  250. }
  251. // Get the maximum number of characters needed to decode a
  252. // specified number of bytes.
  253. public override int GetMaxCharCount (int byteCount)
  254. {
  255. if (byteCount < 0) {
  256. throw new ArgumentOutOfRangeException
  257. ("byteCount", _("ArgRange_NonNegative"));
  258. }
  259. return byteCount / 2;
  260. }
  261. // Get a Unicode-specific decoder that is attached to this instance.
  262. public override Decoder GetDecoder ()
  263. {
  264. return new UnicodeDecoder (bigEndian);
  265. }
  266. // Get the Unicode preamble.
  267. public override byte[] GetPreamble ()
  268. {
  269. if (byteOrderMark) {
  270. byte[] preamble = new byte[2];
  271. if (bigEndian) {
  272. preamble[0] = (byte)0xFE;
  273. preamble[1] = (byte)0xFF;
  274. } else {
  275. preamble[0] = (byte)0xFF;
  276. preamble[1] = (byte)0xFE;
  277. }
  278. return preamble;
  279. } else {
  280. return new byte [0];
  281. }
  282. }
  283. // Determine if this object is equal to another.
  284. public override bool Equals (Object value)
  285. {
  286. UnicodeEncoding enc = (value as UnicodeEncoding);
  287. if (enc != null) {
  288. return (codePage == enc.codePage &&
  289. bigEndian == enc.bigEndian &&
  290. byteOrderMark == enc.byteOrderMark);
  291. } else {
  292. return false;
  293. }
  294. }
  295. // Get the hash code for this object.
  296. public override int GetHashCode ()
  297. {
  298. return base.GetHashCode ();
  299. }
  300. // Unicode decoder implementation.
  301. private sealed class UnicodeDecoder : Decoder
  302. {
  303. private bool bigEndian;
  304. private int leftOverByte;
  305. // Constructor.
  306. public UnicodeDecoder (bool bigEndian)
  307. {
  308. this.bigEndian = bigEndian;
  309. leftOverByte = -1;
  310. }
  311. // Override inherited methods.
  312. public override int GetCharCount (byte[] bytes, int index, int count)
  313. {
  314. if (bytes == null) {
  315. throw new ArgumentNullException ("bytes");
  316. }
  317. if (index < 0 || index > bytes.Length) {
  318. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  319. }
  320. if (count < 0 || count > (bytes.Length - index)) {
  321. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  322. }
  323. if (leftOverByte != -1) {
  324. return (count + 1) / 2;
  325. } else {
  326. return count / 2;
  327. }
  328. }
  329. public override int GetChars (byte[] bytes, int byteIndex,
  330. int byteCount, char[] chars,
  331. int charIndex)
  332. {
  333. if (bytes == null) {
  334. throw new ArgumentNullException ("bytes");
  335. }
  336. if (chars == null) {
  337. throw new ArgumentNullException ("chars");
  338. }
  339. if (byteIndex < 0 || byteIndex > bytes.Length) {
  340. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  341. }
  342. if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
  343. throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
  344. }
  345. if (charIndex < 0 || charIndex > chars.Length) {
  346. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  347. }
  348. // Convert the characters.
  349. int posn = charIndex;
  350. bool isBigEndian = bigEndian;
  351. int leftOver = leftOverByte;
  352. int length = chars.Length;
  353. char ch;
  354. while (byteCount > 0) {
  355. if (leftOver != -1) {
  356. if (isBigEndian) {
  357. ch = ((char)((leftOver << 8) | ((int)(bytes[byteIndex]))));
  358. } else {
  359. ch = ((char)(leftOver |
  360. (((int)(bytes[byteIndex])) << 8)));
  361. }
  362. leftOver = -1;
  363. ++byteIndex;
  364. --byteCount;
  365. } else if (byteCount > 1) {
  366. if (isBigEndian) {
  367. ch = ((char)((((int)(bytes[byteIndex])) << 8) |
  368. ((int)(bytes[byteIndex + 1]))));
  369. } else {
  370. ch = ((char)((((int)(bytes[byteIndex + 1])) << 8) |
  371. ((int)(bytes[byteIndex]))));
  372. }
  373. byteIndex += 2;
  374. byteCount -= 2;
  375. } else {
  376. leftOver = (int)(bytes[byteIndex]);
  377. break;
  378. }
  379. if (posn < length) {
  380. chars[posn++] = ch;
  381. } else {
  382. throw new ArgumentException (_("Arg_InsufficientSpace"));
  383. }
  384. }
  385. leftOverByte = leftOver;
  386. bigEndian = isBigEndian;
  387. // Finished - return the converted length.
  388. return posn - charIndex;
  389. }
  390. } // class UnicodeDecoder
  391. #if NET_2_0
  392. [CLSCompliantAttribute(false)]
  393. public unsafe override int GetByteCount (char *chars, int count)
  394. {
  395. return count * 2;
  396. }
  397. #else
  398. public override byte [] GetBytes (String s)
  399. {
  400. return base.GetBytes (s);
  401. }
  402. #endif
  403. }; // class UnicodeEncoding
  404. }; // namespace System.Text