UnicodeEncoding.cs 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. /*
  2. * UnicodeEncoding.cs - Implementation of the
  3. * "System.Text.UnicodeEncoding" class.
  4. *
  5. * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
  6. * Copyright (C) 2003 Novell, Inc.
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining
  9. * a copy of this software and associated documentation files (the "Software"),
  10. * to deal in the Software without restriction, including without limitation
  11. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  12. * and/or sell copies of the Software, and to permit persons to whom the
  13. * Software is furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included
  16. * in all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  21. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  22. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  23. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  24. * OTHER DEALINGS IN THE SOFTWARE.
  25. */
  26. namespace System.Text
  27. {
  28. using System;
  29. [Serializable]
  30. public class UnicodeEncoding : Encoding
  31. {
  32. // Magic numbers used by Windows for Unicode.
  33. internal const int UNICODE_CODE_PAGE = 1200;
  34. internal const int BIG_UNICODE_CODE_PAGE = 1201;
  35. #if !ECMA_COMPAT
  36. // Size of characters in this encoding.
  37. public const int CharSize = 2;
  38. #endif
  39. // Internal state.
  40. private bool bigEndian;
  41. private bool byteOrderMark;
  42. // Constructors.
  43. public UnicodeEncoding () : this (false, true)
  44. {
  45. bigEndian = false;
  46. byteOrderMark = true;
  47. }
  48. public UnicodeEncoding (bool bigEndian, bool byteOrderMark)
  49. : base ((bigEndian ? BIG_UNICODE_CODE_PAGE : UNICODE_CODE_PAGE))
  50. {
  51. this.bigEndian = bigEndian;
  52. this.byteOrderMark = byteOrderMark;
  53. if (bigEndian){
  54. body_name = "unicodeFFFE";
  55. encoding_name = "Unicode (Big-Endian)";
  56. header_name = "unicodeFFFE";
  57. is_browser_save = false;
  58. web_name = "utf-16be";
  59. } else {
  60. body_name = "utf-16";
  61. encoding_name = "Unicode";
  62. header_name = "utf-16";
  63. is_browser_save = true;
  64. web_name = "utf-16";
  65. }
  66. // Windows reports the same code page number for
  67. // both the little-endian and big-endian forms.
  68. windows_code_page = UNICODE_CODE_PAGE;
  69. }
  70. // Get the number of bytes needed to encode a character buffer.
  71. public override int GetByteCount (char[] chars, int index, int count)
  72. {
  73. if (chars == null) {
  74. throw new ArgumentNullException ("chars");
  75. }
  76. if (index < 0 || index > chars.Length) {
  77. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  78. }
  79. if (count < 0 || count > (chars.Length - index)) {
  80. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  81. }
  82. return count * 2;
  83. }
  84. // Convenience wrappers for "GetByteCount".
  85. public override int GetByteCount (String s)
  86. {
  87. if (s == null) {
  88. throw new ArgumentNullException ("s");
  89. }
  90. return s.Length * 2;
  91. }
  92. // Get the bytes that result from encoding a character buffer.
  93. public override int GetBytes (char[] chars, int charIndex, int charCount,
  94. byte[] bytes, int byteIndex)
  95. {
  96. if (chars == null) {
  97. throw new ArgumentNullException ("chars");
  98. }
  99. if (bytes == null) {
  100. throw new ArgumentNullException ("bytes");
  101. }
  102. if (charIndex < 0 || charIndex > chars.Length) {
  103. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  104. }
  105. if (charCount < 0 || charCount > (chars.Length - charIndex)) {
  106. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
  107. }
  108. if (byteIndex < 0 || byteIndex > bytes.Length) {
  109. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  110. }
  111. if ((bytes.Length - byteIndex) < (charCount * 2)) {
  112. throw new ArgumentException (_("Arg_InsufficientSpace"));
  113. }
  114. int posn = byteIndex;
  115. char ch;
  116. if (bigEndian) {
  117. while (charCount-- > 0) {
  118. ch = chars[charIndex++];
  119. bytes[posn++] = (byte)(ch >> 8);
  120. bytes[posn++] = (byte)ch;
  121. }
  122. } else {
  123. while (charCount-- > 0) {
  124. ch = chars[charIndex++];
  125. bytes[posn++] = (byte)ch;
  126. bytes[posn++] = (byte)(ch >> 8);
  127. }
  128. }
  129. return posn - byteIndex;
  130. }
  131. // Convenience wrappers for "GetBytes".
  132. public override int GetBytes (String s, int charIndex, int charCount,
  133. byte[] bytes, int byteIndex)
  134. {
  135. if (s == null) {
  136. throw new ArgumentNullException ("s");
  137. }
  138. if (bytes == null) {
  139. throw new ArgumentNullException ("bytes");
  140. }
  141. if (charIndex < 0 || charIndex > s.Length) {
  142. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
  143. }
  144. if (charCount < 0 || charCount > (s.Length - charIndex)) {
  145. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
  146. }
  147. if (byteIndex < 0 || byteIndex > bytes.Length) {
  148. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  149. }
  150. if ((bytes.Length - byteIndex) < (charCount * 2)) {
  151. throw new ArgumentException (_("Arg_InsufficientSpace"));
  152. }
  153. int posn = byteIndex;
  154. char ch;
  155. if (bigEndian) {
  156. while (charCount-- > 0) {
  157. ch = s[charIndex++];
  158. bytes[posn++] = (byte)(ch >> 8);
  159. bytes[posn++] = (byte)ch;
  160. }
  161. } else {
  162. while (charCount-- > 0) {
  163. ch = s[charIndex++];
  164. bytes[posn++] = (byte)ch;
  165. bytes[posn++] = (byte)(ch >> 8);
  166. }
  167. }
  168. return posn - byteIndex;
  169. }
  170. public override byte [] GetBytes (String s)
  171. {
  172. return base.GetBytes (s);
  173. }
  174. // Get the number of characters needed to decode a byte buffer.
  175. public override int GetCharCount (byte[] bytes, int index, int count)
  176. {
  177. if (bytes == null) {
  178. throw new ArgumentNullException ("bytes");
  179. }
  180. if (index < 0 || index > bytes.Length) {
  181. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  182. }
  183. if (count < 0 || count > (bytes.Length - index)) {
  184. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  185. }
  186. if (count >= 2) {
  187. if ((bytes[index] == (byte)0xFE && bytes[index + 1] == (byte)0xFF) ||
  188. (bytes[index] == (byte)0xFF && bytes[index + 1] == (byte)0xFE)) {
  189. return ((count - 1) / 2);
  190. }
  191. }
  192. return count / 2;
  193. }
  194. // Get the characters that result from decoding a byte buffer.
  195. public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
  196. char[] chars, int charIndex)
  197. {
  198. if (bytes == null) {
  199. throw new ArgumentNullException ("bytes");
  200. }
  201. if (chars == null) {
  202. throw new ArgumentNullException ("chars");
  203. }
  204. if (byteIndex < 0 || byteIndex > bytes.Length) {
  205. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  206. }
  207. if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
  208. throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
  209. }
  210. if (charIndex < 0 || charIndex > chars.Length) {
  211. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  212. }
  213. // Determine the byte order in the incoming buffer.
  214. bool isBigEndian;
  215. if (byteCount >= 2) {
  216. if (bytes[byteIndex] == (byte)0xFE && bytes[byteIndex + 1] == (byte)0xFF) {
  217. isBigEndian = true;
  218. byteCount -= 2;
  219. byteIndex += 2;
  220. } else if (bytes[byteIndex] == (byte)0xFF && bytes[byteIndex + 1] == (byte)0xFE) {
  221. isBigEndian = false;
  222. byteCount -= 2;
  223. byteIndex += 2;
  224. } else {
  225. isBigEndian = bigEndian;
  226. }
  227. } else {
  228. isBigEndian = bigEndian;
  229. }
  230. // Validate that we have sufficient space in "chars".
  231. if ((chars.Length - charIndex) < (byteCount / 2)) {
  232. throw new ArgumentException (_("Arg_InsufficientSpace"));
  233. }
  234. // Convert the characters.
  235. int posn = charIndex;
  236. if (isBigEndian) {
  237. while (byteCount >= 2) {
  238. chars[posn++] =
  239. ((char)((((int)(bytes[byteIndex])) << 8) |
  240. ((int)(bytes[byteIndex + 1]))));
  241. byteIndex += 2;
  242. byteCount -= 2;
  243. }
  244. } else {
  245. while (byteCount >= 2) {
  246. chars[posn++] =
  247. ((char)((((int)(bytes[byteIndex + 1])) << 8) |
  248. ((int)(bytes[byteIndex]))));
  249. byteIndex += 2;
  250. byteCount -= 2;
  251. }
  252. }
  253. return posn - charIndex;
  254. }
  255. // Get the maximum number of bytes needed to encode a
  256. // specified number of characters.
  257. public override int GetMaxByteCount (int charCount)
  258. {
  259. if (charCount < 0) {
  260. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
  261. }
  262. return charCount * 2;
  263. }
  264. // Get the maximum number of characters needed to decode a
  265. // specified number of bytes.
  266. public override int GetMaxCharCount (int byteCount)
  267. {
  268. if (byteCount < 0) {
  269. throw new ArgumentOutOfRangeException
  270. ("byteCount", _("ArgRange_NonNegative"));
  271. }
  272. return byteCount / 2;
  273. }
  274. // Get a Unicode-specific decoder that is attached to this instance.
  275. public override Decoder GetDecoder ()
  276. {
  277. return new UnicodeDecoder (bigEndian);
  278. }
  279. // Get the Unicode preamble.
  280. public override byte[] GetPreamble ()
  281. {
  282. if (byteOrderMark) {
  283. byte[] preamble = new byte[2];
  284. if (bigEndian) {
  285. preamble[0] = (byte)0xFE;
  286. preamble[1] = (byte)0xFF;
  287. } else {
  288. preamble[0] = (byte)0xFF;
  289. preamble[1] = (byte)0xFE;
  290. }
  291. return preamble;
  292. } else {
  293. return new byte [0];
  294. }
  295. }
  296. // Determine if this object is equal to another.
  297. public override bool Equals (Object value)
  298. {
  299. UnicodeEncoding enc = (value as UnicodeEncoding);
  300. if (enc != null) {
  301. return (codePage == enc.codePage &&
  302. bigEndian == enc.bigEndian &&
  303. byteOrderMark == enc.byteOrderMark);
  304. } else {
  305. return false;
  306. }
  307. }
  308. // Get the hash code for this object.
  309. public override int GetHashCode ()
  310. {
  311. return base.GetHashCode ();
  312. }
  313. // Unicode decoder implementation.
  314. private sealed class UnicodeDecoder : Decoder
  315. {
  316. private bool bigEndian;
  317. private int leftOverByte;
  318. // Constructor.
  319. public UnicodeDecoder (bool bigEndian)
  320. {
  321. this.bigEndian = bigEndian;
  322. leftOverByte = -1;
  323. }
  324. // Override inherited methods.
  325. public override int GetCharCount (byte[] bytes, int index, int count)
  326. {
  327. if (bytes == null) {
  328. throw new ArgumentNullException ("bytes");
  329. }
  330. if (index < 0 || index > bytes.Length) {
  331. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  332. }
  333. if (count < 0 || count > (bytes.Length - index)) {
  334. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  335. }
  336. if (leftOverByte != -1) {
  337. return (count + 1) / 2;
  338. } else {
  339. return count / 2;
  340. }
  341. }
  342. public override int GetChars (byte[] bytes, int byteIndex,
  343. int byteCount, char[] chars,
  344. int charIndex)
  345. {
  346. if (bytes == null) {
  347. throw new ArgumentNullException ("bytes");
  348. }
  349. if (chars == null) {
  350. throw new ArgumentNullException ("chars");
  351. }
  352. if (byteIndex < 0 || byteIndex > bytes.Length) {
  353. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  354. }
  355. if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
  356. throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
  357. }
  358. if (charIndex < 0 || charIndex > chars.Length) {
  359. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  360. }
  361. // Convert the characters.
  362. int posn = charIndex;
  363. bool isBigEndian = bigEndian;
  364. int leftOver = leftOverByte;
  365. int length = chars.Length;
  366. char ch;
  367. while (byteCount > 0) {
  368. if (leftOver != -1) {
  369. if (isBigEndian) {
  370. ch = ((char)((leftOver << 8) | ((int)(bytes[byteIndex]))));
  371. } else {
  372. ch = ((char)(leftOver |
  373. (((int)(bytes[byteIndex])) << 8)));
  374. }
  375. leftOver = -1;
  376. ++byteIndex;
  377. --byteCount;
  378. } else if (byteCount > 1) {
  379. if (isBigEndian) {
  380. ch = ((char)((((int)(bytes[byteIndex])) << 8) |
  381. ((int)(bytes[byteIndex + 1]))));
  382. } else {
  383. ch = ((char)((((int)(bytes[byteIndex + 1])) << 8) |
  384. ((int)(bytes[byteIndex]))));
  385. }
  386. byteIndex += 2;
  387. byteCount -= 2;
  388. } else {
  389. leftOver = (int)(bytes[byteIndex]);
  390. break;
  391. }
  392. if (ch == '\uFFFE') {
  393. // Switch byte orders.
  394. bigEndian = !bigEndian;
  395. } else if (ch != '\uFEFF') {
  396. // Ordinary character.
  397. if (posn < length) {
  398. chars[posn++] = ch;
  399. } else {
  400. throw new ArgumentException (_("Arg_InsufficientSpace"));
  401. }
  402. }
  403. }
  404. leftOverByte = leftOver;
  405. bigEndian = isBigEndian;
  406. // Finished - return the converted length.
  407. return posn - charIndex;
  408. }
  409. } // class UnicodeDecoder
  410. }; // class UnicodeEncoding
  411. }; // namespace System.Text