UnicodeEncoding.cs 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643
  1. /*
  2. * UnicodeEncoding.cs - Implementation of the
  3. * "System.Text.UnicodeEncoding" class.
  4. *
  5. * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
  6. *
  7. * Permission is hereby granted, free of charge, to any person obtaining
  8. * a copy of this software and associated documentation files (the "Software"),
  9. * to deal in the Software without restriction, including without limitation
  10. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11. * and/or sell copies of the Software, and to permit persons to whom the
  12. * Software is furnished to do so, subject to the following conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be included
  15. * in all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  21. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  22. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  23. * OTHER DEALINGS IN THE SOFTWARE.
  24. */
  25. namespace System.Text
  26. {
  27. using System;
  28. public class UnicodeEncoding : Encoding
  29. {
  30. // Magic numbers used by Windows for Unicode.
  31. internal const int UNICODE_CODE_PAGE = 1200;
  32. internal const int BIG_UNICODE_CODE_PAGE = 1201;
  33. #if !ECMA_COMPAT
  34. // Size of characters in this encoding.
  35. public const int CharSize = 2;
  36. #endif
  37. // Internal state.
  38. private bool bigEndian;
  39. private bool byteOrderMark;
  40. // Constructors.
  41. public UnicodeEncoding() : base(UNICODE_CODE_PAGE)
  42. {
  43. bigEndian = false;
  44. byteOrderMark = true;
  45. }
  46. public UnicodeEncoding(bool bigEndian, bool byteOrderMark)
  47. : base((bigEndian ? BIG_UNICODE_CODE_PAGE : UNICODE_CODE_PAGE))
  48. {
  49. this.bigEndian = bigEndian;
  50. this.byteOrderMark = byteOrderMark;
  51. }
  52. // Get the number of bytes needed to encode a character buffer.
  53. public override int GetByteCount(char[] chars, int index, int count)
  54. {
  55. if(chars == null)
  56. {
  57. throw new ArgumentNullException("chars");
  58. }
  59. if(index < 0 || index > chars.Length)
  60. {
  61. throw new ArgumentOutOfRangeException
  62. ("index", _("ArgRange_Array"));
  63. }
  64. if(count < 0 || count > (chars.Length - index))
  65. {
  66. throw new ArgumentOutOfRangeException
  67. ("count", _("ArgRange_Array"));
  68. }
  69. return count * 2 + (byteOrderMark ? 2 : 0);
  70. }
  71. // Convenience wrappers for "GetByteCount".
  72. public override int GetByteCount(String s)
  73. {
  74. if(s == null)
  75. {
  76. throw new ArgumentNullException("s");
  77. }
  78. return s.Length * 2 + (byteOrderMark ? 2 : 0);
  79. }
  80. // Get the bytes that result from encoding a character buffer.
  81. public override int GetBytes(char[] chars, int charIndex, int charCount,
  82. byte[] bytes, int byteIndex)
  83. {
  84. if(chars == null)
  85. {
  86. throw new ArgumentNullException("chars");
  87. }
  88. if(bytes == null)
  89. {
  90. throw new ArgumentNullException("bytes");
  91. }
  92. if(charIndex < 0 || charIndex > chars.Length)
  93. {
  94. throw new ArgumentOutOfRangeException
  95. ("charIndex", _("ArgRange_Array"));
  96. }
  97. if(charCount < 0 || charCount > (chars.Length - charIndex))
  98. {
  99. throw new ArgumentOutOfRangeException
  100. ("charCount", _("ArgRange_Array"));
  101. }
  102. if(byteIndex < 0 || byteIndex > bytes.Length)
  103. {
  104. throw new ArgumentOutOfRangeException
  105. ("byteIndex", _("ArgRange_Array"));
  106. }
  107. if((bytes.Length - byteIndex) <
  108. (charCount * 2 + (byteOrderMark ? 2 : 0)))
  109. {
  110. throw new ArgumentException
  111. (_("Arg_InsufficientSpace"));
  112. }
  113. int posn = byteIndex;
  114. char ch;
  115. if(bigEndian)
  116. {
  117. if(byteOrderMark)
  118. {
  119. bytes[posn++] = (byte)0xFE;
  120. bytes[posn++] = (byte)0xFF;
  121. }
  122. while(charCount-- > 0)
  123. {
  124. ch = chars[charIndex++];
  125. bytes[posn++] = (byte)(ch >> 8);
  126. bytes[posn++] = (byte)ch;
  127. }
  128. }
  129. else
  130. {
  131. if(byteOrderMark)
  132. {
  133. bytes[posn++] = (byte)0xFF;
  134. bytes[posn++] = (byte)0xFE;
  135. }
  136. while(charCount-- > 0)
  137. {
  138. ch = chars[charIndex++];
  139. bytes[posn++] = (byte)ch;
  140. bytes[posn++] = (byte)(ch >> 8);
  141. }
  142. }
  143. return posn - byteIndex;
  144. }
  145. // Convenience wrappers for "GetBytes".
  146. public override int GetBytes(String s, int charIndex, int charCount,
  147. byte[] bytes, int byteIndex)
  148. {
  149. if(s == null)
  150. {
  151. throw new ArgumentNullException("s");
  152. }
  153. if(bytes == null)
  154. {
  155. throw new ArgumentNullException("bytes");
  156. }
  157. if(charIndex < 0 || charIndex > s.Length)
  158. {
  159. throw new ArgumentOutOfRangeException
  160. ("charIndex", _("ArgRange_StringIndex"));
  161. }
  162. if(charCount < 0 || charCount > (s.Length - charIndex))
  163. {
  164. throw new ArgumentOutOfRangeException
  165. ("charCount", _("ArgRange_StringRange"));
  166. }
  167. if(byteIndex < 0 || byteIndex > bytes.Length)
  168. {
  169. throw new ArgumentOutOfRangeException
  170. ("byteIndex", _("ArgRange_Array"));
  171. }
  172. if((bytes.Length - byteIndex) <
  173. (charCount * 2 + (byteOrderMark ? 2 : 0)))
  174. {
  175. throw new ArgumentException
  176. (_("Arg_InsufficientSpace"));
  177. }
  178. int posn = byteIndex;
  179. char ch;
  180. if(bigEndian)
  181. {
  182. if(byteOrderMark)
  183. {
  184. bytes[posn++] = (byte)0xFE;
  185. bytes[posn++] = (byte)0xFF;
  186. }
  187. while(charCount-- > 0)
  188. {
  189. ch = s[charIndex++];
  190. bytes[posn++] = (byte)(ch >> 8);
  191. bytes[posn++] = (byte)ch;
  192. }
  193. }
  194. else
  195. {
  196. if(byteOrderMark)
  197. {
  198. bytes[posn++] = (byte)0xFF;
  199. bytes[posn++] = (byte)0xFE;
  200. }
  201. while(charCount-- > 0)
  202. {
  203. ch = s[charIndex++];
  204. bytes[posn++] = (byte)ch;
  205. bytes[posn++] = (byte)(ch >> 8);
  206. }
  207. }
  208. return posn - byteIndex;
  209. }
  210. // Get the number of characters needed to decode a byte buffer.
  211. public override int GetCharCount(byte[] bytes, int index, int count)
  212. {
  213. if(bytes == null)
  214. {
  215. throw new ArgumentNullException("bytes");
  216. }
  217. if(index < 0 || index > bytes.Length)
  218. {
  219. throw new ArgumentOutOfRangeException
  220. ("index", _("ArgRange_Array"));
  221. }
  222. if(count < 0 || count > (bytes.Length - index))
  223. {
  224. throw new ArgumentOutOfRangeException
  225. ("count", _("ArgRange_Array"));
  226. }
  227. if(count >= 2)
  228. {
  229. if((bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF) ||
  230. (bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE))
  231. {
  232. return ((count - 1) / 2);
  233. }
  234. }
  235. return count / 2;
  236. }
  237. // Get the characters that result from decoding a byte buffer.
  238. public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
  239. char[] chars, int charIndex)
  240. {
  241. if(bytes == null)
  242. {
  243. throw new ArgumentNullException("bytes");
  244. }
  245. if(chars == null)
  246. {
  247. throw new ArgumentNullException("chars");
  248. }
  249. if(byteIndex < 0 || byteIndex > bytes.Length)
  250. {
  251. throw new ArgumentOutOfRangeException
  252. ("byteIndex", _("ArgRange_Array"));
  253. }
  254. if(byteCount < 0 || byteCount > (bytes.Length - byteIndex))
  255. {
  256. throw new ArgumentOutOfRangeException
  257. ("byteCount", _("ArgRange_Array"));
  258. }
  259. if(charIndex < 0 || charIndex > chars.Length)
  260. {
  261. throw new ArgumentOutOfRangeException
  262. ("charIndex", _("ArgRange_Array"));
  263. }
  264. // Determine the byte order in the incoming buffer.
  265. bool isBigEndian;
  266. if(byteCount >= 2)
  267. {
  268. if(bytes[0] == (byte)0xFE && bytes[1] == (byte)0xFF)
  269. {
  270. isBigEndian = true;
  271. byteCount -= 2;
  272. byteIndex += 2;
  273. }
  274. else if(bytes[0] == (byte)0xFF && bytes[1] == (byte)0xFE)
  275. {
  276. isBigEndian = false;
  277. byteCount -= 2;
  278. byteIndex += 2;
  279. }
  280. else
  281. {
  282. isBigEndian = bigEndian;
  283. }
  284. }
  285. else
  286. {
  287. isBigEndian = bigEndian;
  288. }
  289. // Validate that we have sufficient space in "chars".
  290. if((chars.Length - charIndex) < (byteCount / 2))
  291. {
  292. throw new ArgumentException
  293. (_("Arg_InsufficientSpace"));
  294. }
  295. // Convert the characters.
  296. int posn = charIndex;
  297. if(isBigEndian)
  298. {
  299. while(byteCount >= 2)
  300. {
  301. chars[posn++] =
  302. ((char)((((int)(bytes[byteIndex])) << 8) |
  303. ((int)(bytes[byteIndex + 1]))));
  304. byteIndex += 2;
  305. byteCount -= 2;
  306. }
  307. }
  308. else
  309. {
  310. while(byteCount >= 2)
  311. {
  312. chars[posn++] =
  313. ((char)((((int)(bytes[byteIndex + 1])) << 8) |
  314. ((int)(bytes[byteIndex]))));
  315. byteIndex += 2;
  316. byteCount -= 2;
  317. }
  318. }
  319. return posn - charIndex;
  320. }
  321. // Get the maximum number of bytes needed to encode a
  322. // specified number of characters.
  323. public override int GetMaxByteCount(int charCount)
  324. {
  325. if(charCount < 0)
  326. {
  327. throw new ArgumentOutOfRangeException
  328. ("charCount", _("ArgRange_NonNegative"));
  329. }
  330. return charCount * 2 + (byteOrderMark ? 2 : 0);
  331. }
  332. // Get the maximum number of characters needed to decode a
  333. // specified number of bytes.
  334. public override int GetMaxCharCount(int byteCount)
  335. {
  336. if(byteCount < 0)
  337. {
  338. throw new ArgumentOutOfRangeException
  339. ("byteCount", _("ArgRange_NonNegative"));
  340. }
  341. return byteCount / 2;
  342. }
  343. // Get a Unicode-specific decoder that is attached to this instance.
  344. public override Decoder GetDecoder()
  345. {
  346. return new UnicodeDecoder(bigEndian);
  347. }
  348. // Get the Unicode preamble.
  349. public override byte[] GetPreamble()
  350. {
  351. if(byteOrderMark)
  352. {
  353. byte[] preamble = new byte[2];
  354. if(bigEndian)
  355. {
  356. preamble[0] = (byte)0xFE;
  357. preamble[1] = (byte)0xFF;
  358. }
  359. else
  360. {
  361. preamble[0] = (byte)0xFF;
  362. preamble[1] = (byte)0xFE;
  363. }
  364. return preamble;
  365. }
  366. else
  367. {
  368. return new byte [0];
  369. }
  370. }
  371. // Determine if this object is equal to another.
  372. public override bool Equals(Object value)
  373. {
  374. UnicodeEncoding enc = (value as UnicodeEncoding);
  375. if(enc != null)
  376. {
  377. return (codePage == enc.codePage &&
  378. bigEndian == enc.bigEndian &&
  379. byteOrderMark == enc.byteOrderMark);
  380. }
  381. else
  382. {
  383. return false;
  384. }
  385. }
  386. // Get the hash code for this object.
  387. public override int GetHashCode()
  388. {
  389. return base.GetHashCode();
  390. }
  391. #if !ECMA_COMPAT
  392. // Get the mail body name for this encoding.
  393. public override String BodyName
  394. {
  395. get
  396. {
  397. if(bigEndian)
  398. {
  399. return "unicodeFFFE";
  400. }
  401. else
  402. {
  403. return "utf-16";
  404. }
  405. }
  406. }
  407. // Get the human-readable name for this encoding.
  408. public override String EncodingName
  409. {
  410. get
  411. {
  412. if(bigEndian)
  413. {
  414. return "Unicode (Big-Endian)";
  415. }
  416. else
  417. {
  418. return "Unicode";
  419. }
  420. }
  421. }
  422. // Get the mail agent header name for this encoding.
  423. public override String HeaderName
  424. {
  425. get
  426. {
  427. if(bigEndian)
  428. {
  429. return "unicodeFFFE";
  430. }
  431. else
  432. {
  433. return "utf-16";
  434. }
  435. }
  436. }
  437. // Determine if this encoding can be saved from a Web browser.
  438. public override bool IsBrowserSave
  439. {
  440. get
  441. {
  442. return !bigEndian;
  443. }
  444. }
  445. // Get the IANA-preferred Web name for this encoding.
  446. public override String WebName
  447. {
  448. get
  449. {
  450. if(bigEndian)
  451. {
  452. return "unicodeFFFE";
  453. }
  454. else
  455. {
  456. return "utf-16";
  457. }
  458. }
  459. }
  460. // Get the Windows code page represented by this object.
  461. public override int WindowsCodePage
  462. {
  463. get
  464. {
  465. // Windows reports the same code page number for
  466. // both the little-endian and big-endian forms.
  467. return UNICODE_CODE_PAGE;
  468. }
  469. }
  470. #endif // !ECMA_COMPAT
  471. // Unicode decoder implementation.
  472. private sealed class UnicodeDecoder : Decoder
  473. {
  474. private bool bigEndian;
  475. private int leftOverByte;
  476. // Constructor.
  477. public UnicodeDecoder(bool bigEndian)
  478. {
  479. this.bigEndian = bigEndian;
  480. leftOverByte = -1;
  481. }
  482. // Override inherited methods.
  483. public override int GetCharCount(byte[] bytes, int index, int count)
  484. {
  485. if(bytes == null)
  486. {
  487. throw new ArgumentNullException("bytes");
  488. }
  489. if(index < 0 || index > bytes.Length)
  490. {
  491. throw new ArgumentOutOfRangeException
  492. ("index", _("ArgRange_Array"));
  493. }
  494. if(count < 0 || count > (bytes.Length - index))
  495. {
  496. throw new ArgumentOutOfRangeException
  497. ("count", _("ArgRange_Array"));
  498. }
  499. if(leftOverByte != -1)
  500. {
  501. return (count + 1) / 2;
  502. }
  503. else
  504. {
  505. return count / 2;
  506. }
  507. }
  508. public override int GetChars(byte[] bytes, int byteIndex,
  509. int byteCount, char[] chars,
  510. int charIndex)
  511. {
  512. if(bytes == null)
  513. {
  514. throw new ArgumentNullException("bytes");
  515. }
  516. if(chars == null)
  517. {
  518. throw new ArgumentNullException("chars");
  519. }
  520. if(byteIndex < 0 || byteIndex > bytes.Length)
  521. {
  522. throw new ArgumentOutOfRangeException
  523. ("byteIndex", _("ArgRange_Array"));
  524. }
  525. if(byteCount < 0 || byteCount > (bytes.Length - byteIndex))
  526. {
  527. throw new ArgumentOutOfRangeException
  528. ("byteCount", _("ArgRange_Array"));
  529. }
  530. if(charIndex < 0 || charIndex > chars.Length)
  531. {
  532. throw new ArgumentOutOfRangeException
  533. ("charIndex", _("ArgRange_Array"));
  534. }
  535. // Convert the characters.
  536. int posn = charIndex;
  537. bool isBigEndian = bigEndian;
  538. int leftOver = leftOverByte;
  539. int length = chars.Length;
  540. char ch;
  541. while(byteCount > 0)
  542. {
  543. if(leftOver != -1)
  544. {
  545. if(isBigEndian)
  546. {
  547. ch = ((char)((leftOver << 8) |
  548. ((int)(bytes[byteIndex]))));
  549. }
  550. else
  551. {
  552. ch = ((char)(leftOver |
  553. (((int)(bytes[byteIndex])) << 8)));
  554. }
  555. leftOver = -1;
  556. ++byteIndex;
  557. --byteCount;
  558. }
  559. else if(byteCount > 1)
  560. {
  561. if(isBigEndian)
  562. {
  563. ch = ((char)((((int)(bytes[byteIndex])) << 8) |
  564. ((int)(bytes[byteIndex + 1]))));
  565. }
  566. else
  567. {
  568. ch = ((char)((((int)(bytes[byteIndex + 1]))
  569. << 8) |
  570. ((int)(bytes[byteIndex]))));
  571. }
  572. byteIndex += 2;
  573. byteCount -= 2;
  574. }
  575. else
  576. {
  577. leftOver = (int)(bytes[byteIndex]);
  578. break;
  579. }
  580. if(ch == '\uFFFE')
  581. {
  582. // Switch byte orders.
  583. bigEndian = !bigEndian;
  584. }
  585. else if(ch != '\uFEFF')
  586. {
  587. // Ordinary character.
  588. if(posn < length)
  589. {
  590. chars[posn++] = ch;
  591. }
  592. else
  593. {
  594. throw new ArgumentException
  595. (_("Arg_InsufficientSpace"));
  596. }
  597. }
  598. }
  599. leftOverByte = leftOver;
  600. bigEndian = isBigEndian;
  601. // Finished - return the converted length.
  602. return posn - charIndex;
  603. }
  604. } // class UnicodeDecoder
  605. }; // class UnicodeEncoding
  606. }; // namespace System.Text