UnicodeEncoding.cs 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643
  1. /*
  2. * UnicodeEncoding.cs - Implementation of the
  3. * "System.Text.UnicodeEncoding" class.
  4. *
  5. * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
  6. * Copyright (C) 2003, 2004 Novell, Inc.
  7. * Copyright (C) 2006 Kornél Pál <http://www.kornelpal.hu/>
  8. *
  9. * Permission is hereby granted, free of charge, to any person obtaining
  10. * a copy of this software and associated documentation files (the "Software"),
  11. * to deal in the Software without restriction, including without limitation
  12. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  13. * and/or sell copies of the Software, and to permit persons to whom the
  14. * Software is furnished to do so, subject to the following conditions:
  15. *
  16. * The above copyright notice and this permission notice shall be included
  17. * in all copies or substantial portions of the Software.
  18. *
  19. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  22. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  23. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  24. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  25. * OTHER DEALINGS IN THE SOFTWARE.
  26. */
  27. namespace System.Text
  28. {
  29. using System;
  30. using System.Runtime.InteropServices;
  31. [Serializable]
  32. #if NET_2_0
  33. [ComVisible (true)]
  34. #endif
  35. [MonoTODO ("Serialization format not compatible with .NET")]
  36. public class UnicodeEncoding : Encoding
  37. {
  38. // Magic numbers used by Windows for Unicode.
  39. internal const int UNICODE_CODE_PAGE = 1200;
  40. internal const int BIG_UNICODE_CODE_PAGE = 1201;
  41. #if !ECMA_COMPAT
  42. // Size of characters in this encoding.
  43. public const int CharSize = 2;
  44. #endif
  45. // Internal state.
  46. private bool bigEndian;
  47. private bool byteOrderMark;
  48. // Constructors.
  49. public UnicodeEncoding () : this (false, true)
  50. {
  51. bigEndian = false;
  52. byteOrderMark = true;
  53. }
  54. public UnicodeEncoding (bool bigEndian, bool byteOrderMark)
  55. : this (bigEndian, byteOrderMark, false)
  56. {
  57. }
  58. #if NET_2_0
  59. public
  60. #endif
  61. UnicodeEncoding (bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes)
  62. : base ((bigEndian ? BIG_UNICODE_CODE_PAGE : UNICODE_CODE_PAGE))
  63. {
  64. #if NET_2_0
  65. if (throwOnInvalidBytes)
  66. SetFallbackInternal (null, new DecoderExceptionFallback ());
  67. else
  68. SetFallbackInternal (null, new DecoderReplacementFallback ("\uFFFD"));
  69. #else
  70. throwOnInvalid = throwOnInvalidBytes;
  71. #endif
  72. this.bigEndian = bigEndian;
  73. this.byteOrderMark = byteOrderMark;
  74. if (bigEndian){
  75. body_name = "unicodeFFFE";
  76. encoding_name = "Unicode (Big-Endian)";
  77. header_name = "unicodeFFFE";
  78. is_browser_save = false;
  79. web_name = "unicodeFFFE";
  80. } else {
  81. body_name = "utf-16";
  82. encoding_name = "Unicode";
  83. header_name = "utf-16";
  84. is_browser_save = true;
  85. web_name = "utf-16";
  86. }
  87. // Windows reports the same code page number for
  88. // both the little-endian and big-endian forms.
  89. windows_code_page = UNICODE_CODE_PAGE;
  90. }
  91. // Get the number of bytes needed to encode a character buffer.
  92. public override int GetByteCount (char[] chars, int index, int count)
  93. {
  94. if (chars == null) {
  95. throw new ArgumentNullException ("chars");
  96. }
  97. if (index < 0 || index > chars.Length) {
  98. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  99. }
  100. if (count < 0 || count > (chars.Length - index)) {
  101. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  102. }
  103. return count * 2;
  104. }
  105. public override int GetByteCount (String s)
  106. {
  107. if (s == null) {
  108. throw new ArgumentNullException ("s");
  109. }
  110. return s.Length * 2;
  111. }
  112. #if NET_2_0
  113. [CLSCompliantAttribute (false)]
  114. [ComVisible (false)]
  115. public unsafe override int GetByteCount (char* chars, int count)
  116. {
  117. if (chars == null)
  118. throw new ArgumentNullException ("chars");
  119. if (count < 0)
  120. throw new ArgumentOutOfRangeException ("count");
  121. return count * 2;
  122. }
  123. #endif
  124. // Get the bytes that result from encoding a character buffer.
  125. public unsafe override int GetBytes (char [] chars, int charIndex, int charCount,
  126. byte [] bytes, int byteIndex)
  127. {
  128. if (chars == null) {
  129. throw new ArgumentNullException ("chars");
  130. }
  131. if (bytes == null) {
  132. throw new ArgumentNullException ("bytes");
  133. }
  134. if (charIndex < 0 || charIndex > chars.Length) {
  135. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  136. }
  137. if (charCount < 0 || charCount > (chars.Length - charIndex)) {
  138. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
  139. }
  140. if (byteIndex < 0 || byteIndex > bytes.Length) {
  141. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  142. }
  143. if (charCount == 0)
  144. return 0;
  145. int byteCount = bytes.Length - byteIndex;
  146. if (bytes.Length == 0)
  147. bytes = new byte [1];
  148. fixed (char* charPtr = chars)
  149. fixed (byte* bytePtr = bytes)
  150. return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
  151. }
  152. #if !NET_2_0
  153. public override byte [] GetBytes (String s)
  154. {
  155. if (s == null)
  156. throw new ArgumentNullException ("s");
  157. int byteCount = GetByteCount (s);
  158. byte [] bytes = new byte [byteCount];
  159. GetBytes (s, 0, s.Length, bytes, 0);
  160. return bytes;
  161. }
  162. #endif
  163. public unsafe override int GetBytes (String s, int charIndex, int charCount,
  164. byte [] bytes, int byteIndex)
  165. {
  166. if (s == null) {
  167. throw new ArgumentNullException ("s");
  168. }
  169. if (bytes == null) {
  170. throw new ArgumentNullException ("bytes");
  171. }
  172. if (charIndex < 0 || charIndex > s.Length) {
  173. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
  174. }
  175. if (charCount < 0 || charCount > (s.Length - charIndex)) {
  176. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
  177. }
  178. if (byteIndex < 0 || byteIndex > bytes.Length) {
  179. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  180. }
  181. // For consistency
  182. if (charCount == 0)
  183. return 0;
  184. int byteCount = bytes.Length - byteIndex;
  185. if (bytes.Length == 0)
  186. bytes = new byte [1];
  187. fixed (char* charPtr = s)
  188. fixed (byte* bytePtr = bytes)
  189. return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
  190. }
  191. #if NET_2_0
  192. [CLSCompliantAttribute (false)]
  193. [ComVisible (false)]
  194. public unsafe override int GetBytes (char* chars, int charCount,
  195. byte* bytes, int byteCount)
  196. {
  197. if (bytes == null)
  198. throw new ArgumentNullException ("bytes");
  199. if (chars == null)
  200. throw new ArgumentNullException ("chars");
  201. if (charCount < 0)
  202. throw new ArgumentOutOfRangeException ("charCount");
  203. if (byteCount < 0)
  204. throw new ArgumentOutOfRangeException ("byteCount");
  205. return GetBytesInternal (chars, charCount, bytes, byteCount);
  206. }
  207. #endif
  208. private unsafe int GetBytesInternal (char* chars, int charCount,
  209. byte* bytes, int byteCount)
  210. {
  211. int count = charCount * 2;
  212. if (byteCount < count)
  213. throw new ArgumentException (_("Arg_InsufficientSpace"));
  214. CopyChars ((byte*) chars, bytes, count, bigEndian);
  215. return count;
  216. }
  217. // Get the number of characters needed to decode a byte buffer.
  218. public override int GetCharCount (byte[] bytes, int index, int count)
  219. {
  220. if (bytes == null) {
  221. throw new ArgumentNullException ("bytes");
  222. }
  223. if (index < 0 || index > bytes.Length) {
  224. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  225. }
  226. if (count < 0 || count > (bytes.Length - index)) {
  227. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  228. }
  229. return count / 2;
  230. }
  231. #if NET_2_0
  232. [CLSCompliantAttribute (false)]
  233. [ComVisible (false)]
  234. public unsafe override int GetCharCount (byte* bytes, int count)
  235. {
  236. if (bytes == null)
  237. throw new ArgumentNullException ("bytes");
  238. if (count < 0)
  239. throw new ArgumentOutOfRangeException ("count");
  240. return count / 2;
  241. }
  242. #endif
  243. // Get the characters that result from decoding a byte buffer.
  244. public unsafe override int GetChars (byte [] bytes, int byteIndex, int byteCount,
  245. char [] chars, int charIndex)
  246. {
  247. if (bytes == null) {
  248. throw new ArgumentNullException ("bytes");
  249. }
  250. if (chars == null) {
  251. throw new ArgumentNullException ("chars");
  252. }
  253. if (byteIndex < 0 || byteIndex > bytes.Length) {
  254. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  255. }
  256. if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
  257. throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
  258. }
  259. if (charIndex < 0 || charIndex > chars.Length) {
  260. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  261. }
  262. if (byteCount == 0)
  263. return 0;
  264. int charCount = chars.Length - charIndex;
  265. if (chars.Length == 0)
  266. chars = new char [1];
  267. fixed (byte* bytePtr = bytes)
  268. fixed (char* charPtr = chars)
  269. return GetCharsInternal (bytePtr + byteIndex, byteCount, charPtr + charIndex, charCount);
  270. }
  271. #if NET_2_0
  272. [CLSCompliantAttribute (false)]
  273. [ComVisible (false)]
  274. public unsafe override int GetChars (byte* bytes, int byteCount,
  275. char* chars, int charCount)
  276. {
  277. if (bytes == null)
  278. throw new ArgumentNullException ("bytes");
  279. if (chars == null)
  280. throw new ArgumentNullException ("chars");
  281. if (charCount < 0)
  282. throw new ArgumentOutOfRangeException ("charCount");
  283. if (byteCount < 0)
  284. throw new ArgumentOutOfRangeException ("byteCount");
  285. return GetCharsInternal (bytes, byteCount, chars, charCount);
  286. }
  287. #endif
  288. // Decode a buffer of bytes into a string.
  289. [ComVisible (false)]
  290. public unsafe override String GetString (byte [] bytes, int index, int count)
  291. {
  292. if (bytes == null)
  293. throw new ArgumentNullException ("bytes");
  294. if (index < 0 || index > bytes.Length)
  295. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  296. if (count < 0 || count > (bytes.Length - index))
  297. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  298. if (count == 0)
  299. return string.Empty;
  300. // GetCharCountInternal
  301. int charCount = count / 2;
  302. string s = string.InternalAllocateStr (charCount);
  303. fixed (byte* bytePtr = bytes)
  304. fixed (char* charPtr = s)
  305. GetCharsInternal (bytePtr + index, count, charPtr, charCount);
  306. return s;
  307. }
  308. private unsafe int GetCharsInternal (byte* bytes, int byteCount,
  309. char* chars, int charCount)
  310. {
  311. int count = byteCount / 2;
  312. // Validate that we have sufficient space in "chars".
  313. if (charCount < count)
  314. throw new ArgumentException (_("Arg_InsufficientSpace"));
  315. CopyChars (bytes, (byte*) chars, byteCount, bigEndian);
  316. return count;
  317. }
  318. [ComVisible (false)]
  319. public override Encoder GetEncoder ()
  320. {
  321. return(base.GetEncoder ());
  322. }
  323. // Get the maximum number of bytes needed to encode a
  324. // specified number of characters.
  325. public override int GetMaxByteCount (int charCount)
  326. {
  327. if (charCount < 0) {
  328. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
  329. }
  330. return charCount * 2;
  331. }
  332. // Get the maximum number of characters needed to decode a
  333. // specified number of bytes.
  334. public override int GetMaxCharCount (int byteCount)
  335. {
  336. if (byteCount < 0) {
  337. throw new ArgumentOutOfRangeException
  338. ("byteCount", _("ArgRange_NonNegative"));
  339. }
  340. return byteCount / 2;
  341. }
  342. // Get a Unicode-specific decoder that is attached to this instance.
  343. public override Decoder GetDecoder ()
  344. {
  345. return new UnicodeDecoder (bigEndian);
  346. }
  347. // Get the Unicode preamble.
  348. public override byte[] GetPreamble ()
  349. {
  350. if (byteOrderMark) {
  351. byte[] preamble = new byte[2];
  352. if (bigEndian) {
  353. preamble[0] = (byte)0xFE;
  354. preamble[1] = (byte)0xFF;
  355. } else {
  356. preamble[0] = (byte)0xFF;
  357. preamble[1] = (byte)0xFE;
  358. }
  359. return preamble;
  360. } else {
  361. return new byte [0];
  362. }
  363. }
  364. // Determine if this object is equal to another.
  365. public override bool Equals (Object value)
  366. {
  367. UnicodeEncoding enc = (value as UnicodeEncoding);
  368. if (enc != null) {
  369. return (codePage == enc.codePage &&
  370. bigEndian == enc.bigEndian &&
  371. byteOrderMark == enc.byteOrderMark);
  372. } else {
  373. return false;
  374. }
  375. }
  376. // Get the hash code for this object.
  377. public override int GetHashCode ()
  378. {
  379. return base.GetHashCode ();
  380. }
  381. private unsafe static void CopyChars (byte* src, byte* dest, int count, bool bigEndian)
  382. {
  383. if (BitConverter.IsLittleEndian != bigEndian) {
  384. string.memcpy (dest, src, count & unchecked ((int) 0xFFFFFFFE));
  385. return;
  386. }
  387. switch (count) {
  388. case 0:
  389. return;
  390. case 1:
  391. return;
  392. case 2:
  393. goto Count2;
  394. case 3:
  395. goto Count2;
  396. case 4:
  397. goto Count4;
  398. case 5:
  399. goto Count4;
  400. case 6:
  401. goto Count4;
  402. case 7:
  403. goto Count4;
  404. case 8:
  405. goto Count8;
  406. case 9:
  407. goto Count8;
  408. case 10:
  409. goto Count8;
  410. case 11:
  411. goto Count8;
  412. case 12:
  413. goto Count8;
  414. case 13:
  415. goto Count8;
  416. case 14:
  417. goto Count8;
  418. case 15:
  419. goto Count8;
  420. }
  421. do {
  422. dest [0] = src [1];
  423. dest [1] = src [0];
  424. dest [2] = src [3];
  425. dest [3] = src [2];
  426. dest [4] = src [5];
  427. dest [5] = src [4];
  428. dest [6] = src [7];
  429. dest [7] = src [6];
  430. dest [8] = src [9];
  431. dest [9] = src [8];
  432. dest [10] = src [11];
  433. dest [11] = src [10];
  434. dest [12] = src [13];
  435. dest [13] = src [12];
  436. dest [14] = src [15];
  437. dest [15] = src [14];
  438. dest += 16;
  439. src += 16;
  440. count -= 16;
  441. } while ((count & unchecked ((int) 0xFFFFFFF0)) != 0);
  442. switch (count) {
  443. case 0:
  444. return;
  445. case 1:
  446. return;
  447. case 2:
  448. goto Count2;
  449. case 3:
  450. goto Count2;
  451. case 4:
  452. goto Count4;
  453. case 5:
  454. goto Count4;
  455. case 6:
  456. goto Count4;
  457. case 7:
  458. goto Count4;
  459. }
  460. Count8:;
  461. dest [0] = src [1];
  462. dest [1] = src [0];
  463. dest [2] = src [3];
  464. dest [3] = src [2];
  465. dest [4] = src [5];
  466. dest [5] = src [4];
  467. dest [6] = src [7];
  468. dest [7] = src [6];
  469. dest += 8;
  470. src += 8;
  471. if ((count & 4) == 0)
  472. goto TestCount2;
  473. Count4:;
  474. dest [0] = src [1];
  475. dest [1] = src [0];
  476. dest [2] = src [3];
  477. dest [3] = src [2];
  478. dest += 4;
  479. src += 4;
  480. TestCount2:;
  481. if ((count & 2) == 0)
  482. return;
  483. Count2:;
  484. dest [0] = src [1];
  485. dest [1] = src [0];
  486. }
  487. // Unicode decoder implementation.
  488. private sealed class UnicodeDecoder : Decoder
  489. {
  490. private bool bigEndian;
  491. private int leftOverByte;
  492. // Constructor.
  493. public UnicodeDecoder (bool bigEndian)
  494. {
  495. this.bigEndian = bigEndian;
  496. leftOverByte = -1;
  497. }
  498. // Override inherited methods.
  499. public override int GetCharCount (byte[] bytes, int index, int count)
  500. {
  501. if (bytes == null) {
  502. throw new ArgumentNullException ("bytes");
  503. }
  504. if (index < 0 || index > bytes.Length) {
  505. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  506. }
  507. if (count < 0 || count > (bytes.Length - index)) {
  508. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  509. }
  510. if (leftOverByte != -1) {
  511. return (count + 1) / 2;
  512. } else {
  513. return count / 2;
  514. }
  515. }
  516. public unsafe override int GetChars (byte [] bytes, int byteIndex,
  517. int byteCount, char [] chars,
  518. int charIndex)
  519. {
  520. if (bytes == null) {
  521. throw new ArgumentNullException ("bytes");
  522. }
  523. if (chars == null) {
  524. throw new ArgumentNullException ("chars");
  525. }
  526. if (byteIndex < 0 || byteIndex > bytes.Length) {
  527. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  528. }
  529. if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
  530. throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
  531. }
  532. if (charIndex < 0 || charIndex > chars.Length) {
  533. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  534. }
  535. if (byteCount == 0)
  536. return 0;
  537. int leftOver = leftOverByte;
  538. int count;
  539. if (leftOver != -1)
  540. count = (byteCount + 1) / 2;
  541. else
  542. count = byteCount / 2;
  543. if (chars.Length - charIndex < count)
  544. throw new ArgumentException (_("Arg_InsufficientSpace"));
  545. if (leftOver != -1) {
  546. if (bigEndian)
  547. chars [charIndex] = unchecked ((char) ((leftOver << 8) | (int) bytes [byteIndex]));
  548. else
  549. chars [charIndex] = unchecked ((char) (((int) bytes [byteIndex] << 8) | leftOver));
  550. charIndex++;
  551. byteIndex++;
  552. byteCount--;
  553. }
  554. if ((byteCount & unchecked ((int) 0xFFFFFFFE)) != 0)
  555. fixed (byte* bytePtr = bytes)
  556. fixed (char* charPtr = chars)
  557. CopyChars (bytePtr + byteIndex, (byte*) (charPtr + charIndex), byteCount, bigEndian);
  558. if ((byteCount & 1) == 0)
  559. leftOverByte = -1;
  560. else
  561. leftOverByte = bytes [byteCount + byteIndex - 1];
  562. return count;
  563. }
  564. } // class UnicodeDecoder
  565. }; // class UnicodeEncoding
  566. }; // namespace System.Text