UnicodeEncoding.cs 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641
  1. /*
  2. * UnicodeEncoding.cs - Implementation of the
  3. * "System.Text.UnicodeEncoding" class.
  4. *
  5. * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
  6. * Copyright (C) 2003, 2004 Novell, Inc.
  7. * Copyright (C) 2006 Kornél Pál <http://www.kornelpal.hu/>
  8. *
  9. * Permission is hereby granted, free of charge, to any person obtaining
  10. * a copy of this software and associated documentation files (the "Software"),
  11. * to deal in the Software without restriction, including without limitation
  12. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  13. * and/or sell copies of the Software, and to permit persons to whom the
  14. * Software is furnished to do so, subject to the following conditions:
  15. *
  16. * The above copyright notice and this permission notice shall be included
  17. * in all copies or substantial portions of the Software.
  18. *
  19. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  22. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  23. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  24. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  25. * OTHER DEALINGS IN THE SOFTWARE.
  26. */
  27. namespace System.Text
  28. {
  29. using System;
  30. using System.Runtime.InteropServices;
  31. [Serializable]
  32. #if NET_2_0
  33. [ComVisible (true)]
  34. #endif
  35. [MonoTODO ("Serialization format not compatible with .NET")]
  36. public class UnicodeEncoding : Encoding
  37. {
  38. // Magic numbers used by Windows for Unicode.
  39. internal const int UNICODE_CODE_PAGE = 1200;
  40. internal const int BIG_UNICODE_CODE_PAGE = 1201;
  41. #if !ECMA_COMPAT
  42. // Size of characters in this encoding.
  43. public const int CharSize = 2;
  44. #endif
  45. // Internal state.
  46. private bool bigEndian;
  47. private bool byteOrderMark;
  48. // Constructors.
  49. public UnicodeEncoding () : this (false, true)
  50. {
  51. bigEndian = false;
  52. byteOrderMark = true;
  53. }
  54. public UnicodeEncoding (bool bigEndian, bool byteOrderMark)
  55. : this (bigEndian, byteOrderMark, false)
  56. {
  57. }
  58. #if NET_2_0
  59. public
  60. #endif
  61. UnicodeEncoding (bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes)
  62. : base ((bigEndian ? BIG_UNICODE_CODE_PAGE : UNICODE_CODE_PAGE))
  63. {
  64. #if NET_2_0
  65. if (throwOnInvalidBytes)
  66. SetFallbackInternal (null, new DecoderExceptionFallback ());
  67. else
  68. SetFallbackInternal (null, new DecoderReplacementFallback ("\uFFFD"));
  69. #endif
  70. this.bigEndian = bigEndian;
  71. this.byteOrderMark = byteOrderMark;
  72. if (bigEndian){
  73. body_name = "unicodeFFFE";
  74. encoding_name = "Unicode (Big-Endian)";
  75. header_name = "unicodeFFFE";
  76. is_browser_save = false;
  77. web_name = "unicodeFFFE";
  78. } else {
  79. body_name = "utf-16";
  80. encoding_name = "Unicode";
  81. header_name = "utf-16";
  82. is_browser_save = true;
  83. web_name = "utf-16";
  84. }
  85. // Windows reports the same code page number for
  86. // both the little-endian and big-endian forms.
  87. windows_code_page = UNICODE_CODE_PAGE;
  88. }
  89. // Get the number of bytes needed to encode a character buffer.
  90. public override int GetByteCount (char[] chars, int index, int count)
  91. {
  92. if (chars == null) {
  93. throw new ArgumentNullException ("chars");
  94. }
  95. if (index < 0 || index > chars.Length) {
  96. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  97. }
  98. if (count < 0 || count > (chars.Length - index)) {
  99. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  100. }
  101. return count * 2;
  102. }
  103. public override int GetByteCount (String s)
  104. {
  105. if (s == null) {
  106. throw new ArgumentNullException ("s");
  107. }
  108. return s.Length * 2;
  109. }
  110. #if NET_2_0
  111. [CLSCompliantAttribute (false)]
  112. [ComVisible (false)]
  113. public unsafe override int GetByteCount (char* chars, int count)
  114. {
  115. if (chars == null)
  116. throw new ArgumentNullException ("chars");
  117. if (count < 0)
  118. throw new ArgumentOutOfRangeException ("count");
  119. return count * 2;
  120. }
  121. #endif
  122. // Get the bytes that result from encoding a character buffer.
  123. public unsafe override int GetBytes (char [] chars, int charIndex, int charCount,
  124. byte [] bytes, int byteIndex)
  125. {
  126. if (chars == null) {
  127. throw new ArgumentNullException ("chars");
  128. }
  129. if (bytes == null) {
  130. throw new ArgumentNullException ("bytes");
  131. }
  132. if (charIndex < 0 || charIndex > chars.Length) {
  133. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  134. }
  135. if (charCount < 0 || charCount > (chars.Length - charIndex)) {
  136. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
  137. }
  138. if (byteIndex < 0 || byteIndex > bytes.Length) {
  139. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  140. }
  141. if (charCount == 0)
  142. return 0;
  143. int byteCount = bytes.Length - byteIndex;
  144. if (bytes.Length == 0)
  145. bytes = new byte [1];
  146. fixed (char* charPtr = chars)
  147. fixed (byte* bytePtr = bytes)
  148. return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
  149. }
  150. #if !NET_2_0
  151. public override byte [] GetBytes (String s)
  152. {
  153. if (s == null)
  154. throw new ArgumentNullException ("s");
  155. int byteCount = GetByteCount (s);
  156. byte [] bytes = new byte [byteCount];
  157. GetBytes (s, 0, s.Length, bytes, 0);
  158. return bytes;
  159. }
  160. #endif
  161. public unsafe override int GetBytes (String s, int charIndex, int charCount,
  162. byte [] bytes, int byteIndex)
  163. {
  164. if (s == null) {
  165. throw new ArgumentNullException ("s");
  166. }
  167. if (bytes == null) {
  168. throw new ArgumentNullException ("bytes");
  169. }
  170. if (charIndex < 0 || charIndex > s.Length) {
  171. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
  172. }
  173. if (charCount < 0 || charCount > (s.Length - charIndex)) {
  174. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
  175. }
  176. if (byteIndex < 0 || byteIndex > bytes.Length) {
  177. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  178. }
  179. // For consistency
  180. if (charCount == 0)
  181. return 0;
  182. int byteCount = bytes.Length - byteIndex;
  183. if (bytes.Length == 0)
  184. bytes = new byte [1];
  185. fixed (char* charPtr = s)
  186. fixed (byte* bytePtr = bytes)
  187. return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
  188. }
  189. #if NET_2_0
  190. [CLSCompliantAttribute (false)]
  191. [ComVisible (false)]
  192. public unsafe override int GetBytes (char* chars, int charCount,
  193. byte* bytes, int byteCount)
  194. {
  195. if (bytes == null)
  196. throw new ArgumentNullException ("bytes");
  197. if (chars == null)
  198. throw new ArgumentNullException ("chars");
  199. if (charCount < 0)
  200. throw new ArgumentOutOfRangeException ("charCount");
  201. if (byteCount < 0)
  202. throw new ArgumentOutOfRangeException ("byteCount");
  203. return GetBytesInternal (chars, charCount, bytes, byteCount);
  204. }
  205. #endif
  206. private unsafe int GetBytesInternal (char* chars, int charCount,
  207. byte* bytes, int byteCount)
  208. {
  209. int count = charCount * 2;
  210. if (byteCount < count)
  211. throw new ArgumentException (_("Arg_InsufficientSpace"));
  212. CopyChars ((byte*) chars, bytes, count, bigEndian);
  213. return count;
  214. }
  215. // Get the number of characters needed to decode a byte buffer.
  216. public override int GetCharCount (byte[] bytes, int index, int count)
  217. {
  218. if (bytes == null) {
  219. throw new ArgumentNullException ("bytes");
  220. }
  221. if (index < 0 || index > bytes.Length) {
  222. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  223. }
  224. if (count < 0 || count > (bytes.Length - index)) {
  225. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  226. }
  227. return count / 2;
  228. }
  229. #if NET_2_0
  230. [CLSCompliantAttribute (false)]
  231. [ComVisible (false)]
  232. public unsafe override int GetCharCount (byte* bytes, int count)
  233. {
  234. if (bytes == null)
  235. throw new ArgumentNullException ("bytes");
  236. if (count < 0)
  237. throw new ArgumentOutOfRangeException ("count");
  238. return count / 2;
  239. }
  240. #endif
  241. // Get the characters that result from decoding a byte buffer.
  242. public unsafe override int GetChars (byte [] bytes, int byteIndex, int byteCount,
  243. char [] chars, int charIndex)
  244. {
  245. if (bytes == null) {
  246. throw new ArgumentNullException ("bytes");
  247. }
  248. if (chars == null) {
  249. throw new ArgumentNullException ("chars");
  250. }
  251. if (byteIndex < 0 || byteIndex > bytes.Length) {
  252. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  253. }
  254. if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
  255. throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
  256. }
  257. if (charIndex < 0 || charIndex > chars.Length) {
  258. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  259. }
  260. if (byteCount == 0)
  261. return 0;
  262. int charCount = chars.Length - charIndex;
  263. if (chars.Length == 0)
  264. chars = new char [1];
  265. fixed (byte* bytePtr = bytes)
  266. fixed (char* charPtr = chars)
  267. return GetCharsInternal (bytePtr + byteIndex, byteCount, charPtr + charIndex, charCount);
  268. }
  269. #if NET_2_0
  270. [CLSCompliantAttribute (false)]
  271. [ComVisible (false)]
  272. public unsafe override int GetChars (byte* bytes, int byteCount,
  273. char* chars, int charCount)
  274. {
  275. if (bytes == null)
  276. throw new ArgumentNullException ("bytes");
  277. if (chars == null)
  278. throw new ArgumentNullException ("chars");
  279. if (charCount < 0)
  280. throw new ArgumentOutOfRangeException ("charCount");
  281. if (byteCount < 0)
  282. throw new ArgumentOutOfRangeException ("byteCount");
  283. return GetCharsInternal (bytes, byteCount, chars, charCount);
  284. }
  285. #endif
  286. // Decode a buffer of bytes into a string.
  287. [ComVisible (false)]
  288. public unsafe override String GetString (byte [] bytes, int index, int count)
  289. {
  290. if (bytes == null)
  291. throw new ArgumentNullException ("bytes");
  292. if (index < 0 || index > bytes.Length)
  293. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  294. if (count < 0 || count > (bytes.Length - index))
  295. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  296. if (count == 0)
  297. return string.Empty;
  298. // GetCharCountInternal
  299. int charCount = count / 2;
  300. string s = string.InternalAllocateStr (charCount);
  301. fixed (byte* bytePtr = bytes)
  302. fixed (char* charPtr = s)
  303. GetCharsInternal (bytePtr + index, count, charPtr, charCount);
  304. return s;
  305. }
  306. private unsafe int GetCharsInternal (byte* bytes, int byteCount,
  307. char* chars, int charCount)
  308. {
  309. int count = byteCount / 2;
  310. // Validate that we have sufficient space in "chars".
  311. if (charCount < count)
  312. throw new ArgumentException (_("Arg_InsufficientSpace"));
  313. CopyChars (bytes, (byte*) chars, byteCount, bigEndian);
  314. return count;
  315. }
  316. [ComVisible (false)]
  317. public override Encoder GetEncoder ()
  318. {
  319. return(base.GetEncoder ());
  320. }
  321. // Get the maximum number of bytes needed to encode a
  322. // specified number of characters.
  323. public override int GetMaxByteCount (int charCount)
  324. {
  325. if (charCount < 0) {
  326. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
  327. }
  328. return charCount * 2;
  329. }
  330. // Get the maximum number of characters needed to decode a
  331. // specified number of bytes.
  332. public override int GetMaxCharCount (int byteCount)
  333. {
  334. if (byteCount < 0) {
  335. throw new ArgumentOutOfRangeException
  336. ("byteCount", _("ArgRange_NonNegative"));
  337. }
  338. return byteCount / 2;
  339. }
  340. // Get a Unicode-specific decoder that is attached to this instance.
  341. public override Decoder GetDecoder ()
  342. {
  343. return new UnicodeDecoder (bigEndian);
  344. }
  345. // Get the Unicode preamble.
  346. public override byte[] GetPreamble ()
  347. {
  348. if (byteOrderMark) {
  349. byte[] preamble = new byte[2];
  350. if (bigEndian) {
  351. preamble[0] = (byte)0xFE;
  352. preamble[1] = (byte)0xFF;
  353. } else {
  354. preamble[0] = (byte)0xFF;
  355. preamble[1] = (byte)0xFE;
  356. }
  357. return preamble;
  358. } else {
  359. return new byte [0];
  360. }
  361. }
  362. // Determine if this object is equal to another.
  363. public override bool Equals (Object value)
  364. {
  365. UnicodeEncoding enc = (value as UnicodeEncoding);
  366. if (enc != null) {
  367. return (codePage == enc.codePage &&
  368. bigEndian == enc.bigEndian &&
  369. byteOrderMark == enc.byteOrderMark);
  370. } else {
  371. return false;
  372. }
  373. }
  374. // Get the hash code for this object.
  375. public override int GetHashCode ()
  376. {
  377. return base.GetHashCode ();
  378. }
  379. private unsafe static void CopyChars (byte* src, byte* dest, int count, bool bigEndian)
  380. {
  381. if (BitConverter.IsLittleEndian != bigEndian) {
  382. string.memcpy (dest, src, count & unchecked ((int) 0xFFFFFFFE));
  383. return;
  384. }
  385. switch (count) {
  386. case 0:
  387. return;
  388. case 1:
  389. return;
  390. case 2:
  391. goto Count2;
  392. case 3:
  393. goto Count2;
  394. case 4:
  395. goto Count4;
  396. case 5:
  397. goto Count4;
  398. case 6:
  399. goto Count4;
  400. case 7:
  401. goto Count4;
  402. case 8:
  403. goto Count8;
  404. case 9:
  405. goto Count8;
  406. case 10:
  407. goto Count8;
  408. case 11:
  409. goto Count8;
  410. case 12:
  411. goto Count8;
  412. case 13:
  413. goto Count8;
  414. case 14:
  415. goto Count8;
  416. case 15:
  417. goto Count8;
  418. }
  419. do {
  420. dest [0] = src [1];
  421. dest [1] = src [0];
  422. dest [2] = src [3];
  423. dest [3] = src [2];
  424. dest [4] = src [5];
  425. dest [5] = src [4];
  426. dest [6] = src [7];
  427. dest [7] = src [6];
  428. dest [8] = src [9];
  429. dest [9] = src [8];
  430. dest [10] = src [11];
  431. dest [11] = src [10];
  432. dest [12] = src [13];
  433. dest [13] = src [12];
  434. dest [14] = src [15];
  435. dest [15] = src [14];
  436. dest += 16;
  437. src += 16;
  438. count -= 16;
  439. } while ((count & unchecked ((int) 0xFFFFFFF0)) != 0);
  440. switch (count) {
  441. case 0:
  442. return;
  443. case 1:
  444. return;
  445. case 2:
  446. goto Count2;
  447. case 3:
  448. goto Count2;
  449. case 4:
  450. goto Count4;
  451. case 5:
  452. goto Count4;
  453. case 6:
  454. goto Count4;
  455. case 7:
  456. goto Count4;
  457. }
  458. Count8:;
  459. dest [0] = src [1];
  460. dest [1] = src [0];
  461. dest [2] = src [3];
  462. dest [3] = src [2];
  463. dest [4] = src [5];
  464. dest [5] = src [4];
  465. dest [6] = src [7];
  466. dest [7] = src [6];
  467. dest += 8;
  468. src += 8;
  469. if ((count & 4) == 0)
  470. goto TestCount2;
  471. Count4:;
  472. dest [0] = src [1];
  473. dest [1] = src [0];
  474. dest [2] = src [3];
  475. dest [3] = src [2];
  476. dest += 4;
  477. src += 4;
  478. TestCount2:;
  479. if ((count & 2) == 0)
  480. return;
  481. Count2:;
  482. dest [0] = src [1];
  483. dest [1] = src [0];
  484. }
  485. // Unicode decoder implementation.
  486. private sealed class UnicodeDecoder : Decoder
  487. {
  488. private bool bigEndian;
  489. private int leftOverByte;
  490. // Constructor.
  491. public UnicodeDecoder (bool bigEndian)
  492. {
  493. this.bigEndian = bigEndian;
  494. leftOverByte = -1;
  495. }
  496. // Override inherited methods.
  497. public override int GetCharCount (byte[] bytes, int index, int count)
  498. {
  499. if (bytes == null) {
  500. throw new ArgumentNullException ("bytes");
  501. }
  502. if (index < 0 || index > bytes.Length) {
  503. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  504. }
  505. if (count < 0 || count > (bytes.Length - index)) {
  506. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  507. }
  508. if (leftOverByte != -1) {
  509. return (count + 1) / 2;
  510. } else {
  511. return count / 2;
  512. }
  513. }
  514. public unsafe override int GetChars (byte [] bytes, int byteIndex,
  515. int byteCount, char [] chars,
  516. int charIndex)
  517. {
  518. if (bytes == null) {
  519. throw new ArgumentNullException ("bytes");
  520. }
  521. if (chars == null) {
  522. throw new ArgumentNullException ("chars");
  523. }
  524. if (byteIndex < 0 || byteIndex > bytes.Length) {
  525. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  526. }
  527. if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
  528. throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
  529. }
  530. if (charIndex < 0 || charIndex > chars.Length) {
  531. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  532. }
  533. if (byteCount == 0)
  534. return 0;
  535. int leftOver = leftOverByte;
  536. int count;
  537. if (leftOver != -1)
  538. count = (byteCount + 1) / 2;
  539. else
  540. count = byteCount / 2;
  541. if (chars.Length - charIndex < count)
  542. throw new ArgumentException (_("Arg_InsufficientSpace"));
  543. if (leftOver != -1) {
  544. if (bigEndian)
  545. chars [charIndex] = unchecked ((char) ((leftOver << 8) | (int) bytes [byteIndex]));
  546. else
  547. chars [charIndex] = unchecked ((char) (((int) bytes [byteIndex] << 8) | leftOver));
  548. charIndex++;
  549. byteIndex++;
  550. byteCount--;
  551. }
  552. if ((byteCount & unchecked ((int) 0xFFFFFFFE)) != 0)
  553. fixed (byte* bytePtr = bytes)
  554. fixed (char* charPtr = chars)
  555. CopyChars (bytePtr + byteIndex, (byte*) (charPtr + charIndex), byteCount, bigEndian);
  556. if ((byteCount & 1) == 0)
  557. leftOverByte = -1;
  558. else
  559. leftOverByte = bytes [byteCount + byteIndex - 1];
  560. return count;
  561. }
  562. } // class UnicodeDecoder
  563. }; // class UnicodeEncoding
  564. }; // namespace System.Text