UnicodeEncoding.cs 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611
  1. /*
  2. * UnicodeEncoding.cs - Implementation of the
  3. * "System.Text.UnicodeEncoding" class.
  4. *
  5. * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
  6. * Copyright (C) 2003, 2004 Novell, Inc.
  7. * Copyright (C) 2006 Kornél Pál <http://www.kornelpal.hu/>
  8. *
  9. * Permission is hereby granted, free of charge, to any person obtaining
  10. * a copy of this software and associated documentation files (the "Software"),
  11. * to deal in the Software without restriction, including without limitation
  12. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  13. * and/or sell copies of the Software, and to permit persons to whom the
  14. * Software is furnished to do so, subject to the following conditions:
  15. *
  16. * The above copyright notice and this permission notice shall be included
  17. * in all copies or substantial portions of the Software.
  18. *
  19. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  22. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  23. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  24. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  25. * OTHER DEALINGS IN THE SOFTWARE.
  26. */
  27. namespace System.Text
  28. {
  29. using System;
  30. using System.Runtime.InteropServices;
  31. [Serializable]
  32. [ComVisible (true)]
  33. [MonoLimitation ("Serialization format not compatible with .NET")]
  34. public class UnicodeEncoding : Encoding
  35. {
  36. // Magic numbers used by Windows for Unicode.
  37. internal const int UNICODE_CODE_PAGE = 1200;
  38. internal const int BIG_UNICODE_CODE_PAGE = 1201;
  39. #if !ECMA_COMPAT
  40. // Size of characters in this encoding.
  41. public const int CharSize = 2;
  42. #endif
  43. // Internal state.
  44. private bool bigEndian;
  45. private bool byteOrderMark;
  46. // Constructors.
  47. public UnicodeEncoding () : this (false, true)
  48. {
  49. bigEndian = false;
  50. byteOrderMark = true;
  51. }
  52. public UnicodeEncoding (bool bigEndian, bool byteOrderMark)
  53. : this (bigEndian, byteOrderMark, false)
  54. {
  55. }
  56. public UnicodeEncoding (bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes)
  57. : base ((bigEndian ? BIG_UNICODE_CODE_PAGE : UNICODE_CODE_PAGE))
  58. {
  59. if (throwOnInvalidBytes)
  60. SetFallbackInternal (null, new DecoderExceptionFallback ());
  61. else
  62. SetFallbackInternal (null, new DecoderReplacementFallback ("\uFFFD"));
  63. this.bigEndian = bigEndian;
  64. this.byteOrderMark = byteOrderMark;
  65. if (bigEndian){
  66. body_name = "unicodeFFFE";
  67. encoding_name = "Unicode (Big-Endian)";
  68. header_name = "unicodeFFFE";
  69. is_browser_save = false;
  70. web_name = "unicodeFFFE";
  71. } else {
  72. body_name = "utf-16";
  73. encoding_name = "Unicode";
  74. header_name = "utf-16";
  75. is_browser_save = true;
  76. web_name = "utf-16";
  77. }
  78. // Windows reports the same code page number for
  79. // both the little-endian and big-endian forms.
  80. windows_code_page = UNICODE_CODE_PAGE;
  81. }
  82. // Get the number of bytes needed to encode a character buffer.
  83. public override int GetByteCount (char[] chars, int index, int count)
  84. {
  85. if (chars == null) {
  86. throw new ArgumentNullException ("chars");
  87. }
  88. if (index < 0 || index > chars.Length) {
  89. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  90. }
  91. if (count < 0 || count > (chars.Length - index)) {
  92. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  93. }
  94. return count * 2;
  95. }
  96. public override int GetByteCount (String s)
  97. {
  98. if (s == null) {
  99. throw new ArgumentNullException ("s");
  100. }
  101. return s.Length * 2;
  102. }
  103. [CLSCompliantAttribute (false)]
  104. [ComVisible (false)]
  105. public unsafe override int GetByteCount (char* chars, int count)
  106. {
  107. if (chars == null)
  108. throw new ArgumentNullException ("chars");
  109. if (count < 0)
  110. throw new ArgumentOutOfRangeException ("count");
  111. return count * 2;
  112. }
  113. // Get the bytes that result from encoding a character buffer.
  114. public unsafe override int GetBytes (char [] chars, int charIndex, int charCount,
  115. byte [] bytes, int byteIndex)
  116. {
  117. if (chars == null) {
  118. throw new ArgumentNullException ("chars");
  119. }
  120. if (bytes == null) {
  121. throw new ArgumentNullException ("bytes");
  122. }
  123. if (charIndex < 0 || charIndex > chars.Length) {
  124. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  125. }
  126. if (charCount < 0 || charCount > (chars.Length - charIndex)) {
  127. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
  128. }
  129. if (byteIndex < 0 || byteIndex > bytes.Length) {
  130. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  131. }
  132. if (charCount == 0)
  133. return 0;
  134. int byteCount = bytes.Length - byteIndex;
  135. if (bytes.Length == 0)
  136. bytes = new byte [1];
  137. fixed (char* charPtr = chars)
  138. fixed (byte* bytePtr = bytes)
  139. return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
  140. }
  141. public unsafe override int GetBytes (String s, int charIndex, int charCount,
  142. byte [] bytes, int byteIndex)
  143. {
  144. if (s == null) {
  145. throw new ArgumentNullException ("s");
  146. }
  147. if (bytes == null) {
  148. throw new ArgumentNullException ("bytes");
  149. }
  150. if (charIndex < 0 || charIndex > s.Length) {
  151. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
  152. }
  153. if (charCount < 0 || charCount > (s.Length - charIndex)) {
  154. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
  155. }
  156. if (byteIndex < 0 || byteIndex > bytes.Length) {
  157. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  158. }
  159. // For consistency
  160. if (charCount == 0)
  161. return 0;
  162. int byteCount = bytes.Length - byteIndex;
  163. if (bytes.Length == 0)
  164. bytes = new byte [1];
  165. fixed (char* charPtr = s)
  166. fixed (byte* bytePtr = bytes)
  167. return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
  168. }
  169. [CLSCompliantAttribute (false)]
  170. [ComVisible (false)]
  171. public unsafe override int GetBytes (char* chars, int charCount,
  172. byte* bytes, int byteCount)
  173. {
  174. if (bytes == null)
  175. throw new ArgumentNullException ("bytes");
  176. if (chars == null)
  177. throw new ArgumentNullException ("chars");
  178. if (charCount < 0)
  179. throw new ArgumentOutOfRangeException ("charCount");
  180. if (byteCount < 0)
  181. throw new ArgumentOutOfRangeException ("byteCount");
  182. return GetBytesInternal (chars, charCount, bytes, byteCount);
  183. }
  184. private unsafe int GetBytesInternal (char* chars, int charCount,
  185. byte* bytes, int byteCount)
  186. {
  187. int count = charCount * 2;
  188. if (byteCount < count)
  189. throw new ArgumentException (_("Arg_InsufficientSpace"));
  190. CopyChars ((byte*) chars, bytes, count, bigEndian);
  191. return count;
  192. }
  193. // Get the number of characters needed to decode a byte buffer.
  194. public override int GetCharCount (byte[] bytes, int index, int count)
  195. {
  196. if (bytes == null) {
  197. throw new ArgumentNullException ("bytes");
  198. }
  199. if (index < 0 || index > bytes.Length) {
  200. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  201. }
  202. if (count < 0 || count > (bytes.Length - index)) {
  203. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  204. }
  205. return count / 2;
  206. }
  207. [CLSCompliantAttribute (false)]
  208. [ComVisible (false)]
  209. public unsafe override int GetCharCount (byte* bytes, int count)
  210. {
  211. if (bytes == null)
  212. throw new ArgumentNullException ("bytes");
  213. if (count < 0)
  214. throw new ArgumentOutOfRangeException ("count");
  215. return count / 2;
  216. }
  217. // Get the characters that result from decoding a byte buffer.
  218. public unsafe override int GetChars (byte [] bytes, int byteIndex, int byteCount,
  219. char [] chars, int charIndex)
  220. {
  221. if (bytes == null) {
  222. throw new ArgumentNullException ("bytes");
  223. }
  224. if (chars == null) {
  225. throw new ArgumentNullException ("chars");
  226. }
  227. if (byteIndex < 0 || byteIndex > bytes.Length) {
  228. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  229. }
  230. if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
  231. throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
  232. }
  233. if (charIndex < 0 || charIndex > chars.Length) {
  234. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  235. }
  236. if (byteCount == 0)
  237. return 0;
  238. int charCount = chars.Length - charIndex;
  239. if (chars.Length == 0)
  240. chars = new char [1];
  241. fixed (byte* bytePtr = bytes)
  242. fixed (char* charPtr = chars)
  243. return GetCharsInternal (bytePtr + byteIndex, byteCount, charPtr + charIndex, charCount);
  244. }
  245. [CLSCompliantAttribute (false)]
  246. [ComVisible (false)]
  247. public unsafe override int GetChars (byte* bytes, int byteCount,
  248. char* chars, int charCount)
  249. {
  250. if (bytes == null)
  251. throw new ArgumentNullException ("bytes");
  252. if (chars == null)
  253. throw new ArgumentNullException ("chars");
  254. if (charCount < 0)
  255. throw new ArgumentOutOfRangeException ("charCount");
  256. if (byteCount < 0)
  257. throw new ArgumentOutOfRangeException ("byteCount");
  258. return GetCharsInternal (bytes, byteCount, chars, charCount);
  259. }
  260. // Decode a buffer of bytes into a string.
  261. [ComVisible (false)]
  262. public unsafe override String GetString (byte [] bytes, int index, int count)
  263. {
  264. if (bytes == null)
  265. throw new ArgumentNullException ("bytes");
  266. if (index < 0 || index > bytes.Length)
  267. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  268. if (count < 0 || count > (bytes.Length - index))
  269. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  270. if (count == 0)
  271. return string.Empty;
  272. // GetCharCountInternal
  273. int charCount = count / 2;
  274. string s = string.InternalAllocateStr (charCount);
  275. fixed (byte* bytePtr = bytes)
  276. fixed (char* charPtr = s)
  277. GetCharsInternal (bytePtr + index, count, charPtr, charCount);
  278. return s;
  279. }
  280. private unsafe int GetCharsInternal (byte* bytes, int byteCount,
  281. char* chars, int charCount)
  282. {
  283. int count = byteCount / 2;
  284. // Validate that we have sufficient space in "chars".
  285. if (charCount < count)
  286. throw new ArgumentException (_("Arg_InsufficientSpace"));
  287. CopyChars (bytes, (byte*) chars, byteCount, bigEndian);
  288. return count;
  289. }
  290. [ComVisible (false)]
  291. public override Encoder GetEncoder ()
  292. {
  293. return(base.GetEncoder ());
  294. }
  295. // Get the maximum number of bytes needed to encode a
  296. // specified number of characters.
  297. public override int GetMaxByteCount (int charCount)
  298. {
  299. if (charCount < 0) {
  300. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
  301. }
  302. return charCount * 2;
  303. }
  304. // Get the maximum number of characters needed to decode a
  305. // specified number of bytes.
  306. public override int GetMaxCharCount (int byteCount)
  307. {
  308. if (byteCount < 0) {
  309. throw new ArgumentOutOfRangeException
  310. ("byteCount", _("ArgRange_NonNegative"));
  311. }
  312. return byteCount / 2;
  313. }
  314. // Get a Unicode-specific decoder that is attached to this instance.
  315. public override Decoder GetDecoder ()
  316. {
  317. return new UnicodeDecoder (bigEndian);
  318. }
  319. // Get the Unicode preamble.
  320. public override byte[] GetPreamble ()
  321. {
  322. if (byteOrderMark) {
  323. byte[] preamble = new byte[2];
  324. if (bigEndian) {
  325. preamble[0] = (byte)0xFE;
  326. preamble[1] = (byte)0xFF;
  327. } else {
  328. preamble[0] = (byte)0xFF;
  329. preamble[1] = (byte)0xFE;
  330. }
  331. return preamble;
  332. }
  333. return EmptyArray<byte>.Value;
  334. }
  335. // Determine if this object is equal to another.
  336. public override bool Equals (Object value)
  337. {
  338. UnicodeEncoding enc = (value as UnicodeEncoding);
  339. if (enc != null) {
  340. return (codePage == enc.codePage &&
  341. bigEndian == enc.bigEndian &&
  342. byteOrderMark == enc.byteOrderMark);
  343. } else {
  344. return false;
  345. }
  346. }
  347. // Get the hash code for this object.
  348. public override int GetHashCode ()
  349. {
  350. return base.GetHashCode ();
  351. }
  352. private unsafe static void CopyChars (byte* src, byte* dest, int count, bool bigEndian)
  353. {
  354. if (BitConverter.IsLittleEndian != bigEndian) {
  355. string.memcpy (dest, src, count & unchecked ((int) 0xFFFFFFFE));
  356. return;
  357. }
  358. switch (count) {
  359. case 0:
  360. return;
  361. case 1:
  362. return;
  363. case 2:
  364. goto Count2;
  365. case 3:
  366. goto Count2;
  367. case 4:
  368. goto Count4;
  369. case 5:
  370. goto Count4;
  371. case 6:
  372. goto Count4;
  373. case 7:
  374. goto Count4;
  375. case 8:
  376. goto Count8;
  377. case 9:
  378. goto Count8;
  379. case 10:
  380. goto Count8;
  381. case 11:
  382. goto Count8;
  383. case 12:
  384. goto Count8;
  385. case 13:
  386. goto Count8;
  387. case 14:
  388. goto Count8;
  389. case 15:
  390. goto Count8;
  391. }
  392. do {
  393. dest [0] = src [1];
  394. dest [1] = src [0];
  395. dest [2] = src [3];
  396. dest [3] = src [2];
  397. dest [4] = src [5];
  398. dest [5] = src [4];
  399. dest [6] = src [7];
  400. dest [7] = src [6];
  401. dest [8] = src [9];
  402. dest [9] = src [8];
  403. dest [10] = src [11];
  404. dest [11] = src [10];
  405. dest [12] = src [13];
  406. dest [13] = src [12];
  407. dest [14] = src [15];
  408. dest [15] = src [14];
  409. dest += 16;
  410. src += 16;
  411. count -= 16;
  412. } while ((count & unchecked ((int) 0xFFFFFFF0)) != 0);
  413. switch (count) {
  414. case 0:
  415. return;
  416. case 1:
  417. return;
  418. case 2:
  419. goto Count2;
  420. case 3:
  421. goto Count2;
  422. case 4:
  423. goto Count4;
  424. case 5:
  425. goto Count4;
  426. case 6:
  427. goto Count4;
  428. case 7:
  429. goto Count4;
  430. }
  431. Count8:;
  432. dest [0] = src [1];
  433. dest [1] = src [0];
  434. dest [2] = src [3];
  435. dest [3] = src [2];
  436. dest [4] = src [5];
  437. dest [5] = src [4];
  438. dest [6] = src [7];
  439. dest [7] = src [6];
  440. dest += 8;
  441. src += 8;
  442. if ((count & 4) == 0)
  443. goto TestCount2;
  444. Count4:;
  445. dest [0] = src [1];
  446. dest [1] = src [0];
  447. dest [2] = src [3];
  448. dest [3] = src [2];
  449. dest += 4;
  450. src += 4;
  451. TestCount2:;
  452. if ((count & 2) == 0)
  453. return;
  454. Count2:;
  455. dest [0] = src [1];
  456. dest [1] = src [0];
  457. }
  458. // Unicode decoder implementation.
  459. private sealed class UnicodeDecoder : Decoder
  460. {
  461. private bool bigEndian;
  462. private int leftOverByte;
  463. // Constructor.
  464. public UnicodeDecoder (bool bigEndian)
  465. {
  466. this.bigEndian = bigEndian;
  467. leftOverByte = -1;
  468. }
  469. // Override inherited methods.
  470. public override int GetCharCount (byte[] bytes, int index, int count)
  471. {
  472. if (bytes == null) {
  473. throw new ArgumentNullException ("bytes");
  474. }
  475. if (index < 0 || index > bytes.Length) {
  476. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  477. }
  478. if (count < 0 || count > (bytes.Length - index)) {
  479. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  480. }
  481. if (leftOverByte != -1) {
  482. return (count + 1) / 2;
  483. } else {
  484. return count / 2;
  485. }
  486. }
  487. public unsafe override int GetChars (byte [] bytes, int byteIndex,
  488. int byteCount, char [] chars,
  489. int charIndex)
  490. {
  491. if (bytes == null) {
  492. throw new ArgumentNullException ("bytes");
  493. }
  494. if (chars == null) {
  495. throw new ArgumentNullException ("chars");
  496. }
  497. if (byteIndex < 0 || byteIndex > bytes.Length) {
  498. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  499. }
  500. if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
  501. throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
  502. }
  503. if (charIndex < 0 || charIndex > chars.Length) {
  504. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  505. }
  506. if (byteCount == 0)
  507. return 0;
  508. int leftOver = leftOverByte;
  509. int count;
  510. if (leftOver != -1)
  511. count = (byteCount + 1) / 2;
  512. else
  513. count = byteCount / 2;
  514. if (chars.Length - charIndex < count)
  515. throw new ArgumentException (_("Arg_InsufficientSpace"));
  516. if (leftOver != -1) {
  517. if (bigEndian)
  518. chars [charIndex] = unchecked ((char) ((leftOver << 8) | (int) bytes [byteIndex]));
  519. else
  520. chars [charIndex] = unchecked ((char) (((int) bytes [byteIndex] << 8) | leftOver));
  521. charIndex++;
  522. byteIndex++;
  523. byteCount--;
  524. }
  525. if ((byteCount & unchecked ((int) 0xFFFFFFFE)) != 0)
  526. fixed (byte* bytePtr = bytes)
  527. fixed (char* charPtr = chars)
  528. CopyChars (bytePtr + byteIndex, (byte*) (charPtr + charIndex), byteCount, bigEndian);
  529. if ((byteCount & 1) == 0)
  530. leftOverByte = -1;
  531. else
  532. leftOverByte = bytes [byteCount + byteIndex - 1];
  533. return count;
  534. }
  535. } // class UnicodeDecoder
  536. }; // class UnicodeEncoding
  537. }; // namespace System.Text