UnicodeEncoding.cs 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611
  1. /*
  2. * UnicodeEncoding.cs - Implementation of the
  3. * "System.Text.UnicodeEncoding" class.
  4. *
  5. * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
  6. * Copyright (C) 2003, 2004 Novell, Inc.
  7. * Copyright (C) 2006 Kornél Pál <http://www.kornelpal.hu/>
  8. *
  9. * Permission is hereby granted, free of charge, to any person obtaining
  10. * a copy of this software and associated documentation files (the "Software"),
  11. * to deal in the Software without restriction, including without limitation
  12. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  13. * and/or sell copies of the Software, and to permit persons to whom the
  14. * Software is furnished to do so, subject to the following conditions:
  15. *
  16. * The above copyright notice and this permission notice shall be included
  17. * in all copies or substantial portions of the Software.
  18. *
  19. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  22. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  23. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  24. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  25. * OTHER DEALINGS IN THE SOFTWARE.
  26. */
  27. namespace System.Text
  28. {
  29. using System;
  30. using System.Runtime.InteropServices;
  31. [Serializable]
  32. [ComVisible (true)]
  33. [MonoLimitation ("Serialization format not compatible with .NET")]
  34. public class UnicodeEncoding : Encoding
  35. {
  36. // Magic numbers used by Windows for Unicode.
  37. internal const int UNICODE_CODE_PAGE = 1200;
  38. internal const int BIG_UNICODE_CODE_PAGE = 1201;
  39. #if !ECMA_COMPAT
  40. // Size of characters in this encoding.
  41. public const int CharSize = 2;
  42. #endif
  43. // Internal state.
  44. private bool bigEndian;
  45. private bool byteOrderMark;
  46. // Constructors.
  47. public UnicodeEncoding () : this (false, true)
  48. {
  49. bigEndian = false;
  50. byteOrderMark = true;
  51. }
  52. public UnicodeEncoding (bool bigEndian, bool byteOrderMark)
  53. : this (bigEndian, byteOrderMark, false)
  54. {
  55. }
  56. public UnicodeEncoding (bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes)
  57. : base ((bigEndian ? BIG_UNICODE_CODE_PAGE : UNICODE_CODE_PAGE))
  58. {
  59. if (throwOnInvalidBytes)
  60. SetFallbackInternal (null, new DecoderExceptionFallback ());
  61. else
  62. SetFallbackInternal (null, new DecoderReplacementFallback ("\uFFFD"));
  63. this.bigEndian = bigEndian;
  64. this.byteOrderMark = byteOrderMark;
  65. if (bigEndian){
  66. body_name = "unicodeFFFE";
  67. encoding_name = "Unicode (Big-Endian)";
  68. header_name = "unicodeFFFE";
  69. is_browser_save = false;
  70. web_name = "unicodeFFFE";
  71. } else {
  72. body_name = "utf-16";
  73. encoding_name = "Unicode";
  74. header_name = "utf-16";
  75. is_browser_save = true;
  76. web_name = "utf-16";
  77. }
  78. // Windows reports the same code page number for
  79. // both the little-endian and big-endian forms.
  80. windows_code_page = UNICODE_CODE_PAGE;
  81. }
  82. // Get the number of bytes needed to encode a character buffer.
  83. public override int GetByteCount (char[] chars, int index, int count)
  84. {
  85. if (chars == null) {
  86. throw new ArgumentNullException ("chars");
  87. }
  88. if (index < 0 || index > chars.Length) {
  89. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  90. }
  91. if (count < 0 || count > (chars.Length - index)) {
  92. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  93. }
  94. return count * 2;
  95. }
  96. public override int GetByteCount (String s)
  97. {
  98. if (s == null) {
  99. throw new ArgumentNullException ("s");
  100. }
  101. return s.Length * 2;
  102. }
  103. [CLSCompliantAttribute (false)]
  104. [ComVisible (false)]
  105. public unsafe override int GetByteCount (char* chars, int count)
  106. {
  107. if (chars == null)
  108. throw new ArgumentNullException ("chars");
  109. if (count < 0)
  110. throw new ArgumentOutOfRangeException ("count");
  111. return count * 2;
  112. }
  113. // Get the bytes that result from encoding a character buffer.
  114. public unsafe override int GetBytes (char [] chars, int charIndex, int charCount,
  115. byte [] bytes, int byteIndex)
  116. {
  117. if (chars == null) {
  118. throw new ArgumentNullException ("chars");
  119. }
  120. if (bytes == null) {
  121. throw new ArgumentNullException ("bytes");
  122. }
  123. if (charIndex < 0 || charIndex > chars.Length) {
  124. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  125. }
  126. if (charCount < 0 || charCount > (chars.Length - charIndex)) {
  127. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
  128. }
  129. if (byteIndex < 0 || byteIndex > bytes.Length) {
  130. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  131. }
  132. if (charCount == 0)
  133. return 0;
  134. int byteCount = bytes.Length - byteIndex;
  135. if (bytes.Length == 0)
  136. bytes = new byte [1];
  137. fixed (char* charPtr = chars)
  138. fixed (byte* bytePtr = bytes)
  139. return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
  140. }
  141. public unsafe override int GetBytes (String s, int charIndex, int charCount,
  142. byte [] bytes, int byteIndex)
  143. {
  144. if (s == null) {
  145. throw new ArgumentNullException ("s");
  146. }
  147. if (bytes == null) {
  148. throw new ArgumentNullException ("bytes");
  149. }
  150. if (charIndex < 0 || charIndex > s.Length) {
  151. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
  152. }
  153. if (charCount < 0 || charCount > (s.Length - charIndex)) {
  154. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
  155. }
  156. if (byteIndex < 0 || byteIndex > bytes.Length) {
  157. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  158. }
  159. // For consistency
  160. if (charCount == 0)
  161. return 0;
  162. int byteCount = bytes.Length - byteIndex;
  163. if (bytes.Length == 0)
  164. bytes = new byte [1];
  165. fixed (char* charPtr = s)
  166. fixed (byte* bytePtr = bytes)
  167. return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
  168. }
  169. [CLSCompliantAttribute (false)]
  170. [ComVisible (false)]
  171. public unsafe override int GetBytes (char* chars, int charCount,
  172. byte* bytes, int byteCount)
  173. {
  174. if (bytes == null)
  175. throw new ArgumentNullException ("bytes");
  176. if (chars == null)
  177. throw new ArgumentNullException ("chars");
  178. if (charCount < 0)
  179. throw new ArgumentOutOfRangeException ("charCount");
  180. if (byteCount < 0)
  181. throw new ArgumentOutOfRangeException ("byteCount");
  182. return GetBytesInternal (chars, charCount, bytes, byteCount);
  183. }
  184. private unsafe int GetBytesInternal (char* chars, int charCount,
  185. byte* bytes, int byteCount)
  186. {
  187. int count = charCount * 2;
  188. if (byteCount < count)
  189. throw new ArgumentException (_("Arg_InsufficientSpace"));
  190. CopyChars ((byte*) chars, bytes, count, bigEndian);
  191. return count;
  192. }
  193. // Get the number of characters needed to decode a byte buffer.
  194. public override int GetCharCount (byte[] bytes, int index, int count)
  195. {
  196. if (bytes == null) {
  197. throw new ArgumentNullException ("bytes");
  198. }
  199. if (index < 0 || index > bytes.Length) {
  200. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  201. }
  202. if (count < 0 || count > (bytes.Length - index)) {
  203. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  204. }
  205. return count / 2;
  206. }
  207. [CLSCompliantAttribute (false)]
  208. [ComVisible (false)]
  209. public unsafe override int GetCharCount (byte* bytes, int count)
  210. {
  211. if (bytes == null)
  212. throw new ArgumentNullException ("bytes");
  213. if (count < 0)
  214. throw new ArgumentOutOfRangeException ("count");
  215. return count / 2;
  216. }
  217. // Get the characters that result from decoding a byte buffer.
  218. public unsafe override int GetChars (byte [] bytes, int byteIndex, int byteCount,
  219. char [] chars, int charIndex)
  220. {
  221. if (bytes == null) {
  222. throw new ArgumentNullException ("bytes");
  223. }
  224. if (chars == null) {
  225. throw new ArgumentNullException ("chars");
  226. }
  227. if (byteIndex < 0 || byteIndex > bytes.Length) {
  228. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  229. }
  230. if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
  231. throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
  232. }
  233. if (charIndex < 0 || charIndex > chars.Length) {
  234. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  235. }
  236. if (byteCount == 0)
  237. return 0;
  238. int charCount = chars.Length - charIndex;
  239. if (chars.Length == 0)
  240. chars = new char [1];
  241. fixed (byte* bytePtr = bytes)
  242. fixed (char* charPtr = chars)
  243. return GetCharsInternal (bytePtr + byteIndex, byteCount, charPtr + charIndex, charCount);
  244. }
  245. [CLSCompliantAttribute (false)]
  246. [ComVisible (false)]
  247. public unsafe override int GetChars (byte* bytes, int byteCount,
  248. char* chars, int charCount)
  249. {
  250. if (bytes == null)
  251. throw new ArgumentNullException ("bytes");
  252. if (chars == null)
  253. throw new ArgumentNullException ("chars");
  254. if (charCount < 0)
  255. throw new ArgumentOutOfRangeException ("charCount");
  256. if (byteCount < 0)
  257. throw new ArgumentOutOfRangeException ("byteCount");
  258. return GetCharsInternal (bytes, byteCount, chars, charCount);
  259. }
  260. // Decode a buffer of bytes into a string.
  261. [ComVisible (false)]
  262. public unsafe override String GetString (byte [] bytes, int index, int count)
  263. {
  264. if (bytes == null)
  265. throw new ArgumentNullException ("bytes");
  266. if (index < 0 || index > bytes.Length)
  267. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  268. if (count < 0 || count > (bytes.Length - index))
  269. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  270. if (count == 0)
  271. return string.Empty;
  272. // GetCharCountInternal
  273. int charCount = count / 2;
  274. string s = string.InternalAllocateStr (charCount);
  275. fixed (byte* bytePtr = bytes)
  276. fixed (char* charPtr = s)
  277. GetCharsInternal (bytePtr + index, count, charPtr, charCount);
  278. return s;
  279. }
  280. private unsafe int GetCharsInternal (byte* bytes, int byteCount,
  281. char* chars, int charCount)
  282. {
  283. int count = byteCount / 2;
  284. // Validate that we have sufficient space in "chars".
  285. if (charCount < count)
  286. throw new ArgumentException (_("Arg_InsufficientSpace"));
  287. CopyChars (bytes, (byte*) chars, byteCount, bigEndian);
  288. return count;
  289. }
  290. [ComVisible (false)]
  291. public override Encoder GetEncoder ()
  292. {
  293. return(base.GetEncoder ());
  294. }
  295. // Get the maximum number of bytes needed to encode a
  296. // specified number of characters.
  297. public override int GetMaxByteCount (int charCount)
  298. {
  299. if (charCount < 0) {
  300. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
  301. }
  302. return charCount * 2;
  303. }
  304. // Get the maximum number of characters needed to decode a
  305. // specified number of bytes.
  306. public override int GetMaxCharCount (int byteCount)
  307. {
  308. if (byteCount < 0) {
  309. throw new ArgumentOutOfRangeException
  310. ("byteCount", _("ArgRange_NonNegative"));
  311. }
  312. return byteCount / 2;
  313. }
  314. // Get a Unicode-specific decoder that is attached to this instance.
  315. public override Decoder GetDecoder ()
  316. {
  317. return new UnicodeDecoder (bigEndian);
  318. }
  319. // Get the Unicode preamble.
  320. public override byte[] GetPreamble ()
  321. {
  322. if (byteOrderMark) {
  323. byte[] preamble = new byte[2];
  324. if (bigEndian) {
  325. preamble[0] = (byte)0xFE;
  326. preamble[1] = (byte)0xFF;
  327. } else {
  328. preamble[0] = (byte)0xFF;
  329. preamble[1] = (byte)0xFE;
  330. }
  331. return preamble;
  332. } else {
  333. return new byte [0];
  334. }
  335. }
  336. // Determine if this object is equal to another.
  337. public override bool Equals (Object value)
  338. {
  339. UnicodeEncoding enc = (value as UnicodeEncoding);
  340. if (enc != null) {
  341. return (codePage == enc.codePage &&
  342. bigEndian == enc.bigEndian &&
  343. byteOrderMark == enc.byteOrderMark);
  344. } else {
  345. return false;
  346. }
  347. }
  348. // Get the hash code for this object.
  349. public override int GetHashCode ()
  350. {
  351. return base.GetHashCode ();
  352. }
  353. private unsafe static void CopyChars (byte* src, byte* dest, int count, bool bigEndian)
  354. {
  355. if (BitConverter.IsLittleEndian != bigEndian) {
  356. string.memcpy (dest, src, count & unchecked ((int) 0xFFFFFFFE));
  357. return;
  358. }
  359. switch (count) {
  360. case 0:
  361. return;
  362. case 1:
  363. return;
  364. case 2:
  365. goto Count2;
  366. case 3:
  367. goto Count2;
  368. case 4:
  369. goto Count4;
  370. case 5:
  371. goto Count4;
  372. case 6:
  373. goto Count4;
  374. case 7:
  375. goto Count4;
  376. case 8:
  377. goto Count8;
  378. case 9:
  379. goto Count8;
  380. case 10:
  381. goto Count8;
  382. case 11:
  383. goto Count8;
  384. case 12:
  385. goto Count8;
  386. case 13:
  387. goto Count8;
  388. case 14:
  389. goto Count8;
  390. case 15:
  391. goto Count8;
  392. }
  393. do {
  394. dest [0] = src [1];
  395. dest [1] = src [0];
  396. dest [2] = src [3];
  397. dest [3] = src [2];
  398. dest [4] = src [5];
  399. dest [5] = src [4];
  400. dest [6] = src [7];
  401. dest [7] = src [6];
  402. dest [8] = src [9];
  403. dest [9] = src [8];
  404. dest [10] = src [11];
  405. dest [11] = src [10];
  406. dest [12] = src [13];
  407. dest [13] = src [12];
  408. dest [14] = src [15];
  409. dest [15] = src [14];
  410. dest += 16;
  411. src += 16;
  412. count -= 16;
  413. } while ((count & unchecked ((int) 0xFFFFFFF0)) != 0);
  414. switch (count) {
  415. case 0:
  416. return;
  417. case 1:
  418. return;
  419. case 2:
  420. goto Count2;
  421. case 3:
  422. goto Count2;
  423. case 4:
  424. goto Count4;
  425. case 5:
  426. goto Count4;
  427. case 6:
  428. goto Count4;
  429. case 7:
  430. goto Count4;
  431. }
  432. Count8:;
  433. dest [0] = src [1];
  434. dest [1] = src [0];
  435. dest [2] = src [3];
  436. dest [3] = src [2];
  437. dest [4] = src [5];
  438. dest [5] = src [4];
  439. dest [6] = src [7];
  440. dest [7] = src [6];
  441. dest += 8;
  442. src += 8;
  443. if ((count & 4) == 0)
  444. goto TestCount2;
  445. Count4:;
  446. dest [0] = src [1];
  447. dest [1] = src [0];
  448. dest [2] = src [3];
  449. dest [3] = src [2];
  450. dest += 4;
  451. src += 4;
  452. TestCount2:;
  453. if ((count & 2) == 0)
  454. return;
  455. Count2:;
  456. dest [0] = src [1];
  457. dest [1] = src [0];
  458. }
  459. // Unicode decoder implementation.
  460. private sealed class UnicodeDecoder : Decoder
  461. {
  462. private bool bigEndian;
  463. private int leftOverByte;
  464. // Constructor.
  465. public UnicodeDecoder (bool bigEndian)
  466. {
  467. this.bigEndian = bigEndian;
  468. leftOverByte = -1;
  469. }
  470. // Override inherited methods.
  471. public override int GetCharCount (byte[] bytes, int index, int count)
  472. {
  473. if (bytes == null) {
  474. throw new ArgumentNullException ("bytes");
  475. }
  476. if (index < 0 || index > bytes.Length) {
  477. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  478. }
  479. if (count < 0 || count > (bytes.Length - index)) {
  480. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  481. }
  482. if (leftOverByte != -1) {
  483. return (count + 1) / 2;
  484. } else {
  485. return count / 2;
  486. }
  487. }
  488. public unsafe override int GetChars (byte [] bytes, int byteIndex,
  489. int byteCount, char [] chars,
  490. int charIndex)
  491. {
  492. if (bytes == null) {
  493. throw new ArgumentNullException ("bytes");
  494. }
  495. if (chars == null) {
  496. throw new ArgumentNullException ("chars");
  497. }
  498. if (byteIndex < 0 || byteIndex > bytes.Length) {
  499. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  500. }
  501. if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
  502. throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
  503. }
  504. if (charIndex < 0 || charIndex > chars.Length) {
  505. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  506. }
  507. if (byteCount == 0)
  508. return 0;
  509. int leftOver = leftOverByte;
  510. int count;
  511. if (leftOver != -1)
  512. count = (byteCount + 1) / 2;
  513. else
  514. count = byteCount / 2;
  515. if (chars.Length - charIndex < count)
  516. throw new ArgumentException (_("Arg_InsufficientSpace"));
  517. if (leftOver != -1) {
  518. if (bigEndian)
  519. chars [charIndex] = unchecked ((char) ((leftOver << 8) | (int) bytes [byteIndex]));
  520. else
  521. chars [charIndex] = unchecked ((char) (((int) bytes [byteIndex] << 8) | leftOver));
  522. charIndex++;
  523. byteIndex++;
  524. byteCount--;
  525. }
  526. if ((byteCount & unchecked ((int) 0xFFFFFFFE)) != 0)
  527. fixed (byte* bytePtr = bytes)
  528. fixed (char* charPtr = chars)
  529. CopyChars (bytePtr + byteIndex, (byte*) (charPtr + charIndex), byteCount, bigEndian);
  530. if ((byteCount & 1) == 0)
  531. leftOverByte = -1;
  532. else
  533. leftOverByte = bytes [byteCount + byteIndex - 1];
  534. return count;
  535. }
  536. } // class UnicodeDecoder
  537. }; // class UnicodeEncoding
  538. }; // namespace System.Text