UnicodeEncoding.cs 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628
  1. /*
  2. * UnicodeEncoding.cs - Implementation of the
  3. * "System.Text.UnicodeEncoding" class.
  4. *
  5. * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
  6. * Copyright (C) 2003, 2004 Novell, Inc.
  7. * Copyright (C) 2006 Kornél Pál <http://www.kornelpal.hu/>
  8. *
  9. * Permission is hereby granted, free of charge, to any person obtaining
  10. * a copy of this software and associated documentation files (the "Software"),
  11. * to deal in the Software without restriction, including without limitation
  12. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  13. * and/or sell copies of the Software, and to permit persons to whom the
  14. * Software is furnished to do so, subject to the following conditions:
  15. *
  16. * The above copyright notice and this permission notice shall be included
  17. * in all copies or substantial portions of the Software.
  18. *
  19. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  22. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  23. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  24. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  25. * OTHER DEALINGS IN THE SOFTWARE.
  26. */
  27. namespace System.Text
  28. {
  29. using System;
  30. using System.Runtime.InteropServices;
  31. [Serializable]
  32. #if NET_2_0
  33. [ComVisible (true)]
  34. #endif
  35. [MonoTODO ("Serialization format not compatible with .NET")]
  36. public class UnicodeEncoding : Encoding
  37. {
  38. // Magic numbers used by Windows for Unicode.
  39. internal const int UNICODE_CODE_PAGE = 1200;
  40. internal const int BIG_UNICODE_CODE_PAGE = 1201;
  41. #if !ECMA_COMPAT
  42. // Size of characters in this encoding.
  43. public const int CharSize = 2;
  44. #endif
  45. // Internal state.
  46. private bool bigEndian;
  47. private bool byteOrderMark;
  48. // Constructors.
  49. public UnicodeEncoding () : this (false, true)
  50. {
  51. bigEndian = false;
  52. byteOrderMark = true;
  53. }
  54. public UnicodeEncoding (bool bigEndian, bool byteOrderMark)
  55. : base ((bigEndian ? BIG_UNICODE_CODE_PAGE : UNICODE_CODE_PAGE))
  56. {
  57. this.bigEndian = bigEndian;
  58. this.byteOrderMark = byteOrderMark;
  59. if (bigEndian){
  60. body_name = "unicodeFFFE";
  61. encoding_name = "Unicode (Big-Endian)";
  62. header_name = "unicodeFFFE";
  63. is_browser_save = false;
  64. web_name = "unicodeFFFE";
  65. } else {
  66. body_name = "utf-16";
  67. encoding_name = "Unicode";
  68. header_name = "utf-16";
  69. is_browser_save = true;
  70. web_name = "utf-16";
  71. }
  72. // Windows reports the same code page number for
  73. // both the little-endian and big-endian forms.
  74. windows_code_page = UNICODE_CODE_PAGE;
  75. }
  76. #if NET_2_0
  77. [MonoTODO ("Implement throwOnInvalidBytes")]
  78. public UnicodeEncoding (bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes)
  79. : this (bigEndian, byteOrderMark)
  80. {
  81. }
  82. #endif
  83. // Get the number of bytes needed to encode a character buffer.
  84. public override int GetByteCount (char[] chars, int index, int count)
  85. {
  86. if (chars == null) {
  87. throw new ArgumentNullException ("chars");
  88. }
  89. if (index < 0 || index > chars.Length) {
  90. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  91. }
  92. if (count < 0 || count > (chars.Length - index)) {
  93. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  94. }
  95. return count * 2;
  96. }
  97. public override int GetByteCount (String s)
  98. {
  99. if (s == null) {
  100. throw new ArgumentNullException ("s");
  101. }
  102. return s.Length * 2;
  103. }
  104. #if NET_2_0
  105. [CLSCompliantAttribute (false)]
  106. [ComVisible (false)]
  107. public unsafe override int GetByteCount (char* chars, int count)
  108. {
  109. if (chars == null)
  110. throw new ArgumentNullException ("chars");
  111. if (count < 0)
  112. throw new ArgumentOutOfRangeException ("count");
  113. return count * 2;
  114. }
  115. #endif
  116. // Get the bytes that result from encoding a character buffer.
  117. public unsafe override int GetBytes (char [] chars, int charIndex, int charCount,
  118. byte [] bytes, int byteIndex)
  119. {
  120. if (chars == null) {
  121. throw new ArgumentNullException ("chars");
  122. }
  123. if (bytes == null) {
  124. throw new ArgumentNullException ("bytes");
  125. }
  126. if (charIndex < 0 || charIndex > chars.Length) {
  127. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  128. }
  129. if (charCount < 0 || charCount > (chars.Length - charIndex)) {
  130. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
  131. }
  132. if (byteIndex < 0 || byteIndex > bytes.Length) {
  133. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  134. }
  135. if (charCount == 0)
  136. return 0;
  137. int byteCount = bytes.Length - byteIndex;
  138. if (bytes.Length == 0)
  139. bytes = new byte [1];
  140. fixed (char* charPtr = chars)
  141. fixed (byte* bytePtr = bytes)
  142. return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
  143. }
  144. #if !NET_2_0
  145. public override byte [] GetBytes (String s)
  146. {
  147. if (s == null)
  148. throw new ArgumentNullException ("s");
  149. int byteCount = GetByteCount (s);
  150. byte [] bytes = new byte [byteCount];
  151. GetBytes (s, 0, s.Length, bytes, 0);
  152. return bytes;
  153. }
  154. #endif
  155. public unsafe override int GetBytes (String s, int charIndex, int charCount,
  156. byte [] bytes, int byteIndex)
  157. {
  158. if (s == null) {
  159. throw new ArgumentNullException ("s");
  160. }
  161. if (bytes == null) {
  162. throw new ArgumentNullException ("bytes");
  163. }
  164. if (charIndex < 0 || charIndex > s.Length) {
  165. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_StringIndex"));
  166. }
  167. if (charCount < 0 || charCount > (s.Length - charIndex)) {
  168. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_StringRange"));
  169. }
  170. if (byteIndex < 0 || byteIndex > bytes.Length) {
  171. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  172. }
  173. // For consistency
  174. if (charCount == 0)
  175. return 0;
  176. int byteCount = bytes.Length - byteIndex;
  177. if (bytes.Length == 0)
  178. bytes = new byte [1];
  179. fixed (char* charPtr = s)
  180. fixed (byte* bytePtr = bytes)
  181. return GetBytesInternal (charPtr + charIndex, charCount, bytePtr + byteIndex, byteCount);
  182. }
  183. #if NET_2_0
  184. [CLSCompliantAttribute (false)]
  185. [ComVisible (false)]
  186. public unsafe override int GetBytes (char* chars, int charCount,
  187. byte* bytes, int byteCount)
  188. {
  189. if (bytes == null)
  190. throw new ArgumentNullException ("bytes");
  191. if (chars == null)
  192. throw new ArgumentNullException ("chars");
  193. if (charCount < 0)
  194. throw new ArgumentOutOfRangeException ("charCount");
  195. if (byteCount < 0)
  196. throw new ArgumentOutOfRangeException ("byteCount");
  197. return GetBytesInternal (chars, charCount, bytes, byteCount);
  198. }
  199. #endif
  200. private unsafe int GetBytesInternal (char* chars, int charCount,
  201. byte* bytes, int byteCount)
  202. {
  203. int count = charCount * 2;
  204. if (byteCount < count)
  205. throw new ArgumentException (_("Arg_InsufficientSpace"));
  206. CopyChars ((byte*) chars, bytes, count, bigEndian);
  207. return count;
  208. }
  209. // Get the number of characters needed to decode a byte buffer.
  210. public override int GetCharCount (byte[] bytes, int index, int count)
  211. {
  212. if (bytes == null) {
  213. throw new ArgumentNullException ("bytes");
  214. }
  215. if (index < 0 || index > bytes.Length) {
  216. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  217. }
  218. if (count < 0 || count > (bytes.Length - index)) {
  219. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  220. }
  221. return count / 2;
  222. }
  223. #if NET_2_0
  224. [CLSCompliantAttribute (false)]
  225. [ComVisible (false)]
  226. public unsafe override int GetCharCount (byte* bytes, int count)
  227. {
  228. if (bytes == null)
  229. throw new ArgumentNullException ("bytes");
  230. if (count < 0)
  231. throw new ArgumentOutOfRangeException ("count");
  232. return count / 2;
  233. }
  234. #endif
  235. // Get the characters that result from decoding a byte buffer.
  236. public unsafe override int GetChars (byte [] bytes, int byteIndex, int byteCount,
  237. char [] chars, int charIndex)
  238. {
  239. if (bytes == null) {
  240. throw new ArgumentNullException ("bytes");
  241. }
  242. if (chars == null) {
  243. throw new ArgumentNullException ("chars");
  244. }
  245. if (byteIndex < 0 || byteIndex > bytes.Length) {
  246. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  247. }
  248. if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
  249. throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
  250. }
  251. if (charIndex < 0 || charIndex > chars.Length) {
  252. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  253. }
  254. if (byteCount == 0)
  255. return 0;
  256. int charCount = chars.Length - charIndex;
  257. if (chars.Length == 0)
  258. chars = new char [1];
  259. fixed (byte* bytePtr = bytes)
  260. fixed (char* charPtr = chars)
  261. return GetCharsInternal (bytePtr + byteIndex, byteCount, charPtr + charIndex, charCount);
  262. }
  263. #if NET_2_0
  264. [CLSCompliantAttribute (false)]
  265. [ComVisible (false)]
  266. public unsafe override int GetChars (byte* bytes, int byteCount,
  267. char* chars, int charCount)
  268. {
  269. if (bytes == null)
  270. throw new ArgumentNullException ("bytes");
  271. if (chars == null)
  272. throw new ArgumentNullException ("chars");
  273. if (charCount < 0)
  274. throw new ArgumentOutOfRangeException ("charCount");
  275. if (byteCount < 0)
  276. throw new ArgumentOutOfRangeException ("byteCount");
  277. return GetCharsInternal (bytes, byteCount, chars, charCount);
  278. }
  279. #endif
  280. // Decode a buffer of bytes into a string.
  281. [ComVisible (false)]
  282. public unsafe override String GetString (byte [] bytes, int index, int count)
  283. {
  284. if (bytes == null)
  285. throw new ArgumentNullException ("bytes");
  286. if (index < 0 || index > bytes.Length)
  287. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  288. if (count < 0 || count > (bytes.Length - index))
  289. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  290. if (count == 0)
  291. return string.Empty;
  292. // GetCharCountInternal
  293. int charCount = count / 2;
  294. string s = string.InternalAllocateStr (charCount);
  295. fixed (byte* bytePtr = bytes)
  296. fixed (char* charPtr = s)
  297. GetCharsInternal (bytePtr + index, count, charPtr, charCount);
  298. return s;
  299. }
  300. private unsafe int GetCharsInternal (byte* bytes, int byteCount,
  301. char* chars, int charCount)
  302. {
  303. int count = byteCount / 2;
  304. // Validate that we have sufficient space in "chars".
  305. if (charCount < count)
  306. throw new ArgumentException (_("Arg_InsufficientSpace"));
  307. CopyChars (bytes, (byte*) chars, byteCount, bigEndian);
  308. return count;
  309. }
  310. // Get the maximum number of bytes needed to encode a
  311. // specified number of characters.
  312. public override int GetMaxByteCount (int charCount)
  313. {
  314. if (charCount < 0) {
  315. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_NonNegative"));
  316. }
  317. return charCount * 2;
  318. }
  319. // Get the maximum number of characters needed to decode a
  320. // specified number of bytes.
  321. public override int GetMaxCharCount (int byteCount)
  322. {
  323. if (byteCount < 0) {
  324. throw new ArgumentOutOfRangeException
  325. ("byteCount", _("ArgRange_NonNegative"));
  326. }
  327. return byteCount / 2;
  328. }
  329. // Get a Unicode-specific decoder that is attached to this instance.
  330. public override Decoder GetDecoder ()
  331. {
  332. return new UnicodeDecoder (bigEndian);
  333. }
  334. // Get the Unicode preamble.
  335. public override byte[] GetPreamble ()
  336. {
  337. if (byteOrderMark) {
  338. byte[] preamble = new byte[2];
  339. if (bigEndian) {
  340. preamble[0] = (byte)0xFE;
  341. preamble[1] = (byte)0xFF;
  342. } else {
  343. preamble[0] = (byte)0xFF;
  344. preamble[1] = (byte)0xFE;
  345. }
  346. return preamble;
  347. } else {
  348. return new byte [0];
  349. }
  350. }
  351. // Determine if this object is equal to another.
  352. public override bool Equals (Object value)
  353. {
  354. UnicodeEncoding enc = (value as UnicodeEncoding);
  355. if (enc != null) {
  356. return (codePage == enc.codePage &&
  357. bigEndian == enc.bigEndian &&
  358. byteOrderMark == enc.byteOrderMark);
  359. } else {
  360. return false;
  361. }
  362. }
  363. // Get the hash code for this object.
  364. public override int GetHashCode ()
  365. {
  366. return base.GetHashCode ();
  367. }
  368. private unsafe static void CopyChars (byte* src, byte* dest, int count, bool bigEndian)
  369. {
  370. if (BitConverter.IsLittleEndian != bigEndian) {
  371. string.memcpy (dest, src, count & unchecked ((int) 0xFFFFFFFE));
  372. return;
  373. }
  374. switch (count) {
  375. case 0:
  376. return;
  377. case 1:
  378. return;
  379. case 2:
  380. goto Count2;
  381. case 3:
  382. goto Count2;
  383. case 4:
  384. goto Count4;
  385. case 5:
  386. goto Count4;
  387. case 6:
  388. goto Count4;
  389. case 7:
  390. goto Count4;
  391. case 8:
  392. goto Count8;
  393. case 9:
  394. goto Count8;
  395. case 10:
  396. goto Count8;
  397. case 11:
  398. goto Count8;
  399. case 12:
  400. goto Count8;
  401. case 13:
  402. goto Count8;
  403. case 14:
  404. goto Count8;
  405. case 15:
  406. goto Count8;
  407. }
  408. do {
  409. dest [0] = src [1];
  410. dest [1] = src [0];
  411. dest [2] = src [3];
  412. dest [3] = src [2];
  413. dest [4] = src [5];
  414. dest [5] = src [4];
  415. dest [6] = src [7];
  416. dest [7] = src [6];
  417. dest [8] = src [9];
  418. dest [9] = src [8];
  419. dest [10] = src [11];
  420. dest [11] = src [10];
  421. dest [12] = src [13];
  422. dest [13] = src [12];
  423. dest [14] = src [15];
  424. dest [15] = src [14];
  425. dest += 16;
  426. src += 16;
  427. count -= 16;
  428. } while ((count & unchecked ((int) 0xFFFFFFF0)) != 0);
  429. switch (count) {
  430. case 0:
  431. return;
  432. case 1:
  433. return;
  434. case 2:
  435. goto Count2;
  436. case 3:
  437. goto Count2;
  438. case 4:
  439. goto Count4;
  440. case 5:
  441. goto Count4;
  442. case 6:
  443. goto Count4;
  444. case 7:
  445. goto Count4;
  446. }
  447. Count8:;
  448. dest [0] = src [1];
  449. dest [1] = src [0];
  450. dest [2] = src [3];
  451. dest [3] = src [2];
  452. dest [4] = src [5];
  453. dest [5] = src [4];
  454. dest [6] = src [7];
  455. dest [7] = src [6];
  456. dest += 8;
  457. src += 8;
  458. if ((count & 4) == 0)
  459. goto TestCount2;
  460. Count4:;
  461. dest [0] = src [1];
  462. dest [1] = src [0];
  463. dest [2] = src [3];
  464. dest [3] = src [2];
  465. dest += 4;
  466. src += 4;
  467. TestCount2:;
  468. if ((count & 2) == 0)
  469. return;
  470. Count2:;
  471. dest [0] = src [1];
  472. dest [1] = src [0];
  473. }
  474. // Unicode decoder implementation.
  475. private sealed class UnicodeDecoder : Decoder
  476. {
  477. private bool bigEndian;
  478. private int leftOverByte;
  479. // Constructor.
  480. public UnicodeDecoder (bool bigEndian)
  481. {
  482. this.bigEndian = bigEndian;
  483. leftOverByte = -1;
  484. }
  485. // Override inherited methods.
  486. public override int GetCharCount (byte[] bytes, int index, int count)
  487. {
  488. if (bytes == null) {
  489. throw new ArgumentNullException ("bytes");
  490. }
  491. if (index < 0 || index > bytes.Length) {
  492. throw new ArgumentOutOfRangeException ("index", _("ArgRange_Array"));
  493. }
  494. if (count < 0 || count > (bytes.Length - index)) {
  495. throw new ArgumentOutOfRangeException ("count", _("ArgRange_Array"));
  496. }
  497. if (leftOverByte != -1) {
  498. return (count + 1) / 2;
  499. } else {
  500. return count / 2;
  501. }
  502. }
  503. public unsafe override int GetChars (byte [] bytes, int byteIndex,
  504. int byteCount, char [] chars,
  505. int charIndex)
  506. {
  507. if (bytes == null) {
  508. throw new ArgumentNullException ("bytes");
  509. }
  510. if (chars == null) {
  511. throw new ArgumentNullException ("chars");
  512. }
  513. if (byteIndex < 0 || byteIndex > bytes.Length) {
  514. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  515. }
  516. if (byteCount < 0 || byteCount > (bytes.Length - byteIndex)) {
  517. throw new ArgumentOutOfRangeException ("byteCount", _("ArgRange_Array"));
  518. }
  519. if (charIndex < 0 || charIndex > chars.Length) {
  520. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  521. }
  522. if (byteCount == 0)
  523. return 0;
  524. int leftOver = leftOverByte;
  525. int count;
  526. if (leftOver != -1)
  527. count = (byteCount + 1) / 2;
  528. else
  529. count = byteCount / 2;
  530. if (chars.Length - charIndex < count)
  531. throw new ArgumentException (_("Arg_InsufficientSpace"));
  532. if (leftOver != -1) {
  533. if (bigEndian)
  534. chars [charIndex] = unchecked ((char) ((leftOver << 8) | (int) bytes [byteIndex]));
  535. else
  536. chars [charIndex] = unchecked ((char) (((int) bytes [byteIndex] << 8) | leftOver));
  537. charIndex++;
  538. byteIndex++;
  539. byteCount--;
  540. }
  541. if ((byteCount & unchecked ((int) 0xFFFFFFFE)) != 0)
  542. fixed (byte* bytePtr = bytes)
  543. fixed (char* charPtr = chars)
  544. CopyChars (bytePtr + byteIndex, (byte*) (charPtr + charIndex), byteCount, bigEndian);
  545. if ((byteCount & 1) == 0)
  546. leftOverByte = -1;
  547. else
  548. leftOverByte = bytes [byteCount + byteIndex - 1];
  549. return count;
  550. }
  551. } // class UnicodeDecoder
  552. }; // class UnicodeEncoding
  553. }; // namespace System.Text