Encoding.cs 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194
  1. /*
  2. * Encoding.cs - Implementation of the "System.Text.Encoding" class.
  3. *
  4. * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
  5. * Copyright (c) 2002, Ximian, Inc.
  6. * Copyright (c) 2003, 2004 Novell, Inc.
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining
  9. * a copy of this software and associated documentation files (the "Software"),
  10. * to deal in the Software without restriction, including without limitation
  11. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  12. * and/or sell copies of the Software, and to permit persons to whom the
  13. * Software is furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included
  16. * in all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  21. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  22. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  23. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  24. * OTHER DEALINGS IN THE SOFTWARE.
  25. */
  26. namespace System.Text
  27. {
  28. using System;
  29. using System.Reflection;
  30. using System.Globalization;
  31. using System.Security;
  32. using System.Runtime.CompilerServices;
  33. using System.Runtime.InteropServices;
  34. [Serializable]
  35. #if NET_2_0
  36. [ComVisible (true)]
  37. #endif
  38. public abstract class Encoding
  39. #if NET_2_0
  40. : ICloneable
  41. #endif
  42. {
  43. // Code page used by this encoding.
  44. internal int codePage;
  45. internal int windows_code_page;
  46. bool is_readonly = true;
  47. // Constructor.
  48. protected Encoding ()
  49. {
  50. }
  51. #if ECMA_COMPAT
  52. protected internal
  53. #else
  54. protected
  55. #endif
  56. Encoding (int codePage)
  57. {
  58. this.codePage = windows_code_page = codePage;
  59. #if NET_2_0
  60. switch (codePage) {
  61. default:
  62. // MS has "InternalBestFit{Decoder|Encoder}Fallback
  63. // here, but we dunno what they are for.
  64. decoder_fallback = DecoderFallback.ReplacementFallback;
  65. encoder_fallback = EncoderFallback.ReplacementFallback;
  66. break;
  67. case 20127: // ASCII
  68. case 54936: // GB18030
  69. decoder_fallback = DecoderFallback.ReplacementFallback;
  70. encoder_fallback = EncoderFallback.ReplacementFallback;
  71. break;
  72. case 1200: // UTF16
  73. case 1201: // UTF16
  74. case 12000: // UTF32
  75. case 12001: // UTF32
  76. case 65000: // UTF7
  77. case 65001: // UTF8
  78. decoder_fallback = new DecoderReplacementFallback (String.Empty);
  79. encoder_fallback = new EncoderReplacementFallback (String.Empty);
  80. break;
  81. }
  82. #endif
  83. }
  84. // until we change the callers:
  85. internal static string _ (string arg) {
  86. return arg;
  87. }
  88. #if NET_2_0
  89. DecoderFallback decoder_fallback;
  90. EncoderFallback encoder_fallback;
  91. [ComVisible (false)]
  92. public bool IsReadOnly {
  93. get { return is_readonly; }
  94. }
  95. [ComVisible (false)]
  96. public virtual bool IsSingleByte {
  97. get { return false; }
  98. }
  99. [MonoTODO ("not used yet")]
  100. [ComVisible (false)]
  101. public DecoderFallback DecoderFallback {
  102. get {
  103. if (decoder_fallback == null)
  104. decoder_fallback = new DecoderReplacementFallback (String.Empty);
  105. return decoder_fallback;
  106. }
  107. set {
  108. if (IsReadOnly)
  109. throw new InvalidOperationException ("This Encoding is readonly.");
  110. if (value == null)
  111. throw new ArgumentNullException ();
  112. decoder_fallback = value;
  113. }
  114. }
  115. [MonoTODO ("not used yet")]
  116. [ComVisible (false)]
  117. public EncoderFallback EncoderFallback {
  118. get {
  119. if (encoder_fallback == null)
  120. encoder_fallback = new EncoderReplacementFallback (String.Empty);
  121. return encoder_fallback;
  122. }
  123. set {
  124. if (IsReadOnly)
  125. throw new InvalidOperationException ("This Encoding is readonly.");
  126. if (value == null)
  127. throw new ArgumentNullException ();
  128. encoder_fallback = value;
  129. }
  130. }
  131. internal void SetFallbackInternal (EncoderFallback e, DecoderFallback d)
  132. {
  133. if (e != null)
  134. encoder_fallback = e;
  135. if (d != null)
  136. decoder_fallback = d;
  137. }
  138. #endif
  139. // Convert between two encodings.
  140. public static byte[] Convert (Encoding srcEncoding, Encoding dstEncoding,
  141. byte[] bytes)
  142. {
  143. if (srcEncoding == null) {
  144. throw new ArgumentNullException ("srcEncoding");
  145. }
  146. if (dstEncoding == null) {
  147. throw new ArgumentNullException ("dstEncoding");
  148. }
  149. if (bytes == null) {
  150. throw new ArgumentNullException ("bytes");
  151. }
  152. return dstEncoding.GetBytes (srcEncoding.GetChars (bytes, 0, bytes.Length));
  153. }
  154. public static byte[] Convert (Encoding srcEncoding, Encoding dstEncoding,
  155. byte[] bytes, int index, int count)
  156. {
  157. if (srcEncoding == null) {
  158. throw new ArgumentNullException ("srcEncoding");
  159. }
  160. if (dstEncoding == null) {
  161. throw new ArgumentNullException ("dstEncoding");
  162. }
  163. if (bytes == null) {
  164. throw new ArgumentNullException ("bytes");
  165. }
  166. if (index < 0 || index > bytes.Length) {
  167. throw new ArgumentOutOfRangeException
  168. ("index", _("ArgRange_Array"));
  169. }
  170. if (count < 0 || (bytes.Length - index) < count) {
  171. throw new ArgumentOutOfRangeException
  172. ("count", _("ArgRange_Array"));
  173. }
  174. return dstEncoding.GetBytes (srcEncoding.GetChars (bytes, index, count));
  175. }
  176. // Determine if two Encoding objects are equal.
  177. public override bool Equals (Object obj)
  178. {
  179. Encoding enc = (obj as Encoding);
  180. if (enc != null) {
  181. #if NET_2_0
  182. return codePage == enc.codePage &&
  183. DecoderFallback.Equals (enc.DecoderFallback) &&
  184. EncoderFallback.Equals (enc.EncoderFallback);
  185. #else
  186. return (codePage == enc.codePage);
  187. #endif
  188. } else {
  189. return false;
  190. }
  191. }
  192. // Get the number of characters needed to encode a character buffer.
  193. public abstract int GetByteCount (char[] chars, int index, int count);
  194. // Convenience wrappers for "GetByteCount".
  195. public virtual int GetByteCount (String s)
  196. {
  197. if (s != null) {
  198. char[] chars = s.ToCharArray ();
  199. return GetByteCount (chars, 0, chars.Length);
  200. } else {
  201. throw new ArgumentNullException ("s");
  202. }
  203. }
  204. public virtual int GetByteCount (char[] chars)
  205. {
  206. if (chars != null) {
  207. return GetByteCount (chars, 0, chars.Length);
  208. } else {
  209. throw new ArgumentNullException ("chars");
  210. }
  211. }
  212. // Get the bytes that result from encoding a character buffer.
  213. public abstract int GetBytes (char[] chars, int charIndex, int charCount,
  214. byte[] bytes, int byteIndex);
  215. // Convenience wrappers for "GetBytes".
  216. public virtual int GetBytes (String s, int charIndex, int charCount,
  217. byte[] bytes, int byteIndex)
  218. {
  219. if (s == null)
  220. throw new ArgumentNullException ("s");
  221. #if NET_2_0
  222. if (charIndex < 0 || charIndex > s.Length)
  223. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  224. if (charCount < 0 || charIndex + charCount > s.Length)
  225. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
  226. if (byteIndex < 0 || byteIndex > bytes.Length)
  227. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  228. if (charCount == 0 || bytes.Length == byteIndex)
  229. return 0;
  230. unsafe {
  231. fixed (char* cptr = s) {
  232. fixed (byte* bptr = bytes) {
  233. return GetBytes (cptr + charIndex,
  234. charCount,
  235. bptr + byteIndex,
  236. bytes.Length - byteIndex);
  237. }
  238. }
  239. }
  240. #else
  241. return GetBytes (s.ToCharArray(), charIndex, charCount, bytes, byteIndex);
  242. #endif
  243. }
  244. public virtual byte[] GetBytes (String s)
  245. {
  246. if (s == null)
  247. throw new ArgumentNullException ("s");
  248. #if NET_2_0
  249. if (s.Length == 0)
  250. return new byte [0];
  251. int byteCount = GetByteCount (s);
  252. if (byteCount == 0)
  253. return new byte [0];
  254. unsafe {
  255. fixed (char* cptr = s) {
  256. byte [] bytes = new byte [byteCount];
  257. fixed (byte* bptr = bytes) {
  258. GetBytes (cptr, s.Length,
  259. bptr, byteCount);
  260. return bytes;
  261. }
  262. }
  263. }
  264. #else
  265. char[] chars = s.ToCharArray ();
  266. int numBytes = GetByteCount (chars, 0, chars.Length);
  267. byte[] bytes = new byte [numBytes];
  268. GetBytes (chars, 0, chars.Length, bytes, 0);
  269. return bytes;
  270. #endif
  271. }
  272. public virtual byte[] GetBytes (char[] chars, int index, int count)
  273. {
  274. int numBytes = GetByteCount (chars, index, count);
  275. byte[] bytes = new byte [numBytes];
  276. GetBytes (chars, index, count, bytes, 0);
  277. return bytes;
  278. }
  279. public virtual byte[] GetBytes (char[] chars)
  280. {
  281. int numBytes = GetByteCount (chars, 0, chars.Length);
  282. byte[] bytes = new byte [numBytes];
  283. GetBytes (chars, 0, chars.Length, bytes, 0);
  284. return bytes;
  285. }
  286. // Get the number of characters needed to decode a byte buffer.
  287. public abstract int GetCharCount (byte[] bytes, int index, int count);
  288. // Convenience wrappers for "GetCharCount".
  289. public virtual int GetCharCount (byte[] bytes)
  290. {
  291. if (bytes == null) {
  292. throw new ArgumentNullException ("bytes");
  293. }
  294. return GetCharCount (bytes, 0, bytes.Length);
  295. }
  296. // Get the characters that result from decoding a byte buffer.
  297. public abstract int GetChars (byte[] bytes, int byteIndex, int byteCount,
  298. char[] chars, int charIndex);
  299. // Convenience wrappers for "GetChars".
  300. public virtual char[] GetChars (byte[] bytes, int index, int count)
  301. {
  302. int numChars = GetCharCount (bytes, index, count);
  303. char[] chars = new char [numChars];
  304. GetChars (bytes, index, count, chars, 0);
  305. return chars;
  306. }
  307. public virtual char[] GetChars (byte[] bytes)
  308. {
  309. if (bytes == null) {
  310. throw new ArgumentNullException ("bytes");
  311. }
  312. int numChars = GetCharCount (bytes, 0, bytes.Length);
  313. char[] chars = new char [numChars];
  314. GetChars (bytes, 0, bytes.Length, chars, 0);
  315. return chars;
  316. }
  317. // Get a decoder that forwards requests to this object.
  318. public virtual Decoder GetDecoder ()
  319. {
  320. return new ForwardingDecoder (this);
  321. }
  322. // Get an encoder that forwards requests to this object.
  323. public virtual Encoder GetEncoder ()
  324. {
  325. return new ForwardingEncoder (this);
  326. }
  327. // Loaded copy of the "I18N" assembly. We need to move
  328. // this into a class in "System.Private" eventually.
  329. private static Assembly i18nAssembly;
  330. private static bool i18nDisabled;
  331. // Invoke a specific method on the "I18N" manager object.
  332. // Returns NULL if the method failed.
  333. private static Object InvokeI18N (String name, params Object[] args)
  334. {
  335. lock (lockobj) {
  336. // Bail out if we previously detected that there
  337. // is insufficent engine support for I18N handling.
  338. if (i18nDisabled) {
  339. return null;
  340. }
  341. // Find or load the "I18N" assembly.
  342. if (i18nAssembly == null) {
  343. try {
  344. try {
  345. i18nAssembly = Assembly.Load (Consts.AssemblyI18N);
  346. } catch (NotImplementedException) {
  347. // Assembly loading unsupported by the engine.
  348. i18nDisabled = true;
  349. return null;
  350. }
  351. if (i18nAssembly == null) {
  352. return null;
  353. }
  354. } catch (SystemException) {
  355. return null;
  356. }
  357. }
  358. // Find the "I18N.Common.Manager" class.
  359. Type managerClass;
  360. try {
  361. managerClass = i18nAssembly.GetType ("I18N.Common.Manager");
  362. } catch (NotImplementedException) {
  363. // "GetType" is not supported by the engine.
  364. i18nDisabled = true;
  365. return null;
  366. }
  367. if (managerClass == null) {
  368. return null;
  369. }
  370. // Get the value of the "PrimaryManager" property.
  371. Object manager;
  372. try {
  373. manager = managerClass.InvokeMember
  374. ("PrimaryManager",
  375. BindingFlags.GetProperty |
  376. BindingFlags.Static |
  377. BindingFlags.Public,
  378. null, null, null, null, null, null);
  379. if (manager == null) {
  380. return null;
  381. }
  382. } catch (MissingMethodException) {
  383. return null;
  384. } catch (SecurityException) {
  385. return null;
  386. } catch (NotImplementedException) {
  387. // "InvokeMember" is not supported by the engine.
  388. i18nDisabled = true;
  389. return null;
  390. }
  391. // Invoke the requested method on the manager.
  392. try {
  393. return managerClass.InvokeMember
  394. (name,
  395. BindingFlags.InvokeMethod |
  396. BindingFlags.Instance |
  397. BindingFlags.Public,
  398. null, manager, args, null, null, null);
  399. } catch (MissingMethodException) {
  400. return null;
  401. } catch (SecurityException) {
  402. return null;
  403. }
  404. }
  405. }
  406. // Get an encoder for a specific code page.
  407. #if ECMA_COMPAT
  408. private
  409. #else
  410. public
  411. #endif
  412. static Encoding GetEncoding (int codePage)
  413. {
  414. // Check for the builtin code pages first.
  415. switch (codePage) {
  416. case 0: return Default;
  417. case ASCIIEncoding.ASCII_CODE_PAGE:
  418. return ASCII;
  419. case UTF7Encoding.UTF7_CODE_PAGE:
  420. return UTF7;
  421. case UTF8Encoding.UTF8_CODE_PAGE:
  422. return UTF8;
  423. #if NET_2_0
  424. case UTF32Encoding.UTF32_CODE_PAGE:
  425. return UTF32;
  426. case UTF32Encoding.BIG_UTF32_CODE_PAGE:
  427. return BigEndianUTF32;
  428. #endif
  429. case UnicodeEncoding.UNICODE_CODE_PAGE:
  430. return Unicode;
  431. case UnicodeEncoding.BIG_UNICODE_CODE_PAGE:
  432. return BigEndianUnicode;
  433. case Latin1Encoding.ISOLATIN_CODE_PAGE:
  434. return ISOLatin1;
  435. default: break;
  436. }
  437. // Try to obtain a code page handler from the I18N handler.
  438. Encoding enc = (Encoding)(InvokeI18N ("GetEncoding", codePage));
  439. if (enc != null) {
  440. enc.is_readonly = true;
  441. return enc;
  442. }
  443. // Build a code page class name.
  444. String cpName = "System.Text.CP" + codePage.ToString ();
  445. // Look for a code page converter in this assembly.
  446. Assembly assembly = Assembly.GetExecutingAssembly ();
  447. Type type = assembly.GetType (cpName);
  448. if (type != null) {
  449. enc = (Encoding)(Activator.CreateInstance (type));
  450. enc.is_readonly = true;
  451. return enc;
  452. }
  453. // Look in any assembly, in case the application
  454. // has provided its own code page handler.
  455. type = Type.GetType (cpName);
  456. if (type != null) {
  457. enc = (Encoding)(Activator.CreateInstance (type));
  458. enc.is_readonly = true;
  459. return enc;
  460. }
  461. // We have no idea how to handle this code page.
  462. throw new NotSupportedException
  463. (String.Format ("CodePage {0} not supported", codePage.ToString ()));
  464. }
  465. #if !ECMA_COMPAT
  466. #if NET_2_0
  467. [ComVisible (false)]
  468. public virtual object Clone ()
  469. {
  470. Encoding e = (Encoding) MemberwiseClone ();
  471. e.is_readonly = false;
  472. return e;
  473. }
  474. public static Encoding GetEncoding (int codePage,
  475. EncoderFallback encoderFallback, DecoderFallback decoderFallback)
  476. {
  477. if (encoderFallback == null)
  478. throw new ArgumentNullException ("encoderFallback");
  479. if (decoderFallback == null)
  480. throw new ArgumentNullException ("decoderFallback");
  481. Encoding e = GetEncoding (codePage).Clone () as Encoding;
  482. e.is_readonly = false;
  483. e.encoder_fallback = encoderFallback;
  484. e.decoder_fallback = decoderFallback;
  485. return e;
  486. }
  487. public static Encoding GetEncoding (string name,
  488. EncoderFallback encoderFallback, DecoderFallback decoderFallback)
  489. {
  490. if (encoderFallback == null)
  491. throw new ArgumentNullException ("encoderFallback");
  492. if (decoderFallback == null)
  493. throw new ArgumentNullException ("decoderFallback");
  494. Encoding e = GetEncoding (name).Clone () as Encoding;
  495. e.is_readonly = false;
  496. e.encoder_fallback = encoderFallback;
  497. e.decoder_fallback = decoderFallback;
  498. return e;
  499. }
  500. static EncodingInfo [] encoding_infos;
  501. // FIXME: As everyone would agree, this implementation is so *hacky*
  502. // and could be very easily broken. But since there is a test for
  503. // this method to make sure that this method always returns
  504. // the same number and content of encoding infos, this won't
  505. // matter practically.
  506. public static EncodingInfo[] GetEncodings ()
  507. {
  508. if (encoding_infos == null) {
  509. int [] codepages = new int [] {
  510. 37, 437, 500, 708,
  511. 850, 852, 855, 857, 858, 860, 861, 862, 863,
  512. 864, 865, 866, 869, 870, 874, 875,
  513. 932, 936, 949, 950,
  514. 1026, 1047, 1140, 1141, 1142, 1143, 1144,
  515. 1145, 1146, 1147, 1148, 1149,
  516. 1200, 1201, 1250, 1251, 1252, 1253, 1254,
  517. 1255, 1256, 1257, 1258,
  518. 10000, 10079, 12000, 12001,
  519. 20127, 20273, 20277, 20278, 20280, 20284,
  520. 20285, 20290, 20297, 20420, 20424, 20866,
  521. 20871, 21025, 21866, 28591, 28592, 28593,
  522. 28594, 28595, 28596, 28597, 28598, 28599,
  523. 28605, 38598,
  524. 50220, 50221, 50222, 51932, 51949, 54936,
  525. 57002, 57003, 57004, 57005, 57006, 57007,
  526. 57008, 57009, 57010, 57011,
  527. 65000, 65001};
  528. encoding_infos = new EncodingInfo [codepages.Length];
  529. for (int i = 0; i < codepages.Length; i++)
  530. encoding_infos [i] = new EncodingInfo (codepages [i]);
  531. }
  532. return encoding_infos;
  533. }
  534. public bool IsAlwaysNormalized ()
  535. {
  536. return IsAlwaysNormalized (NormalizationForm.FormC);
  537. }
  538. public virtual bool IsAlwaysNormalized (NormalizationForm form)
  539. {
  540. // umm, ASCIIEncoding should have overriden this method, no?
  541. return form == NormalizationForm.FormC && this is ASCIIEncoding;
  542. }
  543. #endif
  544. // Table of builtin web encoding names and the corresponding code pages.
  545. private static readonly object[] encodings =
  546. {
  547. ASCIIEncoding.ASCII_CODE_PAGE,
  548. "ascii", "us_ascii", "us", "ansi_x3.4_1968",
  549. "ansi_x3.4_1986", "cp367", "csascii", "ibm367",
  550. "iso_ir_6", "iso646_us", "iso_646.irv:1991",
  551. UTF7Encoding.UTF7_CODE_PAGE,
  552. "utf_7", "csunicode11utf7", "unicode_1_1_utf_7",
  553. "unicode_2_0_utf_7", "x_unicode_1_1_utf_7",
  554. "x_unicode_2_0_utf_7",
  555. UTF8Encoding.UTF8_CODE_PAGE,
  556. "utf_8", "unicode_1_1_utf_8", "unicode_2_0_utf_8",
  557. "x_unicode_1_1_utf_8", "x_unicode_2_0_utf_8",
  558. UnicodeEncoding.UNICODE_CODE_PAGE,
  559. "utf_16", "UTF_16LE", "ucs_2", "unicode",
  560. "iso_10646_ucs2",
  561. UnicodeEncoding.BIG_UNICODE_CODE_PAGE,
  562. "unicodefffe", "utf_16be",
  563. #if NET_2_0
  564. UTF32Encoding.UTF32_CODE_PAGE,
  565. "utf_32", "UTF_32LE", "ucs_4",
  566. UTF32Encoding.BIG_UTF32_CODE_PAGE,
  567. "UTF_32BE",
  568. #endif
  569. Latin1Encoding.ISOLATIN_CODE_PAGE,
  570. "iso_8859_1", "latin1"
  571. };
  572. // Get an encoding object for a specific web encoding name.
  573. public static Encoding GetEncoding (String name)
  574. {
  575. // Validate the parameters.
  576. if (name == null) {
  577. throw new ArgumentNullException ("name");
  578. }
  579. string converted = name.ToLowerInvariant ().Replace ('-', '_');
  580. // Search the table for a name match.
  581. int code = 0;
  582. for (int i = 0; i < encodings.Length; ++i) {
  583. object o = encodings [i];
  584. if (o is int){
  585. code = (int) o;
  586. continue;
  587. }
  588. if (converted == ((string)encodings [i]))
  589. return GetEncoding (code);
  590. }
  591. // Try to obtain a web encoding handler from the I18N handler.
  592. Encoding enc = (Encoding)(InvokeI18N ("GetEncoding", name));
  593. if (enc != null) {
  594. return enc;
  595. }
  596. // Build a web encoding class name.
  597. String encName = "System.Text.ENC" + converted;
  598. // Look for a code page converter in this assembly.
  599. Assembly assembly = Assembly.GetExecutingAssembly ();
  600. Type type = assembly.GetType (encName);
  601. if (type != null) {
  602. return (Encoding)(Activator.CreateInstance (type));
  603. }
  604. // Look in any assembly, in case the application
  605. // has provided its own code page handler.
  606. type = Type.GetType (encName);
  607. if (type != null) {
  608. return (Encoding)(Activator.CreateInstance (type));
  609. }
  610. // We have no idea how to handle this encoding name.
  611. throw new NotSupportedException (String.Format ("Encoding name `{0}' not supported", name));
  612. }
  613. #endif // !ECMA_COMPAT
  614. // Get a hash code for this instance.
  615. public override int GetHashCode ()
  616. {
  617. #if NET_2_0
  618. return DecoderFallback.GetHashCode () << 24 + EncoderFallback.GetHashCode () << 16 + codePage;
  619. #else
  620. return codePage;
  621. #endif
  622. }
  623. // Get the maximum number of bytes needed to encode a
  624. // specified number of characters.
  625. public abstract int GetMaxByteCount (int charCount);
  626. // Get the maximum number of characters needed to decode a
  627. // specified number of bytes.
  628. public abstract int GetMaxCharCount (int byteCount);
  629. // Get the identifying preamble for this encoding.
  630. public virtual byte[] GetPreamble ()
  631. {
  632. return new byte [0];
  633. }
  634. // Decode a buffer of bytes into a string.
  635. public virtual String GetString (byte[] bytes, int index, int count)
  636. {
  637. return new String (GetChars(bytes, index, count));
  638. }
  639. public virtual String GetString (byte[] bytes)
  640. {
  641. if (bytes == null)
  642. throw new ArgumentNullException ("bytes");
  643. return GetString (bytes, 0, bytes.Length);
  644. }
  645. #if !ECMA_COMPAT
  646. internal string body_name;
  647. internal string encoding_name;
  648. internal string header_name;
  649. internal bool is_mail_news_display;
  650. internal bool is_mail_news_save;
  651. internal bool is_browser_save = false;
  652. internal bool is_browser_display = false;
  653. internal string web_name;
  654. // Get the mail body name for this encoding.
  655. public virtual String BodyName
  656. {
  657. get {
  658. return body_name;
  659. }
  660. }
  661. // Get the code page represented by this object.
  662. public virtual int CodePage
  663. {
  664. get {
  665. return codePage;
  666. }
  667. }
  668. // Get the human-readable name for this encoding.
  669. public virtual String EncodingName
  670. {
  671. get {
  672. return encoding_name;
  673. }
  674. }
  675. // Get the mail agent header name for this encoding.
  676. public virtual String HeaderName
  677. {
  678. get {
  679. return header_name;
  680. }
  681. }
  682. // Determine if this encoding can be displayed in a Web browser.
  683. public virtual bool IsBrowserDisplay
  684. {
  685. get {
  686. return is_browser_display;
  687. }
  688. }
  689. // Determine if this encoding can be saved from a Web browser.
  690. public virtual bool IsBrowserSave
  691. {
  692. get {
  693. return is_browser_save;
  694. }
  695. }
  696. // Determine if this encoding can be displayed in a mail/news agent.
  697. public virtual bool IsMailNewsDisplay
  698. {
  699. get {
  700. return is_mail_news_display;
  701. }
  702. }
  703. // Determine if this encoding can be saved from a mail/news agent.
  704. public virtual bool IsMailNewsSave
  705. {
  706. get {
  707. return is_mail_news_save;
  708. }
  709. }
  710. // Get the IANA-preferred Web name for this encoding.
  711. public virtual String WebName
  712. {
  713. get {
  714. return web_name;
  715. }
  716. }
  717. // Get the Windows code page represented by this object.
  718. public virtual int WindowsCodePage
  719. {
  720. get {
  721. // We make no distinction between normal and
  722. // Windows code pages in this implementation.
  723. return windows_code_page;
  724. }
  725. }
  726. #endif // !ECMA_COMPAT
  727. // Storage for standard encoding objects.
  728. static volatile Encoding asciiEncoding;
  729. static volatile Encoding bigEndianEncoding;
  730. static volatile Encoding defaultEncoding;
  731. static volatile Encoding utf7Encoding;
  732. static volatile Encoding utf8EncodingWithMarkers;
  733. static volatile Encoding utf8EncodingWithoutMarkers;
  734. static volatile Encoding unicodeEncoding;
  735. static volatile Encoding isoLatin1Encoding;
  736. static volatile Encoding unixConsoleEncoding;
  737. #if NET_2_0
  738. static volatile Encoding utf32Encoding;
  739. static volatile Encoding bigEndianUTF32Encoding;
  740. #endif
  741. static readonly object lockobj = new object ();
  742. // Get the standard ASCII encoding object.
  743. public static Encoding ASCII
  744. {
  745. get {
  746. if (asciiEncoding == null) {
  747. lock (lockobj) {
  748. if (asciiEncoding == null) {
  749. asciiEncoding = new ASCIIEncoding ();
  750. asciiEncoding.is_readonly = true;
  751. }
  752. }
  753. }
  754. return asciiEncoding;
  755. }
  756. }
  757. // Get the standard big-endian Unicode encoding object.
  758. public static Encoding BigEndianUnicode
  759. {
  760. get {
  761. if (bigEndianEncoding == null) {
  762. lock (lockobj) {
  763. if (bigEndianEncoding == null) {
  764. bigEndianEncoding = new UnicodeEncoding (true, true);
  765. bigEndianEncoding.is_readonly = true;
  766. }
  767. }
  768. }
  769. return bigEndianEncoding;
  770. }
  771. }
  772. [MethodImpl (MethodImplOptions.InternalCall)]
  773. extern internal static string InternalCodePage (ref int code_page);
  774. // Get the default encoding object.
  775. public static Encoding Default
  776. {
  777. get {
  778. if (defaultEncoding == null) {
  779. lock (lockobj) {
  780. if (defaultEncoding == null) {
  781. // See if the underlying system knows what
  782. // code page handler we should be using.
  783. int code_page = 1;
  784. string code_page_name = InternalCodePage (ref code_page);
  785. try {
  786. if (code_page == -1)
  787. defaultEncoding = GetEncoding (code_page_name);
  788. else {
  789. // map the codepage from internal to our numbers
  790. code_page = code_page & 0x0fffffff;
  791. switch (code_page){
  792. case 1: code_page = ASCIIEncoding.ASCII_CODE_PAGE; break;
  793. case 2: code_page = UTF7Encoding.UTF7_CODE_PAGE; break;
  794. case 3: code_page = UTF8Encoding.UTF8_CODE_PAGE; break;
  795. case 4: code_page = UnicodeEncoding.UNICODE_CODE_PAGE; break;
  796. case 5: code_page = UnicodeEncoding.BIG_UNICODE_CODE_PAGE; break;
  797. case 6: code_page = Latin1Encoding.ISOLATIN_CODE_PAGE; break;
  798. }
  799. defaultEncoding = GetEncoding (code_page);
  800. }
  801. } catch (NotSupportedException) {
  802. defaultEncoding = UTF8Unmarked;
  803. }
  804. defaultEncoding.is_readonly = true;
  805. }
  806. }
  807. }
  808. return defaultEncoding;
  809. }
  810. }
  811. // Get the ISO Latin1 encoding object.
  812. private static Encoding ISOLatin1
  813. {
  814. get {
  815. if (isoLatin1Encoding == null) {
  816. lock (lockobj) {
  817. if (isoLatin1Encoding == null) {
  818. isoLatin1Encoding = new Latin1Encoding ();
  819. isoLatin1Encoding.is_readonly = true;
  820. }
  821. }
  822. }
  823. return isoLatin1Encoding;
  824. }
  825. }
  826. // Get the standard UTF-7 encoding object.
  827. #if ECMA_COMPAT
  828. private
  829. #else
  830. public
  831. #endif
  832. static Encoding UTF7
  833. {
  834. get {
  835. if (utf7Encoding == null) {
  836. lock (lockobj) {
  837. if (utf7Encoding == null) {
  838. utf7Encoding = new UTF7Encoding ();
  839. utf7Encoding.is_readonly = true;
  840. }
  841. }
  842. }
  843. return utf7Encoding;
  844. }
  845. }
  846. // Get the standard UTF-8 encoding object.
  847. public static Encoding UTF8
  848. {
  849. get {
  850. if (utf8EncodingWithMarkers == null) {
  851. lock (lockobj) {
  852. if (utf8EncodingWithMarkers == null) {
  853. utf8EncodingWithMarkers = new UTF8Encoding (true);
  854. utf8EncodingWithMarkers.is_readonly = true;
  855. }
  856. }
  857. }
  858. return utf8EncodingWithMarkers;
  859. }
  860. }
  861. //
  862. // Only internal, to be used by the class libraries: Unmarked and non-input-validating
  863. //
  864. internal static Encoding UTF8Unmarked {
  865. get {
  866. if (utf8EncodingWithoutMarkers == null) {
  867. lock (lockobj){
  868. if (utf8EncodingWithoutMarkers == null){
  869. utf8EncodingWithoutMarkers = new UTF8Encoding (false, false);
  870. utf8EncodingWithoutMarkers.is_readonly = true;
  871. }
  872. }
  873. }
  874. return utf8EncodingWithoutMarkers;
  875. }
  876. }
  877. // Get the standard little-endian Unicode encoding object.
  878. public static Encoding Unicode
  879. {
  880. get {
  881. if (unicodeEncoding == null) {
  882. lock (lockobj) {
  883. if (unicodeEncoding == null) {
  884. unicodeEncoding = new UnicodeEncoding (false, true);
  885. unicodeEncoding.is_readonly = true;
  886. }
  887. }
  888. }
  889. return unicodeEncoding;
  890. }
  891. }
  892. #if NET_2_0
  893. // Get the standard little-endian UTF-32 encoding object.
  894. public static Encoding UTF32
  895. {
  896. get {
  897. if (utf32Encoding == null) {
  898. lock (lockobj) {
  899. if (utf32Encoding == null) {
  900. utf32Encoding = new UTF32Encoding (false, true);
  901. utf32Encoding.is_readonly = true;
  902. }
  903. }
  904. }
  905. return utf32Encoding;
  906. }
  907. }
  908. // Get the standard big-endian UTF-32 encoding object.
  909. private static Encoding BigEndianUTF32
  910. {
  911. get {
  912. if (bigEndianUTF32Encoding == null) {
  913. lock (lockobj) {
  914. if (bigEndianUTF32Encoding == null) {
  915. bigEndianUTF32Encoding = new UTF32Encoding (true, true);
  916. bigEndianUTF32Encoding.is_readonly = true;
  917. }
  918. }
  919. }
  920. return bigEndianUTF32Encoding;
  921. }
  922. }
  923. #endif
  924. // Forwarding decoder implementation.
  925. private sealed class ForwardingDecoder : Decoder
  926. {
  927. private Encoding encoding;
  928. // Constructor.
  929. public ForwardingDecoder (Encoding enc)
  930. {
  931. encoding = enc;
  932. #if NET_2_0
  933. Fallback = encoding.DecoderFallback;
  934. #endif
  935. }
  936. // Override inherited methods.
  937. public override int GetCharCount (byte[] bytes, int index, int count)
  938. {
  939. return encoding.GetCharCount (bytes, index, count);
  940. }
  941. public override int GetChars (byte[] bytes, int byteIndex,
  942. int byteCount, char[] chars,
  943. int charIndex)
  944. {
  945. return encoding.GetChars (bytes, byteIndex, byteCount, chars, charIndex);
  946. }
  947. } // class ForwardingDecoder
  948. // Forwarding encoder implementation.
  949. private sealed class ForwardingEncoder : Encoder
  950. {
  951. private Encoding encoding;
  952. // Constructor.
  953. public ForwardingEncoder (Encoding enc)
  954. {
  955. encoding = enc;
  956. #if NET_2_0
  957. Fallback = encoding.EncoderFallback;
  958. #endif
  959. }
  960. // Override inherited methods.
  961. public override int GetByteCount (char[] chars, int index, int count, bool flush)
  962. {
  963. return encoding.GetByteCount (chars, index, count);
  964. }
  965. public override int GetBytes (char[] chars, int charIndex,
  966. int charCount, byte[] bytes,
  967. int byteCount, bool flush)
  968. {
  969. return encoding.GetBytes (chars, charIndex, charCount, bytes, byteCount);
  970. }
  971. } // class ForwardingEncoder
  972. #if NET_2_0
  973. [CLSCompliantAttribute(false)]
  974. [ComVisible (false)]
  975. public unsafe virtual int GetByteCount (char *chars, int count)
  976. {
  977. if (chars == null)
  978. throw new ArgumentNullException ("chars");
  979. if (count < 0)
  980. throw new ArgumentOutOfRangeException ("count");
  981. char [] c = new char [count];
  982. for (int p = 0; p < count; p++)
  983. c [p] = chars [p];
  984. return GetByteCount (c);
  985. }
  986. [CLSCompliantAttribute(false)]
  987. [ComVisible (false)]
  988. public unsafe virtual int GetCharCount (byte *bytes, int count)
  989. {
  990. if (bytes == null)
  991. throw new ArgumentNullException ("bytes");
  992. if (count < 0)
  993. throw new ArgumentOutOfRangeException ("count");
  994. byte [] ba = new byte [count];
  995. for (int i = 0; i < count; i++)
  996. ba [i] = bytes [i];
  997. return GetCharCount (ba, 0, count);
  998. }
  999. [CLSCompliantAttribute(false)]
  1000. [ComVisible (false)]
  1001. public unsafe virtual int GetChars (byte *bytes, int byteCount, char *chars, int charCount)
  1002. {
  1003. if (bytes == null)
  1004. throw new ArgumentNullException ("bytes");
  1005. if (chars == null)
  1006. throw new ArgumentNullException ("chars");
  1007. if (charCount < 0)
  1008. throw new ArgumentOutOfRangeException ("charCount");
  1009. if (byteCount < 0)
  1010. throw new ArgumentOutOfRangeException ("byteCount");
  1011. byte [] ba = new byte [byteCount];
  1012. for (int i = 0; i < byteCount; i++)
  1013. ba [i] = bytes [i];
  1014. char [] ret = GetChars (ba, 0, byteCount);
  1015. int top = ret.Length;
  1016. if (top > charCount)
  1017. throw new ArgumentException ("charCount is less than the number of characters produced", "charCount");
  1018. for (int i = 0; i < top; i++)
  1019. chars [i] = ret [i];
  1020. return top;
  1021. }
  1022. [CLSCompliantAttribute(false)]
  1023. [ComVisible (false)]
  1024. public unsafe virtual int GetBytes (char *chars, int charCount, byte *bytes, int byteCount)
  1025. {
  1026. if (bytes == null)
  1027. throw new ArgumentNullException ("bytes");
  1028. if (chars == null)
  1029. throw new ArgumentNullException ("chars");
  1030. if (charCount < 0)
  1031. throw new ArgumentOutOfRangeException ("charCount");
  1032. if (byteCount < 0)
  1033. throw new ArgumentOutOfRangeException ("byteCount");
  1034. char [] c = new char [charCount];
  1035. for (int i = 0; i < charCount; i++)
  1036. c [i] = chars [i];
  1037. byte [] b = GetBytes (c, 0, charCount);
  1038. int top = b.Length;
  1039. if (top > byteCount)
  1040. throw new ArgumentException ("byteCount is less that the number of bytes produced", "byteCount");
  1041. for (int i = 0; i < top; i++)
  1042. bytes [i] = b [i];
  1043. return b.Length;
  1044. }
  1045. #endif
  1046. }; // class Encoding
  1047. }; // namespace System.Text