Encoding.cs 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215
  1. /*
  2. * Encoding.cs - Implementation of the "System.Text.Encoding" class.
  3. *
  4. * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
  5. * Copyright (c) 2002, Ximian, Inc.
  6. * Copyright (c) 2003, 2004 Novell, Inc.
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining
  9. * a copy of this software and associated documentation files (the "Software"),
  10. * to deal in the Software without restriction, including without limitation
  11. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  12. * and/or sell copies of the Software, and to permit persons to whom the
  13. * Software is furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included
  16. * in all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  21. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  22. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  23. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  24. * OTHER DEALINGS IN THE SOFTWARE.
  25. */
  26. namespace System.Text
  27. {
  28. using System;
  29. using System.Reflection;
  30. using System.Globalization;
  31. using System.Security;
  32. using System.Runtime.CompilerServices;
  33. using System.Runtime.InteropServices;
  34. [Serializable]
  35. #if NET_2_0
  36. [ComVisible (true)]
  37. #endif
  38. public abstract class Encoding
  39. #if NET_2_0
  40. : ICloneable
  41. #endif
  42. {
  43. // Code page used by this encoding.
  44. internal int codePage;
  45. internal int windows_code_page;
  46. bool is_readonly = true;
  47. // Constructor.
  48. protected Encoding ()
  49. {
  50. }
  51. #if ECMA_COMPAT
  52. protected internal
  53. #else
  54. protected
  55. #endif
  56. Encoding (int codePage)
  57. {
  58. this.codePage = windows_code_page = codePage;
  59. #if NET_2_0
  60. switch (codePage) {
  61. default:
  62. // MS has "InternalBestFit{Decoder|Encoder}Fallback
  63. // here, but we dunno what they are for.
  64. decoder_fallback = DecoderFallback.ReplacementFallback;
  65. encoder_fallback = EncoderFallback.ReplacementFallback;
  66. break;
  67. case 20127: // ASCII
  68. case 54936: // GB18030
  69. decoder_fallback = DecoderFallback.ReplacementFallback;
  70. encoder_fallback = EncoderFallback.ReplacementFallback;
  71. break;
  72. case 1200: // UTF16
  73. case 1201: // UTF16
  74. case 12000: // UTF32
  75. case 12001: // UTF32
  76. case 65000: // UTF7
  77. case 65001: // UTF8
  78. decoder_fallback = new DecoderReplacementFallback (String.Empty);
  79. encoder_fallback = new EncoderReplacementFallback (String.Empty);
  80. break;
  81. }
  82. #endif
  83. }
  84. // until we change the callers:
  85. internal static string _ (string arg) {
  86. return arg;
  87. }
  88. #if NET_2_0
  89. DecoderFallback decoder_fallback;
  90. EncoderFallback encoder_fallback;
  91. [ComVisible (false)]
  92. public bool IsReadOnly {
  93. get { return is_readonly; }
  94. }
  95. [ComVisible (false)]
  96. public virtual bool IsSingleByte {
  97. get { return false; }
  98. }
  99. [MonoTODO ("not used yet")]
  100. [ComVisible (false)]
  101. public DecoderFallback DecoderFallback {
  102. get {
  103. if (decoder_fallback == null)
  104. decoder_fallback = new DecoderReplacementFallback (String.Empty);
  105. return decoder_fallback;
  106. }
  107. set {
  108. if (IsReadOnly)
  109. throw new InvalidOperationException ("This Encoding is readonly.");
  110. if (value == null)
  111. throw new ArgumentNullException ();
  112. decoder_fallback = value;
  113. }
  114. }
  115. [MonoTODO ("not used yet")]
  116. [ComVisible (false)]
  117. public EncoderFallback EncoderFallback {
  118. get {
  119. if (encoder_fallback == null)
  120. encoder_fallback = new EncoderReplacementFallback (String.Empty);
  121. return encoder_fallback;
  122. }
  123. set {
  124. if (IsReadOnly)
  125. throw new InvalidOperationException ("This Encoding is readonly.");
  126. if (value == null)
  127. throw new ArgumentNullException ();
  128. encoder_fallback = value;
  129. }
  130. }
  131. internal void SetFallbackInternal (EncoderFallback e, DecoderFallback d)
  132. {
  133. if (e != null)
  134. encoder_fallback = e;
  135. if (d != null)
  136. decoder_fallback = d;
  137. }
  138. #endif
  139. // Convert between two encodings.
  140. public static byte[] Convert (Encoding srcEncoding, Encoding dstEncoding,
  141. byte[] bytes)
  142. {
  143. if (srcEncoding == null) {
  144. throw new ArgumentNullException ("srcEncoding");
  145. }
  146. if (dstEncoding == null) {
  147. throw new ArgumentNullException ("dstEncoding");
  148. }
  149. if (bytes == null) {
  150. throw new ArgumentNullException ("bytes");
  151. }
  152. return dstEncoding.GetBytes (srcEncoding.GetChars (bytes, 0, bytes.Length));
  153. }
  154. public static byte[] Convert (Encoding srcEncoding, Encoding dstEncoding,
  155. byte[] bytes, int index, int count)
  156. {
  157. if (srcEncoding == null) {
  158. throw new ArgumentNullException ("srcEncoding");
  159. }
  160. if (dstEncoding == null) {
  161. throw new ArgumentNullException ("dstEncoding");
  162. }
  163. if (bytes == null) {
  164. throw new ArgumentNullException ("bytes");
  165. }
  166. if (index < 0 || index > bytes.Length) {
  167. throw new ArgumentOutOfRangeException
  168. ("index", _("ArgRange_Array"));
  169. }
  170. if (count < 0 || (bytes.Length - index) < count) {
  171. throw new ArgumentOutOfRangeException
  172. ("count", _("ArgRange_Array"));
  173. }
  174. return dstEncoding.GetBytes (srcEncoding.GetChars (bytes, index, count));
  175. }
  176. // Determine if two Encoding objects are equal.
  177. public override bool Equals (Object obj)
  178. {
  179. Encoding enc = (obj as Encoding);
  180. if (enc != null) {
  181. #if NET_2_0
  182. return codePage == enc.codePage &&
  183. DecoderFallback.Equals (enc.DecoderFallback) &&
  184. EncoderFallback.Equals (enc.EncoderFallback);
  185. #else
  186. return (codePage == enc.codePage);
  187. #endif
  188. } else {
  189. return false;
  190. }
  191. }
  192. // Get the number of characters needed to encode a character buffer.
  193. public abstract int GetByteCount (char[] chars, int index, int count);
  194. // Convenience wrappers for "GetByteCount".
  195. public virtual int GetByteCount (String s)
  196. {
  197. if (s == null)
  198. throw new ArgumentNullException ("s");
  199. if (s.Length == 0)
  200. return 0;
  201. #if NET_2_0
  202. unsafe {
  203. fixed (char* cptr = s) {
  204. return GetByteCount (cptr, s.Length);
  205. }
  206. }
  207. #else
  208. char[] chars = s.ToCharArray ();
  209. return GetByteCount (chars, 0, chars.Length);
  210. #endif
  211. }
  212. public virtual int GetByteCount (char[] chars)
  213. {
  214. if (chars != null) {
  215. return GetByteCount (chars, 0, chars.Length);
  216. } else {
  217. throw new ArgumentNullException ("chars");
  218. }
  219. }
  220. // Get the bytes that result from encoding a character buffer.
  221. public abstract int GetBytes (char[] chars, int charIndex, int charCount,
  222. byte[] bytes, int byteIndex);
  223. // Convenience wrappers for "GetBytes".
  224. public virtual int GetBytes (String s, int charIndex, int charCount,
  225. byte[] bytes, int byteIndex)
  226. {
  227. if (s == null)
  228. throw new ArgumentNullException ("s");
  229. #if NET_2_0
  230. if (charIndex < 0 || charIndex > s.Length)
  231. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  232. if (charCount < 0 || charIndex + charCount > s.Length)
  233. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
  234. if (byteIndex < 0 || byteIndex > bytes.Length)
  235. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  236. if (charCount == 0 || bytes.Length == byteIndex)
  237. return 0;
  238. unsafe {
  239. fixed (char* cptr = s) {
  240. fixed (byte* bptr = bytes) {
  241. return GetBytes (cptr + charIndex,
  242. charCount,
  243. bptr + byteIndex,
  244. bytes.Length - byteIndex);
  245. }
  246. }
  247. }
  248. #else
  249. return GetBytes (s.ToCharArray(), charIndex, charCount, bytes, byteIndex);
  250. #endif
  251. }
  252. public virtual byte[] GetBytes (String s)
  253. {
  254. if (s == null)
  255. throw new ArgumentNullException ("s");
  256. #if NET_2_0
  257. if (s.Length == 0)
  258. return new byte [0];
  259. int byteCount = GetByteCount (s);
  260. if (byteCount == 0)
  261. return new byte [0];
  262. unsafe {
  263. fixed (char* cptr = s) {
  264. byte [] bytes = new byte [byteCount];
  265. fixed (byte* bptr = bytes) {
  266. GetBytes (cptr, s.Length,
  267. bptr, byteCount);
  268. return bytes;
  269. }
  270. }
  271. }
  272. #else
  273. char[] chars = s.ToCharArray ();
  274. int numBytes = GetByteCount (chars, 0, chars.Length);
  275. byte[] bytes = new byte [numBytes];
  276. GetBytes (chars, 0, chars.Length, bytes, 0);
  277. return bytes;
  278. #endif
  279. }
  280. public virtual byte[] GetBytes (char[] chars, int index, int count)
  281. {
  282. int numBytes = GetByteCount (chars, index, count);
  283. byte[] bytes = new byte [numBytes];
  284. GetBytes (chars, index, count, bytes, 0);
  285. return bytes;
  286. }
  287. public virtual byte[] GetBytes (char[] chars)
  288. {
  289. int numBytes = GetByteCount (chars, 0, chars.Length);
  290. byte[] bytes = new byte [numBytes];
  291. GetBytes (chars, 0, chars.Length, bytes, 0);
  292. return bytes;
  293. }
  294. // Get the number of characters needed to decode a byte buffer.
  295. public abstract int GetCharCount (byte[] bytes, int index, int count);
  296. // Convenience wrappers for "GetCharCount".
  297. public virtual int GetCharCount (byte[] bytes)
  298. {
  299. if (bytes == null) {
  300. throw new ArgumentNullException ("bytes");
  301. }
  302. return GetCharCount (bytes, 0, bytes.Length);
  303. }
  304. // Get the characters that result from decoding a byte buffer.
  305. public abstract int GetChars (byte[] bytes, int byteIndex, int byteCount,
  306. char[] chars, int charIndex);
  307. // Convenience wrappers for "GetChars".
  308. public virtual char[] GetChars (byte[] bytes, int index, int count)
  309. {
  310. int numChars = GetCharCount (bytes, index, count);
  311. char[] chars = new char [numChars];
  312. GetChars (bytes, index, count, chars, 0);
  313. return chars;
  314. }
  315. public virtual char[] GetChars (byte[] bytes)
  316. {
  317. if (bytes == null) {
  318. throw new ArgumentNullException ("bytes");
  319. }
  320. int numChars = GetCharCount (bytes, 0, bytes.Length);
  321. char[] chars = new char [numChars];
  322. GetChars (bytes, 0, bytes.Length, chars, 0);
  323. return chars;
  324. }
  325. // Get a decoder that forwards requests to this object.
  326. public virtual Decoder GetDecoder ()
  327. {
  328. return new ForwardingDecoder (this);
  329. }
  330. // Get an encoder that forwards requests to this object.
  331. public virtual Encoder GetEncoder ()
  332. {
  333. return new ForwardingEncoder (this);
  334. }
  335. // Loaded copy of the "I18N" assembly. We need to move
  336. // this into a class in "System.Private" eventually.
  337. private static Assembly i18nAssembly;
  338. private static bool i18nDisabled;
  339. // Invoke a specific method on the "I18N" manager object.
  340. // Returns NULL if the method failed.
  341. private static Object InvokeI18N (String name, params Object[] args)
  342. {
  343. lock (lockobj) {
  344. // Bail out if we previously detected that there
  345. // is insufficent engine support for I18N handling.
  346. if (i18nDisabled) {
  347. return null;
  348. }
  349. // Find or load the "I18N" assembly.
  350. if (i18nAssembly == null) {
  351. try {
  352. try {
  353. i18nAssembly = Assembly.Load (Consts.AssemblyI18N);
  354. } catch (NotImplementedException) {
  355. // Assembly loading unsupported by the engine.
  356. i18nDisabled = true;
  357. return null;
  358. }
  359. if (i18nAssembly == null) {
  360. return null;
  361. }
  362. } catch (SystemException) {
  363. return null;
  364. }
  365. }
  366. // Find the "I18N.Common.Manager" class.
  367. Type managerClass;
  368. try {
  369. managerClass = i18nAssembly.GetType ("I18N.Common.Manager");
  370. } catch (NotImplementedException) {
  371. // "GetType" is not supported by the engine.
  372. i18nDisabled = true;
  373. return null;
  374. }
  375. if (managerClass == null) {
  376. return null;
  377. }
  378. // Get the value of the "PrimaryManager" property.
  379. Object manager;
  380. try {
  381. manager = managerClass.InvokeMember
  382. ("PrimaryManager",
  383. BindingFlags.GetProperty |
  384. BindingFlags.Static |
  385. BindingFlags.Public,
  386. null, null, null, null, null, null);
  387. if (manager == null) {
  388. return null;
  389. }
  390. } catch (MissingMethodException) {
  391. return null;
  392. } catch (SecurityException) {
  393. return null;
  394. } catch (NotImplementedException) {
  395. // "InvokeMember" is not supported by the engine.
  396. i18nDisabled = true;
  397. return null;
  398. }
  399. // Invoke the requested method on the manager.
  400. try {
  401. return managerClass.InvokeMember
  402. (name,
  403. BindingFlags.InvokeMethod |
  404. BindingFlags.Instance |
  405. BindingFlags.Public,
  406. null, manager, args, null, null, null);
  407. } catch (MissingMethodException) {
  408. return null;
  409. } catch (SecurityException) {
  410. return null;
  411. }
  412. }
  413. }
  414. // Get an encoder for a specific code page.
  415. #if ECMA_COMPAT
  416. private
  417. #else
  418. public
  419. #endif
  420. static Encoding GetEncoding (int codePage)
  421. {
  422. if (codePage < 0 || codePage > 0xffff)
  423. throw new ArgumentOutOfRangeException ("codepage",
  424. "Valid values are between 0 and 65535, inclusive.");
  425. // Check for the builtin code pages first.
  426. switch (codePage) {
  427. case 0: return Default;
  428. case ASCIIEncoding.ASCII_CODE_PAGE:
  429. return ASCII;
  430. case UTF7Encoding.UTF7_CODE_PAGE:
  431. return UTF7;
  432. case UTF8Encoding.UTF8_CODE_PAGE:
  433. return UTF8;
  434. #if NET_2_0
  435. case UTF32Encoding.UTF32_CODE_PAGE:
  436. return UTF32;
  437. case UTF32Encoding.BIG_UTF32_CODE_PAGE:
  438. return BigEndianUTF32;
  439. #endif
  440. case UnicodeEncoding.UNICODE_CODE_PAGE:
  441. return Unicode;
  442. case UnicodeEncoding.BIG_UNICODE_CODE_PAGE:
  443. return BigEndianUnicode;
  444. case Latin1Encoding.ISOLATIN_CODE_PAGE:
  445. return ISOLatin1;
  446. default: break;
  447. }
  448. // Try to obtain a code page handler from the I18N handler.
  449. Encoding enc = (Encoding)(InvokeI18N ("GetEncoding", codePage));
  450. if (enc != null) {
  451. enc.is_readonly = true;
  452. return enc;
  453. }
  454. // Build a code page class name.
  455. String cpName = "System.Text.CP" + codePage.ToString ();
  456. // Look for a code page converter in this assembly.
  457. Assembly assembly = Assembly.GetExecutingAssembly ();
  458. Type type = assembly.GetType (cpName);
  459. if (type != null) {
  460. enc = (Encoding)(Activator.CreateInstance (type));
  461. enc.is_readonly = true;
  462. return enc;
  463. }
  464. // Look in any assembly, in case the application
  465. // has provided its own code page handler.
  466. type = Type.GetType (cpName);
  467. if (type != null) {
  468. enc = (Encoding)(Activator.CreateInstance (type));
  469. enc.is_readonly = true;
  470. return enc;
  471. }
  472. // We have no idea how to handle this code page.
  473. throw new NotSupportedException
  474. (String.Format ("CodePage {0} not supported", codePage.ToString ()));
  475. }
  476. #if !ECMA_COMPAT
  477. #if NET_2_0
  478. [ComVisible (false)]
  479. public virtual object Clone ()
  480. {
  481. Encoding e = (Encoding) MemberwiseClone ();
  482. e.is_readonly = false;
  483. return e;
  484. }
  485. public static Encoding GetEncoding (int codePage,
  486. EncoderFallback encoderFallback, DecoderFallback decoderFallback)
  487. {
  488. if (encoderFallback == null)
  489. throw new ArgumentNullException ("encoderFallback");
  490. if (decoderFallback == null)
  491. throw new ArgumentNullException ("decoderFallback");
  492. Encoding e = GetEncoding (codePage).Clone () as Encoding;
  493. e.is_readonly = false;
  494. e.encoder_fallback = encoderFallback;
  495. e.decoder_fallback = decoderFallback;
  496. return e;
  497. }
  498. public static Encoding GetEncoding (string name,
  499. EncoderFallback encoderFallback, DecoderFallback decoderFallback)
  500. {
  501. if (encoderFallback == null)
  502. throw new ArgumentNullException ("encoderFallback");
  503. if (decoderFallback == null)
  504. throw new ArgumentNullException ("decoderFallback");
  505. Encoding e = GetEncoding (name).Clone () as Encoding;
  506. e.is_readonly = false;
  507. e.encoder_fallback = encoderFallback;
  508. e.decoder_fallback = decoderFallback;
  509. return e;
  510. }
  511. static EncodingInfo [] encoding_infos;
  512. // FIXME: As everyone would agree, this implementation is so *hacky*
  513. // and could be very easily broken. But since there is a test for
  514. // this method to make sure that this method always returns
  515. // the same number and content of encoding infos, this won't
  516. // matter practically.
  517. public static EncodingInfo[] GetEncodings ()
  518. {
  519. if (encoding_infos == null) {
  520. int [] codepages = new int [] {
  521. 37, 437, 500, 708,
  522. 850, 852, 855, 857, 858, 860, 861, 862, 863,
  523. 864, 865, 866, 869, 870, 874, 875,
  524. 932, 936, 949, 950,
  525. 1026, 1047, 1140, 1141, 1142, 1143, 1144,
  526. 1145, 1146, 1147, 1148, 1149,
  527. 1200, 1201, 1250, 1251, 1252, 1253, 1254,
  528. 1255, 1256, 1257, 1258,
  529. 10000, 10079, 12000, 12001,
  530. 20127, 20273, 20277, 20278, 20280, 20284,
  531. 20285, 20290, 20297, 20420, 20424, 20866,
  532. 20871, 21025, 21866, 28591, 28592, 28593,
  533. 28594, 28595, 28596, 28597, 28598, 28599,
  534. 28605, 38598,
  535. 50220, 50221, 50222, 51932, 51949, 54936,
  536. 57002, 57003, 57004, 57005, 57006, 57007,
  537. 57008, 57009, 57010, 57011,
  538. 65000, 65001};
  539. encoding_infos = new EncodingInfo [codepages.Length];
  540. for (int i = 0; i < codepages.Length; i++)
  541. encoding_infos [i] = new EncodingInfo (codepages [i]);
  542. }
  543. return encoding_infos;
  544. }
  545. [ComVisible (false)]
  546. public bool IsAlwaysNormalized ()
  547. {
  548. return IsAlwaysNormalized (NormalizationForm.FormC);
  549. }
  550. [ComVisible (false)]
  551. public virtual bool IsAlwaysNormalized (NormalizationForm form)
  552. {
  553. // umm, ASCIIEncoding should have overriden this method, no?
  554. return form == NormalizationForm.FormC && this is ASCIIEncoding;
  555. }
  556. #endif
  557. // Table of builtin web encoding names and the corresponding code pages.
  558. private static readonly object[] encodings =
  559. {
  560. ASCIIEncoding.ASCII_CODE_PAGE,
  561. "ascii", "us_ascii", "us", "ansi_x3.4_1968",
  562. "ansi_x3.4_1986", "cp367", "csascii", "ibm367",
  563. "iso_ir_6", "iso646_us", "iso_646.irv:1991",
  564. UTF7Encoding.UTF7_CODE_PAGE,
  565. "utf_7", "csunicode11utf7", "unicode_1_1_utf_7",
  566. "unicode_2_0_utf_7", "x_unicode_1_1_utf_7",
  567. "x_unicode_2_0_utf_7",
  568. UTF8Encoding.UTF8_CODE_PAGE,
  569. "utf_8", "unicode_1_1_utf_8", "unicode_2_0_utf_8",
  570. "x_unicode_1_1_utf_8", "x_unicode_2_0_utf_8",
  571. UnicodeEncoding.UNICODE_CODE_PAGE,
  572. "utf_16", "UTF_16LE", "ucs_2", "unicode",
  573. "iso_10646_ucs2",
  574. UnicodeEncoding.BIG_UNICODE_CODE_PAGE,
  575. "unicodefffe", "utf_16be",
  576. #if NET_2_0
  577. UTF32Encoding.UTF32_CODE_PAGE,
  578. "utf_32", "UTF_32LE", "ucs_4",
  579. UTF32Encoding.BIG_UTF32_CODE_PAGE,
  580. "UTF_32BE",
  581. #endif
  582. Latin1Encoding.ISOLATIN_CODE_PAGE,
  583. "iso_8859_1", "latin1"
  584. };
  585. // Get an encoding object for a specific web encoding name.
  586. public static Encoding GetEncoding (String name)
  587. {
  588. // Validate the parameters.
  589. if (name == null) {
  590. throw new ArgumentNullException ("name");
  591. }
  592. string converted = name.ToLowerInvariant ().Replace ('-', '_');
  593. // Search the table for a name match.
  594. int code = 0;
  595. for (int i = 0; i < encodings.Length; ++i) {
  596. object o = encodings [i];
  597. if (o is int){
  598. code = (int) o;
  599. continue;
  600. }
  601. if (converted == ((string)encodings [i]))
  602. return GetEncoding (code);
  603. }
  604. // Try to obtain a web encoding handler from the I18N handler.
  605. Encoding enc = (Encoding)(InvokeI18N ("GetEncoding", name));
  606. if (enc != null) {
  607. return enc;
  608. }
  609. // Build a web encoding class name.
  610. String encName = "System.Text.ENC" + converted;
  611. // Look for a code page converter in this assembly.
  612. Assembly assembly = Assembly.GetExecutingAssembly ();
  613. Type type = assembly.GetType (encName);
  614. if (type != null) {
  615. return (Encoding)(Activator.CreateInstance (type));
  616. }
  617. // Look in any assembly, in case the application
  618. // has provided its own code page handler.
  619. type = Type.GetType (encName);
  620. if (type != null) {
  621. return (Encoding)(Activator.CreateInstance (type));
  622. }
  623. // We have no idea how to handle this encoding name.
  624. throw new ArgumentException (String.Format ("Encoding name '{0}' not "
  625. + "supported", name), "name");
  626. }
  627. #endif // !ECMA_COMPAT
  628. // Get a hash code for this instance.
  629. public override int GetHashCode ()
  630. {
  631. #if NET_2_0
  632. return DecoderFallback.GetHashCode () << 24 + EncoderFallback.GetHashCode () << 16 + codePage;
  633. #else
  634. return codePage;
  635. #endif
  636. }
  637. // Get the maximum number of bytes needed to encode a
  638. // specified number of characters.
  639. public abstract int GetMaxByteCount (int charCount);
  640. // Get the maximum number of characters needed to decode a
  641. // specified number of bytes.
  642. public abstract int GetMaxCharCount (int byteCount);
  643. // Get the identifying preamble for this encoding.
  644. public virtual byte[] GetPreamble ()
  645. {
  646. return new byte [0];
  647. }
  648. // Decode a buffer of bytes into a string.
  649. public virtual String GetString (byte[] bytes, int index, int count)
  650. {
  651. return new String (GetChars(bytes, index, count));
  652. }
  653. public virtual String GetString (byte[] bytes)
  654. {
  655. if (bytes == null)
  656. throw new ArgumentNullException ("bytes");
  657. return GetString (bytes, 0, bytes.Length);
  658. }
  659. #if !ECMA_COMPAT
  660. internal string body_name;
  661. internal string encoding_name;
  662. internal string header_name;
  663. internal bool is_mail_news_display;
  664. internal bool is_mail_news_save;
  665. internal bool is_browser_save = false;
  666. internal bool is_browser_display = false;
  667. internal string web_name;
  668. // Get the mail body name for this encoding.
  669. public virtual String BodyName
  670. {
  671. get {
  672. return body_name;
  673. }
  674. }
  675. // Get the code page represented by this object.
  676. public virtual int CodePage
  677. {
  678. get {
  679. return codePage;
  680. }
  681. }
  682. // Get the human-readable name for this encoding.
  683. public virtual String EncodingName
  684. {
  685. get {
  686. return encoding_name;
  687. }
  688. }
  689. // Get the mail agent header name for this encoding.
  690. public virtual String HeaderName
  691. {
  692. get {
  693. return header_name;
  694. }
  695. }
  696. // Determine if this encoding can be displayed in a Web browser.
  697. public virtual bool IsBrowserDisplay
  698. {
  699. get {
  700. return is_browser_display;
  701. }
  702. }
  703. // Determine if this encoding can be saved from a Web browser.
  704. public virtual bool IsBrowserSave
  705. {
  706. get {
  707. return is_browser_save;
  708. }
  709. }
  710. // Determine if this encoding can be displayed in a mail/news agent.
  711. public virtual bool IsMailNewsDisplay
  712. {
  713. get {
  714. return is_mail_news_display;
  715. }
  716. }
  717. // Determine if this encoding can be saved from a mail/news agent.
  718. public virtual bool IsMailNewsSave
  719. {
  720. get {
  721. return is_mail_news_save;
  722. }
  723. }
  724. // Get the IANA-preferred Web name for this encoding.
  725. public virtual String WebName
  726. {
  727. get {
  728. return web_name;
  729. }
  730. }
  731. // Get the Windows code page represented by this object.
  732. public virtual int WindowsCodePage
  733. {
  734. get {
  735. // We make no distinction between normal and
  736. // Windows code pages in this implementation.
  737. return windows_code_page;
  738. }
  739. }
  740. #endif // !ECMA_COMPAT
  741. // Storage for standard encoding objects.
  742. static volatile Encoding asciiEncoding;
  743. static volatile Encoding bigEndianEncoding;
  744. static volatile Encoding defaultEncoding;
  745. static volatile Encoding utf7Encoding;
  746. static volatile Encoding utf8EncodingWithMarkers;
  747. static volatile Encoding utf8EncodingWithoutMarkers;
  748. static volatile Encoding unicodeEncoding;
  749. static volatile Encoding isoLatin1Encoding;
  750. static volatile Encoding unixConsoleEncoding;
  751. #if NET_2_0
  752. static volatile Encoding utf32Encoding;
  753. static volatile Encoding bigEndianUTF32Encoding;
  754. #endif
  755. static readonly object lockobj = new object ();
  756. // Get the standard ASCII encoding object.
  757. public static Encoding ASCII
  758. {
  759. get {
  760. if (asciiEncoding == null) {
  761. lock (lockobj) {
  762. if (asciiEncoding == null) {
  763. asciiEncoding = new ASCIIEncoding ();
  764. asciiEncoding.is_readonly = true;
  765. }
  766. }
  767. }
  768. return asciiEncoding;
  769. }
  770. }
  771. // Get the standard big-endian Unicode encoding object.
  772. public static Encoding BigEndianUnicode
  773. {
  774. get {
  775. if (bigEndianEncoding == null) {
  776. lock (lockobj) {
  777. if (bigEndianEncoding == null) {
  778. bigEndianEncoding = new UnicodeEncoding (true, true);
  779. bigEndianEncoding.is_readonly = true;
  780. }
  781. }
  782. }
  783. return bigEndianEncoding;
  784. }
  785. }
  786. [MethodImpl (MethodImplOptions.InternalCall)]
  787. extern internal static string InternalCodePage (ref int code_page);
  788. // Get the default encoding object.
  789. public static Encoding Default
  790. {
  791. get {
  792. if (defaultEncoding == null) {
  793. lock (lockobj) {
  794. if (defaultEncoding == null) {
  795. // See if the underlying system knows what
  796. // code page handler we should be using.
  797. int code_page = 1;
  798. string code_page_name = InternalCodePage (ref code_page);
  799. try {
  800. if (code_page == -1)
  801. defaultEncoding = GetEncoding (code_page_name);
  802. else {
  803. // map the codepage from internal to our numbers
  804. code_page = code_page & 0x0fffffff;
  805. switch (code_page){
  806. case 1: code_page = ASCIIEncoding.ASCII_CODE_PAGE; break;
  807. case 2: code_page = UTF7Encoding.UTF7_CODE_PAGE; break;
  808. case 3: code_page = UTF8Encoding.UTF8_CODE_PAGE; break;
  809. case 4: code_page = UnicodeEncoding.UNICODE_CODE_PAGE; break;
  810. case 5: code_page = UnicodeEncoding.BIG_UNICODE_CODE_PAGE; break;
  811. case 6: code_page = Latin1Encoding.ISOLATIN_CODE_PAGE; break;
  812. }
  813. defaultEncoding = GetEncoding (code_page);
  814. }
  815. } catch (NotSupportedException) {
  816. // code_page is not supported on underlying platform
  817. defaultEncoding = UTF8Unmarked;
  818. } catch (ArgumentException) {
  819. // code_page_name is not a valid code page, or is
  820. // not supported by underlying OS
  821. defaultEncoding = UTF8Unmarked;
  822. }
  823. defaultEncoding.is_readonly = true;
  824. }
  825. }
  826. }
  827. return defaultEncoding;
  828. }
  829. }
  830. // Get the ISO Latin1 encoding object.
  831. private static Encoding ISOLatin1
  832. {
  833. get {
  834. if (isoLatin1Encoding == null) {
  835. lock (lockobj) {
  836. if (isoLatin1Encoding == null) {
  837. isoLatin1Encoding = new Latin1Encoding ();
  838. isoLatin1Encoding.is_readonly = true;
  839. }
  840. }
  841. }
  842. return isoLatin1Encoding;
  843. }
  844. }
  845. // Get the standard UTF-7 encoding object.
  846. #if ECMA_COMPAT
  847. private
  848. #else
  849. public
  850. #endif
  851. static Encoding UTF7
  852. {
  853. get {
  854. if (utf7Encoding == null) {
  855. lock (lockobj) {
  856. if (utf7Encoding == null) {
  857. utf7Encoding = new UTF7Encoding ();
  858. utf7Encoding.is_readonly = true;
  859. }
  860. }
  861. }
  862. return utf7Encoding;
  863. }
  864. }
  865. // Get the standard UTF-8 encoding object.
  866. public static Encoding UTF8
  867. {
  868. get {
  869. if (utf8EncodingWithMarkers == null) {
  870. lock (lockobj) {
  871. if (utf8EncodingWithMarkers == null) {
  872. utf8EncodingWithMarkers = new UTF8Encoding (true);
  873. utf8EncodingWithMarkers.is_readonly = true;
  874. }
  875. }
  876. }
  877. return utf8EncodingWithMarkers;
  878. }
  879. }
  880. //
  881. // Only internal, to be used by the class libraries: Unmarked and non-input-validating
  882. //
  883. internal static Encoding UTF8Unmarked {
  884. get {
  885. if (utf8EncodingWithoutMarkers == null) {
  886. lock (lockobj){
  887. if (utf8EncodingWithoutMarkers == null){
  888. utf8EncodingWithoutMarkers = new UTF8Encoding (false, false);
  889. utf8EncodingWithoutMarkers.is_readonly = true;
  890. }
  891. }
  892. }
  893. return utf8EncodingWithoutMarkers;
  894. }
  895. }
  896. // Get the standard little-endian Unicode encoding object.
  897. public static Encoding Unicode
  898. {
  899. get {
  900. if (unicodeEncoding == null) {
  901. lock (lockobj) {
  902. if (unicodeEncoding == null) {
  903. unicodeEncoding = new UnicodeEncoding (false, true);
  904. unicodeEncoding.is_readonly = true;
  905. }
  906. }
  907. }
  908. return unicodeEncoding;
  909. }
  910. }
  911. #if NET_2_0
  912. // Get the standard little-endian UTF-32 encoding object.
  913. public static Encoding UTF32
  914. {
  915. get {
  916. if (utf32Encoding == null) {
  917. lock (lockobj) {
  918. if (utf32Encoding == null) {
  919. utf32Encoding = new UTF32Encoding (false, true);
  920. utf32Encoding.is_readonly = true;
  921. }
  922. }
  923. }
  924. return utf32Encoding;
  925. }
  926. }
  927. // Get the standard big-endian UTF-32 encoding object.
  928. internal static Encoding BigEndianUTF32
  929. {
  930. get {
  931. if (bigEndianUTF32Encoding == null) {
  932. lock (lockobj) {
  933. if (bigEndianUTF32Encoding == null) {
  934. bigEndianUTF32Encoding = new UTF32Encoding (true, true);
  935. bigEndianUTF32Encoding.is_readonly = true;
  936. }
  937. }
  938. }
  939. return bigEndianUTF32Encoding;
  940. }
  941. }
  942. #endif
  943. // Forwarding decoder implementation.
  944. private sealed class ForwardingDecoder : Decoder
  945. {
  946. private Encoding encoding;
  947. // Constructor.
  948. public ForwardingDecoder (Encoding enc)
  949. {
  950. encoding = enc;
  951. #if NET_2_0
  952. Fallback = encoding.DecoderFallback;
  953. #endif
  954. }
  955. // Override inherited methods.
  956. public override int GetCharCount (byte[] bytes, int index, int count)
  957. {
  958. return encoding.GetCharCount (bytes, index, count);
  959. }
  960. public override int GetChars (byte[] bytes, int byteIndex,
  961. int byteCount, char[] chars,
  962. int charIndex)
  963. {
  964. return encoding.GetChars (bytes, byteIndex, byteCount, chars, charIndex);
  965. }
  966. } // class ForwardingDecoder
  967. // Forwarding encoder implementation.
  968. private sealed class ForwardingEncoder : Encoder
  969. {
  970. private Encoding encoding;
  971. // Constructor.
  972. public ForwardingEncoder (Encoding enc)
  973. {
  974. encoding = enc;
  975. #if NET_2_0
  976. Fallback = encoding.EncoderFallback;
  977. #endif
  978. }
  979. // Override inherited methods.
  980. public override int GetByteCount (char[] chars, int index, int count, bool flush)
  981. {
  982. return encoding.GetByteCount (chars, index, count);
  983. }
  984. public override int GetBytes (char[] chars, int charIndex,
  985. int charCount, byte[] bytes,
  986. int byteCount, bool flush)
  987. {
  988. return encoding.GetBytes (chars, charIndex, charCount, bytes, byteCount);
  989. }
  990. } // class ForwardingEncoder
  991. #if NET_2_0
  992. [CLSCompliantAttribute(false)]
  993. [ComVisible (false)]
  994. public unsafe virtual int GetByteCount (char *chars, int count)
  995. {
  996. if (chars == null)
  997. throw new ArgumentNullException ("chars");
  998. if (count < 0)
  999. throw new ArgumentOutOfRangeException ("count");
  1000. char [] c = new char [count];
  1001. for (int p = 0; p < count; p++)
  1002. c [p] = chars [p];
  1003. return GetByteCount (c);
  1004. }
  1005. [CLSCompliantAttribute(false)]
  1006. [ComVisible (false)]
  1007. public unsafe virtual int GetCharCount (byte *bytes, int count)
  1008. {
  1009. if (bytes == null)
  1010. throw new ArgumentNullException ("bytes");
  1011. if (count < 0)
  1012. throw new ArgumentOutOfRangeException ("count");
  1013. byte [] ba = new byte [count];
  1014. for (int i = 0; i < count; i++)
  1015. ba [i] = bytes [i];
  1016. return GetCharCount (ba, 0, count);
  1017. }
  1018. [CLSCompliantAttribute(false)]
  1019. [ComVisible (false)]
  1020. public unsafe virtual int GetChars (byte *bytes, int byteCount, char *chars, int charCount)
  1021. {
  1022. if (bytes == null)
  1023. throw new ArgumentNullException ("bytes");
  1024. if (chars == null)
  1025. throw new ArgumentNullException ("chars");
  1026. if (charCount < 0)
  1027. throw new ArgumentOutOfRangeException ("charCount");
  1028. if (byteCount < 0)
  1029. throw new ArgumentOutOfRangeException ("byteCount");
  1030. byte [] ba = new byte [byteCount];
  1031. for (int i = 0; i < byteCount; i++)
  1032. ba [i] = bytes [i];
  1033. char [] ret = GetChars (ba, 0, byteCount);
  1034. int top = ret.Length;
  1035. if (top > charCount)
  1036. throw new ArgumentException ("charCount is less than the number of characters produced", "charCount");
  1037. for (int i = 0; i < top; i++)
  1038. chars [i] = ret [i];
  1039. return top;
  1040. }
  1041. [CLSCompliantAttribute(false)]
  1042. [ComVisible (false)]
  1043. public unsafe virtual int GetBytes (char *chars, int charCount, byte *bytes, int byteCount)
  1044. {
  1045. if (bytes == null)
  1046. throw new ArgumentNullException ("bytes");
  1047. if (chars == null)
  1048. throw new ArgumentNullException ("chars");
  1049. if (charCount < 0)
  1050. throw new ArgumentOutOfRangeException ("charCount");
  1051. if (byteCount < 0)
  1052. throw new ArgumentOutOfRangeException ("byteCount");
  1053. char [] c = new char [charCount];
  1054. for (int i = 0; i < charCount; i++)
  1055. c [i] = chars [i];
  1056. byte [] b = GetBytes (c, 0, charCount);
  1057. int top = b.Length;
  1058. if (top > byteCount)
  1059. throw new ArgumentException ("byteCount is less that the number of bytes produced", "byteCount");
  1060. for (int i = 0; i < top; i++)
  1061. bytes [i] = b [i];
  1062. return b.Length;
  1063. }
  1064. #endif
  1065. }; // class Encoding
  1066. }; // namespace System.Text