Encoding.cs 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213
  1. /*
  2. * Encoding.cs - Implementation of the "System.Text.Encoding" class.
  3. *
  4. * Copyright (c) 2001, 2002 Southern Storm Software, Pty Ltd
  5. * Copyright (c) 2002, Ximian, Inc.
  6. * Copyright (c) 2003, 2004 Novell, Inc.
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining
  9. * a copy of this software and associated documentation files (the "Software"),
  10. * to deal in the Software without restriction, including without limitation
  11. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  12. * and/or sell copies of the Software, and to permit persons to whom the
  13. * Software is furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included
  16. * in all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  21. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  22. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  23. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  24. * OTHER DEALINGS IN THE SOFTWARE.
  25. */
  26. namespace System.Text
  27. {
  28. using System;
  29. using System.Reflection;
  30. using System.Globalization;
  31. using System.Security;
  32. using System.Runtime.CompilerServices;
  33. using System.Runtime.InteropServices;
  34. [Serializable]
  35. #if NET_2_0
  36. [ComVisible (true)]
  37. #endif
  38. public abstract class Encoding
  39. #if NET_2_0
  40. : ICloneable
  41. #endif
  42. {
  43. // Code page used by this encoding.
  44. internal int codePage;
  45. internal int windows_code_page;
  46. bool is_readonly = true;
  47. // Constructor.
  48. protected Encoding ()
  49. {
  50. }
  51. #if ECMA_COMPAT
  52. protected internal
  53. #else
  54. protected
  55. #endif
  56. Encoding (int codePage)
  57. {
  58. this.codePage = windows_code_page = codePage;
  59. #if NET_2_0
  60. switch (codePage) {
  61. default:
  62. // MS has "InternalBestFit{Decoder|Encoder}Fallback
  63. // here, but we dunno what they are for.
  64. decoder_fallback = DecoderFallback.ReplacementFallback;
  65. encoder_fallback = EncoderFallback.ReplacementFallback;
  66. break;
  67. case 20127: // ASCII
  68. case 54936: // GB18030
  69. decoder_fallback = DecoderFallback.ReplacementFallback;
  70. encoder_fallback = EncoderFallback.ReplacementFallback;
  71. break;
  72. case 1200: // UTF16
  73. case 1201: // UTF16
  74. case 12000: // UTF32
  75. case 12001: // UTF32
  76. case 65000: // UTF7
  77. case 65001: // UTF8
  78. decoder_fallback = new DecoderReplacementFallback (String.Empty);
  79. encoder_fallback = new EncoderReplacementFallback (String.Empty);
  80. break;
  81. }
  82. #endif
  83. }
  84. // until we change the callers:
  85. internal static string _ (string arg) {
  86. return arg;
  87. }
  88. #if NET_2_0
  89. DecoderFallback decoder_fallback;
  90. EncoderFallback encoder_fallback;
  91. [ComVisible (false)]
  92. public bool IsReadOnly {
  93. get { return is_readonly; }
  94. }
  95. [ComVisible (false)]
  96. public virtual bool IsSingleByte {
  97. get { return false; }
  98. }
  99. [MonoTODO ("not used yet")]
  100. [ComVisible (false)]
  101. public DecoderFallback DecoderFallback {
  102. get {
  103. if (decoder_fallback == null)
  104. decoder_fallback = new DecoderReplacementFallback (String.Empty);
  105. return decoder_fallback;
  106. }
  107. set {
  108. if (IsReadOnly)
  109. throw new InvalidOperationException ("This Encoding is readonly.");
  110. if (value == null)
  111. throw new ArgumentNullException ();
  112. decoder_fallback = value;
  113. }
  114. }
  115. [MonoTODO ("not used yet")]
  116. [ComVisible (false)]
  117. public EncoderFallback EncoderFallback {
  118. get {
  119. if (encoder_fallback == null)
  120. encoder_fallback = new EncoderReplacementFallback (String.Empty);
  121. return encoder_fallback;
  122. }
  123. set {
  124. if (IsReadOnly)
  125. throw new InvalidOperationException ("This Encoding is readonly.");
  126. if (value == null)
  127. throw new ArgumentNullException ();
  128. encoder_fallback = value;
  129. }
  130. }
  131. internal void SetFallbackInternal (EncoderFallback e, DecoderFallback d)
  132. {
  133. if (e != null)
  134. encoder_fallback = e;
  135. if (d != null)
  136. decoder_fallback = d;
  137. }
  138. #endif
  139. // Convert between two encodings.
  140. public static byte[] Convert (Encoding srcEncoding, Encoding dstEncoding,
  141. byte[] bytes)
  142. {
  143. if (srcEncoding == null) {
  144. throw new ArgumentNullException ("srcEncoding");
  145. }
  146. if (dstEncoding == null) {
  147. throw new ArgumentNullException ("dstEncoding");
  148. }
  149. if (bytes == null) {
  150. throw new ArgumentNullException ("bytes");
  151. }
  152. return dstEncoding.GetBytes (srcEncoding.GetChars (bytes, 0, bytes.Length));
  153. }
  154. public static byte[] Convert (Encoding srcEncoding, Encoding dstEncoding,
  155. byte[] bytes, int index, int count)
  156. {
  157. if (srcEncoding == null) {
  158. throw new ArgumentNullException ("srcEncoding");
  159. }
  160. if (dstEncoding == null) {
  161. throw new ArgumentNullException ("dstEncoding");
  162. }
  163. if (bytes == null) {
  164. throw new ArgumentNullException ("bytes");
  165. }
  166. if (index < 0 || index > bytes.Length) {
  167. throw new ArgumentOutOfRangeException
  168. ("index", _("ArgRange_Array"));
  169. }
  170. if (count < 0 || (bytes.Length - index) < count) {
  171. throw new ArgumentOutOfRangeException
  172. ("count", _("ArgRange_Array"));
  173. }
  174. return dstEncoding.GetBytes (srcEncoding.GetChars (bytes, index, count));
  175. }
  176. // Determine if two Encoding objects are equal.
  177. public override bool Equals (Object obj)
  178. {
  179. Encoding enc = (obj as Encoding);
  180. if (enc != null) {
  181. #if NET_2_0
  182. return codePage == enc.codePage &&
  183. DecoderFallback.Equals (enc.DecoderFallback) &&
  184. EncoderFallback.Equals (enc.EncoderFallback);
  185. #else
  186. return (codePage == enc.codePage);
  187. #endif
  188. } else {
  189. return false;
  190. }
  191. }
  192. // Get the number of characters needed to encode a character buffer.
  193. public abstract int GetByteCount (char[] chars, int index, int count);
  194. // Convenience wrappers for "GetByteCount".
  195. public virtual int GetByteCount (String s)
  196. {
  197. if (s == null)
  198. throw new ArgumentNullException ("s");
  199. if (s.Length == 0)
  200. return 0;
  201. #if NET_2_0
  202. unsafe {
  203. fixed (char* cptr = s) {
  204. return GetByteCount (cptr, s.Length);
  205. }
  206. }
  207. #else
  208. char[] chars = s.ToCharArray ();
  209. return GetByteCount (chars, 0, chars.Length);
  210. #endif
  211. }
  212. public virtual int GetByteCount (char[] chars)
  213. {
  214. if (chars != null) {
  215. return GetByteCount (chars, 0, chars.Length);
  216. } else {
  217. throw new ArgumentNullException ("chars");
  218. }
  219. }
  220. // Get the bytes that result from encoding a character buffer.
  221. public abstract int GetBytes (char[] chars, int charIndex, int charCount,
  222. byte[] bytes, int byteIndex);
  223. // Convenience wrappers for "GetBytes".
  224. public virtual int GetBytes (String s, int charIndex, int charCount,
  225. byte[] bytes, int byteIndex)
  226. {
  227. if (s == null)
  228. throw new ArgumentNullException ("s");
  229. #if NET_2_0
  230. if (charIndex < 0 || charIndex > s.Length)
  231. throw new ArgumentOutOfRangeException ("charIndex", _("ArgRange_Array"));
  232. if (charCount < 0 || charIndex + charCount > s.Length)
  233. throw new ArgumentOutOfRangeException ("charCount", _("ArgRange_Array"));
  234. if (byteIndex < 0 || byteIndex > bytes.Length)
  235. throw new ArgumentOutOfRangeException ("byteIndex", _("ArgRange_Array"));
  236. if (charCount == 0 || bytes.Length == byteIndex)
  237. return 0;
  238. unsafe {
  239. fixed (char* cptr = s) {
  240. fixed (byte* bptr = bytes) {
  241. return GetBytes (cptr + charIndex,
  242. charCount,
  243. bptr + byteIndex,
  244. bytes.Length - byteIndex);
  245. }
  246. }
  247. }
  248. #else
  249. return GetBytes (s.ToCharArray(), charIndex, charCount, bytes, byteIndex);
  250. #endif
  251. }
  252. public virtual byte[] GetBytes (String s)
  253. {
  254. if (s == null)
  255. throw new ArgumentNullException ("s");
  256. #if NET_2_0
  257. if (s.Length == 0)
  258. return new byte [0];
  259. int byteCount = GetByteCount (s);
  260. if (byteCount == 0)
  261. return new byte [0];
  262. unsafe {
  263. fixed (char* cptr = s) {
  264. byte [] bytes = new byte [byteCount];
  265. fixed (byte* bptr = bytes) {
  266. GetBytes (cptr, s.Length,
  267. bptr, byteCount);
  268. return bytes;
  269. }
  270. }
  271. }
  272. #else
  273. char[] chars = s.ToCharArray ();
  274. int numBytes = GetByteCount (chars, 0, chars.Length);
  275. byte[] bytes = new byte [numBytes];
  276. GetBytes (chars, 0, chars.Length, bytes, 0);
  277. return bytes;
  278. #endif
  279. }
  280. public virtual byte[] GetBytes (char[] chars, int index, int count)
  281. {
  282. int numBytes = GetByteCount (chars, index, count);
  283. byte[] bytes = new byte [numBytes];
  284. GetBytes (chars, index, count, bytes, 0);
  285. return bytes;
  286. }
  287. public virtual byte[] GetBytes (char[] chars)
  288. {
  289. int numBytes = GetByteCount (chars, 0, chars.Length);
  290. byte[] bytes = new byte [numBytes];
  291. GetBytes (chars, 0, chars.Length, bytes, 0);
  292. return bytes;
  293. }
  294. // Get the number of characters needed to decode a byte buffer.
  295. public abstract int GetCharCount (byte[] bytes, int index, int count);
  296. // Convenience wrappers for "GetCharCount".
  297. public virtual int GetCharCount (byte[] bytes)
  298. {
  299. if (bytes == null) {
  300. throw new ArgumentNullException ("bytes");
  301. }
  302. return GetCharCount (bytes, 0, bytes.Length);
  303. }
  304. // Get the characters that result from decoding a byte buffer.
  305. public abstract int GetChars (byte[] bytes, int byteIndex, int byteCount,
  306. char[] chars, int charIndex);
  307. // Convenience wrappers for "GetChars".
  308. public virtual char[] GetChars (byte[] bytes, int index, int count)
  309. {
  310. int numChars = GetCharCount (bytes, index, count);
  311. char[] chars = new char [numChars];
  312. GetChars (bytes, index, count, chars, 0);
  313. return chars;
  314. }
  315. public virtual char[] GetChars (byte[] bytes)
  316. {
  317. if (bytes == null) {
  318. throw new ArgumentNullException ("bytes");
  319. }
  320. int numChars = GetCharCount (bytes, 0, bytes.Length);
  321. char[] chars = new char [numChars];
  322. GetChars (bytes, 0, bytes.Length, chars, 0);
  323. return chars;
  324. }
  325. // Get a decoder that forwards requests to this object.
  326. public virtual Decoder GetDecoder ()
  327. {
  328. return new ForwardingDecoder (this);
  329. }
  330. // Get an encoder that forwards requests to this object.
  331. public virtual Encoder GetEncoder ()
  332. {
  333. return new ForwardingEncoder (this);
  334. }
  335. // Loaded copy of the "I18N" assembly. We need to move
  336. // this into a class in "System.Private" eventually.
  337. private static Assembly i18nAssembly;
  338. private static bool i18nDisabled;
  339. // Invoke a specific method on the "I18N" manager object.
  340. // Returns NULL if the method failed.
  341. private static Object InvokeI18N (String name, params Object[] args)
  342. {
  343. lock (lockobj) {
  344. // Bail out if we previously detected that there
  345. // is insufficent engine support for I18N handling.
  346. if (i18nDisabled) {
  347. return null;
  348. }
  349. // Find or load the "I18N" assembly.
  350. if (i18nAssembly == null) {
  351. try {
  352. try {
  353. i18nAssembly = Assembly.Load (Consts.AssemblyI18N);
  354. } catch (NotImplementedException) {
  355. // Assembly loading unsupported by the engine.
  356. i18nDisabled = true;
  357. return null;
  358. }
  359. if (i18nAssembly == null) {
  360. return null;
  361. }
  362. } catch (SystemException) {
  363. return null;
  364. }
  365. }
  366. // Find the "I18N.Common.Manager" class.
  367. Type managerClass;
  368. try {
  369. managerClass = i18nAssembly.GetType ("I18N.Common.Manager");
  370. } catch (NotImplementedException) {
  371. // "GetType" is not supported by the engine.
  372. i18nDisabled = true;
  373. return null;
  374. }
  375. if (managerClass == null) {
  376. return null;
  377. }
  378. // Get the value of the "PrimaryManager" property.
  379. Object manager;
  380. try {
  381. manager = managerClass.InvokeMember
  382. ("PrimaryManager",
  383. BindingFlags.GetProperty |
  384. BindingFlags.Static |
  385. BindingFlags.Public,
  386. null, null, null, null, null, null);
  387. if (manager == null) {
  388. return null;
  389. }
  390. } catch (MissingMethodException) {
  391. return null;
  392. } catch (SecurityException) {
  393. return null;
  394. } catch (NotImplementedException) {
  395. // "InvokeMember" is not supported by the engine.
  396. i18nDisabled = true;
  397. return null;
  398. }
  399. // Invoke the requested method on the manager.
  400. try {
  401. return managerClass.InvokeMember
  402. (name,
  403. BindingFlags.InvokeMethod |
  404. BindingFlags.Instance |
  405. BindingFlags.Public,
  406. null, manager, args, null, null, null);
  407. } catch (MissingMethodException) {
  408. return null;
  409. } catch (SecurityException) {
  410. return null;
  411. }
  412. }
  413. }
  414. // Get an encoder for a specific code page.
  415. #if ECMA_COMPAT
  416. private
  417. #else
  418. public
  419. #endif
  420. static Encoding GetEncoding (int codePage)
  421. {
  422. if (codePage < 0 || codePage > 0xffff)
  423. throw new ArgumentOutOfRangeException ("codepage",
  424. "Valid values are between 0 and 65535, inclusive.");
  425. // Check for the builtin code pages first.
  426. switch (codePage) {
  427. case 0: return Default;
  428. case ASCIIEncoding.ASCII_CODE_PAGE:
  429. return ASCII;
  430. case UTF7Encoding.UTF7_CODE_PAGE:
  431. return UTF7;
  432. case UTF8Encoding.UTF8_CODE_PAGE:
  433. return UTF8;
  434. #if NET_2_0
  435. case UTF32Encoding.UTF32_CODE_PAGE:
  436. return UTF32;
  437. case UTF32Encoding.BIG_UTF32_CODE_PAGE:
  438. return BigEndianUTF32;
  439. #endif
  440. case UnicodeEncoding.UNICODE_CODE_PAGE:
  441. return Unicode;
  442. case UnicodeEncoding.BIG_UNICODE_CODE_PAGE:
  443. return BigEndianUnicode;
  444. case Latin1Encoding.ISOLATIN_CODE_PAGE:
  445. return ISOLatin1;
  446. default: break;
  447. }
  448. // Try to obtain a code page handler from the I18N handler.
  449. Encoding enc = (Encoding)(InvokeI18N ("GetEncoding", codePage));
  450. if (enc != null) {
  451. enc.is_readonly = true;
  452. return enc;
  453. }
  454. // Build a code page class name.
  455. String cpName = "System.Text.CP" + codePage.ToString ();
  456. // Look for a code page converter in this assembly.
  457. Assembly assembly = Assembly.GetExecutingAssembly ();
  458. Type type = assembly.GetType (cpName);
  459. if (type != null) {
  460. enc = (Encoding)(Activator.CreateInstance (type));
  461. enc.is_readonly = true;
  462. return enc;
  463. }
  464. // Look in any assembly, in case the application
  465. // has provided its own code page handler.
  466. type = Type.GetType (cpName);
  467. if (type != null) {
  468. enc = (Encoding)(Activator.CreateInstance (type));
  469. enc.is_readonly = true;
  470. return enc;
  471. }
  472. // We have no idea how to handle this code page.
  473. throw new NotSupportedException
  474. (String.Format ("CodePage {0} not supported", codePage.ToString ()));
  475. }
  476. #if !ECMA_COMPAT
  477. #if NET_2_0
  478. [ComVisible (false)]
  479. public virtual object Clone ()
  480. {
  481. Encoding e = (Encoding) MemberwiseClone ();
  482. e.is_readonly = false;
  483. return e;
  484. }
  485. public static Encoding GetEncoding (int codePage,
  486. EncoderFallback encoderFallback, DecoderFallback decoderFallback)
  487. {
  488. if (encoderFallback == null)
  489. throw new ArgumentNullException ("encoderFallback");
  490. if (decoderFallback == null)
  491. throw new ArgumentNullException ("decoderFallback");
  492. Encoding e = GetEncoding (codePage).Clone () as Encoding;
  493. e.is_readonly = false;
  494. e.encoder_fallback = encoderFallback;
  495. e.decoder_fallback = decoderFallback;
  496. return e;
  497. }
  498. public static Encoding GetEncoding (string name,
  499. EncoderFallback encoderFallback, DecoderFallback decoderFallback)
  500. {
  501. if (encoderFallback == null)
  502. throw new ArgumentNullException ("encoderFallback");
  503. if (decoderFallback == null)
  504. throw new ArgumentNullException ("decoderFallback");
  505. Encoding e = GetEncoding (name).Clone () as Encoding;
  506. e.is_readonly = false;
  507. e.encoder_fallback = encoderFallback;
  508. e.decoder_fallback = decoderFallback;
  509. return e;
  510. }
  511. static EncodingInfo [] encoding_infos;
  512. // FIXME: As everyone would agree, this implementation is so *hacky*
  513. // and could be very easily broken. But since there is a test for
  514. // this method to make sure that this method always returns
  515. // the same number and content of encoding infos, this won't
  516. // matter practically.
  517. public static EncodingInfo[] GetEncodings ()
  518. {
  519. if (encoding_infos == null) {
  520. int [] codepages = new int [] {
  521. 37, 437, 500, 708,
  522. 850, 852, 855, 857, 858, 860, 861, 862, 863,
  523. 864, 865, 866, 869, 870, 874, 875,
  524. 932, 936, 949, 950,
  525. 1026, 1047, 1140, 1141, 1142, 1143, 1144,
  526. 1145, 1146, 1147, 1148, 1149,
  527. 1200, 1201, 1250, 1251, 1252, 1253, 1254,
  528. 1255, 1256, 1257, 1258,
  529. 10000, 10079, 12000, 12001,
  530. 20127, 20273, 20277, 20278, 20280, 20284,
  531. 20285, 20290, 20297, 20420, 20424, 20866,
  532. 20871, 21025, 21866, 28591, 28592, 28593,
  533. 28594, 28595, 28596, 28597, 28598, 28599,
  534. 28605, 38598,
  535. 50220, 50221, 50222, 51932, 51949, 54936,
  536. 57002, 57003, 57004, 57005, 57006, 57007,
  537. 57008, 57009, 57010, 57011,
  538. 65000, 65001};
  539. encoding_infos = new EncodingInfo [codepages.Length];
  540. for (int i = 0; i < codepages.Length; i++)
  541. encoding_infos [i] = new EncodingInfo (codepages [i]);
  542. }
  543. return encoding_infos;
  544. }
  545. public bool IsAlwaysNormalized ()
  546. {
  547. return IsAlwaysNormalized (NormalizationForm.FormC);
  548. }
  549. public virtual bool IsAlwaysNormalized (NormalizationForm form)
  550. {
  551. // umm, ASCIIEncoding should have overriden this method, no?
  552. return form == NormalizationForm.FormC && this is ASCIIEncoding;
  553. }
  554. #endif
  555. // Table of builtin web encoding names and the corresponding code pages.
  556. private static readonly object[] encodings =
  557. {
  558. ASCIIEncoding.ASCII_CODE_PAGE,
  559. "ascii", "us_ascii", "us", "ansi_x3.4_1968",
  560. "ansi_x3.4_1986", "cp367", "csascii", "ibm367",
  561. "iso_ir_6", "iso646_us", "iso_646.irv:1991",
  562. UTF7Encoding.UTF7_CODE_PAGE,
  563. "utf_7", "csunicode11utf7", "unicode_1_1_utf_7",
  564. "unicode_2_0_utf_7", "x_unicode_1_1_utf_7",
  565. "x_unicode_2_0_utf_7",
  566. UTF8Encoding.UTF8_CODE_PAGE,
  567. "utf_8", "unicode_1_1_utf_8", "unicode_2_0_utf_8",
  568. "x_unicode_1_1_utf_8", "x_unicode_2_0_utf_8",
  569. UnicodeEncoding.UNICODE_CODE_PAGE,
  570. "utf_16", "UTF_16LE", "ucs_2", "unicode",
  571. "iso_10646_ucs2",
  572. UnicodeEncoding.BIG_UNICODE_CODE_PAGE,
  573. "unicodefffe", "utf_16be",
  574. #if NET_2_0
  575. UTF32Encoding.UTF32_CODE_PAGE,
  576. "utf_32", "UTF_32LE", "ucs_4",
  577. UTF32Encoding.BIG_UTF32_CODE_PAGE,
  578. "UTF_32BE",
  579. #endif
  580. Latin1Encoding.ISOLATIN_CODE_PAGE,
  581. "iso_8859_1", "latin1"
  582. };
  583. // Get an encoding object for a specific web encoding name.
  584. public static Encoding GetEncoding (String name)
  585. {
  586. // Validate the parameters.
  587. if (name == null) {
  588. throw new ArgumentNullException ("name");
  589. }
  590. string converted = name.ToLowerInvariant ().Replace ('-', '_');
  591. // Search the table for a name match.
  592. int code = 0;
  593. for (int i = 0; i < encodings.Length; ++i) {
  594. object o = encodings [i];
  595. if (o is int){
  596. code = (int) o;
  597. continue;
  598. }
  599. if (converted == ((string)encodings [i]))
  600. return GetEncoding (code);
  601. }
  602. // Try to obtain a web encoding handler from the I18N handler.
  603. Encoding enc = (Encoding)(InvokeI18N ("GetEncoding", name));
  604. if (enc != null) {
  605. return enc;
  606. }
  607. // Build a web encoding class name.
  608. String encName = "System.Text.ENC" + converted;
  609. // Look for a code page converter in this assembly.
  610. Assembly assembly = Assembly.GetExecutingAssembly ();
  611. Type type = assembly.GetType (encName);
  612. if (type != null) {
  613. return (Encoding)(Activator.CreateInstance (type));
  614. }
  615. // Look in any assembly, in case the application
  616. // has provided its own code page handler.
  617. type = Type.GetType (encName);
  618. if (type != null) {
  619. return (Encoding)(Activator.CreateInstance (type));
  620. }
  621. // We have no idea how to handle this encoding name.
  622. throw new ArgumentException (String.Format ("Encoding name '{0}' not "
  623. + "supported", name), "name");
  624. }
  625. #endif // !ECMA_COMPAT
  626. // Get a hash code for this instance.
  627. public override int GetHashCode ()
  628. {
  629. #if NET_2_0
  630. return DecoderFallback.GetHashCode () << 24 + EncoderFallback.GetHashCode () << 16 + codePage;
  631. #else
  632. return codePage;
  633. #endif
  634. }
  635. // Get the maximum number of bytes needed to encode a
  636. // specified number of characters.
  637. public abstract int GetMaxByteCount (int charCount);
  638. // Get the maximum number of characters needed to decode a
  639. // specified number of bytes.
  640. public abstract int GetMaxCharCount (int byteCount);
  641. // Get the identifying preamble for this encoding.
  642. public virtual byte[] GetPreamble ()
  643. {
  644. return new byte [0];
  645. }
  646. // Decode a buffer of bytes into a string.
  647. public virtual String GetString (byte[] bytes, int index, int count)
  648. {
  649. return new String (GetChars(bytes, index, count));
  650. }
  651. public virtual String GetString (byte[] bytes)
  652. {
  653. if (bytes == null)
  654. throw new ArgumentNullException ("bytes");
  655. return GetString (bytes, 0, bytes.Length);
  656. }
  657. #if !ECMA_COMPAT
  658. internal string body_name;
  659. internal string encoding_name;
  660. internal string header_name;
  661. internal bool is_mail_news_display;
  662. internal bool is_mail_news_save;
  663. internal bool is_browser_save = false;
  664. internal bool is_browser_display = false;
  665. internal string web_name;
  666. // Get the mail body name for this encoding.
  667. public virtual String BodyName
  668. {
  669. get {
  670. return body_name;
  671. }
  672. }
  673. // Get the code page represented by this object.
  674. public virtual int CodePage
  675. {
  676. get {
  677. return codePage;
  678. }
  679. }
  680. // Get the human-readable name for this encoding.
  681. public virtual String EncodingName
  682. {
  683. get {
  684. return encoding_name;
  685. }
  686. }
  687. // Get the mail agent header name for this encoding.
  688. public virtual String HeaderName
  689. {
  690. get {
  691. return header_name;
  692. }
  693. }
  694. // Determine if this encoding can be displayed in a Web browser.
  695. public virtual bool IsBrowserDisplay
  696. {
  697. get {
  698. return is_browser_display;
  699. }
  700. }
  701. // Determine if this encoding can be saved from a Web browser.
  702. public virtual bool IsBrowserSave
  703. {
  704. get {
  705. return is_browser_save;
  706. }
  707. }
  708. // Determine if this encoding can be displayed in a mail/news agent.
  709. public virtual bool IsMailNewsDisplay
  710. {
  711. get {
  712. return is_mail_news_display;
  713. }
  714. }
  715. // Determine if this encoding can be saved from a mail/news agent.
  716. public virtual bool IsMailNewsSave
  717. {
  718. get {
  719. return is_mail_news_save;
  720. }
  721. }
  722. // Get the IANA-preferred Web name for this encoding.
  723. public virtual String WebName
  724. {
  725. get {
  726. return web_name;
  727. }
  728. }
  729. // Get the Windows code page represented by this object.
  730. public virtual int WindowsCodePage
  731. {
  732. get {
  733. // We make no distinction between normal and
  734. // Windows code pages in this implementation.
  735. return windows_code_page;
  736. }
  737. }
  738. #endif // !ECMA_COMPAT
  739. // Storage for standard encoding objects.
  740. static volatile Encoding asciiEncoding;
  741. static volatile Encoding bigEndianEncoding;
  742. static volatile Encoding defaultEncoding;
  743. static volatile Encoding utf7Encoding;
  744. static volatile Encoding utf8EncodingWithMarkers;
  745. static volatile Encoding utf8EncodingWithoutMarkers;
  746. static volatile Encoding unicodeEncoding;
  747. static volatile Encoding isoLatin1Encoding;
  748. static volatile Encoding unixConsoleEncoding;
  749. #if NET_2_0
  750. static volatile Encoding utf32Encoding;
  751. static volatile Encoding bigEndianUTF32Encoding;
  752. #endif
  753. static readonly object lockobj = new object ();
  754. // Get the standard ASCII encoding object.
  755. public static Encoding ASCII
  756. {
  757. get {
  758. if (asciiEncoding == null) {
  759. lock (lockobj) {
  760. if (asciiEncoding == null) {
  761. asciiEncoding = new ASCIIEncoding ();
  762. asciiEncoding.is_readonly = true;
  763. }
  764. }
  765. }
  766. return asciiEncoding;
  767. }
  768. }
  769. // Get the standard big-endian Unicode encoding object.
  770. public static Encoding BigEndianUnicode
  771. {
  772. get {
  773. if (bigEndianEncoding == null) {
  774. lock (lockobj) {
  775. if (bigEndianEncoding == null) {
  776. bigEndianEncoding = new UnicodeEncoding (true, true);
  777. bigEndianEncoding.is_readonly = true;
  778. }
  779. }
  780. }
  781. return bigEndianEncoding;
  782. }
  783. }
  784. [MethodImpl (MethodImplOptions.InternalCall)]
  785. extern internal static string InternalCodePage (ref int code_page);
  786. // Get the default encoding object.
  787. public static Encoding Default
  788. {
  789. get {
  790. if (defaultEncoding == null) {
  791. lock (lockobj) {
  792. if (defaultEncoding == null) {
  793. // See if the underlying system knows what
  794. // code page handler we should be using.
  795. int code_page = 1;
  796. string code_page_name = InternalCodePage (ref code_page);
  797. try {
  798. if (code_page == -1)
  799. defaultEncoding = GetEncoding (code_page_name);
  800. else {
  801. // map the codepage from internal to our numbers
  802. code_page = code_page & 0x0fffffff;
  803. switch (code_page){
  804. case 1: code_page = ASCIIEncoding.ASCII_CODE_PAGE; break;
  805. case 2: code_page = UTF7Encoding.UTF7_CODE_PAGE; break;
  806. case 3: code_page = UTF8Encoding.UTF8_CODE_PAGE; break;
  807. case 4: code_page = UnicodeEncoding.UNICODE_CODE_PAGE; break;
  808. case 5: code_page = UnicodeEncoding.BIG_UNICODE_CODE_PAGE; break;
  809. case 6: code_page = Latin1Encoding.ISOLATIN_CODE_PAGE; break;
  810. }
  811. defaultEncoding = GetEncoding (code_page);
  812. }
  813. } catch (NotSupportedException) {
  814. // code_page is not supported on underlying platform
  815. defaultEncoding = UTF8Unmarked;
  816. } catch (ArgumentException) {
  817. // code_page_name is not a valid code page, or is
  818. // not supported by underlying OS
  819. defaultEncoding = UTF8Unmarked;
  820. }
  821. defaultEncoding.is_readonly = true;
  822. }
  823. }
  824. }
  825. return defaultEncoding;
  826. }
  827. }
  828. // Get the ISO Latin1 encoding object.
  829. private static Encoding ISOLatin1
  830. {
  831. get {
  832. if (isoLatin1Encoding == null) {
  833. lock (lockobj) {
  834. if (isoLatin1Encoding == null) {
  835. isoLatin1Encoding = new Latin1Encoding ();
  836. isoLatin1Encoding.is_readonly = true;
  837. }
  838. }
  839. }
  840. return isoLatin1Encoding;
  841. }
  842. }
  843. // Get the standard UTF-7 encoding object.
  844. #if ECMA_COMPAT
  845. private
  846. #else
  847. public
  848. #endif
  849. static Encoding UTF7
  850. {
  851. get {
  852. if (utf7Encoding == null) {
  853. lock (lockobj) {
  854. if (utf7Encoding == null) {
  855. utf7Encoding = new UTF7Encoding ();
  856. utf7Encoding.is_readonly = true;
  857. }
  858. }
  859. }
  860. return utf7Encoding;
  861. }
  862. }
  863. // Get the standard UTF-8 encoding object.
  864. public static Encoding UTF8
  865. {
  866. get {
  867. if (utf8EncodingWithMarkers == null) {
  868. lock (lockobj) {
  869. if (utf8EncodingWithMarkers == null) {
  870. utf8EncodingWithMarkers = new UTF8Encoding (true);
  871. utf8EncodingWithMarkers.is_readonly = true;
  872. }
  873. }
  874. }
  875. return utf8EncodingWithMarkers;
  876. }
  877. }
  878. //
  879. // Only internal, to be used by the class libraries: Unmarked and non-input-validating
  880. //
  881. internal static Encoding UTF8Unmarked {
  882. get {
  883. if (utf8EncodingWithoutMarkers == null) {
  884. lock (lockobj){
  885. if (utf8EncodingWithoutMarkers == null){
  886. utf8EncodingWithoutMarkers = new UTF8Encoding (false, false);
  887. utf8EncodingWithoutMarkers.is_readonly = true;
  888. }
  889. }
  890. }
  891. return utf8EncodingWithoutMarkers;
  892. }
  893. }
  894. // Get the standard little-endian Unicode encoding object.
  895. public static Encoding Unicode
  896. {
  897. get {
  898. if (unicodeEncoding == null) {
  899. lock (lockobj) {
  900. if (unicodeEncoding == null) {
  901. unicodeEncoding = new UnicodeEncoding (false, true);
  902. unicodeEncoding.is_readonly = true;
  903. }
  904. }
  905. }
  906. return unicodeEncoding;
  907. }
  908. }
  909. #if NET_2_0
  910. // Get the standard little-endian UTF-32 encoding object.
  911. public static Encoding UTF32
  912. {
  913. get {
  914. if (utf32Encoding == null) {
  915. lock (lockobj) {
  916. if (utf32Encoding == null) {
  917. utf32Encoding = new UTF32Encoding (false, true);
  918. utf32Encoding.is_readonly = true;
  919. }
  920. }
  921. }
  922. return utf32Encoding;
  923. }
  924. }
  925. // Get the standard big-endian UTF-32 encoding object.
  926. internal static Encoding BigEndianUTF32
  927. {
  928. get {
  929. if (bigEndianUTF32Encoding == null) {
  930. lock (lockobj) {
  931. if (bigEndianUTF32Encoding == null) {
  932. bigEndianUTF32Encoding = new UTF32Encoding (true, true);
  933. bigEndianUTF32Encoding.is_readonly = true;
  934. }
  935. }
  936. }
  937. return bigEndianUTF32Encoding;
  938. }
  939. }
  940. #endif
  941. // Forwarding decoder implementation.
  942. private sealed class ForwardingDecoder : Decoder
  943. {
  944. private Encoding encoding;
  945. // Constructor.
  946. public ForwardingDecoder (Encoding enc)
  947. {
  948. encoding = enc;
  949. #if NET_2_0
  950. Fallback = encoding.DecoderFallback;
  951. #endif
  952. }
  953. // Override inherited methods.
  954. public override int GetCharCount (byte[] bytes, int index, int count)
  955. {
  956. return encoding.GetCharCount (bytes, index, count);
  957. }
  958. public override int GetChars (byte[] bytes, int byteIndex,
  959. int byteCount, char[] chars,
  960. int charIndex)
  961. {
  962. return encoding.GetChars (bytes, byteIndex, byteCount, chars, charIndex);
  963. }
  964. } // class ForwardingDecoder
  965. // Forwarding encoder implementation.
  966. private sealed class ForwardingEncoder : Encoder
  967. {
  968. private Encoding encoding;
  969. // Constructor.
  970. public ForwardingEncoder (Encoding enc)
  971. {
  972. encoding = enc;
  973. #if NET_2_0
  974. Fallback = encoding.EncoderFallback;
  975. #endif
  976. }
  977. // Override inherited methods.
  978. public override int GetByteCount (char[] chars, int index, int count, bool flush)
  979. {
  980. return encoding.GetByteCount (chars, index, count);
  981. }
  982. public override int GetBytes (char[] chars, int charIndex,
  983. int charCount, byte[] bytes,
  984. int byteCount, bool flush)
  985. {
  986. return encoding.GetBytes (chars, charIndex, charCount, bytes, byteCount);
  987. }
  988. } // class ForwardingEncoder
  989. #if NET_2_0
  990. [CLSCompliantAttribute(false)]
  991. [ComVisible (false)]
  992. public unsafe virtual int GetByteCount (char *chars, int count)
  993. {
  994. if (chars == null)
  995. throw new ArgumentNullException ("chars");
  996. if (count < 0)
  997. throw new ArgumentOutOfRangeException ("count");
  998. char [] c = new char [count];
  999. for (int p = 0; p < count; p++)
  1000. c [p] = chars [p];
  1001. return GetByteCount (c);
  1002. }
  1003. [CLSCompliantAttribute(false)]
  1004. [ComVisible (false)]
  1005. public unsafe virtual int GetCharCount (byte *bytes, int count)
  1006. {
  1007. if (bytes == null)
  1008. throw new ArgumentNullException ("bytes");
  1009. if (count < 0)
  1010. throw new ArgumentOutOfRangeException ("count");
  1011. byte [] ba = new byte [count];
  1012. for (int i = 0; i < count; i++)
  1013. ba [i] = bytes [i];
  1014. return GetCharCount (ba, 0, count);
  1015. }
  1016. [CLSCompliantAttribute(false)]
  1017. [ComVisible (false)]
  1018. public unsafe virtual int GetChars (byte *bytes, int byteCount, char *chars, int charCount)
  1019. {
  1020. if (bytes == null)
  1021. throw new ArgumentNullException ("bytes");
  1022. if (chars == null)
  1023. throw new ArgumentNullException ("chars");
  1024. if (charCount < 0)
  1025. throw new ArgumentOutOfRangeException ("charCount");
  1026. if (byteCount < 0)
  1027. throw new ArgumentOutOfRangeException ("byteCount");
  1028. byte [] ba = new byte [byteCount];
  1029. for (int i = 0; i < byteCount; i++)
  1030. ba [i] = bytes [i];
  1031. char [] ret = GetChars (ba, 0, byteCount);
  1032. int top = ret.Length;
  1033. if (top > charCount)
  1034. throw new ArgumentException ("charCount is less than the number of characters produced", "charCount");
  1035. for (int i = 0; i < top; i++)
  1036. chars [i] = ret [i];
  1037. return top;
  1038. }
  1039. [CLSCompliantAttribute(false)]
  1040. [ComVisible (false)]
  1041. public unsafe virtual int GetBytes (char *chars, int charCount, byte *bytes, int byteCount)
  1042. {
  1043. if (bytes == null)
  1044. throw new ArgumentNullException ("bytes");
  1045. if (chars == null)
  1046. throw new ArgumentNullException ("chars");
  1047. if (charCount < 0)
  1048. throw new ArgumentOutOfRangeException ("charCount");
  1049. if (byteCount < 0)
  1050. throw new ArgumentOutOfRangeException ("byteCount");
  1051. char [] c = new char [charCount];
  1052. for (int i = 0; i < charCount; i++)
  1053. c [i] = chars [i];
  1054. byte [] b = GetBytes (c, 0, charCount);
  1055. int top = b.Length;
  1056. if (top > byteCount)
  1057. throw new ArgumentException ("byteCount is less that the number of bytes produced", "byteCount");
  1058. for (int i = 0; i < top; i++)
  1059. bytes [i] = b [i];
  1060. return b.Length;
  1061. }
  1062. #endif
  1063. }; // class Encoding
  1064. }; // namespace System.Text