Encoding.cs 70 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Diagnostics;
  5. using System.Globalization;
  6. using System.Threading;
  7. using System.Runtime.InteropServices;
  8. using System.Runtime.Serialization;
  9. using System.Diagnostics.CodeAnalysis;
  10. namespace System.Text
  11. {
  12. // This abstract base class represents a character encoding. The class provides
  13. // methods to convert arrays and strings of Unicode characters to and from
  14. // arrays of bytes. A number of Encoding implementations are provided in
  15. // the System.Text package, including:
  16. //
  17. // ASCIIEncoding, which encodes Unicode characters as single 7-bit
  18. // ASCII characters. This encoding only supports character values between 0x00
  19. // and 0x7F.
  20. // BaseCodePageEncoding, which encapsulates a Windows code page. Any
  21. // installed code page can be accessed through this encoding, and conversions
  22. // are performed using the WideCharToMultiByte and
  23. // MultiByteToWideChar Windows API functions.
  24. // UnicodeEncoding, which encodes each Unicode character as two
  25. // consecutive bytes. Both little-endian (code page 1200) and big-endian (code
  26. // page 1201) encodings are recognized.
  27. // UTF7Encoding, which encodes Unicode characters using the UTF-7
  28. // encoding (UTF-7 stands for UCS Transformation Format, 7-bit form). This
  29. // encoding supports all Unicode character values, and can also be accessed
  30. // as code page 65000.
  31. // UTF8Encoding, which encodes Unicode characters using the UTF-8
  32. // encoding (UTF-8 stands for UCS Transformation Format, 8-bit form). This
  33. // encoding supports all Unicode character values, and can also be accessed
  34. // as code page 65001.
  35. // UTF32Encoding, both 12000 (little endian) & 12001 (big endian)
  36. //
  37. // In addition to directly instantiating Encoding objects, an
  38. // application can use the ForCodePage, GetASCII,
  39. // GetDefault, GetUnicode, GetUTF7, and GetUTF8
  40. // methods in this class to obtain encodings.
  41. //
  42. // Through an encoding, the GetBytes method is used to convert arrays
  43. // of characters to arrays of bytes, and the GetChars method is used to
  44. // convert arrays of bytes to arrays of characters. The GetBytes and
  45. // GetChars methods maintain no state between conversions, and are
  46. // generally intended for conversions of complete blocks of bytes and
  47. // characters in one operation. When the data to be converted is only available
  48. // in sequential blocks (such as data read from a stream) or when the amount of
  49. // data is so large that it needs to be divided into smaller blocks, an
  50. // application may choose to use a Decoder or an Encoder to
  51. // perform the conversion. Decoders and encoders allow sequential blocks of
  52. // data to be converted and they maintain the state required to support
  53. // conversions of data that spans adjacent blocks. Decoders and encoders are
  54. // obtained using the GetDecoder and GetEncoder methods.
  55. //
  56. // The core GetBytes and GetChars methods require the caller
  57. // to provide the destination buffer and ensure that the buffer is large enough
  58. // to hold the entire result of the conversion. When using these methods,
  59. // either directly on an Encoding object or on an associated
  60. // Decoder or Encoder, an application can use one of two methods
  61. // to allocate destination buffers.
  62. //
  63. // The GetByteCount and GetCharCount methods can be used to
  64. // compute the exact size of the result of a particular conversion, and an
  65. // appropriately sized buffer for that conversion can then be allocated.
  66. // The GetMaxByteCount and GetMaxCharCount methods can be
  67. // be used to compute the maximum possible size of a conversion of a given
  68. // number of bytes or characters, and a buffer of that size can then be reused
  69. // for multiple conversions.
  70. //
  71. // The first method generally uses less memory, whereas the second method
  72. // generally executes faster.
  73. //
  74. public abstract class Encoding : ICloneable
  75. {
  76. // For netcore we use UTF8 as default encoding since ANSI isn't available
  77. private static readonly UTF8Encoding.UTF8EncodingSealed s_defaultEncoding = new UTF8Encoding.UTF8EncodingSealed(encoderShouldEmitUTF8Identifier: false);
  78. // Returns an encoding for the system's current ANSI code page.
  79. public static Encoding Default => s_defaultEncoding;
  80. //
  81. // The following values are from mlang.idl. These values
  82. // should be in sync with those in mlang.idl.
  83. //
  84. internal const int MIMECONTF_MAILNEWS = 0x00000001;
  85. internal const int MIMECONTF_BROWSER = 0x00000002;
  86. internal const int MIMECONTF_SAVABLE_MAILNEWS = 0x00000100;
  87. internal const int MIMECONTF_SAVABLE_BROWSER = 0x00000200;
  88. // Special Case Code Pages
  89. private const int CodePageDefault = 0;
  90. private const int CodePageNoOEM = 1; // OEM Code page not supported
  91. private const int CodePageNoMac = 2; // MAC code page not supported
  92. private const int CodePageNoThread = 3; // Thread code page not supported
  93. private const int CodePageNoSymbol = 42; // Symbol code page not supported
  94. private const int CodePageUnicode = 1200; // Unicode
  95. private const int CodePageBigEndian = 1201; // Big Endian Unicode
  96. private const int CodePageWindows1252 = 1252; // Windows 1252 code page
  97. // 20936 has same code page as 10008, so we'll special case it
  98. private const int CodePageMacGB2312 = 10008;
  99. private const int CodePageGB2312 = 20936;
  100. private const int CodePageMacKorean = 10003;
  101. private const int CodePageDLLKorean = 20949;
  102. // ISO 2022 Code Pages
  103. private const int ISO2022JP = 50220;
  104. private const int ISO2022JPESC = 50221;
  105. private const int ISO2022JPSISO = 50222;
  106. private const int ISOKorean = 50225;
  107. private const int ISOSimplifiedCN = 50227;
  108. private const int EUCJP = 51932;
  109. private const int ChineseHZ = 52936; // HZ has ~}~{~~ sequences
  110. // 51936 is the same as 936
  111. private const int DuplicateEUCCN = 51936;
  112. private const int EUCCN = 936;
  113. private const int EUCKR = 51949;
  114. // Latin 1 & ASCII Code Pages
  115. internal const int CodePageASCII = 20127; // ASCII
  116. internal const int ISO_8859_1 = 28591; // Latin1
  117. // ISCII
  118. private const int ISCIIAssemese = 57006;
  119. private const int ISCIIBengali = 57003;
  120. private const int ISCIIDevanagari = 57002;
  121. private const int ISCIIGujarathi = 57010;
  122. private const int ISCIIKannada = 57008;
  123. private const int ISCIIMalayalam = 57009;
  124. private const int ISCIIOriya = 57007;
  125. private const int ISCIIPanjabi = 57011;
  126. private const int ISCIITamil = 57004;
  127. private const int ISCIITelugu = 57005;
  128. // GB18030
  129. private const int GB18030 = 54936;
  130. // Other
  131. private const int ISO_8859_8I = 38598;
  132. private const int ISO_8859_8_Visual = 28598;
  133. // 50229 is currently unsupported // "Chinese Traditional (ISO-2022)"
  134. private const int ENC50229 = 50229;
  135. // Special code pages
  136. private const int CodePageUTF7 = 65000;
  137. private const int CodePageUTF8 = 65001;
  138. private const int CodePageUTF32 = 12000;
  139. private const int CodePageUTF32BE = 12001;
  140. internal int _codePage = 0;
  141. internal CodePageDataItem _dataItem = null;
  142. // Because of encoders we may be read only
  143. [OptionalField(VersionAdded = 2)]
  144. private bool _isReadOnly = true;
  145. // Encoding (encoder) fallback
  146. internal EncoderFallback encoderFallback = null;
  147. internal DecoderFallback decoderFallback = null;
  148. protected Encoding() : this(0)
  149. {
  150. }
  151. protected Encoding(int codePage)
  152. {
  153. // Validate code page
  154. if (codePage < 0)
  155. {
  156. throw new ArgumentOutOfRangeException(nameof(codePage));
  157. }
  158. // Remember code page
  159. _codePage = codePage;
  160. // Use default encoder/decoder fallbacks
  161. this.SetDefaultFallbacks();
  162. }
  163. // This constructor is needed to allow any sub-classing implementation to provide encoder/decoder fallback objects
  164. // because the encoding object is always created as read-only object and don't allow setting encoder/decoder fallback
  165. // after the creation is done.
  166. protected Encoding(int codePage, EncoderFallback encoderFallback, DecoderFallback decoderFallback)
  167. {
  168. // Validate code page
  169. if (codePage < 0)
  170. {
  171. throw new ArgumentOutOfRangeException(nameof(codePage));
  172. }
  173. // Remember code page
  174. _codePage = codePage;
  175. this.encoderFallback = encoderFallback ?? new InternalEncoderBestFitFallback(this);
  176. this.decoderFallback = decoderFallback ?? new InternalDecoderBestFitFallback(this);
  177. }
  178. // Default fallback that we'll use.
  179. internal virtual void SetDefaultFallbacks()
  180. {
  181. // For UTF-X encodings, we use a replacement fallback with an "\xFFFD" string,
  182. // For ASCII we use "?" replacement fallback, etc.
  183. encoderFallback = new InternalEncoderBestFitFallback(this);
  184. decoderFallback = new InternalDecoderBestFitFallback(this);
  185. }
  186. // Converts a byte array from one encoding to another. The bytes in the
  187. // bytes array are converted from srcEncoding to
  188. // dstEncoding, and the returned value is a new byte array
  189. // containing the result of the conversion.
  190. //
  191. public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
  192. byte[] bytes)
  193. {
  194. if (bytes == null)
  195. throw new ArgumentNullException(nameof(bytes));
  196. return Convert(srcEncoding, dstEncoding, bytes, 0, bytes.Length);
  197. }
  198. // Converts a range of bytes in a byte array from one encoding to another.
  199. // This method converts count bytes from bytes starting at
  200. // index index from srcEncoding to dstEncoding, and
  201. // returns a new byte array containing the result of the conversion.
  202. //
  203. public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
  204. byte[] bytes, int index, int count)
  205. {
  206. if (srcEncoding == null || dstEncoding == null)
  207. {
  208. throw new ArgumentNullException((srcEncoding == null ? nameof(srcEncoding) : nameof(dstEncoding)),
  209. SR.ArgumentNull_Array);
  210. }
  211. if (bytes == null)
  212. {
  213. throw new ArgumentNullException(nameof(bytes),
  214. SR.ArgumentNull_Array);
  215. }
  216. return dstEncoding.GetBytes(srcEncoding.GetChars(bytes, index, count));
  217. }
  218. public static void RegisterProvider(EncodingProvider provider)
  219. {
  220. // Parameters validated inside EncodingProvider
  221. EncodingProvider.AddProvider(provider);
  222. }
  223. public static Encoding GetEncoding(int codepage)
  224. {
  225. Encoding result = EncodingProvider.GetEncodingFromProvider(codepage);
  226. if (result != null)
  227. return result;
  228. //
  229. // NOTE: If you add a new encoding that can be retrieved by codepage, be sure to
  230. // add the corresponding item in EncodingTable.
  231. // Otherwise, the code below will throw exception when trying to call
  232. // EncodingTable.GetDataItem().
  233. //
  234. if (codepage < 0 || codepage > 65535)
  235. {
  236. throw new ArgumentOutOfRangeException(
  237. nameof(codepage), SR.Format(SR.ArgumentOutOfRange_Range, 0, 65535));
  238. }
  239. switch (codepage)
  240. {
  241. case CodePageDefault: return Default; // 0
  242. case CodePageUnicode: return Unicode; // 1200
  243. case CodePageBigEndian: return BigEndianUnicode; // 1201
  244. case CodePageUTF32: return UTF32; // 12000
  245. case CodePageUTF32BE: return BigEndianUTF32; // 12001
  246. case CodePageUTF7: return UTF7; // 65000
  247. case CodePageUTF8: return UTF8; // 65001
  248. case CodePageASCII: return ASCII; // 20127
  249. case ISO_8859_1: return Latin1; // 28591
  250. // We don't allow the following special code page values that Win32 allows.
  251. case CodePageNoOEM: // 1 CP_OEMCP
  252. case CodePageNoMac: // 2 CP_MACCP
  253. case CodePageNoThread: // 3 CP_THREAD_ACP
  254. case CodePageNoSymbol: // 42 CP_SYMBOL
  255. throw new ArgumentException(SR.Format(SR.Argument_CodepageNotSupported, codepage), nameof(codepage));
  256. }
  257. // Is it a valid code page?
  258. if (EncodingTable.GetCodePageDataItem(codepage) == null)
  259. {
  260. throw new NotSupportedException(
  261. SR.Format(SR.NotSupported_NoCodepageData, codepage));
  262. }
  263. return UTF8;
  264. }
  265. public static Encoding GetEncoding(int codepage,
  266. EncoderFallback encoderFallback, DecoderFallback decoderFallback)
  267. {
  268. Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(codepage, encoderFallback, decoderFallback);
  269. if (baseEncoding != null)
  270. return baseEncoding;
  271. // Get the default encoding (which is cached and read only)
  272. baseEncoding = GetEncoding(codepage);
  273. // Clone it and set the fallback
  274. Encoding fallbackEncoding = (Encoding)baseEncoding.Clone();
  275. fallbackEncoding.EncoderFallback = encoderFallback;
  276. fallbackEncoding.DecoderFallback = decoderFallback;
  277. return fallbackEncoding;
  278. }
  279. // Returns an Encoding object for a given name or a given code page value.
  280. //
  281. public static Encoding GetEncoding(string name)
  282. {
  283. Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(name);
  284. if (baseEncoding != null)
  285. return baseEncoding;
  286. //
  287. // NOTE: If you add a new encoding that can be requested by name, be sure to
  288. // add the corresponding item in EncodingTable.
  289. // Otherwise, the code below will throw exception when trying to call
  290. // EncodingTable.GetCodePageFromName().
  291. //
  292. return GetEncoding(EncodingTable.GetCodePageFromName(name));
  293. }
  294. // Returns an Encoding object for a given name or a given code page value.
  295. //
  296. public static Encoding GetEncoding(string name,
  297. EncoderFallback encoderFallback, DecoderFallback decoderFallback)
  298. {
  299. Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(name, encoderFallback, decoderFallback);
  300. if (baseEncoding != null)
  301. return baseEncoding;
  302. //
  303. // NOTE: If you add a new encoding that can be requested by name, be sure to
  304. // add the corresponding item in EncodingTable.
  305. // Otherwise, the code below will throw exception when trying to call
  306. // EncodingTable.GetCodePageFromName().
  307. //
  308. return (GetEncoding(EncodingTable.GetCodePageFromName(name), encoderFallback, decoderFallback));
  309. }
  310. // Return a list of all EncodingInfo objects describing all of our encodings
  311. public static EncodingInfo[] GetEncodings()
  312. {
  313. return EncodingTable.GetEncodings();
  314. }
  315. public virtual byte[] GetPreamble()
  316. {
  317. return Array.Empty<byte>();
  318. }
  319. public virtual ReadOnlySpan<byte> Preamble => GetPreamble();
  320. private void GetDataItem()
  321. {
  322. if (_dataItem == null)
  323. {
  324. _dataItem = EncodingTable.GetCodePageDataItem(_codePage);
  325. if (_dataItem == null)
  326. {
  327. throw new NotSupportedException(SR.Format(SR.NotSupported_NoCodepageData, _codePage));
  328. }
  329. }
  330. }
  331. // Returns the name for this encoding that can be used with mail agent body tags.
  332. // If the encoding may not be used, the string is empty.
  333. public virtual string BodyName
  334. {
  335. get
  336. {
  337. if (_dataItem == null)
  338. {
  339. GetDataItem();
  340. }
  341. return (_dataItem.BodyName);
  342. }
  343. }
  344. // Returns the human-readable description of the encoding ( e.g. Hebrew (DOS)).
  345. #if PROJECTN
  346. public virtual String EncodingName
  347. {
  348. get
  349. {
  350. string encodingName = GetLocalizedEncodingNameResource(this.CodePage);
  351. if (encodingName == null)
  352. {
  353. throw new NotSupportedException(SR.Format(SR.MissingEncodingNameResource, this.CodePage));
  354. }
  355. if (encodingName.StartsWith("Globalization_cp_", StringComparison.Ordinal))
  356. {
  357. // On ProjectN, resource strings are stripped from retail builds and replaced by
  358. // their identifier names. Since this property is meant to be a localized string,
  359. // but we don't localize ProjectN, we specifically need to do something reasonable
  360. // in this case. This currently returns the English name of the encoding from a
  361. // static data table.
  362. encodingName = EncodingTable.GetCodePageDataItem(this.CodePage).EnglishName;
  363. if (encodingName == null)
  364. {
  365. throw new NotSupportedException(SR.Format(SR.MissingEncodingNameResource, this.WebName, this.CodePage));
  366. }
  367. }
  368. return encodingName;
  369. }
  370. }
  371. private static string GetLocalizedEncodingNameResource(int codePage)
  372. {
  373. switch (codePage)
  374. {
  375. case 1200: return SR.Globalization_cp_1200;
  376. case 1201: return SR.Globalization_cp_1201;
  377. case 12000: return SR.Globalization_cp_12000;
  378. case 12001: return SR.Globalization_cp_12001;
  379. case 20127: return SR.Globalization_cp_20127;
  380. case 28591: return SR.Globalization_cp_28591;
  381. case 65000: return SR.Globalization_cp_65000;
  382. case 65001: return SR.Globalization_cp_65001;
  383. default: return null;
  384. }
  385. }
  386. #else
  387. public virtual string EncodingName
  388. {
  389. get
  390. {
  391. return SR.GetResourceString("Globalization_cp_" + _codePage.ToString());
  392. }
  393. }
  394. #endif
  395. // Returns the name for this encoding that can be used with mail agent header
  396. // tags. If the encoding may not be used, the string is empty.
  397. public virtual string HeaderName
  398. {
  399. get
  400. {
  401. if (_dataItem == null)
  402. {
  403. GetDataItem();
  404. }
  405. return (_dataItem.HeaderName);
  406. }
  407. }
  408. // Returns the IANA preferred name for this encoding.
  409. public virtual string WebName
  410. {
  411. get
  412. {
  413. if (_dataItem == null)
  414. {
  415. GetDataItem();
  416. }
  417. return (_dataItem.WebName);
  418. }
  419. }
  420. // Returns the windows code page that most closely corresponds to this encoding.
  421. public virtual int WindowsCodePage
  422. {
  423. get
  424. {
  425. if (_dataItem == null)
  426. {
  427. GetDataItem();
  428. }
  429. return (_dataItem.UIFamilyCodePage);
  430. }
  431. }
  432. // True if and only if the encoding is used for display by browsers clients.
  433. public virtual bool IsBrowserDisplay
  434. {
  435. get
  436. {
  437. if (_dataItem == null)
  438. {
  439. GetDataItem();
  440. }
  441. return ((_dataItem.Flags & MIMECONTF_BROWSER) != 0);
  442. }
  443. }
  444. // True if and only if the encoding is used for saving by browsers clients.
  445. public virtual bool IsBrowserSave
  446. {
  447. get
  448. {
  449. if (_dataItem == null)
  450. {
  451. GetDataItem();
  452. }
  453. return ((_dataItem.Flags & MIMECONTF_SAVABLE_BROWSER) != 0);
  454. }
  455. }
  456. // True if and only if the encoding is used for display by mail and news clients.
  457. public virtual bool IsMailNewsDisplay
  458. {
  459. get
  460. {
  461. if (_dataItem == null)
  462. {
  463. GetDataItem();
  464. }
  465. return ((_dataItem.Flags & MIMECONTF_MAILNEWS) != 0);
  466. }
  467. }
  468. // True if and only if the encoding is used for saving documents by mail and
  469. // news clients
  470. public virtual bool IsMailNewsSave
  471. {
  472. get
  473. {
  474. if (_dataItem == null)
  475. {
  476. GetDataItem();
  477. }
  478. return ((_dataItem.Flags & MIMECONTF_SAVABLE_MAILNEWS) != 0);
  479. }
  480. }
  481. // True if and only if the encoding only uses single byte code points. (Ie, ASCII, 1252, etc)
  482. public virtual bool IsSingleByte
  483. {
  484. get
  485. {
  486. return false;
  487. }
  488. }
  489. public EncoderFallback EncoderFallback
  490. {
  491. get
  492. {
  493. return encoderFallback;
  494. }
  495. set
  496. {
  497. if (this.IsReadOnly)
  498. throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
  499. if (value == null)
  500. throw new ArgumentNullException(nameof(value));
  501. encoderFallback = value;
  502. }
  503. }
  504. public DecoderFallback DecoderFallback
  505. {
  506. get
  507. {
  508. return decoderFallback;
  509. }
  510. set
  511. {
  512. if (this.IsReadOnly)
  513. throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
  514. if (value == null)
  515. throw new ArgumentNullException(nameof(value));
  516. decoderFallback = value;
  517. }
  518. }
  519. public virtual object Clone()
  520. {
  521. Encoding newEncoding = (Encoding)this.MemberwiseClone();
  522. // New one should be readable
  523. newEncoding._isReadOnly = false;
  524. return newEncoding;
  525. }
  526. public bool IsReadOnly
  527. {
  528. get
  529. {
  530. return (_isReadOnly);
  531. }
  532. }
  533. // Returns an encoding for the ASCII character set. The returned encoding
  534. // will be an instance of the ASCIIEncoding class.
  535. public static Encoding ASCII => ASCIIEncoding.s_default;
  536. // Returns an encoding for the Latin1 character set. The returned encoding
  537. // will be an instance of the Latin1Encoding class.
  538. //
  539. // This is for our optimizations
  540. private static Encoding Latin1 => Latin1Encoding.s_default;
  541. // Returns the number of bytes required to encode the given character
  542. // array.
  543. //
  544. public virtual int GetByteCount(char[] chars)
  545. {
  546. if (chars == null)
  547. {
  548. throw new ArgumentNullException(nameof(chars),
  549. SR.ArgumentNull_Array);
  550. }
  551. return GetByteCount(chars, 0, chars.Length);
  552. }
  553. public virtual int GetByteCount(string s)
  554. {
  555. if (s == null)
  556. throw new ArgumentNullException(nameof(s));
  557. char[] chars = s.ToCharArray();
  558. return GetByteCount(chars, 0, chars.Length);
  559. }
  560. // Returns the number of bytes required to encode a range of characters in
  561. // a character array.
  562. //
  563. public abstract int GetByteCount(char[] chars, int index, int count);
  564. // Returns the number of bytes required to encode a string range.
  565. //
  566. public int GetByteCount(string s, int index, int count)
  567. {
  568. if (s == null)
  569. throw new ArgumentNullException(nameof(s),
  570. SR.ArgumentNull_String);
  571. if (index < 0)
  572. throw new ArgumentOutOfRangeException(nameof(index),
  573. SR.ArgumentOutOfRange_NeedNonNegNum);
  574. if (count < 0)
  575. throw new ArgumentOutOfRangeException(nameof(count),
  576. SR.ArgumentOutOfRange_NeedNonNegNum);
  577. if (index > s.Length - count)
  578. throw new ArgumentOutOfRangeException(nameof(index),
  579. SR.ArgumentOutOfRange_IndexCount);
  580. unsafe
  581. {
  582. fixed (char* pChar = s)
  583. {
  584. return GetByteCount(pChar + index, count);
  585. }
  586. }
  587. }
  588. // We expect this to be the workhorse for NLS encodings
  589. // unfortunately for existing overrides, it has to call the [] version,
  590. // which is really slow, so this method should be avoided if you're calling
  591. // a 3rd party encoding.
  592. [CLSCompliant(false)]
  593. public virtual unsafe int GetByteCount(char* chars, int count)
  594. {
  595. // Validate input parameters
  596. if (chars == null)
  597. throw new ArgumentNullException(nameof(chars),
  598. SR.ArgumentNull_Array);
  599. if (count < 0)
  600. throw new ArgumentOutOfRangeException(nameof(count),
  601. SR.ArgumentOutOfRange_NeedNonNegNum);
  602. char[] arrChar = new char[count];
  603. int index;
  604. for (index = 0; index < count; index++)
  605. arrChar[index] = chars[index];
  606. return GetByteCount(arrChar, 0, count);
  607. }
  608. public virtual unsafe int GetByteCount(ReadOnlySpan<char> chars)
  609. {
  610. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  611. {
  612. return GetByteCount(charsPtr, chars.Length);
  613. }
  614. }
  615. // For NLS Encodings, workhorse takes an encoder (may be null)
  616. // Always validate parameters before calling internal version, which will only assert.
  617. internal virtual unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
  618. {
  619. Debug.Assert(chars != null);
  620. Debug.Assert(count >= 0);
  621. return GetByteCount(chars, count);
  622. }
  623. // Returns a byte array containing the encoded representation of the given
  624. // character array.
  625. //
  626. public virtual byte[] GetBytes(char[] chars)
  627. {
  628. if (chars == null)
  629. {
  630. throw new ArgumentNullException(nameof(chars),
  631. SR.ArgumentNull_Array);
  632. }
  633. return GetBytes(chars, 0, chars.Length);
  634. }
  635. // Returns a byte array containing the encoded representation of a range
  636. // of characters in a character array.
  637. //
  638. public virtual byte[] GetBytes(char[] chars, int index, int count)
  639. {
  640. byte[] result = new byte[GetByteCount(chars, index, count)];
  641. GetBytes(chars, index, count, result, 0);
  642. return result;
  643. }
  644. // Encodes a range of characters in a character array into a range of bytes
  645. // in a byte array. An exception occurs if the byte array is not large
  646. // enough to hold the complete encoding of the characters. The
  647. // GetByteCount method can be used to determine the exact number of
  648. // bytes that will be produced for a given range of characters.
  649. // Alternatively, the GetMaxByteCount method can be used to
  650. // determine the maximum number of bytes that will be produced for a given
  651. // number of characters, regardless of the actual character values.
  652. //
  653. public abstract int GetBytes(char[] chars, int charIndex, int charCount,
  654. byte[] bytes, int byteIndex);
  655. // Returns a byte array containing the encoded representation of the given
  656. // string.
  657. //
  658. public virtual byte[] GetBytes(string s)
  659. {
  660. if (s == null)
  661. throw new ArgumentNullException(nameof(s),
  662. SR.ArgumentNull_String);
  663. int byteCount = GetByteCount(s);
  664. byte[] bytes = new byte[byteCount];
  665. int bytesReceived = GetBytes(s, 0, s.Length, bytes, 0);
  666. Debug.Assert(byteCount == bytesReceived);
  667. return bytes;
  668. }
  669. // Returns a byte array containing the encoded representation of the given
  670. // string range.
  671. //
  672. public byte[] GetBytes(string s, int index, int count)
  673. {
  674. if (s == null)
  675. throw new ArgumentNullException(nameof(s),
  676. SR.ArgumentNull_String);
  677. if (index < 0)
  678. throw new ArgumentOutOfRangeException(nameof(index),
  679. SR.ArgumentOutOfRange_NeedNonNegNum);
  680. if (count < 0)
  681. throw new ArgumentOutOfRangeException(nameof(count),
  682. SR.ArgumentOutOfRange_NeedNonNegNum);
  683. if (index > s.Length - count)
  684. throw new ArgumentOutOfRangeException(nameof(index),
  685. SR.ArgumentOutOfRange_IndexCount);
  686. unsafe
  687. {
  688. fixed (char* pChar = s)
  689. {
  690. int byteCount = GetByteCount(pChar + index, count);
  691. if (byteCount == 0)
  692. return Array.Empty<byte>();
  693. byte[] bytes = new byte[byteCount];
  694. fixed (byte* pBytes = &bytes[0])
  695. {
  696. int bytesReceived = GetBytes(pChar + index, count, pBytes, byteCount);
  697. Debug.Assert(byteCount == bytesReceived);
  698. }
  699. return bytes;
  700. }
  701. }
  702. }
  703. public virtual int GetBytes(string s, int charIndex, int charCount,
  704. byte[] bytes, int byteIndex)
  705. {
  706. if (s == null)
  707. throw new ArgumentNullException(nameof(s));
  708. return GetBytes(s.ToCharArray(), charIndex, charCount, bytes, byteIndex);
  709. }
  710. // This is our internal workhorse
  711. // Always validate parameters before calling internal version, which will only assert.
  712. internal virtual unsafe int GetBytes(char* chars, int charCount,
  713. byte* bytes, int byteCount, EncoderNLS encoder)
  714. {
  715. return GetBytes(chars, charCount, bytes, byteCount);
  716. }
  717. // We expect this to be the workhorse for NLS Encodings, but for existing
  718. // ones we need a working (if slow) default implementation)
  719. //
  720. // WARNING WARNING WARNING
  721. //
  722. // WARNING: If this breaks it could be a security threat. Obviously we
  723. // call this internally, so you need to make sure that your pointers, counts
  724. // and indexes are correct when you call this method.
  725. //
  726. // In addition, we have internal code, which will be marked as "safe" calling
  727. // this code. However this code is dependent upon the implementation of an
  728. // external GetBytes() method, which could be overridden by a third party and
  729. // the results of which cannot be guaranteed. We use that result to copy
  730. // the byte[] to our byte* output buffer. If the result count was wrong, we
  731. // could easily overflow our output buffer. Therefore we do an extra test
  732. // when we copy the buffer so that we don't overflow byteCount either.
  733. [CLSCompliant(false)]
  734. public virtual unsafe int GetBytes(char* chars, int charCount,
  735. byte* bytes, int byteCount)
  736. {
  737. // Validate input parameters
  738. if (bytes == null || chars == null)
  739. throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
  740. SR.ArgumentNull_Array);
  741. if (charCount < 0 || byteCount < 0)
  742. throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)),
  743. SR.ArgumentOutOfRange_NeedNonNegNum);
  744. // Get the char array to convert
  745. char[] arrChar = new char[charCount];
  746. int index;
  747. for (index = 0; index < charCount; index++)
  748. arrChar[index] = chars[index];
  749. // Get the byte array to fill
  750. byte[] arrByte = new byte[byteCount];
  751. // Do the work
  752. int result = GetBytes(arrChar, 0, charCount, arrByte, 0);
  753. Debug.Assert(result <= byteCount, "[Encoding.GetBytes]Returned more bytes than we have space for");
  754. // Copy the byte array
  755. // WARNING: We MUST make sure that we don't copy too many bytes. We can't
  756. // rely on result because it could be a 3rd party implementation. We need
  757. // to make sure we never copy more than byteCount bytes no matter the value
  758. // of result
  759. if (result < byteCount)
  760. byteCount = result;
  761. // Copy the data, don't overrun our array!
  762. for (index = 0; index < byteCount; index++)
  763. bytes[index] = arrByte[index];
  764. return byteCount;
  765. }
  766. public virtual unsafe int GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes)
  767. {
  768. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  769. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  770. {
  771. return GetBytes(charsPtr, chars.Length, bytesPtr, bytes.Length);
  772. }
  773. }
  774. // Returns the number of characters produced by decoding the given byte
  775. // array.
  776. //
  777. public virtual int GetCharCount(byte[] bytes)
  778. {
  779. if (bytes == null)
  780. {
  781. throw new ArgumentNullException(nameof(bytes),
  782. SR.ArgumentNull_Array);
  783. }
  784. return GetCharCount(bytes, 0, bytes.Length);
  785. }
  786. // Returns the number of characters produced by decoding a range of bytes
  787. // in a byte array.
  788. //
  789. public abstract int GetCharCount(byte[] bytes, int index, int count);
  790. // We expect this to be the workhorse for NLS Encodings, but for existing
  791. // ones we need a working (if slow) default implementation)
  792. [CLSCompliant(false)]
  793. public virtual unsafe int GetCharCount(byte* bytes, int count)
  794. {
  795. // Validate input parameters
  796. if (bytes == null)
  797. throw new ArgumentNullException(nameof(bytes),
  798. SR.ArgumentNull_Array);
  799. if (count < 0)
  800. throw new ArgumentOutOfRangeException(nameof(count),
  801. SR.ArgumentOutOfRange_NeedNonNegNum);
  802. byte[] arrbyte = new byte[count];
  803. int index;
  804. for (index = 0; index < count; index++)
  805. arrbyte[index] = bytes[index];
  806. return GetCharCount(arrbyte, 0, count);
  807. }
  808. public virtual unsafe int GetCharCount(ReadOnlySpan<byte> bytes)
  809. {
  810. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  811. {
  812. return GetCharCount(bytesPtr, bytes.Length);
  813. }
  814. }
  815. // This is our internal workhorse
  816. // Always validate parameters before calling internal version, which will only assert.
  817. internal virtual unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder)
  818. {
  819. return GetCharCount(bytes, count);
  820. }
  821. // Returns a character array containing the decoded representation of a
  822. // given byte array.
  823. //
  824. public virtual char[] GetChars(byte[] bytes)
  825. {
  826. if (bytes == null)
  827. {
  828. throw new ArgumentNullException(nameof(bytes),
  829. SR.ArgumentNull_Array);
  830. }
  831. return GetChars(bytes, 0, bytes.Length);
  832. }
  833. // Returns a character array containing the decoded representation of a
  834. // range of bytes in a byte array.
  835. //
  836. public virtual char[] GetChars(byte[] bytes, int index, int count)
  837. {
  838. char[] result = new char[GetCharCount(bytes, index, count)];
  839. GetChars(bytes, index, count, result, 0);
  840. return result;
  841. }
  842. // Decodes a range of bytes in a byte array into a range of characters in a
  843. // character array. An exception occurs if the character array is not large
  844. // enough to hold the complete decoding of the bytes. The
  845. // GetCharCount method can be used to determine the exact number of
  846. // characters that will be produced for a given range of bytes.
  847. // Alternatively, the GetMaxCharCount method can be used to
  848. // determine the maximum number of characters that will be produced for a
  849. // given number of bytes, regardless of the actual byte values.
  850. //
  851. public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount,
  852. char[] chars, int charIndex);
  853. // We expect this to be the workhorse for NLS Encodings, but for existing
  854. // ones we need a working (if slow) default implementation)
  855. //
  856. // WARNING WARNING WARNING
  857. //
  858. // WARNING: If this breaks it could be a security threat. Obviously we
  859. // call this internally, so you need to make sure that your pointers, counts
  860. // and indexes are correct when you call this method.
  861. //
  862. // In addition, we have internal code, which will be marked as "safe" calling
  863. // this code. However this code is dependent upon the implementation of an
  864. // external GetChars() method, which could be overridden by a third party and
  865. // the results of which cannot be guaranteed. We use that result to copy
  866. // the char[] to our char* output buffer. If the result count was wrong, we
  867. // could easily overflow our output buffer. Therefore we do an extra test
  868. // when we copy the buffer so that we don't overflow charCount either.
  869. [CLSCompliant(false)]
  870. public virtual unsafe int GetChars(byte* bytes, int byteCount,
  871. char* chars, int charCount)
  872. {
  873. // Validate input parameters
  874. if (chars == null || bytes == null)
  875. throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
  876. SR.ArgumentNull_Array);
  877. if (byteCount < 0 || charCount < 0)
  878. throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
  879. SR.ArgumentOutOfRange_NeedNonNegNum);
  880. // Get the byte array to convert
  881. byte[] arrByte = new byte[byteCount];
  882. int index;
  883. for (index = 0; index < byteCount; index++)
  884. arrByte[index] = bytes[index];
  885. // Get the char array to fill
  886. char[] arrChar = new char[charCount];
  887. // Do the work
  888. int result = GetChars(arrByte, 0, byteCount, arrChar, 0);
  889. Debug.Assert(result <= charCount, "[Encoding.GetChars]Returned more chars than we have space for");
  890. // Copy the char array
  891. // WARNING: We MUST make sure that we don't copy too many chars. We can't
  892. // rely on result because it could be a 3rd party implementation. We need
  893. // to make sure we never copy more than charCount chars no matter the value
  894. // of result
  895. if (result < charCount)
  896. charCount = result;
  897. // Copy the data, don't overrun our array!
  898. for (index = 0; index < charCount; index++)
  899. chars[index] = arrChar[index];
  900. return charCount;
  901. }
  902. public virtual unsafe int GetChars(ReadOnlySpan<byte> bytes, Span<char> chars)
  903. {
  904. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  905. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  906. {
  907. return GetChars(bytesPtr, bytes.Length, charsPtr, chars.Length);
  908. }
  909. }
  910. // This is our internal workhorse
  911. // Always validate parameters before calling internal version, which will only assert.
  912. internal virtual unsafe int GetChars(byte* bytes, int byteCount,
  913. char* chars, int charCount, DecoderNLS decoder)
  914. {
  915. return GetChars(bytes, byteCount, chars, charCount);
  916. }
  917. [CLSCompliant(false)]
  918. public unsafe string GetString(byte* bytes, int byteCount)
  919. {
  920. if (bytes == null)
  921. throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
  922. if (byteCount < 0)
  923. throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_NeedNonNegNum);
  924. return string.CreateStringFromEncoding(bytes, byteCount, this);
  925. }
  926. public unsafe string GetString(ReadOnlySpan<byte> bytes)
  927. {
  928. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  929. {
  930. return string.CreateStringFromEncoding(bytesPtr, bytes.Length, this);
  931. }
  932. }
  933. // Returns the code page identifier of this encoding. The returned value is
  934. // an integer between 0 and 65535 if the encoding has a code page
  935. // identifier, or -1 if the encoding does not represent a code page.
  936. //
  937. public virtual int CodePage
  938. {
  939. get
  940. {
  941. return _codePage;
  942. }
  943. }
  944. // IsAlwaysNormalized
  945. // Returns true if the encoding is always normalized for the specified encoding form
  946. public bool IsAlwaysNormalized()
  947. {
  948. return this.IsAlwaysNormalized(NormalizationForm.FormC);
  949. }
  950. public virtual bool IsAlwaysNormalized(NormalizationForm form)
  951. {
  952. // Assume false unless the encoding knows otherwise
  953. return false;
  954. }
  955. // Returns a Decoder object for this encoding. The returned object
  956. // can be used to decode a sequence of bytes into a sequence of characters.
  957. // Contrary to the GetChars family of methods, a Decoder can
  958. // convert partial sequences of bytes into partial sequences of characters
  959. // by maintaining the appropriate state between the conversions.
  960. //
  961. // This default implementation returns a Decoder that simply
  962. // forwards calls to the GetCharCount and GetChars methods to
  963. // the corresponding methods of this encoding. Encodings that require state
  964. // to be maintained between successive conversions should override this
  965. // method and return an instance of an appropriate Decoder
  966. // implementation.
  967. //
  968. public virtual Decoder GetDecoder()
  969. {
  970. return new DefaultDecoder(this);
  971. }
  972. // Returns an Encoder object for this encoding. The returned object
  973. // can be used to encode a sequence of characters into a sequence of bytes.
  974. // Contrary to the GetBytes family of methods, an Encoder can
  975. // convert partial sequences of characters into partial sequences of bytes
  976. // by maintaining the appropriate state between the conversions.
  977. //
  978. // This default implementation returns an Encoder that simply
  979. // forwards calls to the GetByteCount and GetBytes methods to
  980. // the corresponding methods of this encoding. Encodings that require state
  981. // to be maintained between successive conversions should override this
  982. // method and return an instance of an appropriate Encoder
  983. // implementation.
  984. //
  985. public virtual Encoder GetEncoder()
  986. {
  987. return new DefaultEncoder(this);
  988. }
  989. // Returns the maximum number of bytes required to encode a given number of
  990. // characters. This method can be used to determine an appropriate buffer
  991. // size for byte arrays passed to the GetBytes method of this
  992. // encoding or the GetBytes method of an Encoder for this
  993. // encoding. All encodings must guarantee that no buffer overflow
  994. // exceptions will occur if buffers are sized according to the results of
  995. // this method.
  996. //
  997. // WARNING: If you're using something besides the default replacement encoder fallback,
  998. // then you could have more bytes than this returned from an actual call to GetBytes().
  999. //
  1000. public abstract int GetMaxByteCount(int charCount);
  1001. // Returns the maximum number of characters produced by decoding a given
  1002. // number of bytes. This method can be used to determine an appropriate
  1003. // buffer size for character arrays passed to the GetChars method of
  1004. // this encoding or the GetChars method of a Decoder for this
  1005. // encoding. All encodings must guarantee that no buffer overflow
  1006. // exceptions will occur if buffers are sized according to the results of
  1007. // this method.
  1008. //
  1009. public abstract int GetMaxCharCount(int byteCount);
  1010. // Returns a string containing the decoded representation of a given byte
  1011. // array.
  1012. //
  1013. public virtual string GetString(byte[] bytes)
  1014. {
  1015. if (bytes == null)
  1016. throw new ArgumentNullException(nameof(bytes),
  1017. SR.ArgumentNull_Array);
  1018. return GetString(bytes, 0, bytes.Length);
  1019. }
  1020. // Returns a string containing the decoded representation of a range of
  1021. // bytes in a byte array.
  1022. //
  1023. // Internally we override this for performance
  1024. //
  1025. public virtual string GetString(byte[] bytes, int index, int count)
  1026. {
  1027. return new string(GetChars(bytes, index, count));
  1028. }
  1029. // Returns an encoding for Unicode format. The returned encoding will be
  1030. // an instance of the UnicodeEncoding class.
  1031. //
  1032. // It will use little endian byte order, but will detect
  1033. // input in big endian if it finds a byte order mark per Unicode 2.0.
  1034. public static Encoding Unicode => UnicodeEncoding.s_littleEndianDefault;
  1035. // Returns an encoding for Unicode format. The returned encoding will be
  1036. // an instance of the UnicodeEncoding class.
  1037. //
  1038. // It will use big endian byte order, but will detect
  1039. // input in little endian if it finds a byte order mark per Unicode 2.0.
  1040. public static Encoding BigEndianUnicode => UnicodeEncoding.s_bigEndianDefault;
  1041. // Returns an encoding for the UTF-7 format. The returned encoding will be
  1042. // an instance of the UTF7Encoding class.
  1043. public static Encoding UTF7 => UTF7Encoding.s_default;
  1044. // Returns an encoding for the UTF-8 format. The returned encoding will be
  1045. // an instance of the UTF8Encoding class.
  1046. public static Encoding UTF8 => UTF8Encoding.s_default;
  1047. // Returns an encoding for the UTF-32 format. The returned encoding will be
  1048. // an instance of the UTF32Encoding class.
  1049. public static Encoding UTF32 => UTF32Encoding.s_default;
  1050. // Returns an encoding for the UTF-32 format. The returned encoding will be
  1051. // an instance of the UTF32Encoding class.
  1052. //
  1053. // It will use big endian byte order.
  1054. private static Encoding BigEndianUTF32 => UTF32Encoding.s_bigEndianDefault;
  1055. public override bool Equals(object value)
  1056. {
  1057. Encoding that = value as Encoding;
  1058. if (that != null)
  1059. return (_codePage == that._codePage) &&
  1060. (EncoderFallback.Equals(that.EncoderFallback)) &&
  1061. (DecoderFallback.Equals(that.DecoderFallback));
  1062. return (false);
  1063. }
  1064. public override int GetHashCode()
  1065. {
  1066. return _codePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode();
  1067. }
  1068. internal virtual char[] GetBestFitUnicodeToBytesData()
  1069. {
  1070. // Normally we don't have any best fit data.
  1071. return Array.Empty<char>();
  1072. }
  1073. internal virtual char[] GetBestFitBytesToUnicodeData()
  1074. {
  1075. // Normally we don't have any best fit data.
  1076. return Array.Empty<char>();
  1077. }
  1078. internal void ThrowBytesOverflow()
  1079. {
  1080. // Special message to include fallback type in case fallback's GetMaxCharCount is broken
  1081. // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
  1082. throw new ArgumentException(
  1083. SR.Format(SR.Argument_EncodingConversionOverflowBytes, EncodingName, EncoderFallback.GetType()), "bytes");
  1084. }
  1085. internal void ThrowBytesOverflow(EncoderNLS encoder, bool nothingEncoded)
  1086. {
  1087. if (encoder == null || encoder._throwOnOverflow || nothingEncoded)
  1088. {
  1089. if (encoder != null && encoder.InternalHasFallbackBuffer)
  1090. encoder.FallbackBuffer.InternalReset();
  1091. // Special message to include fallback type in case fallback's GetMaxCharCount is broken
  1092. // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
  1093. ThrowBytesOverflow();
  1094. }
  1095. // If we didn't throw, we are in convert and have to remember our flushing
  1096. encoder.ClearMustFlush();
  1097. }
  1098. internal void ThrowCharsOverflow()
  1099. {
  1100. // Special message to include fallback type in case fallback's GetMaxCharCount is broken
  1101. // This happens if user has implemented a decoder fallback with a broken GetMaxCharCount
  1102. throw new ArgumentException(
  1103. SR.Format(SR.Argument_EncodingConversionOverflowChars, EncodingName, DecoderFallback.GetType()), "chars");
  1104. }
  1105. internal void ThrowCharsOverflow(DecoderNLS decoder, bool nothingDecoded)
  1106. {
  1107. if (decoder == null || decoder._throwOnOverflow || nothingDecoded)
  1108. {
  1109. if (decoder != null && decoder.InternalHasFallbackBuffer)
  1110. decoder.FallbackBuffer.InternalReset();
  1111. // Special message to include fallback type in case fallback's GetMaxCharCount is broken
  1112. // This happens if user has implemented a decoder fallback with a broken GetMaxCharCount
  1113. ThrowCharsOverflow();
  1114. }
  1115. // If we didn't throw, we are in convert and have to remember our flushing
  1116. decoder.ClearMustFlush();
  1117. }
  1118. internal sealed class DefaultEncoder : Encoder, IObjectReference
  1119. {
  1120. private Encoding _encoding;
  1121. public DefaultEncoder(Encoding encoding)
  1122. {
  1123. _encoding = encoding;
  1124. }
  1125. public object GetRealObject(StreamingContext context)
  1126. {
  1127. throw new PlatformNotSupportedException();
  1128. }
  1129. // Returns the number of bytes the next call to GetBytes will
  1130. // produce if presented with the given range of characters and the given
  1131. // value of the flush parameter. The returned value takes into
  1132. // account the state in which the encoder was left following the last call
  1133. // to GetBytes. The state of the encoder is not affected by a call
  1134. // to this method.
  1135. //
  1136. public override int GetByteCount(char[] chars, int index, int count, bool flush)
  1137. {
  1138. return _encoding.GetByteCount(chars, index, count);
  1139. }
  1140. public unsafe override int GetByteCount(char* chars, int count, bool flush)
  1141. {
  1142. return _encoding.GetByteCount(chars, count);
  1143. }
  1144. // Encodes a range of characters in a character array into a range of bytes
  1145. // in a byte array. The method encodes charCount characters from
  1146. // chars starting at index charIndex, storing the resulting
  1147. // bytes in bytes starting at index byteIndex. The encoding
  1148. // takes into account the state in which the encoder was left following the
  1149. // last call to this method. The flush parameter indicates whether
  1150. // the encoder should flush any shift-states and partial characters at the
  1151. // end of the conversion. To ensure correct termination of a sequence of
  1152. // blocks of encoded bytes, the last call to GetBytes should specify
  1153. // a value of true for the flush parameter.
  1154. //
  1155. // An exception occurs if the byte array is not large enough to hold the
  1156. // complete encoding of the characters. The GetByteCount method can
  1157. // be used to determine the exact number of bytes that will be produced for
  1158. // a given range of characters. Alternatively, the GetMaxByteCount
  1159. // method of the Encoding that produced this encoder can be used to
  1160. // determine the maximum number of bytes that will be produced for a given
  1161. // number of characters, regardless of the actual character values.
  1162. //
  1163. public override int GetBytes(char[] chars, int charIndex, int charCount,
  1164. byte[] bytes, int byteIndex, bool flush)
  1165. {
  1166. return _encoding.GetBytes(chars, charIndex, charCount, bytes, byteIndex);
  1167. }
  1168. public unsafe override int GetBytes(char* chars, int charCount,
  1169. byte* bytes, int byteCount, bool flush)
  1170. {
  1171. return _encoding.GetBytes(chars, charCount, bytes, byteCount);
  1172. }
  1173. }
  1174. internal sealed class DefaultDecoder : Decoder, IObjectReference
  1175. {
  1176. private Encoding _encoding;
  1177. public DefaultDecoder(Encoding encoding)
  1178. {
  1179. _encoding = encoding;
  1180. }
  1181. public object GetRealObject(StreamingContext context)
  1182. {
  1183. throw new PlatformNotSupportedException();
  1184. }
  1185. // Returns the number of characters the next call to GetChars will
  1186. // produce if presented with the given range of bytes. The returned value
  1187. // takes into account the state in which the decoder was left following the
  1188. // last call to GetChars. The state of the decoder is not affected
  1189. // by a call to this method.
  1190. //
  1191. public override int GetCharCount(byte[] bytes, int index, int count)
  1192. {
  1193. return GetCharCount(bytes, index, count, false);
  1194. }
  1195. public override int GetCharCount(byte[] bytes, int index, int count, bool flush)
  1196. {
  1197. return _encoding.GetCharCount(bytes, index, count);
  1198. }
  1199. public unsafe override int GetCharCount(byte* bytes, int count, bool flush)
  1200. {
  1201. // By default just call the encoding version, no flush by default
  1202. return _encoding.GetCharCount(bytes, count);
  1203. }
  1204. // Decodes a range of bytes in a byte array into a range of characters
  1205. // in a character array. The method decodes byteCount bytes from
  1206. // bytes starting at index byteIndex, storing the resulting
  1207. // characters in chars starting at index charIndex. The
  1208. // decoding takes into account the state in which the decoder was left
  1209. // following the last call to this method.
  1210. //
  1211. // An exception occurs if the character array is not large enough to
  1212. // hold the complete decoding of the bytes. The GetCharCount method
  1213. // can be used to determine the exact number of characters that will be
  1214. // produced for a given range of bytes. Alternatively, the
  1215. // GetMaxCharCount method of the Encoding that produced this
  1216. // decoder can be used to determine the maximum number of characters that
  1217. // will be produced for a given number of bytes, regardless of the actual
  1218. // byte values.
  1219. //
  1220. public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
  1221. char[] chars, int charIndex)
  1222. {
  1223. return GetChars(bytes, byteIndex, byteCount, chars, charIndex, false);
  1224. }
  1225. public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
  1226. char[] chars, int charIndex, bool flush)
  1227. {
  1228. return _encoding.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
  1229. }
  1230. public unsafe override int GetChars(byte* bytes, int byteCount,
  1231. char* chars, int charCount, bool flush)
  1232. {
  1233. // By default just call the encoding's version
  1234. return _encoding.GetChars(bytes, byteCount, chars, charCount);
  1235. }
  1236. }
  1237. internal class EncodingCharBuffer
  1238. {
  1239. private unsafe char* _chars;
  1240. private unsafe char* _charStart;
  1241. private unsafe char* _charEnd;
  1242. private int _charCountResult = 0;
  1243. private Encoding _enc;
  1244. private DecoderNLS _decoder;
  1245. private unsafe byte* _byteStart;
  1246. private unsafe byte* _byteEnd;
  1247. private unsafe byte* _bytes;
  1248. private DecoderFallbackBuffer _fallbackBuffer;
  1249. internal unsafe EncodingCharBuffer(Encoding enc, DecoderNLS decoder, char* charStart, int charCount,
  1250. byte* byteStart, int byteCount)
  1251. {
  1252. _enc = enc;
  1253. _decoder = decoder;
  1254. _chars = charStart;
  1255. _charStart = charStart;
  1256. _charEnd = charStart + charCount;
  1257. _byteStart = byteStart;
  1258. _bytes = byteStart;
  1259. _byteEnd = byteStart + byteCount;
  1260. if (_decoder == null)
  1261. _fallbackBuffer = enc.DecoderFallback.CreateFallbackBuffer();
  1262. else
  1263. _fallbackBuffer = _decoder.FallbackBuffer;
  1264. // If we're getting chars or getting char count we don't expect to have
  1265. // to remember fallbacks between calls (so it should be empty)
  1266. Debug.Assert(_fallbackBuffer.Remaining == 0,
  1267. "[Encoding.EncodingCharBuffer.EncodingCharBuffer]Expected empty fallback buffer for getchars/charcount");
  1268. _fallbackBuffer.InternalInitialize(_bytes, _charEnd);
  1269. }
  1270. internal unsafe bool AddChar(char ch, int numBytes)
  1271. {
  1272. if (_chars != null)
  1273. {
  1274. if (_chars >= _charEnd)
  1275. {
  1276. // Throw maybe
  1277. _bytes -= numBytes; // Didn't encode these bytes
  1278. _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw?
  1279. return false; // No throw, but no store either
  1280. }
  1281. *(_chars++) = ch;
  1282. }
  1283. _charCountResult++;
  1284. return true;
  1285. }
  1286. internal unsafe bool AddChar(char ch)
  1287. {
  1288. return AddChar(ch, 1);
  1289. }
  1290. internal unsafe bool AddChar(char ch1, char ch2, int numBytes)
  1291. {
  1292. // Need room for 2 chars
  1293. if (_chars >= _charEnd - 1)
  1294. {
  1295. // Throw maybe
  1296. _bytes -= numBytes; // Didn't encode these bytes
  1297. _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw?
  1298. return false; // No throw, but no store either
  1299. }
  1300. return AddChar(ch1, numBytes) && AddChar(ch2, numBytes);
  1301. }
  1302. internal unsafe void AdjustBytes(int count)
  1303. {
  1304. _bytes += count;
  1305. }
  1306. internal unsafe bool MoreData
  1307. {
  1308. get
  1309. {
  1310. return _bytes < _byteEnd;
  1311. }
  1312. }
  1313. // Do we have count more bytes?
  1314. internal unsafe bool EvenMoreData(int count)
  1315. {
  1316. return (_bytes <= _byteEnd - count);
  1317. }
  1318. // GetNextByte shouldn't be called unless the caller's already checked more data or even more data,
  1319. // but we'll double check just to make sure.
  1320. internal unsafe byte GetNextByte()
  1321. {
  1322. Debug.Assert(_bytes < _byteEnd, "[EncodingCharBuffer.GetNextByte]Expected more date");
  1323. if (_bytes >= _byteEnd)
  1324. return 0;
  1325. return *(_bytes++);
  1326. }
  1327. internal unsafe int BytesUsed
  1328. {
  1329. get
  1330. {
  1331. return (int)(_bytes - _byteStart);
  1332. }
  1333. }
  1334. internal unsafe bool Fallback(byte fallbackByte)
  1335. {
  1336. // Build our buffer
  1337. byte[] byteBuffer = new byte[] { fallbackByte };
  1338. // Do the fallback and add the data.
  1339. return Fallback(byteBuffer);
  1340. }
  1341. internal unsafe bool Fallback(byte byte1, byte byte2)
  1342. {
  1343. // Build our buffer
  1344. byte[] byteBuffer = new byte[] { byte1, byte2 };
  1345. // Do the fallback and add the data.
  1346. return Fallback(byteBuffer);
  1347. }
  1348. internal unsafe bool Fallback(byte byte1, byte byte2, byte byte3, byte byte4)
  1349. {
  1350. // Build our buffer
  1351. byte[] byteBuffer = new byte[] { byte1, byte2, byte3, byte4 };
  1352. // Do the fallback and add the data.
  1353. return Fallback(byteBuffer);
  1354. }
  1355. internal unsafe bool Fallback(byte[] byteBuffer)
  1356. {
  1357. // Do the fallback and add the data.
  1358. if (_chars != null)
  1359. {
  1360. char* pTemp = _chars;
  1361. if (_fallbackBuffer.InternalFallback(byteBuffer, _bytes, ref _chars) == false)
  1362. {
  1363. // Throw maybe
  1364. _bytes -= byteBuffer.Length; // Didn't use how many ever bytes we're falling back
  1365. _fallbackBuffer.InternalReset(); // We didn't use this fallback.
  1366. _enc.ThrowCharsOverflow(_decoder, _chars == _charStart); // Throw?
  1367. return false; // No throw, but no store either
  1368. }
  1369. _charCountResult += unchecked((int)(_chars - pTemp));
  1370. }
  1371. else
  1372. {
  1373. _charCountResult += _fallbackBuffer.InternalFallback(byteBuffer, _bytes);
  1374. }
  1375. return true;
  1376. }
  1377. internal unsafe int Count
  1378. {
  1379. get
  1380. {
  1381. return _charCountResult;
  1382. }
  1383. }
  1384. }
  1385. internal class EncodingByteBuffer
  1386. {
  1387. private unsafe byte* _bytes;
  1388. private unsafe byte* _byteStart;
  1389. private unsafe byte* _byteEnd;
  1390. private unsafe char* _chars;
  1391. private unsafe char* _charStart;
  1392. private unsafe char* _charEnd;
  1393. private int _byteCountResult = 0;
  1394. private Encoding _enc;
  1395. private EncoderNLS _encoder;
  1396. internal EncoderFallbackBuffer fallbackBuffer;
  1397. internal unsafe EncodingByteBuffer(Encoding inEncoding, EncoderNLS inEncoder,
  1398. byte* inByteStart, int inByteCount, char* inCharStart, int inCharCount)
  1399. {
  1400. _enc = inEncoding;
  1401. _encoder = inEncoder;
  1402. _charStart = inCharStart;
  1403. _chars = inCharStart;
  1404. _charEnd = inCharStart + inCharCount;
  1405. _bytes = inByteStart;
  1406. _byteStart = inByteStart;
  1407. _byteEnd = inByteStart + inByteCount;
  1408. if (_encoder == null)
  1409. this.fallbackBuffer = _enc.EncoderFallback.CreateFallbackBuffer();
  1410. else
  1411. {
  1412. this.fallbackBuffer = _encoder.FallbackBuffer;
  1413. // If we're not converting we must not have data in our fallback buffer
  1414. if (_encoder._throwOnOverflow && _encoder.InternalHasFallbackBuffer &&
  1415. this.fallbackBuffer.Remaining > 0)
  1416. throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty,
  1417. _encoder.Encoding.EncodingName, _encoder.Fallback.GetType()));
  1418. }
  1419. fallbackBuffer.InternalInitialize(_chars, _charEnd, _encoder, _bytes != null);
  1420. }
  1421. internal unsafe bool AddByte(byte b, int moreBytesExpected)
  1422. {
  1423. Debug.Assert(moreBytesExpected >= 0, "[EncodingByteBuffer.AddByte]expected non-negative moreBytesExpected");
  1424. if (_bytes != null)
  1425. {
  1426. if (_bytes >= _byteEnd - moreBytesExpected)
  1427. {
  1428. // Throw maybe. Check which buffer to back up (only matters if Converting)
  1429. this.MovePrevious(true); // Throw if necessary
  1430. return false; // No throw, but no store either
  1431. }
  1432. *(_bytes++) = b;
  1433. }
  1434. _byteCountResult++;
  1435. return true;
  1436. }
  1437. internal unsafe bool AddByte(byte b1)
  1438. {
  1439. return (AddByte(b1, 0));
  1440. }
  1441. internal unsafe bool AddByte(byte b1, byte b2)
  1442. {
  1443. return (AddByte(b1, b2, 0));
  1444. }
  1445. internal unsafe bool AddByte(byte b1, byte b2, int moreBytesExpected)
  1446. {
  1447. return (AddByte(b1, 1 + moreBytesExpected) && AddByte(b2, moreBytesExpected));
  1448. }
  1449. internal unsafe bool AddByte(byte b1, byte b2, byte b3)
  1450. {
  1451. return AddByte(b1, b2, b3, (int)0);
  1452. }
  1453. internal unsafe bool AddByte(byte b1, byte b2, byte b3, int moreBytesExpected)
  1454. {
  1455. return (AddByte(b1, 2 + moreBytesExpected) &&
  1456. AddByte(b2, 1 + moreBytesExpected) &&
  1457. AddByte(b3, moreBytesExpected));
  1458. }
  1459. internal unsafe bool AddByte(byte b1, byte b2, byte b3, byte b4)
  1460. {
  1461. return (AddByte(b1, 3) &&
  1462. AddByte(b2, 2) &&
  1463. AddByte(b3, 1) &&
  1464. AddByte(b4, 0));
  1465. }
  1466. internal unsafe void MovePrevious(bool bThrow)
  1467. {
  1468. if (fallbackBuffer.bFallingBack)
  1469. fallbackBuffer.MovePrevious(); // don't use last fallback
  1470. else
  1471. {
  1472. Debug.Assert(_chars > _charStart ||
  1473. ((bThrow == true) && (_bytes == _byteStart)),
  1474. "[EncodingByteBuffer.MovePrevious]expected previous data or throw");
  1475. if (_chars > _charStart)
  1476. _chars--; // don't use last char
  1477. }
  1478. if (bThrow)
  1479. _enc.ThrowBytesOverflow(_encoder, _bytes == _byteStart); // Throw? (and reset fallback if not converting)
  1480. }
  1481. internal unsafe bool Fallback(char charFallback)
  1482. {
  1483. // Do the fallback
  1484. return fallbackBuffer.InternalFallback(charFallback, ref _chars);
  1485. }
  1486. internal unsafe bool MoreData
  1487. {
  1488. get
  1489. {
  1490. // See if fallbackBuffer is not empty or if there's data left in chars buffer.
  1491. return ((fallbackBuffer.Remaining > 0) || (_chars < _charEnd));
  1492. }
  1493. }
  1494. internal unsafe char GetNextChar()
  1495. {
  1496. // See if there's something in our fallback buffer
  1497. char cReturn = fallbackBuffer.InternalGetNextChar();
  1498. // Nothing in the fallback buffer, return our normal data.
  1499. if (cReturn == 0)
  1500. {
  1501. if (_chars < _charEnd)
  1502. cReturn = *(_chars++);
  1503. }
  1504. return cReturn;
  1505. }
  1506. internal unsafe int CharsUsed
  1507. {
  1508. get
  1509. {
  1510. return (int)(_chars - _charStart);
  1511. }
  1512. }
  1513. internal unsafe int Count
  1514. {
  1515. get
  1516. {
  1517. return _byteCountResult;
  1518. }
  1519. }
  1520. }
  1521. }
  1522. }