Encoding.cs 67 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Diagnostics;
  5. using System.Globalization;
  6. using System.Threading;
  7. using System.Runtime.InteropServices;
  8. using System.Runtime.Serialization;
  9. using System.Diagnostics.CodeAnalysis;
  10. namespace System.Text
  11. {
  12. // This abstract base class represents a character encoding. The class provides
  13. // methods to convert arrays and strings of Unicode characters to and from
  14. // arrays of bytes. A number of Encoding implementations are provided in
  15. // the System.Text package, including:
  16. //
  17. // ASCIIEncoding, which encodes Unicode characters as single 7-bit
  18. // ASCII characters. This encoding only supports character values between 0x00
  19. // and 0x7F.
  20. // BaseCodePageEncoding, which encapsulates a Windows code page. Any
  21. // installed code page can be accessed through this encoding, and conversions
  22. // are performed using the WideCharToMultiByte and
  23. // MultiByteToWideChar Windows API functions.
  24. // UnicodeEncoding, which encodes each Unicode character as two
  25. // consecutive bytes. Both little-endian (code page 1200) and big-endian (code
  26. // page 1201) encodings are recognized.
  27. // UTF7Encoding, which encodes Unicode characters using the UTF-7
  28. // encoding (UTF-7 stands for UCS Transformation Format, 7-bit form). This
  29. // encoding supports all Unicode character values, and can also be accessed
  30. // as code page 65000.
  31. // UTF8Encoding, which encodes Unicode characters using the UTF-8
  32. // encoding (UTF-8 stands for UCS Transformation Format, 8-bit form). This
  33. // encoding supports all Unicode character values, and can also be accessed
  34. // as code page 65001.
  35. // UTF32Encoding, both 12000 (little endian) & 12001 (big endian)
  36. //
  37. // In addition to directly instantiating Encoding objects, an
  38. // application can use the ForCodePage, GetASCII,
  39. // GetDefault, GetUnicode, GetUTF7, and GetUTF8
  40. // methods in this class to obtain encodings.
  41. //
  42. // Through an encoding, the GetBytes method is used to convert arrays
  43. // of characters to arrays of bytes, and the GetChars method is used to
  44. // convert arrays of bytes to arrays of characters. The GetBytes and
  45. // GetChars methods maintain no state between conversions, and are
  46. // generally intended for conversions of complete blocks of bytes and
  47. // characters in one operation. When the data to be converted is only available
  48. // in sequential blocks (such as data read from a stream) or when the amount of
  49. // data is so large that it needs to be divided into smaller blocks, an
  50. // application may choose to use a Decoder or an Encoder to
  51. // perform the conversion. Decoders and encoders allow sequential blocks of
  52. // data to be converted and they maintain the state required to support
  53. // conversions of data that spans adjacent blocks. Decoders and encoders are
  54. // obtained using the GetDecoder and GetEncoder methods.
  55. //
  56. // The core GetBytes and GetChars methods require the caller
  57. // to provide the destination buffer and ensure that the buffer is large enough
  58. // to hold the entire result of the conversion. When using these methods,
  59. // either directly on an Encoding object or on an associated
  60. // Decoder or Encoder, an application can use one of two methods
  61. // to allocate destination buffers.
  62. //
  63. // The GetByteCount and GetCharCount methods can be used to
  64. // compute the exact size of the result of a particular conversion, and an
  65. // appropriately sized buffer for that conversion can then be allocated.
  66. // The GetMaxByteCount and GetMaxCharCount methods can be
  67. // be used to compute the maximum possible size of a conversion of a given
  68. // number of bytes or characters, and a buffer of that size can then be reused
  69. // for multiple conversions.
  70. //
  71. // The first method generally uses less memory, whereas the second method
  72. // generally executes faster.
  73. //
  74. public abstract class Encoding : ICloneable
  75. {
  76. // For netcore we use UTF8 as default encoding since ANSI isn't available
  77. private static readonly UTF8Encoding.UTF8EncodingSealed s_defaultEncoding = new UTF8Encoding.UTF8EncodingSealed(encoderShouldEmitUTF8Identifier: false);
  78. // Returns an encoding for the system's current ANSI code page.
  79. public static Encoding Default => s_defaultEncoding;
  80. //
  81. // The following values are from mlang.idl. These values
  82. // should be in sync with those in mlang.idl.
  83. //
  84. internal const int MIMECONTF_MAILNEWS = 0x00000001;
  85. internal const int MIMECONTF_BROWSER = 0x00000002;
  86. internal const int MIMECONTF_SAVABLE_MAILNEWS = 0x00000100;
  87. internal const int MIMECONTF_SAVABLE_BROWSER = 0x00000200;
  88. // Special Case Code Pages
  89. private const int CodePageDefault = 0;
  90. private const int CodePageNoOEM = 1; // OEM Code page not supported
  91. private const int CodePageNoMac = 2; // MAC code page not supported
  92. private const int CodePageNoThread = 3; // Thread code page not supported
  93. private const int CodePageNoSymbol = 42; // Symbol code page not supported
  94. private const int CodePageUnicode = 1200; // Unicode
  95. private const int CodePageBigEndian = 1201; // Big Endian Unicode
  96. private const int CodePageWindows1252 = 1252; // Windows 1252 code page
  97. // 20936 has same code page as 10008, so we'll special case it
  98. private const int CodePageMacGB2312 = 10008;
  99. private const int CodePageGB2312 = 20936;
  100. private const int CodePageMacKorean = 10003;
  101. private const int CodePageDLLKorean = 20949;
  102. // ISO 2022 Code Pages
  103. private const int ISO2022JP = 50220;
  104. private const int ISO2022JPESC = 50221;
  105. private const int ISO2022JPSISO = 50222;
  106. private const int ISOKorean = 50225;
  107. private const int ISOSimplifiedCN = 50227;
  108. private const int EUCJP = 51932;
  109. private const int ChineseHZ = 52936; // HZ has ~}~{~~ sequences
  110. // 51936 is the same as 936
  111. private const int DuplicateEUCCN = 51936;
  112. private const int EUCCN = 936;
  113. private const int EUCKR = 51949;
  114. // Latin 1 & ASCII Code Pages
  115. internal const int CodePageASCII = 20127; // ASCII
  116. internal const int ISO_8859_1 = 28591; // Latin1
  117. // ISCII
  118. private const int ISCIIAssemese = 57006;
  119. private const int ISCIIBengali = 57003;
  120. private const int ISCIIDevanagari = 57002;
  121. private const int ISCIIGujarathi = 57010;
  122. private const int ISCIIKannada = 57008;
  123. private const int ISCIIMalayalam = 57009;
  124. private const int ISCIIOriya = 57007;
  125. private const int ISCIIPanjabi = 57011;
  126. private const int ISCIITamil = 57004;
  127. private const int ISCIITelugu = 57005;
  128. // GB18030
  129. private const int GB18030 = 54936;
  130. // Other
  131. private const int ISO_8859_8I = 38598;
  132. private const int ISO_8859_8_Visual = 28598;
  133. // 50229 is currently unsupported // "Chinese Traditional (ISO-2022)"
  134. private const int ENC50229 = 50229;
  135. // Special code pages
  136. private const int CodePageUTF7 = 65000;
  137. private const int CodePageUTF8 = 65001;
  138. private const int CodePageUTF32 = 12000;
  139. private const int CodePageUTF32BE = 12001;
  140. internal int _codePage = 0;
  141. internal CodePageDataItem _dataItem = null;
  142. // Because of encoders we may be read only
  143. [OptionalField(VersionAdded = 2)]
  144. private bool _isReadOnly = true;
  145. // Encoding (encoder) fallback
  146. internal EncoderFallback encoderFallback = null;
  147. internal DecoderFallback decoderFallback = null;
  148. protected Encoding() : this(0)
  149. {
  150. }
  151. protected Encoding(int codePage)
  152. {
  153. // Validate code page
  154. if (codePage < 0)
  155. {
  156. throw new ArgumentOutOfRangeException(nameof(codePage));
  157. }
  158. // Remember code page
  159. _codePage = codePage;
  160. // Use default encoder/decoder fallbacks
  161. this.SetDefaultFallbacks();
  162. }
  163. // This constructor is needed to allow any sub-classing implementation to provide encoder/decoder fallback objects
  164. // because the encoding object is always created as read-only object and don't allow setting encoder/decoder fallback
  165. // after the creation is done.
  166. protected Encoding(int codePage, EncoderFallback encoderFallback, DecoderFallback decoderFallback)
  167. {
  168. // Validate code page
  169. if (codePage < 0)
  170. {
  171. throw new ArgumentOutOfRangeException(nameof(codePage));
  172. }
  173. // Remember code page
  174. _codePage = codePage;
  175. this.encoderFallback = encoderFallback ?? new InternalEncoderBestFitFallback(this);
  176. this.decoderFallback = decoderFallback ?? new InternalDecoderBestFitFallback(this);
  177. }
  178. // Default fallback that we'll use.
  179. internal virtual void SetDefaultFallbacks()
  180. {
  181. // For UTF-X encodings, we use a replacement fallback with an "\xFFFD" string,
  182. // For ASCII we use "?" replacement fallback, etc.
  183. encoderFallback = new InternalEncoderBestFitFallback(this);
  184. decoderFallback = new InternalDecoderBestFitFallback(this);
  185. }
  186. // Converts a byte array from one encoding to another. The bytes in the
  187. // bytes array are converted from srcEncoding to
  188. // dstEncoding, and the returned value is a new byte array
  189. // containing the result of the conversion.
  190. //
  191. public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
  192. byte[] bytes)
  193. {
  194. if (bytes == null)
  195. throw new ArgumentNullException(nameof(bytes));
  196. return Convert(srcEncoding, dstEncoding, bytes, 0, bytes.Length);
  197. }
  198. // Converts a range of bytes in a byte array from one encoding to another.
  199. // This method converts count bytes from bytes starting at
  200. // index index from srcEncoding to dstEncoding, and
  201. // returns a new byte array containing the result of the conversion.
  202. //
  203. public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
  204. byte[] bytes, int index, int count)
  205. {
  206. if (srcEncoding == null || dstEncoding == null)
  207. {
  208. throw new ArgumentNullException((srcEncoding == null ? nameof(srcEncoding) : nameof(dstEncoding)),
  209. SR.ArgumentNull_Array);
  210. }
  211. if (bytes == null)
  212. {
  213. throw new ArgumentNullException(nameof(bytes),
  214. SR.ArgumentNull_Array);
  215. }
  216. return dstEncoding.GetBytes(srcEncoding.GetChars(bytes, index, count));
  217. }
  218. public static void RegisterProvider(EncodingProvider provider)
  219. {
  220. // Parameters validated inside EncodingProvider
  221. EncodingProvider.AddProvider(provider);
  222. }
  223. public static Encoding GetEncoding(int codepage)
  224. {
  225. Encoding result = EncodingProvider.GetEncodingFromProvider(codepage);
  226. if (result != null)
  227. return result;
  228. switch (codepage)
  229. {
  230. case CodePageDefault: return Default; // 0
  231. case CodePageUnicode: return Unicode; // 1200
  232. case CodePageBigEndian: return BigEndianUnicode; // 1201
  233. case CodePageUTF32: return UTF32; // 12000
  234. case CodePageUTF32BE: return BigEndianUTF32; // 12001
  235. case CodePageUTF7: return UTF7; // 65000
  236. case CodePageUTF8: return UTF8; // 65001
  237. case CodePageASCII: return ASCII; // 20127
  238. case ISO_8859_1: return Latin1; // 28591
  239. // We don't allow the following special code page values that Win32 allows.
  240. case CodePageNoOEM: // 1 CP_OEMCP
  241. case CodePageNoMac: // 2 CP_MACCP
  242. case CodePageNoThread: // 3 CP_THREAD_ACP
  243. case CodePageNoSymbol: // 42 CP_SYMBOL
  244. throw new ArgumentException(SR.Format(SR.Argument_CodepageNotSupported, codepage), nameof(codepage));
  245. }
  246. if (codepage < 0 || codepage > 65535)
  247. {
  248. throw new ArgumentOutOfRangeException(
  249. nameof(codepage), SR.Format(SR.ArgumentOutOfRange_Range, 0, 65535));
  250. }
  251. throw new NotSupportedException(SR.Format(SR.NotSupported_NoCodepageData, codepage));
  252. }
  253. public static Encoding GetEncoding(int codepage,
  254. EncoderFallback encoderFallback, DecoderFallback decoderFallback)
  255. {
  256. Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(codepage, encoderFallback, decoderFallback);
  257. if (baseEncoding != null)
  258. return baseEncoding;
  259. // Get the default encoding (which is cached and read only)
  260. baseEncoding = GetEncoding(codepage);
  261. // Clone it and set the fallback
  262. Encoding fallbackEncoding = (Encoding)baseEncoding.Clone();
  263. fallbackEncoding.EncoderFallback = encoderFallback;
  264. fallbackEncoding.DecoderFallback = decoderFallback;
  265. return fallbackEncoding;
  266. }
  267. // Returns an Encoding object for a given name or a given code page value.
  268. //
  269. public static Encoding GetEncoding(string name)
  270. {
  271. Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(name);
  272. if (baseEncoding != null)
  273. return baseEncoding;
  274. //
  275. // NOTE: If you add a new encoding that can be requested by name, be sure to
  276. // add the corresponding item in EncodingTable.
  277. // Otherwise, the code below will throw exception when trying to call
  278. // EncodingTable.GetCodePageFromName().
  279. //
  280. return GetEncoding(EncodingTable.GetCodePageFromName(name));
  281. }
  282. // Returns an Encoding object for a given name or a given code page value.
  283. //
  284. public static Encoding GetEncoding(string name,
  285. EncoderFallback encoderFallback, DecoderFallback decoderFallback)
  286. {
  287. Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(name, encoderFallback, decoderFallback);
  288. if (baseEncoding != null)
  289. return baseEncoding;
  290. //
  291. // NOTE: If you add a new encoding that can be requested by name, be sure to
  292. // add the corresponding item in EncodingTable.
  293. // Otherwise, the code below will throw exception when trying to call
  294. // EncodingTable.GetCodePageFromName().
  295. //
  296. return (GetEncoding(EncodingTable.GetCodePageFromName(name), encoderFallback, decoderFallback));
  297. }
  298. // Return a list of all EncodingInfo objects describing all of our encodings
  299. public static EncodingInfo[] GetEncodings()
  300. {
  301. return EncodingTable.GetEncodings();
  302. }
  303. public virtual byte[] GetPreamble()
  304. {
  305. return Array.Empty<byte>();
  306. }
  307. public virtual ReadOnlySpan<byte> Preamble => GetPreamble();
  308. private void GetDataItem()
  309. {
  310. if (_dataItem == null)
  311. {
  312. _dataItem = EncodingTable.GetCodePageDataItem(_codePage);
  313. if (_dataItem == null)
  314. {
  315. throw new NotSupportedException(SR.Format(SR.NotSupported_NoCodepageData, _codePage));
  316. }
  317. }
  318. }
  319. // Returns the name for this encoding that can be used with mail agent body tags.
  320. // If the encoding may not be used, the string is empty.
  321. public virtual string BodyName
  322. {
  323. get
  324. {
  325. if (_dataItem == null)
  326. {
  327. GetDataItem();
  328. }
  329. return (_dataItem.BodyName);
  330. }
  331. }
  332. // Returns the human-readable description of the encoding ( e.g. Hebrew (DOS)).
  333. public virtual string EncodingName
  334. {
  335. get
  336. {
  337. if (_dataItem == null)
  338. {
  339. GetDataItem();
  340. }
  341. return _dataItem.DisplayName;
  342. }
  343. }
  344. // Returns the name for this encoding that can be used with mail agent header
  345. // tags. If the encoding may not be used, the string is empty.
  346. public virtual string HeaderName
  347. {
  348. get
  349. {
  350. if (_dataItem == null)
  351. {
  352. GetDataItem();
  353. }
  354. return (_dataItem.HeaderName);
  355. }
  356. }
  357. // Returns the IANA preferred name for this encoding.
  358. public virtual string WebName
  359. {
  360. get
  361. {
  362. if (_dataItem == null)
  363. {
  364. GetDataItem();
  365. }
  366. return (_dataItem.WebName);
  367. }
  368. }
  369. // Returns the windows code page that most closely corresponds to this encoding.
  370. public virtual int WindowsCodePage
  371. {
  372. get
  373. {
  374. if (_dataItem == null)
  375. {
  376. GetDataItem();
  377. }
  378. return (_dataItem.UIFamilyCodePage);
  379. }
  380. }
  381. // True if and only if the encoding is used for display by browsers clients.
  382. public virtual bool IsBrowserDisplay
  383. {
  384. get
  385. {
  386. if (_dataItem == null)
  387. {
  388. GetDataItem();
  389. }
  390. return ((_dataItem.Flags & MIMECONTF_BROWSER) != 0);
  391. }
  392. }
  393. // True if and only if the encoding is used for saving by browsers clients.
  394. public virtual bool IsBrowserSave
  395. {
  396. get
  397. {
  398. if (_dataItem == null)
  399. {
  400. GetDataItem();
  401. }
  402. return ((_dataItem.Flags & MIMECONTF_SAVABLE_BROWSER) != 0);
  403. }
  404. }
  405. // True if and only if the encoding is used for display by mail and news clients.
  406. public virtual bool IsMailNewsDisplay
  407. {
  408. get
  409. {
  410. if (_dataItem == null)
  411. {
  412. GetDataItem();
  413. }
  414. return ((_dataItem.Flags & MIMECONTF_MAILNEWS) != 0);
  415. }
  416. }
  417. // True if and only if the encoding is used for saving documents by mail and
  418. // news clients
  419. public virtual bool IsMailNewsSave
  420. {
  421. get
  422. {
  423. if (_dataItem == null)
  424. {
  425. GetDataItem();
  426. }
  427. return ((_dataItem.Flags & MIMECONTF_SAVABLE_MAILNEWS) != 0);
  428. }
  429. }
  430. // True if and only if the encoding only uses single byte code points. (Ie, ASCII, 1252, etc)
  431. public virtual bool IsSingleByte
  432. {
  433. get
  434. {
  435. return false;
  436. }
  437. }
  438. public EncoderFallback EncoderFallback
  439. {
  440. get
  441. {
  442. return encoderFallback;
  443. }
  444. set
  445. {
  446. if (this.IsReadOnly)
  447. throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
  448. if (value == null)
  449. throw new ArgumentNullException(nameof(value));
  450. encoderFallback = value;
  451. }
  452. }
  453. public DecoderFallback DecoderFallback
  454. {
  455. get
  456. {
  457. return decoderFallback;
  458. }
  459. set
  460. {
  461. if (this.IsReadOnly)
  462. throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
  463. if (value == null)
  464. throw new ArgumentNullException(nameof(value));
  465. decoderFallback = value;
  466. }
  467. }
  468. public virtual object Clone()
  469. {
  470. Encoding newEncoding = (Encoding)this.MemberwiseClone();
  471. // New one should be readable
  472. newEncoding._isReadOnly = false;
  473. return newEncoding;
  474. }
  475. public bool IsReadOnly
  476. {
  477. get
  478. {
  479. return (_isReadOnly);
  480. }
  481. }
  482. // Returns an encoding for the ASCII character set. The returned encoding
  483. // will be an instance of the ASCIIEncoding class.
  484. public static Encoding ASCII => ASCIIEncoding.s_default;
  485. // Returns an encoding for the Latin1 character set. The returned encoding
  486. // will be an instance of the Latin1Encoding class.
  487. //
  488. // This is for our optimizations
  489. private static Encoding Latin1 => Latin1Encoding.s_default;
  490. // Returns the number of bytes required to encode the given character
  491. // array.
  492. //
  493. public virtual int GetByteCount(char[] chars)
  494. {
  495. if (chars == null)
  496. {
  497. throw new ArgumentNullException(nameof(chars),
  498. SR.ArgumentNull_Array);
  499. }
  500. return GetByteCount(chars, 0, chars.Length);
  501. }
  502. public virtual int GetByteCount(string s)
  503. {
  504. if (s == null)
  505. throw new ArgumentNullException(nameof(s));
  506. char[] chars = s.ToCharArray();
  507. return GetByteCount(chars, 0, chars.Length);
  508. }
  509. // Returns the number of bytes required to encode a range of characters in
  510. // a character array.
  511. //
  512. public abstract int GetByteCount(char[] chars, int index, int count);
  513. // Returns the number of bytes required to encode a string range.
  514. //
  515. public int GetByteCount(string s, int index, int count)
  516. {
  517. if (s == null)
  518. throw new ArgumentNullException(nameof(s),
  519. SR.ArgumentNull_String);
  520. if (index < 0)
  521. throw new ArgumentOutOfRangeException(nameof(index),
  522. SR.ArgumentOutOfRange_NeedNonNegNum);
  523. if (count < 0)
  524. throw new ArgumentOutOfRangeException(nameof(count),
  525. SR.ArgumentOutOfRange_NeedNonNegNum);
  526. if (index > s.Length - count)
  527. throw new ArgumentOutOfRangeException(nameof(index),
  528. SR.ArgumentOutOfRange_IndexCount);
  529. unsafe
  530. {
  531. fixed (char* pChar = s)
  532. {
  533. return GetByteCount(pChar + index, count);
  534. }
  535. }
  536. }
  537. // We expect this to be the workhorse for NLS encodings
  538. // unfortunately for existing overrides, it has to call the [] version,
  539. // which is really slow, so this method should be avoided if you're calling
  540. // a 3rd party encoding.
  541. [CLSCompliant(false)]
  542. public virtual unsafe int GetByteCount(char* chars, int count)
  543. {
  544. // Validate input parameters
  545. if (chars == null)
  546. throw new ArgumentNullException(nameof(chars),
  547. SR.ArgumentNull_Array);
  548. if (count < 0)
  549. throw new ArgumentOutOfRangeException(nameof(count),
  550. SR.ArgumentOutOfRange_NeedNonNegNum);
  551. char[] arrChar = new char[count];
  552. int index;
  553. for (index = 0; index < count; index++)
  554. arrChar[index] = chars[index];
  555. return GetByteCount(arrChar, 0, count);
  556. }
  557. public virtual unsafe int GetByteCount(ReadOnlySpan<char> chars)
  558. {
  559. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  560. {
  561. return GetByteCount(charsPtr, chars.Length);
  562. }
  563. }
  564. // For NLS Encodings, workhorse takes an encoder (may be null)
  565. // Always validate parameters before calling internal version, which will only assert.
  566. internal virtual unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
  567. {
  568. Debug.Assert(chars != null);
  569. Debug.Assert(count >= 0);
  570. return GetByteCount(chars, count);
  571. }
  572. // Returns a byte array containing the encoded representation of the given
  573. // character array.
  574. //
  575. public virtual byte[] GetBytes(char[] chars)
  576. {
  577. if (chars == null)
  578. {
  579. throw new ArgumentNullException(nameof(chars),
  580. SR.ArgumentNull_Array);
  581. }
  582. return GetBytes(chars, 0, chars.Length);
  583. }
  584. // Returns a byte array containing the encoded representation of a range
  585. // of characters in a character array.
  586. //
  587. public virtual byte[] GetBytes(char[] chars, int index, int count)
  588. {
  589. byte[] result = new byte[GetByteCount(chars, index, count)];
  590. GetBytes(chars, index, count, result, 0);
  591. return result;
  592. }
  593. // Encodes a range of characters in a character array into a range of bytes
  594. // in a byte array. An exception occurs if the byte array is not large
  595. // enough to hold the complete encoding of the characters. The
  596. // GetByteCount method can be used to determine the exact number of
  597. // bytes that will be produced for a given range of characters.
  598. // Alternatively, the GetMaxByteCount method can be used to
  599. // determine the maximum number of bytes that will be produced for a given
  600. // number of characters, regardless of the actual character values.
  601. //
  602. public abstract int GetBytes(char[] chars, int charIndex, int charCount,
  603. byte[] bytes, int byteIndex);
  604. // Returns a byte array containing the encoded representation of the given
  605. // string.
  606. //
  607. public virtual byte[] GetBytes(string s)
  608. {
  609. if (s == null)
  610. throw new ArgumentNullException(nameof(s),
  611. SR.ArgumentNull_String);
  612. int byteCount = GetByteCount(s);
  613. byte[] bytes = new byte[byteCount];
  614. int bytesReceived = GetBytes(s, 0, s.Length, bytes, 0);
  615. Debug.Assert(byteCount == bytesReceived);
  616. return bytes;
  617. }
  618. // Returns a byte array containing the encoded representation of the given
  619. // string range.
  620. //
  621. public byte[] GetBytes(string s, int index, int count)
  622. {
  623. if (s == null)
  624. throw new ArgumentNullException(nameof(s),
  625. SR.ArgumentNull_String);
  626. if (index < 0)
  627. throw new ArgumentOutOfRangeException(nameof(index),
  628. SR.ArgumentOutOfRange_NeedNonNegNum);
  629. if (count < 0)
  630. throw new ArgumentOutOfRangeException(nameof(count),
  631. SR.ArgumentOutOfRange_NeedNonNegNum);
  632. if (index > s.Length - count)
  633. throw new ArgumentOutOfRangeException(nameof(index),
  634. SR.ArgumentOutOfRange_IndexCount);
  635. unsafe
  636. {
  637. fixed (char* pChar = s)
  638. {
  639. int byteCount = GetByteCount(pChar + index, count);
  640. if (byteCount == 0)
  641. return Array.Empty<byte>();
  642. byte[] bytes = new byte[byteCount];
  643. fixed (byte* pBytes = &bytes[0])
  644. {
  645. int bytesReceived = GetBytes(pChar + index, count, pBytes, byteCount);
  646. Debug.Assert(byteCount == bytesReceived);
  647. }
  648. return bytes;
  649. }
  650. }
  651. }
  652. public virtual int GetBytes(string s, int charIndex, int charCount,
  653. byte[] bytes, int byteIndex)
  654. {
  655. if (s == null)
  656. throw new ArgumentNullException(nameof(s));
  657. return GetBytes(s.ToCharArray(), charIndex, charCount, bytes, byteIndex);
  658. }
  659. // This is our internal workhorse
  660. // Always validate parameters before calling internal version, which will only assert.
  661. internal virtual unsafe int GetBytes(char* chars, int charCount,
  662. byte* bytes, int byteCount, EncoderNLS encoder)
  663. {
  664. return GetBytes(chars, charCount, bytes, byteCount);
  665. }
  666. // We expect this to be the workhorse for NLS Encodings, but for existing
  667. // ones we need a working (if slow) default implementation)
  668. //
  669. // WARNING WARNING WARNING
  670. //
  671. // WARNING: If this breaks it could be a security threat. Obviously we
  672. // call this internally, so you need to make sure that your pointers, counts
  673. // and indexes are correct when you call this method.
  674. //
  675. // In addition, we have internal code, which will be marked as "safe" calling
  676. // this code. However this code is dependent upon the implementation of an
  677. // external GetBytes() method, which could be overridden by a third party and
  678. // the results of which cannot be guaranteed. We use that result to copy
  679. // the byte[] to our byte* output buffer. If the result count was wrong, we
  680. // could easily overflow our output buffer. Therefore we do an extra test
  681. // when we copy the buffer so that we don't overflow byteCount either.
  682. [CLSCompliant(false)]
  683. public virtual unsafe int GetBytes(char* chars, int charCount,
  684. byte* bytes, int byteCount)
  685. {
  686. // Validate input parameters
  687. if (bytes == null || chars == null)
  688. throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
  689. SR.ArgumentNull_Array);
  690. if (charCount < 0 || byteCount < 0)
  691. throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)),
  692. SR.ArgumentOutOfRange_NeedNonNegNum);
  693. // Get the char array to convert
  694. char[] arrChar = new char[charCount];
  695. int index;
  696. for (index = 0; index < charCount; index++)
  697. arrChar[index] = chars[index];
  698. // Get the byte array to fill
  699. byte[] arrByte = new byte[byteCount];
  700. // Do the work
  701. int result = GetBytes(arrChar, 0, charCount, arrByte, 0);
  702. Debug.Assert(result <= byteCount, "[Encoding.GetBytes]Returned more bytes than we have space for");
  703. // Copy the byte array
  704. // WARNING: We MUST make sure that we don't copy too many bytes. We can't
  705. // rely on result because it could be a 3rd party implementation. We need
  706. // to make sure we never copy more than byteCount bytes no matter the value
  707. // of result
  708. if (result < byteCount)
  709. byteCount = result;
  710. // Copy the data, don't overrun our array!
  711. for (index = 0; index < byteCount; index++)
  712. bytes[index] = arrByte[index];
  713. return byteCount;
  714. }
  715. public virtual unsafe int GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes)
  716. {
  717. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  718. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  719. {
  720. return GetBytes(charsPtr, chars.Length, bytesPtr, bytes.Length);
  721. }
  722. }
  723. // Returns the number of characters produced by decoding the given byte
  724. // array.
  725. //
  726. public virtual int GetCharCount(byte[] bytes)
  727. {
  728. if (bytes == null)
  729. {
  730. throw new ArgumentNullException(nameof(bytes),
  731. SR.ArgumentNull_Array);
  732. }
  733. return GetCharCount(bytes, 0, bytes.Length);
  734. }
  735. // Returns the number of characters produced by decoding a range of bytes
  736. // in a byte array.
  737. //
  738. public abstract int GetCharCount(byte[] bytes, int index, int count);
  739. // We expect this to be the workhorse for NLS Encodings, but for existing
  740. // ones we need a working (if slow) default implementation)
  741. [CLSCompliant(false)]
  742. public virtual unsafe int GetCharCount(byte* bytes, int count)
  743. {
  744. // Validate input parameters
  745. if (bytes == null)
  746. throw new ArgumentNullException(nameof(bytes),
  747. SR.ArgumentNull_Array);
  748. if (count < 0)
  749. throw new ArgumentOutOfRangeException(nameof(count),
  750. SR.ArgumentOutOfRange_NeedNonNegNum);
  751. byte[] arrbyte = new byte[count];
  752. int index;
  753. for (index = 0; index < count; index++)
  754. arrbyte[index] = bytes[index];
  755. return GetCharCount(arrbyte, 0, count);
  756. }
  757. public virtual unsafe int GetCharCount(ReadOnlySpan<byte> bytes)
  758. {
  759. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  760. {
  761. return GetCharCount(bytesPtr, bytes.Length);
  762. }
  763. }
  764. // This is our internal workhorse
  765. // Always validate parameters before calling internal version, which will only assert.
  766. internal virtual unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder)
  767. {
  768. return GetCharCount(bytes, count);
  769. }
  770. // Returns a character array containing the decoded representation of a
  771. // given byte array.
  772. //
  773. public virtual char[] GetChars(byte[] bytes)
  774. {
  775. if (bytes == null)
  776. {
  777. throw new ArgumentNullException(nameof(bytes),
  778. SR.ArgumentNull_Array);
  779. }
  780. return GetChars(bytes, 0, bytes.Length);
  781. }
  782. // Returns a character array containing the decoded representation of a
  783. // range of bytes in a byte array.
  784. //
  785. public virtual char[] GetChars(byte[] bytes, int index, int count)
  786. {
  787. char[] result = new char[GetCharCount(bytes, index, count)];
  788. GetChars(bytes, index, count, result, 0);
  789. return result;
  790. }
  791. // Decodes a range of bytes in a byte array into a range of characters in a
  792. // character array. An exception occurs if the character array is not large
  793. // enough to hold the complete decoding of the bytes. The
  794. // GetCharCount method can be used to determine the exact number of
  795. // characters that will be produced for a given range of bytes.
  796. // Alternatively, the GetMaxCharCount method can be used to
  797. // determine the maximum number of characters that will be produced for a
  798. // given number of bytes, regardless of the actual byte values.
  799. //
  800. public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount,
  801. char[] chars, int charIndex);
  802. // We expect this to be the workhorse for NLS Encodings, but for existing
  803. // ones we need a working (if slow) default implementation)
  804. //
  805. // WARNING WARNING WARNING
  806. //
  807. // WARNING: If this breaks it could be a security threat. Obviously we
  808. // call this internally, so you need to make sure that your pointers, counts
  809. // and indexes are correct when you call this method.
  810. //
  811. // In addition, we have internal code, which will be marked as "safe" calling
  812. // this code. However this code is dependent upon the implementation of an
  813. // external GetChars() method, which could be overridden by a third party and
  814. // the results of which cannot be guaranteed. We use that result to copy
  815. // the char[] to our char* output buffer. If the result count was wrong, we
  816. // could easily overflow our output buffer. Therefore we do an extra test
  817. // when we copy the buffer so that we don't overflow charCount either.
  818. [CLSCompliant(false)]
  819. public virtual unsafe int GetChars(byte* bytes, int byteCount,
  820. char* chars, int charCount)
  821. {
  822. // Validate input parameters
  823. if (chars == null || bytes == null)
  824. throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
  825. SR.ArgumentNull_Array);
  826. if (byteCount < 0 || charCount < 0)
  827. throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
  828. SR.ArgumentOutOfRange_NeedNonNegNum);
  829. // Get the byte array to convert
  830. byte[] arrByte = new byte[byteCount];
  831. int index;
  832. for (index = 0; index < byteCount; index++)
  833. arrByte[index] = bytes[index];
  834. // Get the char array to fill
  835. char[] arrChar = new char[charCount];
  836. // Do the work
  837. int result = GetChars(arrByte, 0, byteCount, arrChar, 0);
  838. Debug.Assert(result <= charCount, "[Encoding.GetChars]Returned more chars than we have space for");
  839. // Copy the char array
  840. // WARNING: We MUST make sure that we don't copy too many chars. We can't
  841. // rely on result because it could be a 3rd party implementation. We need
  842. // to make sure we never copy more than charCount chars no matter the value
  843. // of result
  844. if (result < charCount)
  845. charCount = result;
  846. // Copy the data, don't overrun our array!
  847. for (index = 0; index < charCount; index++)
  848. chars[index] = arrChar[index];
  849. return charCount;
  850. }
  851. public virtual unsafe int GetChars(ReadOnlySpan<byte> bytes, Span<char> chars)
  852. {
  853. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  854. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  855. {
  856. return GetChars(bytesPtr, bytes.Length, charsPtr, chars.Length);
  857. }
  858. }
  859. // This is our internal workhorse
  860. // Always validate parameters before calling internal version, which will only assert.
  861. internal virtual unsafe int GetChars(byte* bytes, int byteCount,
  862. char* chars, int charCount, DecoderNLS decoder)
  863. {
  864. return GetChars(bytes, byteCount, chars, charCount);
  865. }
  866. [CLSCompliant(false)]
  867. public unsafe string GetString(byte* bytes, int byteCount)
  868. {
  869. if (bytes == null)
  870. throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
  871. if (byteCount < 0)
  872. throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_NeedNonNegNum);
  873. return string.CreateStringFromEncoding(bytes, byteCount, this);
  874. }
  875. public unsafe string GetString(ReadOnlySpan<byte> bytes)
  876. {
  877. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  878. {
  879. return string.CreateStringFromEncoding(bytesPtr, bytes.Length, this);
  880. }
  881. }
  882. // Returns the code page identifier of this encoding. The returned value is
  883. // an integer between 0 and 65535 if the encoding has a code page
  884. // identifier, or -1 if the encoding does not represent a code page.
  885. //
  886. public virtual int CodePage
  887. {
  888. get
  889. {
  890. return _codePage;
  891. }
  892. }
  893. // IsAlwaysNormalized
  894. // Returns true if the encoding is always normalized for the specified encoding form
  895. public bool IsAlwaysNormalized()
  896. {
  897. return this.IsAlwaysNormalized(NormalizationForm.FormC);
  898. }
  899. public virtual bool IsAlwaysNormalized(NormalizationForm form)
  900. {
  901. // Assume false unless the encoding knows otherwise
  902. return false;
  903. }
  904. // Returns a Decoder object for this encoding. The returned object
  905. // can be used to decode a sequence of bytes into a sequence of characters.
  906. // Contrary to the GetChars family of methods, a Decoder can
  907. // convert partial sequences of bytes into partial sequences of characters
  908. // by maintaining the appropriate state between the conversions.
  909. //
  910. // This default implementation returns a Decoder that simply
  911. // forwards calls to the GetCharCount and GetChars methods to
  912. // the corresponding methods of this encoding. Encodings that require state
  913. // to be maintained between successive conversions should override this
  914. // method and return an instance of an appropriate Decoder
  915. // implementation.
  916. //
  917. public virtual Decoder GetDecoder()
  918. {
  919. return new DefaultDecoder(this);
  920. }
  921. // Returns an Encoder object for this encoding. The returned object
  922. // can be used to encode a sequence of characters into a sequence of bytes.
  923. // Contrary to the GetBytes family of methods, an Encoder can
  924. // convert partial sequences of characters into partial sequences of bytes
  925. // by maintaining the appropriate state between the conversions.
  926. //
  927. // This default implementation returns an Encoder that simply
  928. // forwards calls to the GetByteCount and GetBytes methods to
  929. // the corresponding methods of this encoding. Encodings that require state
  930. // to be maintained between successive conversions should override this
  931. // method and return an instance of an appropriate Encoder
  932. // implementation.
  933. //
  934. public virtual Encoder GetEncoder()
  935. {
  936. return new DefaultEncoder(this);
  937. }
  938. // Returns the maximum number of bytes required to encode a given number of
  939. // characters. This method can be used to determine an appropriate buffer
  940. // size for byte arrays passed to the GetBytes method of this
  941. // encoding or the GetBytes method of an Encoder for this
  942. // encoding. All encodings must guarantee that no buffer overflow
  943. // exceptions will occur if buffers are sized according to the results of
  944. // this method.
  945. //
  946. // WARNING: If you're using something besides the default replacement encoder fallback,
  947. // then you could have more bytes than this returned from an actual call to GetBytes().
  948. //
  949. public abstract int GetMaxByteCount(int charCount);
  950. // Returns the maximum number of characters produced by decoding a given
  951. // number of bytes. This method can be used to determine an appropriate
  952. // buffer size for character arrays passed to the GetChars method of
  953. // this encoding or the GetChars method of a Decoder for this
  954. // encoding. All encodings must guarantee that no buffer overflow
  955. // exceptions will occur if buffers are sized according to the results of
  956. // this method.
  957. //
  958. public abstract int GetMaxCharCount(int byteCount);
  959. // Returns a string containing the decoded representation of a given byte
  960. // array.
  961. //
  962. public virtual string GetString(byte[] bytes)
  963. {
  964. if (bytes == null)
  965. throw new ArgumentNullException(nameof(bytes),
  966. SR.ArgumentNull_Array);
  967. return GetString(bytes, 0, bytes.Length);
  968. }
  969. // Returns a string containing the decoded representation of a range of
  970. // bytes in a byte array.
  971. //
  972. // Internally we override this for performance
  973. //
  974. public virtual string GetString(byte[] bytes, int index, int count)
  975. {
  976. return new string(GetChars(bytes, index, count));
  977. }
  978. // Returns an encoding for Unicode format. The returned encoding will be
  979. // an instance of the UnicodeEncoding class.
  980. //
  981. // It will use little endian byte order, but will detect
  982. // input in big endian if it finds a byte order mark per Unicode 2.0.
  983. public static Encoding Unicode => UnicodeEncoding.s_littleEndianDefault;
  984. // Returns an encoding for Unicode format. The returned encoding will be
  985. // an instance of the UnicodeEncoding class.
  986. //
  987. // It will use big endian byte order, but will detect
  988. // input in little endian if it finds a byte order mark per Unicode 2.0.
  989. public static Encoding BigEndianUnicode => UnicodeEncoding.s_bigEndianDefault;
  990. // Returns an encoding for the UTF-7 format. The returned encoding will be
  991. // an instance of the UTF7Encoding class.
  992. public static Encoding UTF7 => UTF7Encoding.s_default;
  993. // Returns an encoding for the UTF-8 format. The returned encoding will be
  994. // an instance of the UTF8Encoding class.
  995. public static Encoding UTF8 => UTF8Encoding.s_default;
  996. // Returns an encoding for the UTF-32 format. The returned encoding will be
  997. // an instance of the UTF32Encoding class.
  998. public static Encoding UTF32 => UTF32Encoding.s_default;
  999. // Returns an encoding for the UTF-32 format. The returned encoding will be
  1000. // an instance of the UTF32Encoding class.
  1001. //
  1002. // It will use big endian byte order.
  1003. private static Encoding BigEndianUTF32 => UTF32Encoding.s_bigEndianDefault;
  1004. public override bool Equals(object value)
  1005. {
  1006. if (value is Encoding that)
  1007. return (_codePage == that._codePage) &&
  1008. (EncoderFallback.Equals(that.EncoderFallback)) &&
  1009. (DecoderFallback.Equals(that.DecoderFallback));
  1010. return (false);
  1011. }
  1012. public override int GetHashCode()
  1013. {
  1014. return _codePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode();
  1015. }
  1016. internal virtual char[] GetBestFitUnicodeToBytesData()
  1017. {
  1018. // Normally we don't have any best fit data.
  1019. return Array.Empty<char>();
  1020. }
  1021. internal virtual char[] GetBestFitBytesToUnicodeData()
  1022. {
  1023. // Normally we don't have any best fit data.
  1024. return Array.Empty<char>();
  1025. }
  1026. internal void ThrowBytesOverflow()
  1027. {
  1028. // Special message to include fallback type in case fallback's GetMaxCharCount is broken
  1029. // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
  1030. throw new ArgumentException(
  1031. SR.Format(SR.Argument_EncodingConversionOverflowBytes, EncodingName, EncoderFallback.GetType()), "bytes");
  1032. }
  1033. internal void ThrowBytesOverflow(EncoderNLS encoder, bool nothingEncoded)
  1034. {
  1035. if (encoder == null || encoder._throwOnOverflow || nothingEncoded)
  1036. {
  1037. if (encoder != null && encoder.InternalHasFallbackBuffer)
  1038. encoder.FallbackBuffer.InternalReset();
  1039. // Special message to include fallback type in case fallback's GetMaxCharCount is broken
  1040. // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
  1041. ThrowBytesOverflow();
  1042. }
  1043. // If we didn't throw, we are in convert and have to remember our flushing
  1044. encoder.ClearMustFlush();
  1045. }
  1046. internal void ThrowCharsOverflow()
  1047. {
  1048. // Special message to include fallback type in case fallback's GetMaxCharCount is broken
  1049. // This happens if user has implemented a decoder fallback with a broken GetMaxCharCount
  1050. throw new ArgumentException(
  1051. SR.Format(SR.Argument_EncodingConversionOverflowChars, EncodingName, DecoderFallback.GetType()), "chars");
  1052. }
  1053. internal void ThrowCharsOverflow(DecoderNLS decoder, bool nothingDecoded)
  1054. {
  1055. if (decoder == null || decoder._throwOnOverflow || nothingDecoded)
  1056. {
  1057. if (decoder != null && decoder.InternalHasFallbackBuffer)
  1058. decoder.FallbackBuffer.InternalReset();
  1059. // Special message to include fallback type in case fallback's GetMaxCharCount is broken
  1060. // This happens if user has implemented a decoder fallback with a broken GetMaxCharCount
  1061. ThrowCharsOverflow();
  1062. }
  1063. // If we didn't throw, we are in convert and have to remember our flushing
  1064. decoder.ClearMustFlush();
  1065. }
  1066. internal sealed class DefaultEncoder : Encoder, IObjectReference
  1067. {
  1068. private Encoding _encoding;
  1069. public DefaultEncoder(Encoding encoding)
  1070. {
  1071. _encoding = encoding;
  1072. }
  1073. public object GetRealObject(StreamingContext context)
  1074. {
  1075. throw new PlatformNotSupportedException();
  1076. }
  1077. // Returns the number of bytes the next call to GetBytes will
  1078. // produce if presented with the given range of characters and the given
  1079. // value of the flush parameter. The returned value takes into
  1080. // account the state in which the encoder was left following the last call
  1081. // to GetBytes. The state of the encoder is not affected by a call
  1082. // to this method.
  1083. //
  1084. public override int GetByteCount(char[] chars, int index, int count, bool flush)
  1085. {
  1086. return _encoding.GetByteCount(chars, index, count);
  1087. }
  1088. public unsafe override int GetByteCount(char* chars, int count, bool flush)
  1089. {
  1090. return _encoding.GetByteCount(chars, count);
  1091. }
  1092. // Encodes a range of characters in a character array into a range of bytes
  1093. // in a byte array. The method encodes charCount characters from
  1094. // chars starting at index charIndex, storing the resulting
  1095. // bytes in bytes starting at index byteIndex. The encoding
  1096. // takes into account the state in which the encoder was left following the
  1097. // last call to this method. The flush parameter indicates whether
  1098. // the encoder should flush any shift-states and partial characters at the
  1099. // end of the conversion. To ensure correct termination of a sequence of
  1100. // blocks of encoded bytes, the last call to GetBytes should specify
  1101. // a value of true for the flush parameter.
  1102. //
  1103. // An exception occurs if the byte array is not large enough to hold the
  1104. // complete encoding of the characters. The GetByteCount method can
  1105. // be used to determine the exact number of bytes that will be produced for
  1106. // a given range of characters. Alternatively, the GetMaxByteCount
  1107. // method of the Encoding that produced this encoder can be used to
  1108. // determine the maximum number of bytes that will be produced for a given
  1109. // number of characters, regardless of the actual character values.
  1110. //
  1111. public override int GetBytes(char[] chars, int charIndex, int charCount,
  1112. byte[] bytes, int byteIndex, bool flush)
  1113. {
  1114. return _encoding.GetBytes(chars, charIndex, charCount, bytes, byteIndex);
  1115. }
  1116. public unsafe override int GetBytes(char* chars, int charCount,
  1117. byte* bytes, int byteCount, bool flush)
  1118. {
  1119. return _encoding.GetBytes(chars, charCount, bytes, byteCount);
  1120. }
  1121. }
  1122. internal sealed class DefaultDecoder : Decoder, IObjectReference
  1123. {
  1124. private Encoding _encoding;
  1125. public DefaultDecoder(Encoding encoding)
  1126. {
  1127. _encoding = encoding;
  1128. }
  1129. public object GetRealObject(StreamingContext context)
  1130. {
  1131. throw new PlatformNotSupportedException();
  1132. }
  1133. // Returns the number of characters the next call to GetChars will
  1134. // produce if presented with the given range of bytes. The returned value
  1135. // takes into account the state in which the decoder was left following the
  1136. // last call to GetChars. The state of the decoder is not affected
  1137. // by a call to this method.
  1138. //
  1139. public override int GetCharCount(byte[] bytes, int index, int count)
  1140. {
  1141. return GetCharCount(bytes, index, count, false);
  1142. }
  1143. public override int GetCharCount(byte[] bytes, int index, int count, bool flush)
  1144. {
  1145. return _encoding.GetCharCount(bytes, index, count);
  1146. }
  1147. public unsafe override int GetCharCount(byte* bytes, int count, bool flush)
  1148. {
  1149. // By default just call the encoding version, no flush by default
  1150. return _encoding.GetCharCount(bytes, count);
  1151. }
  1152. // Decodes a range of bytes in a byte array into a range of characters
  1153. // in a character array. The method decodes byteCount bytes from
  1154. // bytes starting at index byteIndex, storing the resulting
  1155. // characters in chars starting at index charIndex. The
  1156. // decoding takes into account the state in which the decoder was left
  1157. // following the last call to this method.
  1158. //
  1159. // An exception occurs if the character array is not large enough to
  1160. // hold the complete decoding of the bytes. The GetCharCount method
  1161. // can be used to determine the exact number of characters that will be
  1162. // produced for a given range of bytes. Alternatively, the
  1163. // GetMaxCharCount method of the Encoding that produced this
  1164. // decoder can be used to determine the maximum number of characters that
  1165. // will be produced for a given number of bytes, regardless of the actual
  1166. // byte values.
  1167. //
  1168. public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
  1169. char[] chars, int charIndex)
  1170. {
  1171. return GetChars(bytes, byteIndex, byteCount, chars, charIndex, false);
  1172. }
  1173. public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
  1174. char[] chars, int charIndex, bool flush)
  1175. {
  1176. return _encoding.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
  1177. }
  1178. public unsafe override int GetChars(byte* bytes, int byteCount,
  1179. char* chars, int charCount, bool flush)
  1180. {
  1181. // By default just call the encoding's version
  1182. return _encoding.GetChars(bytes, byteCount, chars, charCount);
  1183. }
  1184. }
  1185. internal class EncodingCharBuffer
  1186. {
  1187. private unsafe char* _chars;
  1188. private unsafe char* _charStart;
  1189. private unsafe char* _charEnd;
  1190. private int _charCountResult = 0;
  1191. private Encoding _enc;
  1192. private DecoderNLS _decoder;
  1193. private unsafe byte* _byteStart;
  1194. private unsafe byte* _byteEnd;
  1195. private unsafe byte* _bytes;
  1196. private DecoderFallbackBuffer _fallbackBuffer;
  1197. internal unsafe EncodingCharBuffer(Encoding enc, DecoderNLS decoder, char* charStart, int charCount,
  1198. byte* byteStart, int byteCount)
  1199. {
  1200. _enc = enc;
  1201. _decoder = decoder;
  1202. _chars = charStart;
  1203. _charStart = charStart;
  1204. _charEnd = charStart + charCount;
  1205. _byteStart = byteStart;
  1206. _bytes = byteStart;
  1207. _byteEnd = byteStart + byteCount;
  1208. if (_decoder == null)
  1209. _fallbackBuffer = enc.DecoderFallback.CreateFallbackBuffer();
  1210. else
  1211. _fallbackBuffer = _decoder.FallbackBuffer;
  1212. // If we're getting chars or getting char count we don't expect to have
  1213. // to remember fallbacks between calls (so it should be empty)
  1214. Debug.Assert(_fallbackBuffer.Remaining == 0,
  1215. "[Encoding.EncodingCharBuffer.EncodingCharBuffer]Expected empty fallback buffer for getchars/charcount");
  1216. _fallbackBuffer.InternalInitialize(_bytes, _charEnd);
  1217. }
  1218. internal unsafe bool AddChar(char ch, int numBytes)
  1219. {
  1220. if (_chars != null)
  1221. {
  1222. if (_chars >= _charEnd)
  1223. {
  1224. // Throw maybe
  1225. _bytes -= numBytes; // Didn't encode these bytes
  1226. _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw?
  1227. return false; // No throw, but no store either
  1228. }
  1229. *(_chars++) = ch;
  1230. }
  1231. _charCountResult++;
  1232. return true;
  1233. }
  1234. internal bool AddChar(char ch)
  1235. {
  1236. return AddChar(ch, 1);
  1237. }
  1238. internal unsafe bool AddChar(char ch1, char ch2, int numBytes)
  1239. {
  1240. // Need room for 2 chars
  1241. if (_chars >= _charEnd - 1)
  1242. {
  1243. // Throw maybe
  1244. _bytes -= numBytes; // Didn't encode these bytes
  1245. _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw?
  1246. return false; // No throw, but no store either
  1247. }
  1248. return AddChar(ch1, numBytes) && AddChar(ch2, numBytes);
  1249. }
  1250. internal unsafe void AdjustBytes(int count)
  1251. {
  1252. _bytes += count;
  1253. }
  1254. internal unsafe bool MoreData
  1255. {
  1256. get
  1257. {
  1258. return _bytes < _byteEnd;
  1259. }
  1260. }
  1261. // Do we have count more bytes?
  1262. internal unsafe bool EvenMoreData(int count)
  1263. {
  1264. return (_bytes <= _byteEnd - count);
  1265. }
  1266. // GetNextByte shouldn't be called unless the caller's already checked more data or even more data,
  1267. // but we'll double check just to make sure.
  1268. internal unsafe byte GetNextByte()
  1269. {
  1270. Debug.Assert(_bytes < _byteEnd, "[EncodingCharBuffer.GetNextByte]Expected more date");
  1271. if (_bytes >= _byteEnd)
  1272. return 0;
  1273. return *(_bytes++);
  1274. }
  1275. internal unsafe int BytesUsed
  1276. {
  1277. get
  1278. {
  1279. return (int)(_bytes - _byteStart);
  1280. }
  1281. }
  1282. internal bool Fallback(byte fallbackByte)
  1283. {
  1284. // Build our buffer
  1285. byte[] byteBuffer = new byte[] { fallbackByte };
  1286. // Do the fallback and add the data.
  1287. return Fallback(byteBuffer);
  1288. }
  1289. internal bool Fallback(byte byte1, byte byte2)
  1290. {
  1291. // Build our buffer
  1292. byte[] byteBuffer = new byte[] { byte1, byte2 };
  1293. // Do the fallback and add the data.
  1294. return Fallback(byteBuffer);
  1295. }
  1296. internal bool Fallback(byte byte1, byte byte2, byte byte3, byte byte4)
  1297. {
  1298. // Build our buffer
  1299. byte[] byteBuffer = new byte[] { byte1, byte2, byte3, byte4 };
  1300. // Do the fallback and add the data.
  1301. return Fallback(byteBuffer);
  1302. }
  1303. internal unsafe bool Fallback(byte[] byteBuffer)
  1304. {
  1305. // Do the fallback and add the data.
  1306. if (_chars != null)
  1307. {
  1308. char* pTemp = _chars;
  1309. if (_fallbackBuffer.InternalFallback(byteBuffer, _bytes, ref _chars) == false)
  1310. {
  1311. // Throw maybe
  1312. _bytes -= byteBuffer.Length; // Didn't use how many ever bytes we're falling back
  1313. _fallbackBuffer.InternalReset(); // We didn't use this fallback.
  1314. _enc.ThrowCharsOverflow(_decoder, _chars == _charStart); // Throw?
  1315. return false; // No throw, but no store either
  1316. }
  1317. _charCountResult += unchecked((int)(_chars - pTemp));
  1318. }
  1319. else
  1320. {
  1321. _charCountResult += _fallbackBuffer.InternalFallback(byteBuffer, _bytes);
  1322. }
  1323. return true;
  1324. }
  1325. internal int Count
  1326. {
  1327. get
  1328. {
  1329. return _charCountResult;
  1330. }
  1331. }
  1332. }
  1333. internal class EncodingByteBuffer
  1334. {
  1335. private unsafe byte* _bytes;
  1336. private unsafe byte* _byteStart;
  1337. private unsafe byte* _byteEnd;
  1338. private unsafe char* _chars;
  1339. private unsafe char* _charStart;
  1340. private unsafe char* _charEnd;
  1341. private int _byteCountResult = 0;
  1342. private Encoding _enc;
  1343. private EncoderNLS _encoder;
  1344. internal EncoderFallbackBuffer fallbackBuffer;
  1345. internal unsafe EncodingByteBuffer(Encoding inEncoding, EncoderNLS inEncoder,
  1346. byte* inByteStart, int inByteCount, char* inCharStart, int inCharCount)
  1347. {
  1348. _enc = inEncoding;
  1349. _encoder = inEncoder;
  1350. _charStart = inCharStart;
  1351. _chars = inCharStart;
  1352. _charEnd = inCharStart + inCharCount;
  1353. _bytes = inByteStart;
  1354. _byteStart = inByteStart;
  1355. _byteEnd = inByteStart + inByteCount;
  1356. if (_encoder == null)
  1357. this.fallbackBuffer = _enc.EncoderFallback.CreateFallbackBuffer();
  1358. else
  1359. {
  1360. this.fallbackBuffer = _encoder.FallbackBuffer;
  1361. // If we're not converting we must not have data in our fallback buffer
  1362. if (_encoder._throwOnOverflow && _encoder.InternalHasFallbackBuffer &&
  1363. this.fallbackBuffer.Remaining > 0)
  1364. throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty,
  1365. _encoder.Encoding.EncodingName, _encoder.Fallback.GetType()));
  1366. }
  1367. fallbackBuffer.InternalInitialize(_chars, _charEnd, _encoder, _bytes != null);
  1368. }
  1369. internal unsafe bool AddByte(byte b, int moreBytesExpected)
  1370. {
  1371. Debug.Assert(moreBytesExpected >= 0, "[EncodingByteBuffer.AddByte]expected non-negative moreBytesExpected");
  1372. if (_bytes != null)
  1373. {
  1374. if (_bytes >= _byteEnd - moreBytesExpected)
  1375. {
  1376. // Throw maybe. Check which buffer to back up (only matters if Converting)
  1377. this.MovePrevious(true); // Throw if necessary
  1378. return false; // No throw, but no store either
  1379. }
  1380. *(_bytes++) = b;
  1381. }
  1382. _byteCountResult++;
  1383. return true;
  1384. }
  1385. internal bool AddByte(byte b1)
  1386. {
  1387. return (AddByte(b1, 0));
  1388. }
  1389. internal bool AddByte(byte b1, byte b2)
  1390. {
  1391. return (AddByte(b1, b2, 0));
  1392. }
  1393. internal bool AddByte(byte b1, byte b2, int moreBytesExpected)
  1394. {
  1395. return (AddByte(b1, 1 + moreBytesExpected) && AddByte(b2, moreBytesExpected));
  1396. }
  1397. internal bool AddByte(byte b1, byte b2, byte b3)
  1398. {
  1399. return AddByte(b1, b2, b3, (int)0);
  1400. }
  1401. internal bool AddByte(byte b1, byte b2, byte b3, int moreBytesExpected)
  1402. {
  1403. return (AddByte(b1, 2 + moreBytesExpected) &&
  1404. AddByte(b2, 1 + moreBytesExpected) &&
  1405. AddByte(b3, moreBytesExpected));
  1406. }
  1407. internal bool AddByte(byte b1, byte b2, byte b3, byte b4)
  1408. {
  1409. return (AddByte(b1, 3) &&
  1410. AddByte(b2, 2) &&
  1411. AddByte(b3, 1) &&
  1412. AddByte(b4, 0));
  1413. }
  1414. internal unsafe void MovePrevious(bool bThrow)
  1415. {
  1416. if (fallbackBuffer.bFallingBack)
  1417. fallbackBuffer.MovePrevious(); // don't use last fallback
  1418. else
  1419. {
  1420. Debug.Assert(_chars > _charStart ||
  1421. ((bThrow == true) && (_bytes == _byteStart)),
  1422. "[EncodingByteBuffer.MovePrevious]expected previous data or throw");
  1423. if (_chars > _charStart)
  1424. _chars--; // don't use last char
  1425. }
  1426. if (bThrow)
  1427. _enc.ThrowBytesOverflow(_encoder, _bytes == _byteStart); // Throw? (and reset fallback if not converting)
  1428. }
  1429. internal unsafe bool Fallback(char charFallback)
  1430. {
  1431. // Do the fallback
  1432. return fallbackBuffer.InternalFallback(charFallback, ref _chars);
  1433. }
  1434. internal unsafe bool MoreData
  1435. {
  1436. get
  1437. {
  1438. // See if fallbackBuffer is not empty or if there's data left in chars buffer.
  1439. return ((fallbackBuffer.Remaining > 0) || (_chars < _charEnd));
  1440. }
  1441. }
  1442. internal unsafe char GetNextChar()
  1443. {
  1444. // See if there's something in our fallback buffer
  1445. char cReturn = fallbackBuffer.InternalGetNextChar();
  1446. // Nothing in the fallback buffer, return our normal data.
  1447. if (cReturn == 0)
  1448. {
  1449. if (_chars < _charEnd)
  1450. cReturn = *(_chars++);
  1451. }
  1452. return cReturn;
  1453. }
  1454. internal unsafe int CharsUsed
  1455. {
  1456. get
  1457. {
  1458. return (int)(_chars - _charStart);
  1459. }
  1460. }
  1461. internal int Count
  1462. {
  1463. get
  1464. {
  1465. return _byteCountResult;
  1466. }
  1467. }
  1468. }
  1469. }
  1470. }