2
0

Encoding.cs 62 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Diagnostics;
  5. using System.Diagnostics.CodeAnalysis;
  6. using System.Runtime.InteropServices;
  7. using System.Runtime.Serialization;
  8. namespace System.Text
  9. {
  10. // This abstract base class represents a character encoding. The class provides
  11. // methods to convert arrays and strings of Unicode characters to and from
  12. // arrays of bytes. A number of Encoding implementations are provided in
  13. // the System.Text package, including:
  14. //
  15. // ASCIIEncoding, which encodes Unicode characters as single 7-bit
  16. // ASCII characters. This encoding only supports character values between 0x00
  17. // and 0x7F.
  18. // BaseCodePageEncoding, which encapsulates a Windows code page. Any
  19. // installed code page can be accessed through this encoding, and conversions
  20. // are performed using the WideCharToMultiByte and
  21. // MultiByteToWideChar Windows API functions.
  22. // UnicodeEncoding, which encodes each Unicode character as two
  23. // consecutive bytes. Both little-endian (code page 1200) and big-endian (code
  24. // page 1201) encodings are recognized.
  25. // UTF7Encoding, which encodes Unicode characters using the UTF-7
  26. // encoding (UTF-7 stands for UCS Transformation Format, 7-bit form). This
  27. // encoding supports all Unicode character values, and can also be accessed
  28. // as code page 65000.
  29. // UTF8Encoding, which encodes Unicode characters using the UTF-8
  30. // encoding (UTF-8 stands for UCS Transformation Format, 8-bit form). This
  31. // encoding supports all Unicode character values, and can also be accessed
  32. // as code page 65001.
  33. // UTF32Encoding, both 12000 (little endian) & 12001 (big endian)
  34. //
  35. // In addition to directly instantiating Encoding objects, an
  36. // application can use the ForCodePage, GetASCII,
  37. // GetDefault, GetUnicode, GetUTF7, and GetUTF8
  38. // methods in this class to obtain encodings.
  39. //
  40. // Through an encoding, the GetBytes method is used to convert arrays
  41. // of characters to arrays of bytes, and the GetChars method is used to
  42. // convert arrays of bytes to arrays of characters. The GetBytes and
  43. // GetChars methods maintain no state between conversions, and are
  44. // generally intended for conversions of complete blocks of bytes and
  45. // characters in one operation. When the data to be converted is only available
  46. // in sequential blocks (such as data read from a stream) or when the amount of
  47. // data is so large that it needs to be divided into smaller blocks, an
  48. // application may choose to use a Decoder or an Encoder to
  49. // perform the conversion. Decoders and encoders allow sequential blocks of
  50. // data to be converted and they maintain the state required to support
  51. // conversions of data that spans adjacent blocks. Decoders and encoders are
  52. // obtained using the GetDecoder and GetEncoder methods.
  53. //
  54. // The core GetBytes and GetChars methods require the caller
  55. // to provide the destination buffer and ensure that the buffer is large enough
  56. // to hold the entire result of the conversion. When using these methods,
  57. // either directly on an Encoding object or on an associated
  58. // Decoder or Encoder, an application can use one of two methods
  59. // to allocate destination buffers.
  60. //
  61. // The GetByteCount and GetCharCount methods can be used to
  62. // compute the exact size of the result of a particular conversion, and an
  63. // appropriately sized buffer for that conversion can then be allocated.
  64. // The GetMaxByteCount and GetMaxCharCount methods can be
  65. // be used to compute the maximum possible size of a conversion of a given
  66. // number of bytes or characters, and a buffer of that size can then be reused
  67. // for multiple conversions.
  68. //
  69. // The first method generally uses less memory, whereas the second method
  70. // generally executes faster.
  71. //
  72. public abstract partial class Encoding : ICloneable
  73. {
  74. // For netcore we use UTF8 as default encoding since ANSI isn't available
  75. private static readonly UTF8Encoding.UTF8EncodingSealed s_defaultEncoding = new UTF8Encoding.UTF8EncodingSealed(encoderShouldEmitUTF8Identifier: false);
  76. // Returns an encoding for the system's current ANSI code page.
  77. public static Encoding Default => s_defaultEncoding;
  78. //
  79. // The following values are from mlang.idl. These values
  80. // should be in sync with those in mlang.idl.
  81. //
  82. internal const int MIMECONTF_MAILNEWS = 0x00000001;
  83. internal const int MIMECONTF_BROWSER = 0x00000002;
  84. internal const int MIMECONTF_SAVABLE_MAILNEWS = 0x00000100;
  85. internal const int MIMECONTF_SAVABLE_BROWSER = 0x00000200;
  86. // Special Case Code Pages
  87. private const int CodePageDefault = 0;
  88. private const int CodePageNoOEM = 1; // OEM Code page not supported
  89. private const int CodePageNoMac = 2; // MAC code page not supported
  90. private const int CodePageNoThread = 3; // Thread code page not supported
  91. private const int CodePageNoSymbol = 42; // Symbol code page not supported
  92. private const int CodePageUnicode = 1200; // Unicode
  93. private const int CodePageBigEndian = 1201; // Big Endian Unicode
  94. // Latin 1 & ASCII Code Pages
  95. internal const int CodePageASCII = 20127; // ASCII
  96. internal const int ISO_8859_1 = 28591; // Latin1
  97. // Special code pages
  98. private const int CodePageUTF7 = 65000;
  99. private const int CodePageUTF8 = 65001;
  100. private const int CodePageUTF32 = 12000;
  101. private const int CodePageUTF32BE = 12001;
  102. internal int _codePage = 0;
  103. internal CodePageDataItem? _dataItem = null;
  104. // Because of encoders we may be read only
  105. [OptionalField(VersionAdded = 2)]
  106. private bool _isReadOnly = true;
  107. // Encoding (encoder) fallback
  108. internal EncoderFallback encoderFallback = null!;
  109. internal DecoderFallback decoderFallback = null!;
  110. protected Encoding() : this(0)
  111. {
  112. }
  113. protected Encoding(int codePage)
  114. {
  115. // Validate code page
  116. if (codePage < 0)
  117. {
  118. throw new ArgumentOutOfRangeException(nameof(codePage));
  119. }
  120. // Remember code page
  121. _codePage = codePage;
  122. // Use default encoder/decoder fallbacks
  123. this.SetDefaultFallbacks();
  124. }
  125. // This constructor is needed to allow any sub-classing implementation to provide encoder/decoder fallback objects
  126. // because the encoding object is always created as read-only object and don't allow setting encoder/decoder fallback
  127. // after the creation is done.
  128. protected Encoding(int codePage, EncoderFallback? encoderFallback, DecoderFallback? decoderFallback)
  129. {
  130. // Validate code page
  131. if (codePage < 0)
  132. {
  133. throw new ArgumentOutOfRangeException(nameof(codePage));
  134. }
  135. // Remember code page
  136. _codePage = codePage;
  137. this.encoderFallback = encoderFallback ?? new InternalEncoderBestFitFallback(this);
  138. this.decoderFallback = decoderFallback ?? new InternalDecoderBestFitFallback(this);
  139. }
  140. // Default fallback that we'll use.
  141. internal virtual void SetDefaultFallbacks()
  142. {
  143. // For UTF-X encodings, we use a replacement fallback with an "\xFFFD" string,
  144. // For ASCII we use "?" replacement fallback, etc.
  145. encoderFallback = new InternalEncoderBestFitFallback(this);
  146. decoderFallback = new InternalDecoderBestFitFallback(this);
  147. }
  148. // Converts a byte array from one encoding to another. The bytes in the
  149. // bytes array are converted from srcEncoding to
  150. // dstEncoding, and the returned value is a new byte array
  151. // containing the result of the conversion.
  152. //
  153. public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
  154. byte[] bytes)
  155. {
  156. if (bytes == null)
  157. throw new ArgumentNullException(nameof(bytes));
  158. return Convert(srcEncoding, dstEncoding, bytes, 0, bytes.Length);
  159. }
  160. // Converts a range of bytes in a byte array from one encoding to another.
  161. // This method converts count bytes from bytes starting at
  162. // index index from srcEncoding to dstEncoding, and
  163. // returns a new byte array containing the result of the conversion.
  164. //
  165. public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
  166. byte[] bytes, int index, int count)
  167. {
  168. if (srcEncoding == null || dstEncoding == null)
  169. {
  170. throw new ArgumentNullException(srcEncoding == null ? nameof(srcEncoding) : nameof(dstEncoding),
  171. SR.ArgumentNull_Array);
  172. }
  173. if (bytes == null)
  174. {
  175. throw new ArgumentNullException(nameof(bytes),
  176. SR.ArgumentNull_Array);
  177. }
  178. return dstEncoding.GetBytes(srcEncoding.GetChars(bytes, index, count));
  179. }
  180. public static void RegisterProvider(EncodingProvider provider)
  181. {
  182. // Parameters validated inside EncodingProvider
  183. EncodingProvider.AddProvider(provider);
  184. }
  185. public static Encoding GetEncoding(int codepage)
  186. {
  187. Encoding? result = EncodingProvider.GetEncodingFromProvider(codepage);
  188. if (result != null)
  189. return result;
  190. switch (codepage)
  191. {
  192. case CodePageDefault: return Default; // 0
  193. case CodePageUnicode: return Unicode; // 1200
  194. case CodePageBigEndian: return BigEndianUnicode; // 1201
  195. case CodePageUTF32: return UTF32; // 12000
  196. case CodePageUTF32BE: return BigEndianUTF32; // 12001
  197. case CodePageUTF7: return UTF7; // 65000
  198. case CodePageUTF8: return UTF8; // 65001
  199. case CodePageASCII: return ASCII; // 20127
  200. case ISO_8859_1: return Latin1; // 28591
  201. // We don't allow the following special code page values that Win32 allows.
  202. case CodePageNoOEM: // 1 CP_OEMCP
  203. case CodePageNoMac: // 2 CP_MACCP
  204. case CodePageNoThread: // 3 CP_THREAD_ACP
  205. case CodePageNoSymbol: // 42 CP_SYMBOL
  206. throw new ArgumentException(SR.Format(SR.Argument_CodepageNotSupported, codepage), nameof(codepage));
  207. }
  208. if (codepage < 0 || codepage > 65535)
  209. {
  210. throw new ArgumentOutOfRangeException(
  211. nameof(codepage), SR.Format(SR.ArgumentOutOfRange_Range, 0, 65535));
  212. }
  213. throw new NotSupportedException(SR.Format(SR.NotSupported_NoCodepageData, codepage));
  214. }
  215. public static Encoding GetEncoding(int codepage,
  216. EncoderFallback encoderFallback, DecoderFallback decoderFallback)
  217. {
  218. Encoding? baseEncoding = EncodingProvider.GetEncodingFromProvider(codepage, encoderFallback, decoderFallback);
  219. if (baseEncoding != null)
  220. return baseEncoding;
  221. // Get the default encoding (which is cached and read only)
  222. baseEncoding = GetEncoding(codepage);
  223. // Clone it and set the fallback
  224. Encoding fallbackEncoding = (Encoding)baseEncoding.Clone();
  225. fallbackEncoding.EncoderFallback = encoderFallback;
  226. fallbackEncoding.DecoderFallback = decoderFallback;
  227. return fallbackEncoding;
  228. }
  229. // Returns an Encoding object for a given name or a given code page value.
  230. //
  231. public static Encoding GetEncoding(string name)
  232. {
  233. // NOTE: If you add a new encoding that can be requested by name, be sure to
  234. // add the corresponding item in EncodingTable.
  235. // Otherwise, the code below will throw exception when trying to call
  236. // EncodingTable.GetCodePageFromName().
  237. return EncodingProvider.GetEncodingFromProvider(name) ??
  238. GetEncoding(EncodingTable.GetCodePageFromName(name));
  239. }
  240. // Returns an Encoding object for a given name or a given code page value.
  241. //
  242. public static Encoding GetEncoding(string name,
  243. EncoderFallback encoderFallback, DecoderFallback decoderFallback)
  244. {
  245. // NOTE: If you add a new encoding that can be requested by name, be sure to
  246. // add the corresponding item in EncodingTable.
  247. // Otherwise, the code below will throw exception when trying to call
  248. // EncodingTable.GetCodePageFromName().
  249. return EncodingProvider.GetEncodingFromProvider(name, encoderFallback, decoderFallback) ??
  250. GetEncoding(EncodingTable.GetCodePageFromName(name), encoderFallback, decoderFallback);
  251. }
  252. // Return a list of all EncodingInfo objects describing all of our encodings
  253. public static EncodingInfo[] GetEncodings() => EncodingTable.GetEncodings();
  254. public virtual byte[] GetPreamble() => Array.Empty<byte>();
  255. public virtual ReadOnlySpan<byte> Preamble => GetPreamble();
  256. private void GetDataItem()
  257. {
  258. if (_dataItem == null)
  259. {
  260. _dataItem = EncodingTable.GetCodePageDataItem(_codePage);
  261. if (_dataItem == null)
  262. {
  263. throw new NotSupportedException(SR.Format(SR.NotSupported_NoCodepageData, _codePage));
  264. }
  265. }
  266. }
  267. // Returns the name for this encoding that can be used with mail agent body tags.
  268. // If the encoding may not be used, the string is empty.
  269. public virtual string BodyName
  270. {
  271. get
  272. {
  273. if (_dataItem == null)
  274. {
  275. GetDataItem();
  276. }
  277. return _dataItem!.BodyName;
  278. }
  279. }
  280. // Returns the human-readable description of the encoding ( e.g. Hebrew (DOS)).
  281. public virtual string EncodingName
  282. {
  283. get
  284. {
  285. if (_dataItem == null)
  286. {
  287. GetDataItem();
  288. }
  289. return _dataItem!.DisplayName;
  290. }
  291. }
  292. // Returns the name for this encoding that can be used with mail agent header
  293. // tags. If the encoding may not be used, the string is empty.
  294. public virtual string HeaderName
  295. {
  296. get
  297. {
  298. if (_dataItem == null)
  299. {
  300. GetDataItem();
  301. }
  302. return _dataItem!.HeaderName;
  303. }
  304. }
  305. // Returns the IANA preferred name for this encoding.
  306. public virtual string WebName
  307. {
  308. get
  309. {
  310. if (_dataItem == null)
  311. {
  312. GetDataItem();
  313. }
  314. return _dataItem!.WebName;
  315. }
  316. }
  317. // Returns the windows code page that most closely corresponds to this encoding.
  318. public virtual int WindowsCodePage
  319. {
  320. get
  321. {
  322. if (_dataItem == null)
  323. {
  324. GetDataItem();
  325. }
  326. return _dataItem!.UIFamilyCodePage;
  327. }
  328. }
  329. // True if and only if the encoding is used for display by browsers clients.
  330. public virtual bool IsBrowserDisplay
  331. {
  332. get
  333. {
  334. if (_dataItem == null)
  335. {
  336. GetDataItem();
  337. }
  338. return (_dataItem!.Flags & MIMECONTF_BROWSER) != 0;
  339. }
  340. }
  341. // True if and only if the encoding is used for saving by browsers clients.
  342. public virtual bool IsBrowserSave
  343. {
  344. get
  345. {
  346. if (_dataItem == null)
  347. {
  348. GetDataItem();
  349. }
  350. return (_dataItem!.Flags & MIMECONTF_SAVABLE_BROWSER) != 0;
  351. }
  352. }
  353. // True if and only if the encoding is used for display by mail and news clients.
  354. public virtual bool IsMailNewsDisplay
  355. {
  356. get
  357. {
  358. if (_dataItem == null)
  359. {
  360. GetDataItem();
  361. }
  362. return (_dataItem!.Flags & MIMECONTF_MAILNEWS) != 0;
  363. }
  364. }
  365. // True if and only if the encoding is used for saving documents by mail and
  366. // news clients
  367. public virtual bool IsMailNewsSave
  368. {
  369. get
  370. {
  371. if (_dataItem == null)
  372. {
  373. GetDataItem();
  374. }
  375. return (_dataItem!.Flags & MIMECONTF_SAVABLE_MAILNEWS) != 0;
  376. }
  377. }
  378. // True if and only if the encoding only uses single byte code points. (Ie, ASCII, 1252, etc)
  379. public virtual bool IsSingleByte => false;
  380. public EncoderFallback EncoderFallback
  381. {
  382. get => encoderFallback;
  383. set
  384. {
  385. if (this.IsReadOnly)
  386. throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
  387. if (value == null)
  388. throw new ArgumentNullException(nameof(value));
  389. encoderFallback = value;
  390. }
  391. }
  392. public DecoderFallback DecoderFallback
  393. {
  394. get => decoderFallback;
  395. set
  396. {
  397. if (this.IsReadOnly)
  398. throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
  399. if (value == null)
  400. throw new ArgumentNullException(nameof(value));
  401. decoderFallback = value;
  402. }
  403. }
  404. public virtual object Clone()
  405. {
  406. Encoding newEncoding = (Encoding)this.MemberwiseClone();
  407. // New one should be readable
  408. newEncoding._isReadOnly = false;
  409. return newEncoding;
  410. }
  411. public bool IsReadOnly
  412. {
  413. get => _isReadOnly;
  414. private protected set => _isReadOnly = value;
  415. }
  416. // Returns an encoding for the ASCII character set. The returned encoding
  417. // will be an instance of the ASCIIEncoding class.
  418. public static Encoding ASCII => ASCIIEncoding.s_default;
  419. // Returns an encoding for the Latin1 character set. The returned encoding
  420. // will be an instance of the Latin1Encoding class.
  421. //
  422. // This is for our optimizations
  423. private static Encoding Latin1 => Latin1Encoding.s_default;
  424. // Returns the number of bytes required to encode the given character
  425. // array.
  426. //
  427. public virtual int GetByteCount(char[] chars)
  428. {
  429. if (chars == null)
  430. {
  431. throw new ArgumentNullException(nameof(chars),
  432. SR.ArgumentNull_Array);
  433. }
  434. return GetByteCount(chars, 0, chars.Length);
  435. }
  436. public virtual int GetByteCount(string s)
  437. {
  438. if (s == null)
  439. throw new ArgumentNullException(nameof(s));
  440. char[] chars = s.ToCharArray();
  441. return GetByteCount(chars, 0, chars.Length);
  442. }
  443. // Returns the number of bytes required to encode a range of characters in
  444. // a character array.
  445. //
  446. public abstract int GetByteCount(char[] chars, int index, int count);
  447. // Returns the number of bytes required to encode a string range.
  448. //
  449. public int GetByteCount(string s, int index, int count)
  450. {
  451. if (s == null)
  452. throw new ArgumentNullException(nameof(s),
  453. SR.ArgumentNull_String);
  454. if (index < 0)
  455. throw new ArgumentOutOfRangeException(nameof(index),
  456. SR.ArgumentOutOfRange_NeedNonNegNum);
  457. if (count < 0)
  458. throw new ArgumentOutOfRangeException(nameof(count),
  459. SR.ArgumentOutOfRange_NeedNonNegNum);
  460. if (index > s.Length - count)
  461. throw new ArgumentOutOfRangeException(nameof(index),
  462. SR.ArgumentOutOfRange_IndexCount);
  463. unsafe
  464. {
  465. fixed (char* pChar = s)
  466. {
  467. return GetByteCount(pChar + index, count);
  468. }
  469. }
  470. }
  471. // We expect this to be the workhorse for NLS encodings
  472. // unfortunately for existing overrides, it has to call the [] version,
  473. // which is really slow, so this method should be avoided if you're calling
  474. // a 3rd party encoding.
  475. [CLSCompliant(false)]
  476. public virtual unsafe int GetByteCount(char* chars, int count)
  477. {
  478. // Validate input parameters
  479. if (chars == null)
  480. throw new ArgumentNullException(nameof(chars),
  481. SR.ArgumentNull_Array);
  482. if (count < 0)
  483. throw new ArgumentOutOfRangeException(nameof(count),
  484. SR.ArgumentOutOfRange_NeedNonNegNum);
  485. char[] arrChar = new ReadOnlySpan<char>(chars, count).ToArray();
  486. return GetByteCount(arrChar, 0, count);
  487. }
  488. public virtual unsafe int GetByteCount(ReadOnlySpan<char> chars)
  489. {
  490. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  491. {
  492. return GetByteCount(charsPtr, chars.Length);
  493. }
  494. }
  495. // Returns a byte array containing the encoded representation of the given
  496. // character array.
  497. //
  498. public virtual byte[] GetBytes(char[] chars)
  499. {
  500. if (chars == null)
  501. {
  502. throw new ArgumentNullException(nameof(chars),
  503. SR.ArgumentNull_Array);
  504. }
  505. return GetBytes(chars, 0, chars.Length);
  506. }
  507. // Returns a byte array containing the encoded representation of a range
  508. // of characters in a character array.
  509. //
  510. public virtual byte[] GetBytes(char[] chars, int index, int count)
  511. {
  512. byte[] result = new byte[GetByteCount(chars, index, count)];
  513. GetBytes(chars, index, count, result, 0);
  514. return result;
  515. }
  516. // Encodes a range of characters in a character array into a range of bytes
  517. // in a byte array. An exception occurs if the byte array is not large
  518. // enough to hold the complete encoding of the characters. The
  519. // GetByteCount method can be used to determine the exact number of
  520. // bytes that will be produced for a given range of characters.
  521. // Alternatively, the GetMaxByteCount method can be used to
  522. // determine the maximum number of bytes that will be produced for a given
  523. // number of characters, regardless of the actual character values.
  524. //
  525. public abstract int GetBytes(char[] chars, int charIndex, int charCount,
  526. byte[] bytes, int byteIndex);
  527. // Returns a byte array containing the encoded representation of the given
  528. // string.
  529. //
  530. public virtual byte[] GetBytes(string s)
  531. {
  532. if (s == null)
  533. throw new ArgumentNullException(nameof(s),
  534. SR.ArgumentNull_String);
  535. int byteCount = GetByteCount(s);
  536. byte[] bytes = new byte[byteCount];
  537. int bytesReceived = GetBytes(s, 0, s.Length, bytes, 0);
  538. Debug.Assert(byteCount == bytesReceived);
  539. return bytes;
  540. }
  541. // Returns a byte array containing the encoded representation of the given
  542. // string range.
  543. //
  544. public byte[] GetBytes(string s, int index, int count)
  545. {
  546. if (s == null)
  547. throw new ArgumentNullException(nameof(s),
  548. SR.ArgumentNull_String);
  549. if (index < 0)
  550. throw new ArgumentOutOfRangeException(nameof(index),
  551. SR.ArgumentOutOfRange_NeedNonNegNum);
  552. if (count < 0)
  553. throw new ArgumentOutOfRangeException(nameof(count),
  554. SR.ArgumentOutOfRange_NeedNonNegNum);
  555. if (index > s.Length - count)
  556. throw new ArgumentOutOfRangeException(nameof(index),
  557. SR.ArgumentOutOfRange_IndexCount);
  558. unsafe
  559. {
  560. fixed (char* pChar = s)
  561. {
  562. int byteCount = GetByteCount(pChar + index, count);
  563. if (byteCount == 0)
  564. return Array.Empty<byte>();
  565. byte[] bytes = new byte[byteCount];
  566. fixed (byte* pBytes = &bytes[0])
  567. {
  568. int bytesReceived = GetBytes(pChar + index, count, pBytes, byteCount);
  569. Debug.Assert(byteCount == bytesReceived);
  570. }
  571. return bytes;
  572. }
  573. }
  574. }
  575. public virtual int GetBytes(string s, int charIndex, int charCount,
  576. byte[] bytes, int byteIndex)
  577. {
  578. if (s == null)
  579. throw new ArgumentNullException(nameof(s));
  580. return GetBytes(s.ToCharArray(), charIndex, charCount, bytes, byteIndex);
  581. }
  582. // We expect this to be the workhorse for NLS Encodings, but for existing
  583. // ones we need a working (if slow) default implementation)
  584. //
  585. // WARNING WARNING WARNING
  586. //
  587. // WARNING: If this breaks it could be a security threat. Obviously we
  588. // call this internally, so you need to make sure that your pointers, counts
  589. // and indexes are correct when you call this method.
  590. //
  591. // In addition, we have internal code, which will be marked as "safe" calling
  592. // this code. However this code is dependent upon the implementation of an
  593. // external GetBytes() method, which could be overridden by a third party and
  594. // the results of which cannot be guaranteed. We use that result to copy
  595. // the byte[] to our byte* output buffer. If the result count was wrong, we
  596. // could easily overflow our output buffer. Therefore we do an extra test
  597. // when we copy the buffer so that we don't overflow byteCount either.
  598. [CLSCompliant(false)]
  599. public virtual unsafe int GetBytes(char* chars, int charCount,
  600. byte* bytes, int byteCount)
  601. {
  602. // Validate input parameters
  603. if (bytes == null || chars == null)
  604. throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
  605. SR.ArgumentNull_Array);
  606. if (charCount < 0 || byteCount < 0)
  607. throw new ArgumentOutOfRangeException(charCount < 0 ? nameof(charCount) : nameof(byteCount),
  608. SR.ArgumentOutOfRange_NeedNonNegNum);
  609. // Get the char array to convert
  610. char[] arrChar = new ReadOnlySpan<char>(chars, charCount).ToArray();
  611. // Get the byte array to fill
  612. byte[] arrByte = new byte[byteCount];
  613. // Do the work
  614. int result = GetBytes(arrChar, 0, charCount, arrByte, 0);
  615. Debug.Assert(result <= byteCount, "[Encoding.GetBytes]Returned more bytes than we have space for");
  616. // Copy the byte array
  617. // WARNING: We MUST make sure that we don't copy too many bytes. We can't
  618. // rely on result because it could be a 3rd party implementation. We need
  619. // to make sure we never copy more than byteCount bytes no matter the value
  620. // of result
  621. if (result < byteCount)
  622. byteCount = result;
  623. // Copy the data, don't overrun our array!
  624. new ReadOnlySpan<byte>(arrByte, 0, byteCount).CopyTo(new Span<byte>(bytes, byteCount));
  625. return byteCount;
  626. }
  627. public virtual unsafe int GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes)
  628. {
  629. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  630. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  631. {
  632. return GetBytes(charsPtr, chars.Length, bytesPtr, bytes.Length);
  633. }
  634. }
  635. // Returns the number of characters produced by decoding the given byte
  636. // array.
  637. //
  638. public virtual int GetCharCount(byte[] bytes)
  639. {
  640. if (bytes == null)
  641. {
  642. throw new ArgumentNullException(nameof(bytes),
  643. SR.ArgumentNull_Array);
  644. }
  645. return GetCharCount(bytes, 0, bytes.Length);
  646. }
  647. // Returns the number of characters produced by decoding a range of bytes
  648. // in a byte array.
  649. //
  650. public abstract int GetCharCount(byte[] bytes, int index, int count);
  651. // We expect this to be the workhorse for NLS Encodings, but for existing
  652. // ones we need a working (if slow) default implementation)
  653. [CLSCompliant(false)]
  654. public virtual unsafe int GetCharCount(byte* bytes, int count)
  655. {
  656. // Validate input parameters
  657. if (bytes == null)
  658. throw new ArgumentNullException(nameof(bytes),
  659. SR.ArgumentNull_Array);
  660. if (count < 0)
  661. throw new ArgumentOutOfRangeException(nameof(count),
  662. SR.ArgumentOutOfRange_NeedNonNegNum);
  663. byte[] arrByte = new ReadOnlySpan<byte>(bytes, count).ToArray();
  664. return GetCharCount(arrByte, 0, count);
  665. }
  666. public virtual unsafe int GetCharCount(ReadOnlySpan<byte> bytes)
  667. {
  668. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  669. {
  670. return GetCharCount(bytesPtr, bytes.Length);
  671. }
  672. }
  673. // Returns a character array containing the decoded representation of a
  674. // given byte array.
  675. //
  676. public virtual char[] GetChars(byte[] bytes)
  677. {
  678. if (bytes == null)
  679. {
  680. throw new ArgumentNullException(nameof(bytes),
  681. SR.ArgumentNull_Array);
  682. }
  683. return GetChars(bytes, 0, bytes.Length);
  684. }
  685. // Returns a character array containing the decoded representation of a
  686. // range of bytes in a byte array.
  687. //
  688. public virtual char[] GetChars(byte[] bytes, int index, int count)
  689. {
  690. char[] result = new char[GetCharCount(bytes, index, count)];
  691. GetChars(bytes, index, count, result, 0);
  692. return result;
  693. }
  694. // Decodes a range of bytes in a byte array into a range of characters in a
  695. // character array. An exception occurs if the character array is not large
  696. // enough to hold the complete decoding of the bytes. The
  697. // GetCharCount method can be used to determine the exact number of
  698. // characters that will be produced for a given range of bytes.
  699. // Alternatively, the GetMaxCharCount method can be used to
  700. // determine the maximum number of characters that will be produced for a
  701. // given number of bytes, regardless of the actual byte values.
  702. //
  703. public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount,
  704. char[] chars, int charIndex);
  705. // We expect this to be the workhorse for NLS Encodings, but for existing
  706. // ones we need a working (if slow) default implementation)
  707. //
  708. // WARNING WARNING WARNING
  709. //
  710. // WARNING: If this breaks it could be a security threat. Obviously we
  711. // call this internally, so you need to make sure that your pointers, counts
  712. // and indexes are correct when you call this method.
  713. //
  714. // In addition, we have internal code, which will be marked as "safe" calling
  715. // this code. However this code is dependent upon the implementation of an
  716. // external GetChars() method, which could be overridden by a third party and
  717. // the results of which cannot be guaranteed. We use that result to copy
  718. // the char[] to our char* output buffer. If the result count was wrong, we
  719. // could easily overflow our output buffer. Therefore we do an extra test
  720. // when we copy the buffer so that we don't overflow charCount either.
  721. [CLSCompliant(false)]
  722. public virtual unsafe int GetChars(byte* bytes, int byteCount,
  723. char* chars, int charCount)
  724. {
  725. // Validate input parameters
  726. if (chars == null || bytes == null)
  727. throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
  728. SR.ArgumentNull_Array);
  729. if (byteCount < 0 || charCount < 0)
  730. throw new ArgumentOutOfRangeException(byteCount < 0 ? nameof(byteCount) : nameof(charCount),
  731. SR.ArgumentOutOfRange_NeedNonNegNum);
  732. // Get the byte array to convert
  733. byte[] arrByte = new ReadOnlySpan<byte>(bytes, byteCount).ToArray();
  734. // Get the char array to fill
  735. char[] arrChar = new char[charCount];
  736. // Do the work
  737. int result = GetChars(arrByte, 0, byteCount, arrChar, 0);
  738. Debug.Assert(result <= charCount, "[Encoding.GetChars]Returned more chars than we have space for");
  739. // Copy the char array
  740. // WARNING: We MUST make sure that we don't copy too many chars. We can't
  741. // rely on result because it could be a 3rd party implementation. We need
  742. // to make sure we never copy more than charCount chars no matter the value
  743. // of result
  744. if (result < charCount)
  745. charCount = result;
  746. // Copy the data, don't overrun our array!
  747. new ReadOnlySpan<char>(arrChar, 0, charCount).CopyTo(new Span<char>(chars, charCount));
  748. return charCount;
  749. }
  750. public virtual unsafe int GetChars(ReadOnlySpan<byte> bytes, Span<char> chars)
  751. {
  752. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  753. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  754. {
  755. return GetChars(bytesPtr, bytes.Length, charsPtr, chars.Length);
  756. }
  757. }
  758. [CLSCompliant(false)]
  759. public unsafe string GetString(byte* bytes, int byteCount)
  760. {
  761. if (bytes == null)
  762. throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
  763. if (byteCount < 0)
  764. throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_NeedNonNegNum);
  765. return string.CreateStringFromEncoding(bytes, byteCount, this);
  766. }
  767. public unsafe string GetString(ReadOnlySpan<byte> bytes)
  768. {
  769. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  770. {
  771. return string.CreateStringFromEncoding(bytesPtr, bytes.Length, this);
  772. }
  773. }
  774. // Returns the code page identifier of this encoding. The returned value is
  775. // an integer between 0 and 65535 if the encoding has a code page
  776. // identifier, or -1 if the encoding does not represent a code page.
  777. //
  778. public virtual int CodePage => _codePage;
  779. // IsAlwaysNormalized
  780. // Returns true if the encoding is always normalized for the specified encoding form
  781. public bool IsAlwaysNormalized() =>
  782. IsAlwaysNormalized(NormalizationForm.FormC);
  783. public virtual bool IsAlwaysNormalized(NormalizationForm form) =>
  784. // Assume false unless the encoding knows otherwise
  785. false;
  786. // Returns a Decoder object for this encoding. The returned object
  787. // can be used to decode a sequence of bytes into a sequence of characters.
  788. // Contrary to the GetChars family of methods, a Decoder can
  789. // convert partial sequences of bytes into partial sequences of characters
  790. // by maintaining the appropriate state between the conversions.
  791. //
  792. // This default implementation returns a Decoder that simply
  793. // forwards calls to the GetCharCount and GetChars methods to
  794. // the corresponding methods of this encoding. Encodings that require state
  795. // to be maintained between successive conversions should override this
  796. // method and return an instance of an appropriate Decoder
  797. // implementation.
  798. //
  799. public virtual Decoder GetDecoder() => new DefaultDecoder(this);
  800. // Returns an Encoder object for this encoding. The returned object
  801. // can be used to encode a sequence of characters into a sequence of bytes.
  802. // Contrary to the GetBytes family of methods, an Encoder can
  803. // convert partial sequences of characters into partial sequences of bytes
  804. // by maintaining the appropriate state between the conversions.
  805. //
  806. // This default implementation returns an Encoder that simply
  807. // forwards calls to the GetByteCount and GetBytes methods to
  808. // the corresponding methods of this encoding. Encodings that require state
  809. // to be maintained between successive conversions should override this
  810. // method and return an instance of an appropriate Encoder
  811. // implementation.
  812. //
  813. public virtual Encoder GetEncoder() => new DefaultEncoder(this);
  814. // Returns the maximum number of bytes required to encode a given number of
  815. // characters. This method can be used to determine an appropriate buffer
  816. // size for byte arrays passed to the GetBytes method of this
  817. // encoding or the GetBytes method of an Encoder for this
  818. // encoding. All encodings must guarantee that no buffer overflow
  819. // exceptions will occur if buffers are sized according to the results of
  820. // this method.
  821. //
  822. // WARNING: If you're using something besides the default replacement encoder fallback,
  823. // then you could have more bytes than this returned from an actual call to GetBytes().
  824. //
  825. public abstract int GetMaxByteCount(int charCount);
  826. // Returns the maximum number of characters produced by decoding a given
  827. // number of bytes. This method can be used to determine an appropriate
  828. // buffer size for character arrays passed to the GetChars method of
  829. // this encoding or the GetChars method of a Decoder for this
  830. // encoding. All encodings must guarantee that no buffer overflow
  831. // exceptions will occur if buffers are sized according to the results of
  832. // this method.
  833. //
  834. public abstract int GetMaxCharCount(int byteCount);
  835. // Returns a string containing the decoded representation of a given byte
  836. // array.
  837. //
  838. public virtual string GetString(byte[] bytes)
  839. {
  840. if (bytes == null)
  841. throw new ArgumentNullException(nameof(bytes),
  842. SR.ArgumentNull_Array);
  843. return GetString(bytes, 0, bytes.Length);
  844. }
  845. // Returns a string containing the decoded representation of a range of
  846. // bytes in a byte array.
  847. //
  848. // Internally we override this for performance
  849. //
  850. public virtual string GetString(byte[] bytes, int index, int count) =>
  851. new string(GetChars(bytes, index, count));
  852. // Returns an encoding for Unicode format. The returned encoding will be
  853. // an instance of the UnicodeEncoding class.
  854. //
  855. // It will use little endian byte order, but will detect
  856. // input in big endian if it finds a byte order mark per Unicode 2.0.
  857. public static Encoding Unicode => UnicodeEncoding.s_littleEndianDefault;
  858. // Returns an encoding for Unicode format. The returned encoding will be
  859. // an instance of the UnicodeEncoding class.
  860. //
  861. // It will use big endian byte order, but will detect
  862. // input in little endian if it finds a byte order mark per Unicode 2.0.
  863. public static Encoding BigEndianUnicode => UnicodeEncoding.s_bigEndianDefault;
  864. // Returns an encoding for the UTF-7 format. The returned encoding will be
  865. // an instance of the UTF7Encoding class.
  866. public static Encoding UTF7 => UTF7Encoding.s_default;
  867. // Returns an encoding for the UTF-8 format. The returned encoding will be
  868. // an instance of the UTF8Encoding class.
  869. public static Encoding UTF8 => UTF8Encoding.s_default;
  870. // Returns an encoding for the UTF-32 format. The returned encoding will be
  871. // an instance of the UTF32Encoding class.
  872. public static Encoding UTF32 => UTF32Encoding.s_default;
  873. // Returns an encoding for the UTF-32 format. The returned encoding will be
  874. // an instance of the UTF32Encoding class.
  875. //
  876. // It will use big endian byte order.
  877. private static Encoding BigEndianUTF32 => UTF32Encoding.s_bigEndianDefault;
  878. public override bool Equals(object? value) =>
  879. value is Encoding that &&
  880. (_codePage == that._codePage) &&
  881. (EncoderFallback.Equals(that.EncoderFallback)) &&
  882. (DecoderFallback.Equals(that.DecoderFallback));
  883. public override int GetHashCode() =>
  884. _codePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode();
  885. internal virtual char[] GetBestFitUnicodeToBytesData() =>
  886. // Normally we don't have any best fit data.
  887. Array.Empty<char>();
  888. internal virtual char[] GetBestFitBytesToUnicodeData() =>
  889. // Normally we don't have any best fit data.
  890. Array.Empty<char>();
  891. [DoesNotReturn]
  892. internal void ThrowBytesOverflow() =>
  893. // Special message to include fallback type in case fallback's GetMaxCharCount is broken
  894. // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
  895. throw new ArgumentException(
  896. SR.Format(SR.Argument_EncodingConversionOverflowBytes, EncodingName, EncoderFallback.GetType()), "bytes");
  897. internal void ThrowBytesOverflow(EncoderNLS? encoder, bool nothingEncoded)
  898. {
  899. if (encoder == null || encoder._throwOnOverflow || nothingEncoded)
  900. {
  901. if (encoder != null && encoder.InternalHasFallbackBuffer)
  902. encoder.FallbackBuffer.InternalReset();
  903. // Special message to include fallback type in case fallback's GetMaxCharCount is broken
  904. // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
  905. ThrowBytesOverflow();
  906. }
  907. // If we didn't throw, we are in convert and have to remember our flushing
  908. encoder!.ClearMustFlush();
  909. }
  910. [DoesNotReturn]
  911. [StackTraceHidden]
  912. internal static void ThrowConversionOverflow() =>
  913. throw new ArgumentException(SR.Argument_ConversionOverflow);
  914. [DoesNotReturn]
  915. [StackTraceHidden]
  916. internal void ThrowCharsOverflow() =>
  917. // Special message to include fallback type in case fallback's GetMaxCharCount is broken
  918. // This happens if user has implemented a decoder fallback with a broken GetMaxCharCount
  919. throw new ArgumentException(
  920. SR.Format(SR.Argument_EncodingConversionOverflowChars, EncodingName, DecoderFallback.GetType()), "chars");
  921. internal void ThrowCharsOverflow(DecoderNLS? decoder, bool nothingDecoded)
  922. {
  923. if (decoder == null || decoder._throwOnOverflow || nothingDecoded)
  924. {
  925. if (decoder != null && decoder.InternalHasFallbackBuffer)
  926. decoder.FallbackBuffer.InternalReset();
  927. // Special message to include fallback type in case fallback's GetMaxCharCount is broken
  928. // This happens if user has implemented a decoder fallback with a broken GetMaxCharCount
  929. ThrowCharsOverflow();
  930. }
  931. // If we didn't throw, we are in convert and have to remember our flushing
  932. decoder!.ClearMustFlush();
  933. }
  934. internal sealed class DefaultEncoder : Encoder, IObjectReference
  935. {
  936. private readonly Encoding _encoding;
  937. public DefaultEncoder(Encoding encoding)
  938. {
  939. _encoding = encoding;
  940. }
  941. public object GetRealObject(StreamingContext context) =>
  942. throw new PlatformNotSupportedException();
  943. // Returns the number of bytes the next call to GetBytes will
  944. // produce if presented with the given range of characters and the given
  945. // value of the flush parameter. The returned value takes into
  946. // account the state in which the encoder was left following the last call
  947. // to GetBytes. The state of the encoder is not affected by a call
  948. // to this method.
  949. //
  950. public override int GetByteCount(char[] chars, int index, int count, bool flush) =>
  951. _encoding.GetByteCount(chars, index, count);
  952. public override unsafe int GetByteCount(char* chars, int count, bool flush) =>
  953. _encoding.GetByteCount(chars, count);
  954. // Encodes a range of characters in a character array into a range of bytes
  955. // in a byte array. The method encodes charCount characters from
  956. // chars starting at index charIndex, storing the resulting
  957. // bytes in bytes starting at index byteIndex. The encoding
  958. // takes into account the state in which the encoder was left following the
  959. // last call to this method. The flush parameter indicates whether
  960. // the encoder should flush any shift-states and partial characters at the
  961. // end of the conversion. To ensure correct termination of a sequence of
  962. // blocks of encoded bytes, the last call to GetBytes should specify
  963. // a value of true for the flush parameter.
  964. //
  965. // An exception occurs if the byte array is not large enough to hold the
  966. // complete encoding of the characters. The GetByteCount method can
  967. // be used to determine the exact number of bytes that will be produced for
  968. // a given range of characters. Alternatively, the GetMaxByteCount
  969. // method of the Encoding that produced this encoder can be used to
  970. // determine the maximum number of bytes that will be produced for a given
  971. // number of characters, regardless of the actual character values.
  972. //
  973. public override int GetBytes(char[] chars, int charIndex, int charCount,
  974. byte[] bytes, int byteIndex, bool flush) =>
  975. _encoding.GetBytes(chars, charIndex, charCount, bytes, byteIndex);
  976. public override unsafe int GetBytes(char* chars, int charCount,
  977. byte* bytes, int byteCount, bool flush) =>
  978. _encoding.GetBytes(chars, charCount, bytes, byteCount);
  979. }
  980. internal sealed class DefaultDecoder : Decoder, IObjectReference
  981. {
  982. private readonly Encoding _encoding;
  983. public DefaultDecoder(Encoding encoding)
  984. {
  985. _encoding = encoding;
  986. }
  987. public object GetRealObject(StreamingContext context) =>
  988. throw new PlatformNotSupportedException();
  989. // Returns the number of characters the next call to GetChars will
  990. // produce if presented with the given range of bytes. The returned value
  991. // takes into account the state in which the decoder was left following the
  992. // last call to GetChars. The state of the decoder is not affected
  993. // by a call to this method.
  994. //
  995. public override int GetCharCount(byte[] bytes, int index, int count) =>
  996. GetCharCount(bytes, index, count, false);
  997. public override int GetCharCount(byte[] bytes, int index, int count, bool flush) =>
  998. _encoding.GetCharCount(bytes, index, count);
  999. public override unsafe int GetCharCount(byte* bytes, int count, bool flush) =>
  1000. // By default just call the encoding version, no flush by default
  1001. _encoding.GetCharCount(bytes, count);
  1002. // Decodes a range of bytes in a byte array into a range of characters
  1003. // in a character array. The method decodes byteCount bytes from
  1004. // bytes starting at index byteIndex, storing the resulting
  1005. // characters in chars starting at index charIndex. The
  1006. // decoding takes into account the state in which the decoder was left
  1007. // following the last call to this method.
  1008. //
  1009. // An exception occurs if the character array is not large enough to
  1010. // hold the complete decoding of the bytes. The GetCharCount method
  1011. // can be used to determine the exact number of characters that will be
  1012. // produced for a given range of bytes. Alternatively, the
  1013. // GetMaxCharCount method of the Encoding that produced this
  1014. // decoder can be used to determine the maximum number of characters that
  1015. // will be produced for a given number of bytes, regardless of the actual
  1016. // byte values.
  1017. //
  1018. public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
  1019. char[] chars, int charIndex) =>
  1020. GetChars(bytes, byteIndex, byteCount, chars, charIndex, false);
  1021. public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
  1022. char[] chars, int charIndex, bool flush) =>
  1023. _encoding.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
  1024. public override unsafe int GetChars(byte* bytes, int byteCount,
  1025. char* chars, int charCount, bool flush) =>
  1026. // By default just call the encoding's version
  1027. _encoding.GetChars(bytes, byteCount, chars, charCount);
  1028. }
  1029. internal class EncodingCharBuffer
  1030. {
  1031. private unsafe char* _chars;
  1032. private readonly unsafe char* _charStart;
  1033. private readonly unsafe char* _charEnd;
  1034. private int _charCountResult = 0;
  1035. private readonly Encoding _enc;
  1036. private readonly DecoderNLS? _decoder;
  1037. private readonly unsafe byte* _byteStart;
  1038. private readonly unsafe byte* _byteEnd;
  1039. private unsafe byte* _bytes;
  1040. private readonly DecoderFallbackBuffer _fallbackBuffer;
  1041. internal unsafe EncodingCharBuffer(Encoding enc, DecoderNLS? decoder, char* charStart, int charCount,
  1042. byte* byteStart, int byteCount)
  1043. {
  1044. _enc = enc;
  1045. _decoder = decoder;
  1046. _chars = charStart;
  1047. _charStart = charStart;
  1048. _charEnd = charStart + charCount;
  1049. _byteStart = byteStart;
  1050. _bytes = byteStart;
  1051. _byteEnd = byteStart + byteCount;
  1052. if (_decoder == null)
  1053. _fallbackBuffer = enc.DecoderFallback.CreateFallbackBuffer();
  1054. else
  1055. _fallbackBuffer = _decoder.FallbackBuffer;
  1056. // If we're getting chars or getting char count we don't expect to have
  1057. // to remember fallbacks between calls (so it should be empty)
  1058. Debug.Assert(_fallbackBuffer.Remaining == 0,
  1059. "[Encoding.EncodingCharBuffer.EncodingCharBuffer]Expected empty fallback buffer for getchars/charcount");
  1060. _fallbackBuffer.InternalInitialize(_bytes, _charEnd);
  1061. }
  1062. internal unsafe bool AddChar(char ch, int numBytes)
  1063. {
  1064. if (_chars != null)
  1065. {
  1066. if (_chars >= _charEnd)
  1067. {
  1068. // Throw maybe
  1069. _bytes -= numBytes; // Didn't encode these bytes
  1070. _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw?
  1071. return false; // No throw, but no store either
  1072. }
  1073. *(_chars++) = ch;
  1074. }
  1075. _charCountResult++;
  1076. return true;
  1077. }
  1078. internal bool AddChar(char ch) => AddChar(ch, 1);
  1079. internal unsafe bool AddChar(char ch1, char ch2, int numBytes)
  1080. {
  1081. // Need room for 2 chars
  1082. if (_chars >= _charEnd - 1)
  1083. {
  1084. // Throw maybe
  1085. _bytes -= numBytes; // Didn't encode these bytes
  1086. _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw?
  1087. return false; // No throw, but no store either
  1088. }
  1089. return AddChar(ch1, numBytes) && AddChar(ch2, numBytes);
  1090. }
  1091. internal unsafe void AdjustBytes(int count)
  1092. {
  1093. _bytes += count;
  1094. }
  1095. internal unsafe bool MoreData => _bytes < _byteEnd;
  1096. // Do we have count more bytes?
  1097. internal unsafe bool EvenMoreData(int count) => _bytes <= _byteEnd - count;
  1098. // GetNextByte shouldn't be called unless the caller's already checked more data or even more data,
  1099. // but we'll double check just to make sure.
  1100. internal unsafe byte GetNextByte()
  1101. {
  1102. Debug.Assert(_bytes < _byteEnd, "[EncodingCharBuffer.GetNextByte]Expected more date");
  1103. if (_bytes >= _byteEnd)
  1104. return 0;
  1105. return *(_bytes++);
  1106. }
  1107. internal unsafe int BytesUsed => (int)(_bytes - _byteStart);
  1108. internal bool Fallback(byte fallbackByte)
  1109. {
  1110. // Build our buffer
  1111. byte[] byteBuffer = new byte[] { fallbackByte };
  1112. // Do the fallback and add the data.
  1113. return Fallback(byteBuffer);
  1114. }
  1115. internal bool Fallback(byte byte1, byte byte2)
  1116. {
  1117. // Build our buffer
  1118. byte[] byteBuffer = new byte[] { byte1, byte2 };
  1119. // Do the fallback and add the data.
  1120. return Fallback(byteBuffer);
  1121. }
  1122. internal bool Fallback(byte byte1, byte byte2, byte byte3, byte byte4)
  1123. {
  1124. // Build our buffer
  1125. byte[] byteBuffer = new byte[] { byte1, byte2, byte3, byte4 };
  1126. // Do the fallback and add the data.
  1127. return Fallback(byteBuffer);
  1128. }
  1129. internal unsafe bool Fallback(byte[] byteBuffer)
  1130. {
  1131. // Do the fallback and add the data.
  1132. if (_chars != null)
  1133. {
  1134. char* pTemp = _chars;
  1135. if (!_fallbackBuffer.InternalFallback(byteBuffer, _bytes, ref _chars))
  1136. {
  1137. // Throw maybe
  1138. _bytes -= byteBuffer.Length; // Didn't use how many ever bytes we're falling back
  1139. _fallbackBuffer.InternalReset(); // We didn't use this fallback.
  1140. _enc.ThrowCharsOverflow(_decoder, _chars == _charStart); // Throw?
  1141. return false; // No throw, but no store either
  1142. }
  1143. _charCountResult += unchecked((int)(_chars - pTemp));
  1144. }
  1145. else
  1146. {
  1147. _charCountResult += _fallbackBuffer.InternalFallback(byteBuffer, _bytes);
  1148. }
  1149. return true;
  1150. }
  1151. internal int Count => _charCountResult;
  1152. }
  1153. internal class EncodingByteBuffer
  1154. {
  1155. private unsafe byte* _bytes;
  1156. private readonly unsafe byte* _byteStart;
  1157. private readonly unsafe byte* _byteEnd;
  1158. private unsafe char* _chars;
  1159. private readonly unsafe char* _charStart;
  1160. private readonly unsafe char* _charEnd;
  1161. private int _byteCountResult = 0;
  1162. private readonly Encoding _enc;
  1163. private readonly EncoderNLS? _encoder;
  1164. internal EncoderFallbackBuffer fallbackBuffer;
  1165. internal unsafe EncodingByteBuffer(Encoding inEncoding, EncoderNLS? inEncoder,
  1166. byte* inByteStart, int inByteCount, char* inCharStart, int inCharCount)
  1167. {
  1168. _enc = inEncoding;
  1169. _encoder = inEncoder;
  1170. _charStart = inCharStart;
  1171. _chars = inCharStart;
  1172. _charEnd = inCharStart + inCharCount;
  1173. _bytes = inByteStart;
  1174. _byteStart = inByteStart;
  1175. _byteEnd = inByteStart + inByteCount;
  1176. if (_encoder == null)
  1177. this.fallbackBuffer = _enc.EncoderFallback.CreateFallbackBuffer();
  1178. else
  1179. {
  1180. this.fallbackBuffer = _encoder.FallbackBuffer;
  1181. // If we're not converting we must not have data in our fallback buffer
  1182. if (_encoder._throwOnOverflow && _encoder.InternalHasFallbackBuffer &&
  1183. this.fallbackBuffer.Remaining > 0)
  1184. throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty,
  1185. _encoder.Encoding.EncodingName, _encoder.Fallback!.GetType()));
  1186. }
  1187. fallbackBuffer.InternalInitialize(_chars, _charEnd, _encoder, _bytes != null);
  1188. }
  1189. internal unsafe bool AddByte(byte b, int moreBytesExpected)
  1190. {
  1191. Debug.Assert(moreBytesExpected >= 0, "[EncodingByteBuffer.AddByte]expected non-negative moreBytesExpected");
  1192. if (_bytes != null)
  1193. {
  1194. if (_bytes >= _byteEnd - moreBytesExpected)
  1195. {
  1196. // Throw maybe. Check which buffer to back up (only matters if Converting)
  1197. this.MovePrevious(true); // Throw if necessary
  1198. return false; // No throw, but no store either
  1199. }
  1200. *(_bytes++) = b;
  1201. }
  1202. _byteCountResult++;
  1203. return true;
  1204. }
  1205. internal bool AddByte(byte b1) => AddByte(b1, 0);
  1206. internal bool AddByte(byte b1, byte b2) => AddByte(b1, b2, 0);
  1207. internal bool AddByte(byte b1, byte b2, int moreBytesExpected) =>
  1208. AddByte(b1, 1 + moreBytesExpected) && AddByte(b2, moreBytesExpected);
  1209. internal bool AddByte(byte b1, byte b2, byte b3) =>
  1210. AddByte(b1, b2, b3, (int)0);
  1211. internal bool AddByte(byte b1, byte b2, byte b3, int moreBytesExpected) =>
  1212. AddByte(b1, 2 + moreBytesExpected) &&
  1213. AddByte(b2, 1 + moreBytesExpected) &&
  1214. AddByte(b3, moreBytesExpected);
  1215. internal bool AddByte(byte b1, byte b2, byte b3, byte b4) => AddByte(b1, 3) &&
  1216. AddByte(b2, 2) &&
  1217. AddByte(b3, 1) &&
  1218. AddByte(b4, 0);
  1219. internal unsafe void MovePrevious(bool bThrow)
  1220. {
  1221. if (fallbackBuffer.bFallingBack)
  1222. fallbackBuffer.MovePrevious(); // don't use last fallback
  1223. else
  1224. {
  1225. Debug.Assert(_chars > _charStart ||
  1226. (bThrow && (_bytes == _byteStart)),
  1227. "[EncodingByteBuffer.MovePrevious]expected previous data or throw");
  1228. if (_chars > _charStart)
  1229. _chars--; // don't use last char
  1230. }
  1231. if (bThrow)
  1232. _enc.ThrowBytesOverflow(_encoder, _bytes == _byteStart); // Throw? (and reset fallback if not converting)
  1233. }
  1234. internal unsafe bool Fallback(char charFallback)
  1235. {
  1236. // Do the fallback
  1237. return fallbackBuffer.InternalFallback(charFallback, ref _chars);
  1238. }
  1239. internal unsafe bool MoreData =>
  1240. // See if fallbackBuffer is not empty or if there's data left in chars buffer.
  1241. (fallbackBuffer.Remaining > 0) || (_chars < _charEnd);
  1242. internal unsafe char GetNextChar()
  1243. {
  1244. // See if there's something in our fallback buffer
  1245. char cReturn = fallbackBuffer.InternalGetNextChar();
  1246. // Nothing in the fallback buffer, return our normal data.
  1247. if (cReturn == 0)
  1248. {
  1249. if (_chars < _charEnd)
  1250. cReturn = *(_chars++);
  1251. }
  1252. return cReturn;
  1253. }
  1254. internal unsafe int CharsUsed => (int)(_chars - _charStart);
  1255. internal int Count => _byteCountResult;
  1256. }
  1257. }
  1258. }