Encoding.cs 68 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Diagnostics;
  5. using System.Globalization;
  6. using System.Threading;
  7. using System.Runtime.InteropServices;
  8. using System.Runtime.Serialization;
  9. using System.Diagnostics.CodeAnalysis;
  10. namespace System.Text
  11. {
  12. // This abstract base class represents a character encoding. The class provides
  13. // methods to convert arrays and strings of Unicode characters to and from
  14. // arrays of bytes. A number of Encoding implementations are provided in
  15. // the System.Text package, including:
  16. //
  17. // ASCIIEncoding, which encodes Unicode characters as single 7-bit
  18. // ASCII characters. This encoding only supports character values between 0x00
  19. // and 0x7F.
  20. // BaseCodePageEncoding, which encapsulates a Windows code page. Any
  21. // installed code page can be accessed through this encoding, and conversions
  22. // are performed using the WideCharToMultiByte and
  23. // MultiByteToWideChar Windows API functions.
  24. // UnicodeEncoding, which encodes each Unicode character as two
  25. // consecutive bytes. Both little-endian (code page 1200) and big-endian (code
  26. // page 1201) encodings are recognized.
  27. // UTF7Encoding, which encodes Unicode characters using the UTF-7
  28. // encoding (UTF-7 stands for UCS Transformation Format, 7-bit form). This
  29. // encoding supports all Unicode character values, and can also be accessed
  30. // as code page 65000.
  31. // UTF8Encoding, which encodes Unicode characters using the UTF-8
  32. // encoding (UTF-8 stands for UCS Transformation Format, 8-bit form). This
  33. // encoding supports all Unicode character values, and can also be accessed
  34. // as code page 65001.
  35. // UTF32Encoding, both 12000 (little endian) & 12001 (big endian)
  36. //
  37. // In addition to directly instantiating Encoding objects, an
  38. // application can use the ForCodePage, GetASCII,
  39. // GetDefault, GetUnicode, GetUTF7, and GetUTF8
  40. // methods in this class to obtain encodings.
  41. //
  42. // Through an encoding, the GetBytes method is used to convert arrays
  43. // of characters to arrays of bytes, and the GetChars method is used to
  44. // convert arrays of bytes to arrays of characters. The GetBytes and
  45. // GetChars methods maintain no state between conversions, and are
  46. // generally intended for conversions of complete blocks of bytes and
  47. // characters in one operation. When the data to be converted is only available
  48. // in sequential blocks (such as data read from a stream) or when the amount of
  49. // data is so large that it needs to be divided into smaller blocks, an
  50. // application may choose to use a Decoder or an Encoder to
  51. // perform the conversion. Decoders and encoders allow sequential blocks of
  52. // data to be converted and they maintain the state required to support
  53. // conversions of data that spans adjacent blocks. Decoders and encoders are
  54. // obtained using the GetDecoder and GetEncoder methods.
  55. //
  56. // The core GetBytes and GetChars methods require the caller
  57. // to provide the destination buffer and ensure that the buffer is large enough
  58. // to hold the entire result of the conversion. When using these methods,
  59. // either directly on an Encoding object or on an associated
  60. // Decoder or Encoder, an application can use one of two methods
  61. // to allocate destination buffers.
  62. //
  63. // The GetByteCount and GetCharCount methods can be used to
  64. // compute the exact size of the result of a particular conversion, and an
  65. // appropriately sized buffer for that conversion can then be allocated.
  66. // The GetMaxByteCount and GetMaxCharCount methods can be
  67. // be used to compute the maximum possible size of a conversion of a given
  68. // number of bytes or characters, and a buffer of that size can then be reused
  69. // for multiple conversions.
  70. //
  71. // The first method generally uses less memory, whereas the second method
  72. // generally executes faster.
  73. //
  74. public abstract class Encoding : ICloneable
  75. {
  76. // For netcore we use UTF8 as default encoding since ANSI isn't available
  77. private static readonly UTF8Encoding.UTF8EncodingSealed s_defaultEncoding = new UTF8Encoding.UTF8EncodingSealed(encoderShouldEmitUTF8Identifier: false);
  78. // Returns an encoding for the system's current ANSI code page.
  79. public static Encoding Default => s_defaultEncoding;
  80. //
  81. // The following values are from mlang.idl. These values
  82. // should be in sync with those in mlang.idl.
  83. //
  84. internal const int MIMECONTF_MAILNEWS = 0x00000001;
  85. internal const int MIMECONTF_BROWSER = 0x00000002;
  86. internal const int MIMECONTF_SAVABLE_MAILNEWS = 0x00000100;
  87. internal const int MIMECONTF_SAVABLE_BROWSER = 0x00000200;
  88. // Special Case Code Pages
  89. private const int CodePageDefault = 0;
  90. private const int CodePageNoOEM = 1; // OEM Code page not supported
  91. private const int CodePageNoMac = 2; // MAC code page not supported
  92. private const int CodePageNoThread = 3; // Thread code page not supported
  93. private const int CodePageNoSymbol = 42; // Symbol code page not supported
  94. private const int CodePageUnicode = 1200; // Unicode
  95. private const int CodePageBigEndian = 1201; // Big Endian Unicode
  96. private const int CodePageWindows1252 = 1252; // Windows 1252 code page
  97. // 20936 has same code page as 10008, so we'll special case it
  98. private const int CodePageMacGB2312 = 10008;
  99. private const int CodePageGB2312 = 20936;
  100. private const int CodePageMacKorean = 10003;
  101. private const int CodePageDLLKorean = 20949;
  102. // ISO 2022 Code Pages
  103. private const int ISO2022JP = 50220;
  104. private const int ISO2022JPESC = 50221;
  105. private const int ISO2022JPSISO = 50222;
  106. private const int ISOKorean = 50225;
  107. private const int ISOSimplifiedCN = 50227;
  108. private const int EUCJP = 51932;
  109. private const int ChineseHZ = 52936; // HZ has ~}~{~~ sequences
  110. // 51936 is the same as 936
  111. private const int DuplicateEUCCN = 51936;
  112. private const int EUCCN = 936;
  113. private const int EUCKR = 51949;
  114. // Latin 1 & ASCII Code Pages
  115. internal const int CodePageASCII = 20127; // ASCII
  116. internal const int ISO_8859_1 = 28591; // Latin1
  117. // ISCII
  118. private const int ISCIIAssemese = 57006;
  119. private const int ISCIIBengali = 57003;
  120. private const int ISCIIDevanagari = 57002;
  121. private const int ISCIIGujarathi = 57010;
  122. private const int ISCIIKannada = 57008;
  123. private const int ISCIIMalayalam = 57009;
  124. private const int ISCIIOriya = 57007;
  125. private const int ISCIIPanjabi = 57011;
  126. private const int ISCIITamil = 57004;
  127. private const int ISCIITelugu = 57005;
  128. // GB18030
  129. private const int GB18030 = 54936;
  130. // Other
  131. private const int ISO_8859_8I = 38598;
  132. private const int ISO_8859_8_Visual = 28598;
  133. // 50229 is currently unsupported // "Chinese Traditional (ISO-2022)"
  134. private const int ENC50229 = 50229;
  135. // Special code pages
  136. private const int CodePageUTF7 = 65000;
  137. private const int CodePageUTF8 = 65001;
  138. private const int CodePageUTF32 = 12000;
  139. private const int CodePageUTF32BE = 12001;
  140. internal int _codePage = 0;
  141. internal CodePageDataItem _dataItem = null;
  142. // Because of encoders we may be read only
  143. [OptionalField(VersionAdded = 2)]
  144. private bool _isReadOnly = true;
  145. // Encoding (encoder) fallback
  146. internal EncoderFallback encoderFallback = null;
  147. internal DecoderFallback decoderFallback = null;
  148. protected Encoding() : this(0)
  149. {
  150. }
  151. protected Encoding(int codePage)
  152. {
  153. // Validate code page
  154. if (codePage < 0)
  155. {
  156. throw new ArgumentOutOfRangeException(nameof(codePage));
  157. }
  158. // Remember code page
  159. _codePage = codePage;
  160. // Use default encoder/decoder fallbacks
  161. this.SetDefaultFallbacks();
  162. }
  163. // This constructor is needed to allow any sub-classing implementation to provide encoder/decoder fallback objects
  164. // because the encoding object is always created as read-only object and don't allow setting encoder/decoder fallback
  165. // after the creation is done.
  166. protected Encoding(int codePage, EncoderFallback encoderFallback, DecoderFallback decoderFallback)
  167. {
  168. // Validate code page
  169. if (codePage < 0)
  170. {
  171. throw new ArgumentOutOfRangeException(nameof(codePage));
  172. }
  173. // Remember code page
  174. _codePage = codePage;
  175. this.encoderFallback = encoderFallback ?? new InternalEncoderBestFitFallback(this);
  176. this.decoderFallback = decoderFallback ?? new InternalDecoderBestFitFallback(this);
  177. }
  178. // Default fallback that we'll use.
  179. internal virtual void SetDefaultFallbacks()
  180. {
  181. // For UTF-X encodings, we use a replacement fallback with an "\xFFFD" string,
  182. // For ASCII we use "?" replacement fallback, etc.
  183. encoderFallback = new InternalEncoderBestFitFallback(this);
  184. decoderFallback = new InternalDecoderBestFitFallback(this);
  185. }
  186. // Converts a byte array from one encoding to another. The bytes in the
  187. // bytes array are converted from srcEncoding to
  188. // dstEncoding, and the returned value is a new byte array
  189. // containing the result of the conversion.
  190. //
  191. public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
  192. byte[] bytes)
  193. {
  194. if (bytes == null)
  195. throw new ArgumentNullException(nameof(bytes));
  196. return Convert(srcEncoding, dstEncoding, bytes, 0, bytes.Length);
  197. }
  198. // Converts a range of bytes in a byte array from one encoding to another.
  199. // This method converts count bytes from bytes starting at
  200. // index index from srcEncoding to dstEncoding, and
  201. // returns a new byte array containing the result of the conversion.
  202. //
  203. public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
  204. byte[] bytes, int index, int count)
  205. {
  206. if (srcEncoding == null || dstEncoding == null)
  207. {
  208. throw new ArgumentNullException((srcEncoding == null ? nameof(srcEncoding) : nameof(dstEncoding)),
  209. SR.ArgumentNull_Array);
  210. }
  211. if (bytes == null)
  212. {
  213. throw new ArgumentNullException(nameof(bytes),
  214. SR.ArgumentNull_Array);
  215. }
  216. return dstEncoding.GetBytes(srcEncoding.GetChars(bytes, index, count));
  217. }
  218. public static void RegisterProvider(EncodingProvider provider)
  219. {
  220. // Parameters validated inside EncodingProvider
  221. EncodingProvider.AddProvider(provider);
  222. }
  223. public static Encoding GetEncoding(int codepage)
  224. {
  225. Encoding result = EncodingProvider.GetEncodingFromProvider(codepage);
  226. if (result != null)
  227. return result;
  228. switch (codepage)
  229. {
  230. case CodePageDefault: return Default; // 0
  231. case CodePageUnicode: return Unicode; // 1200
  232. case CodePageBigEndian: return BigEndianUnicode; // 1201
  233. case CodePageUTF32: return UTF32; // 12000
  234. case CodePageUTF32BE: return BigEndianUTF32; // 12001
  235. case CodePageUTF7: return UTF7; // 65000
  236. case CodePageUTF8: return UTF8; // 65001
  237. case CodePageASCII: return ASCII; // 20127
  238. case ISO_8859_1: return Latin1; // 28591
  239. // We don't allow the following special code page values that Win32 allows.
  240. case CodePageNoOEM: // 1 CP_OEMCP
  241. case CodePageNoMac: // 2 CP_MACCP
  242. case CodePageNoThread: // 3 CP_THREAD_ACP
  243. case CodePageNoSymbol: // 42 CP_SYMBOL
  244. throw new ArgumentException(SR.Format(SR.Argument_CodepageNotSupported, codepage), nameof(codepage));
  245. }
  246. if (codepage < 0 || codepage > 65535)
  247. {
  248. throw new ArgumentOutOfRangeException(
  249. nameof(codepage), SR.Format(SR.ArgumentOutOfRange_Range, 0, 65535));
  250. }
  251. throw new NotSupportedException(SR.Format(SR.NotSupported_NoCodepageData, codepage));
  252. }
  253. public static Encoding GetEncoding(int codepage,
  254. EncoderFallback encoderFallback, DecoderFallback decoderFallback)
  255. {
  256. Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(codepage, encoderFallback, decoderFallback);
  257. if (baseEncoding != null)
  258. return baseEncoding;
  259. // Get the default encoding (which is cached and read only)
  260. baseEncoding = GetEncoding(codepage);
  261. // Clone it and set the fallback
  262. Encoding fallbackEncoding = (Encoding)baseEncoding.Clone();
  263. fallbackEncoding.EncoderFallback = encoderFallback;
  264. fallbackEncoding.DecoderFallback = decoderFallback;
  265. return fallbackEncoding;
  266. }
  267. // Returns an Encoding object for a given name or a given code page value.
  268. //
  269. public static Encoding GetEncoding(string name)
  270. {
  271. Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(name);
  272. if (baseEncoding != null)
  273. return baseEncoding;
  274. //
  275. // NOTE: If you add a new encoding that can be requested by name, be sure to
  276. // add the corresponding item in EncodingTable.
  277. // Otherwise, the code below will throw exception when trying to call
  278. // EncodingTable.GetCodePageFromName().
  279. //
  280. return GetEncoding(EncodingTable.GetCodePageFromName(name));
  281. }
  282. // Returns an Encoding object for a given name or a given code page value.
  283. //
  284. public static Encoding GetEncoding(string name,
  285. EncoderFallback encoderFallback, DecoderFallback decoderFallback)
  286. {
  287. Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(name, encoderFallback, decoderFallback);
  288. if (baseEncoding != null)
  289. return baseEncoding;
  290. //
  291. // NOTE: If you add a new encoding that can be requested by name, be sure to
  292. // add the corresponding item in EncodingTable.
  293. // Otherwise, the code below will throw exception when trying to call
  294. // EncodingTable.GetCodePageFromName().
  295. //
  296. return (GetEncoding(EncodingTable.GetCodePageFromName(name), encoderFallback, decoderFallback));
  297. }
  298. // Return a list of all EncodingInfo objects describing all of our encodings
  299. public static EncodingInfo[] GetEncodings()
  300. {
  301. return EncodingTable.GetEncodings();
  302. }
  303. public virtual byte[] GetPreamble()
  304. {
  305. return Array.Empty<byte>();
  306. }
  307. public virtual ReadOnlySpan<byte> Preamble => GetPreamble();
  308. private void GetDataItem()
  309. {
  310. if (_dataItem == null)
  311. {
  312. _dataItem = EncodingTable.GetCodePageDataItem(_codePage);
  313. if (_dataItem == null)
  314. {
  315. throw new NotSupportedException(SR.Format(SR.NotSupported_NoCodepageData, _codePage));
  316. }
  317. }
  318. }
  319. // Returns the name for this encoding that can be used with mail agent body tags.
  320. // If the encoding may not be used, the string is empty.
  321. public virtual string BodyName
  322. {
  323. get
  324. {
  325. if (_dataItem == null)
  326. {
  327. GetDataItem();
  328. }
  329. return (_dataItem.BodyName);
  330. }
  331. }
  332. // Returns the human-readable description of the encoding ( e.g. Hebrew (DOS)).
  333. public virtual string EncodingName
  334. {
  335. get
  336. {
  337. if (_dataItem == null)
  338. {
  339. GetDataItem();
  340. }
  341. return _dataItem.DisplayName;
  342. }
  343. }
  344. // Returns the name for this encoding that can be used with mail agent header
  345. // tags. If the encoding may not be used, the string is empty.
  346. public virtual string HeaderName
  347. {
  348. get
  349. {
  350. if (_dataItem == null)
  351. {
  352. GetDataItem();
  353. }
  354. return (_dataItem.HeaderName);
  355. }
  356. }
  357. // Returns the IANA preferred name for this encoding.
  358. public virtual string WebName
  359. {
  360. get
  361. {
  362. if (_dataItem == null)
  363. {
  364. GetDataItem();
  365. }
  366. return (_dataItem.WebName);
  367. }
  368. }
  369. // Returns the windows code page that most closely corresponds to this encoding.
  370. public virtual int WindowsCodePage
  371. {
  372. get
  373. {
  374. if (_dataItem == null)
  375. {
  376. GetDataItem();
  377. }
  378. return (_dataItem.UIFamilyCodePage);
  379. }
  380. }
  381. // True if and only if the encoding is used for display by browsers clients.
  382. public virtual bool IsBrowserDisplay
  383. {
  384. get
  385. {
  386. if (_dataItem == null)
  387. {
  388. GetDataItem();
  389. }
  390. return ((_dataItem.Flags & MIMECONTF_BROWSER) != 0);
  391. }
  392. }
  393. // True if and only if the encoding is used for saving by browsers clients.
  394. public virtual bool IsBrowserSave
  395. {
  396. get
  397. {
  398. if (_dataItem == null)
  399. {
  400. GetDataItem();
  401. }
  402. return ((_dataItem.Flags & MIMECONTF_SAVABLE_BROWSER) != 0);
  403. }
  404. }
  405. // True if and only if the encoding is used for display by mail and news clients.
  406. public virtual bool IsMailNewsDisplay
  407. {
  408. get
  409. {
  410. if (_dataItem == null)
  411. {
  412. GetDataItem();
  413. }
  414. return ((_dataItem.Flags & MIMECONTF_MAILNEWS) != 0);
  415. }
  416. }
  417. // True if and only if the encoding is used for saving documents by mail and
  418. // news clients
  419. public virtual bool IsMailNewsSave
  420. {
  421. get
  422. {
  423. if (_dataItem == null)
  424. {
  425. GetDataItem();
  426. }
  427. return ((_dataItem.Flags & MIMECONTF_SAVABLE_MAILNEWS) != 0);
  428. }
  429. }
  430. // True if and only if the encoding only uses single byte code points. (Ie, ASCII, 1252, etc)
  431. public virtual bool IsSingleByte
  432. {
  433. get
  434. {
  435. return false;
  436. }
  437. }
  438. public EncoderFallback EncoderFallback
  439. {
  440. get
  441. {
  442. return encoderFallback;
  443. }
  444. set
  445. {
  446. if (this.IsReadOnly)
  447. throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
  448. if (value == null)
  449. throw new ArgumentNullException(nameof(value));
  450. encoderFallback = value;
  451. }
  452. }
  453. public DecoderFallback DecoderFallback
  454. {
  455. get
  456. {
  457. return decoderFallback;
  458. }
  459. set
  460. {
  461. if (this.IsReadOnly)
  462. throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
  463. if (value == null)
  464. throw new ArgumentNullException(nameof(value));
  465. decoderFallback = value;
  466. }
  467. }
  468. public virtual object Clone()
  469. {
  470. Encoding newEncoding = (Encoding)this.MemberwiseClone();
  471. // New one should be readable
  472. newEncoding._isReadOnly = false;
  473. return newEncoding;
  474. }
  475. public bool IsReadOnly
  476. {
  477. get
  478. {
  479. return (_isReadOnly);
  480. }
  481. }
  482. // Returns an encoding for the ASCII character set. The returned encoding
  483. // will be an instance of the ASCIIEncoding class.
  484. public static Encoding ASCII => ASCIIEncoding.s_default;
  485. // Returns an encoding for the Latin1 character set. The returned encoding
  486. // will be an instance of the Latin1Encoding class.
  487. //
  488. // This is for our optimizations
  489. private static Encoding Latin1 => Latin1Encoding.s_default;
  490. // Returns the number of bytes required to encode the given character
  491. // array.
  492. //
  493. public virtual int GetByteCount(char[] chars)
  494. {
  495. if (chars == null)
  496. {
  497. throw new ArgumentNullException(nameof(chars),
  498. SR.ArgumentNull_Array);
  499. }
  500. return GetByteCount(chars, 0, chars.Length);
  501. }
  502. public virtual int GetByteCount(string s)
  503. {
  504. if (s == null)
  505. throw new ArgumentNullException(nameof(s));
  506. char[] chars = s.ToCharArray();
  507. return GetByteCount(chars, 0, chars.Length);
  508. }
  509. // Returns the number of bytes required to encode a range of characters in
  510. // a character array.
  511. //
  512. public abstract int GetByteCount(char[] chars, int index, int count);
  513. // Returns the number of bytes required to encode a string range.
  514. //
  515. public int GetByteCount(string s, int index, int count)
  516. {
  517. if (s == null)
  518. throw new ArgumentNullException(nameof(s),
  519. SR.ArgumentNull_String);
  520. if (index < 0)
  521. throw new ArgumentOutOfRangeException(nameof(index),
  522. SR.ArgumentOutOfRange_NeedNonNegNum);
  523. if (count < 0)
  524. throw new ArgumentOutOfRangeException(nameof(count),
  525. SR.ArgumentOutOfRange_NeedNonNegNum);
  526. if (index > s.Length - count)
  527. throw new ArgumentOutOfRangeException(nameof(index),
  528. SR.ArgumentOutOfRange_IndexCount);
  529. unsafe
  530. {
  531. fixed (char* pChar = s)
  532. {
  533. return GetByteCount(pChar + index, count);
  534. }
  535. }
  536. }
  537. // We expect this to be the workhorse for NLS encodings
  538. // unfortunately for existing overrides, it has to call the [] version,
  539. // which is really slow, so this method should be avoided if you're calling
  540. // a 3rd party encoding.
  541. [CLSCompliant(false)]
  542. public virtual unsafe int GetByteCount(char* chars, int count)
  543. {
  544. // Validate input parameters
  545. if (chars == null)
  546. throw new ArgumentNullException(nameof(chars),
  547. SR.ArgumentNull_Array);
  548. if (count < 0)
  549. throw new ArgumentOutOfRangeException(nameof(count),
  550. SR.ArgumentOutOfRange_NeedNonNegNum);
  551. char[] arrChar = new char[count];
  552. int index;
  553. for (index = 0; index < count; index++)
  554. arrChar[index] = chars[index];
  555. return GetByteCount(arrChar, 0, count);
  556. }
  557. public virtual unsafe int GetByteCount(ReadOnlySpan<char> chars)
  558. {
  559. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  560. {
  561. return GetByteCount(charsPtr, chars.Length);
  562. }
  563. }
  564. // For NLS Encodings, workhorse takes an encoder (may be null)
  565. // Always validate parameters before calling internal version, which will only assert.
  566. internal virtual unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
  567. {
  568. Debug.Assert(chars != null);
  569. Debug.Assert(count >= 0);
  570. return GetByteCount(chars, count);
  571. }
  572. // Returns a byte array containing the encoded representation of the given
  573. // character array.
  574. //
  575. public virtual byte[] GetBytes(char[] chars)
  576. {
  577. if (chars == null)
  578. {
  579. throw new ArgumentNullException(nameof(chars),
  580. SR.ArgumentNull_Array);
  581. }
  582. return GetBytes(chars, 0, chars.Length);
  583. }
  584. // Returns a byte array containing the encoded representation of a range
  585. // of characters in a character array.
  586. //
  587. public virtual byte[] GetBytes(char[] chars, int index, int count)
  588. {
  589. byte[] result = new byte[GetByteCount(chars, index, count)];
  590. GetBytes(chars, index, count, result, 0);
  591. return result;
  592. }
  593. // Encodes a range of characters in a character array into a range of bytes
  594. // in a byte array. An exception occurs if the byte array is not large
  595. // enough to hold the complete encoding of the characters. The
  596. // GetByteCount method can be used to determine the exact number of
  597. // bytes that will be produced for a given range of characters.
  598. // Alternatively, the GetMaxByteCount method can be used to
  599. // determine the maximum number of bytes that will be produced for a given
  600. // number of characters, regardless of the actual character values.
  601. //
  602. public abstract int GetBytes(char[] chars, int charIndex, int charCount,
  603. byte[] bytes, int byteIndex);
  604. // Returns a byte array containing the encoded representation of the given
  605. // string.
  606. //
  607. public virtual byte[] GetBytes(string s)
  608. {
  609. if (s == null)
  610. throw new ArgumentNullException(nameof(s),
  611. SR.ArgumentNull_String);
  612. int byteCount = GetByteCount(s);
  613. byte[] bytes = new byte[byteCount];
  614. int bytesReceived = GetBytes(s, 0, s.Length, bytes, 0);
  615. Debug.Assert(byteCount == bytesReceived);
  616. return bytes;
  617. }
  618. // Returns a byte array containing the encoded representation of the given
  619. // string range.
  620. //
  621. public byte[] GetBytes(string s, int index, int count)
  622. {
  623. if (s == null)
  624. throw new ArgumentNullException(nameof(s),
  625. SR.ArgumentNull_String);
  626. if (index < 0)
  627. throw new ArgumentOutOfRangeException(nameof(index),
  628. SR.ArgumentOutOfRange_NeedNonNegNum);
  629. if (count < 0)
  630. throw new ArgumentOutOfRangeException(nameof(count),
  631. SR.ArgumentOutOfRange_NeedNonNegNum);
  632. if (index > s.Length - count)
  633. throw new ArgumentOutOfRangeException(nameof(index),
  634. SR.ArgumentOutOfRange_IndexCount);
  635. unsafe
  636. {
  637. fixed (char* pChar = s)
  638. {
  639. int byteCount = GetByteCount(pChar + index, count);
  640. if (byteCount == 0)
  641. return Array.Empty<byte>();
  642. byte[] bytes = new byte[byteCount];
  643. fixed (byte* pBytes = &bytes[0])
  644. {
  645. int bytesReceived = GetBytes(pChar + index, count, pBytes, byteCount);
  646. Debug.Assert(byteCount == bytesReceived);
  647. }
  648. return bytes;
  649. }
  650. }
  651. }
  652. public virtual int GetBytes(string s, int charIndex, int charCount,
  653. byte[] bytes, int byteIndex)
  654. {
  655. if (s == null)
  656. throw new ArgumentNullException(nameof(s));
  657. return GetBytes(s.ToCharArray(), charIndex, charCount, bytes, byteIndex);
  658. }
  659. // This is our internal workhorse
  660. // Always validate parameters before calling internal version, which will only assert.
  661. internal virtual unsafe int GetBytes(char* chars, int charCount,
  662. byte* bytes, int byteCount, EncoderNLS encoder)
  663. {
  664. return GetBytes(chars, charCount, bytes, byteCount);
  665. }
  666. // We expect this to be the workhorse for NLS Encodings, but for existing
  667. // ones we need a working (if slow) default implementation)
  668. //
  669. // WARNING WARNING WARNING
  670. //
  671. // WARNING: If this breaks it could be a security threat. Obviously we
  672. // call this internally, so you need to make sure that your pointers, counts
  673. // and indexes are correct when you call this method.
  674. //
  675. // In addition, we have internal code, which will be marked as "safe" calling
  676. // this code. However this code is dependent upon the implementation of an
  677. // external GetBytes() method, which could be overridden by a third party and
  678. // the results of which cannot be guaranteed. We use that result to copy
  679. // the byte[] to our byte* output buffer. If the result count was wrong, we
  680. // could easily overflow our output buffer. Therefore we do an extra test
  681. // when we copy the buffer so that we don't overflow byteCount either.
  682. [CLSCompliant(false)]
  683. public virtual unsafe int GetBytes(char* chars, int charCount,
  684. byte* bytes, int byteCount)
  685. {
  686. // Validate input parameters
  687. if (bytes == null || chars == null)
  688. throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
  689. SR.ArgumentNull_Array);
  690. if (charCount < 0 || byteCount < 0)
  691. throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)),
  692. SR.ArgumentOutOfRange_NeedNonNegNum);
  693. // Get the char array to convert
  694. char[] arrChar = new char[charCount];
  695. int index;
  696. for (index = 0; index < charCount; index++)
  697. arrChar[index] = chars[index];
  698. // Get the byte array to fill
  699. byte[] arrByte = new byte[byteCount];
  700. // Do the work
  701. int result = GetBytes(arrChar, 0, charCount, arrByte, 0);
  702. Debug.Assert(result <= byteCount, "[Encoding.GetBytes]Returned more bytes than we have space for");
  703. // Copy the byte array
  704. // WARNING: We MUST make sure that we don't copy too many bytes. We can't
  705. // rely on result because it could be a 3rd party implementation. We need
  706. // to make sure we never copy more than byteCount bytes no matter the value
  707. // of result
  708. if (result < byteCount)
  709. byteCount = result;
  710. // Copy the data, don't overrun our array!
  711. for (index = 0; index < byteCount; index++)
  712. bytes[index] = arrByte[index];
  713. return byteCount;
  714. }
  715. public virtual unsafe int GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes)
  716. {
  717. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  718. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  719. {
  720. return GetBytes(charsPtr, chars.Length, bytesPtr, bytes.Length);
  721. }
  722. }
  723. // Returns the number of characters produced by decoding the given byte
  724. // array.
  725. //
  726. public virtual int GetCharCount(byte[] bytes)
  727. {
  728. if (bytes == null)
  729. {
  730. throw new ArgumentNullException(nameof(bytes),
  731. SR.ArgumentNull_Array);
  732. }
  733. return GetCharCount(bytes, 0, bytes.Length);
  734. }
  735. // Returns the number of characters produced by decoding a range of bytes
  736. // in a byte array.
  737. //
  738. public abstract int GetCharCount(byte[] bytes, int index, int count);
  739. // We expect this to be the workhorse for NLS Encodings, but for existing
  740. // ones we need a working (if slow) default implementation)
  741. [CLSCompliant(false)]
  742. public virtual unsafe int GetCharCount(byte* bytes, int count)
  743. {
  744. // Validate input parameters
  745. if (bytes == null)
  746. throw new ArgumentNullException(nameof(bytes),
  747. SR.ArgumentNull_Array);
  748. if (count < 0)
  749. throw new ArgumentOutOfRangeException(nameof(count),
  750. SR.ArgumentOutOfRange_NeedNonNegNum);
  751. byte[] arrbyte = new byte[count];
  752. int index;
  753. for (index = 0; index < count; index++)
  754. arrbyte[index] = bytes[index];
  755. return GetCharCount(arrbyte, 0, count);
  756. }
  757. public virtual unsafe int GetCharCount(ReadOnlySpan<byte> bytes)
  758. {
  759. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  760. {
  761. return GetCharCount(bytesPtr, bytes.Length);
  762. }
  763. }
  764. // This is our internal workhorse
  765. // Always validate parameters before calling internal version, which will only assert.
  766. internal virtual unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder)
  767. {
  768. return GetCharCount(bytes, count);
  769. }
  770. // Returns a character array containing the decoded representation of a
  771. // given byte array.
  772. //
  773. public virtual char[] GetChars(byte[] bytes)
  774. {
  775. if (bytes == null)
  776. {
  777. throw new ArgumentNullException(nameof(bytes),
  778. SR.ArgumentNull_Array);
  779. }
  780. return GetChars(bytes, 0, bytes.Length);
  781. }
  782. // Returns a character array containing the decoded representation of a
  783. // range of bytes in a byte array.
  784. //
  785. public virtual char[] GetChars(byte[] bytes, int index, int count)
  786. {
  787. char[] result = new char[GetCharCount(bytes, index, count)];
  788. GetChars(bytes, index, count, result, 0);
  789. return result;
  790. }
  791. // Decodes a range of bytes in a byte array into a range of characters in a
  792. // character array. An exception occurs if the character array is not large
  793. // enough to hold the complete decoding of the bytes. The
  794. // GetCharCount method can be used to determine the exact number of
  795. // characters that will be produced for a given range of bytes.
  796. // Alternatively, the GetMaxCharCount method can be used to
  797. // determine the maximum number of characters that will be produced for a
  798. // given number of bytes, regardless of the actual byte values.
  799. //
  800. public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount,
  801. char[] chars, int charIndex);
  802. // We expect this to be the workhorse for NLS Encodings, but for existing
  803. // ones we need a working (if slow) default implementation)
  804. //
  805. // WARNING WARNING WARNING
  806. //
  807. // WARNING: If this breaks it could be a security threat. Obviously we
  808. // call this internally, so you need to make sure that your pointers, counts
  809. // and indexes are correct when you call this method.
  810. //
  811. // In addition, we have internal code, which will be marked as "safe" calling
  812. // this code. However this code is dependent upon the implementation of an
  813. // external GetChars() method, which could be overridden by a third party and
  814. // the results of which cannot be guaranteed. We use that result to copy
  815. // the char[] to our char* output buffer. If the result count was wrong, we
  816. // could easily overflow our output buffer. Therefore we do an extra test
  817. // when we copy the buffer so that we don't overflow charCount either.
  818. [CLSCompliant(false)]
  819. public virtual unsafe int GetChars(byte* bytes, int byteCount,
  820. char* chars, int charCount)
  821. {
  822. // Validate input parameters
  823. if (chars == null || bytes == null)
  824. throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
  825. SR.ArgumentNull_Array);
  826. if (byteCount < 0 || charCount < 0)
  827. throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
  828. SR.ArgumentOutOfRange_NeedNonNegNum);
  829. // Get the byte array to convert
  830. byte[] arrByte = new byte[byteCount];
  831. int index;
  832. for (index = 0; index < byteCount; index++)
  833. arrByte[index] = bytes[index];
  834. // Get the char array to fill
  835. char[] arrChar = new char[charCount];
  836. // Do the work
  837. int result = GetChars(arrByte, 0, byteCount, arrChar, 0);
  838. Debug.Assert(result <= charCount, "[Encoding.GetChars]Returned more chars than we have space for");
  839. // Copy the char array
  840. // WARNING: We MUST make sure that we don't copy too many chars. We can't
  841. // rely on result because it could be a 3rd party implementation. We need
  842. // to make sure we never copy more than charCount chars no matter the value
  843. // of result
  844. if (result < charCount)
  845. charCount = result;
  846. // Copy the data, don't overrun our array!
  847. for (index = 0; index < charCount; index++)
  848. chars[index] = arrChar[index];
  849. return charCount;
  850. }
  851. public virtual unsafe int GetChars(ReadOnlySpan<byte> bytes, Span<char> chars)
  852. {
  853. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  854. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  855. {
  856. return GetChars(bytesPtr, bytes.Length, charsPtr, chars.Length);
  857. }
  858. }
  859. // This is our internal workhorse
  860. // Always validate parameters before calling internal version, which will only assert.
  861. internal virtual unsafe int GetChars(byte* bytes, int byteCount,
  862. char* chars, int charCount, DecoderNLS decoder)
  863. {
  864. return GetChars(bytes, byteCount, chars, charCount);
  865. }
  866. [CLSCompliant(false)]
  867. public unsafe string GetString(byte* bytes, int byteCount)
  868. {
  869. if (bytes == null)
  870. throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
  871. if (byteCount < 0)
  872. throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_NeedNonNegNum);
  873. return string.CreateStringFromEncoding(bytes, byteCount, this);
  874. }
  875. public unsafe string GetString(ReadOnlySpan<byte> bytes)
  876. {
  877. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  878. {
  879. return string.CreateStringFromEncoding(bytesPtr, bytes.Length, this);
  880. }
  881. }
  882. // Returns the code page identifier of this encoding. The returned value is
  883. // an integer between 0 and 65535 if the encoding has a code page
  884. // identifier, or -1 if the encoding does not represent a code page.
  885. //
  886. public virtual int CodePage
  887. {
  888. get
  889. {
  890. return _codePage;
  891. }
  892. }
  893. // IsAlwaysNormalized
  894. // Returns true if the encoding is always normalized for the specified encoding form
  895. public bool IsAlwaysNormalized()
  896. {
  897. return this.IsAlwaysNormalized(NormalizationForm.FormC);
  898. }
  899. public virtual bool IsAlwaysNormalized(NormalizationForm form)
  900. {
  901. // Assume false unless the encoding knows otherwise
  902. return false;
  903. }
  904. // Returns a Decoder object for this encoding. The returned object
  905. // can be used to decode a sequence of bytes into a sequence of characters.
  906. // Contrary to the GetChars family of methods, a Decoder can
  907. // convert partial sequences of bytes into partial sequences of characters
  908. // by maintaining the appropriate state between the conversions.
  909. //
  910. // This default implementation returns a Decoder that simply
  911. // forwards calls to the GetCharCount and GetChars methods to
  912. // the corresponding methods of this encoding. Encodings that require state
  913. // to be maintained between successive conversions should override this
  914. // method and return an instance of an appropriate Decoder
  915. // implementation.
  916. //
  917. public virtual Decoder GetDecoder()
  918. {
  919. return new DefaultDecoder(this);
  920. }
  921. // Returns an Encoder object for this encoding. The returned object
  922. // can be used to encode a sequence of characters into a sequence of bytes.
  923. // Contrary to the GetBytes family of methods, an Encoder can
  924. // convert partial sequences of characters into partial sequences of bytes
  925. // by maintaining the appropriate state between the conversions.
  926. //
  927. // This default implementation returns an Encoder that simply
  928. // forwards calls to the GetByteCount and GetBytes methods to
  929. // the corresponding methods of this encoding. Encodings that require state
  930. // to be maintained between successive conversions should override this
  931. // method and return an instance of an appropriate Encoder
  932. // implementation.
  933. //
  934. public virtual Encoder GetEncoder()
  935. {
  936. return new DefaultEncoder(this);
  937. }
  938. // Returns the maximum number of bytes required to encode a given number of
  939. // characters. This method can be used to determine an appropriate buffer
  940. // size for byte arrays passed to the GetBytes method of this
  941. // encoding or the GetBytes method of an Encoder for this
  942. // encoding. All encodings must guarantee that no buffer overflow
  943. // exceptions will occur if buffers are sized according to the results of
  944. // this method.
  945. //
  946. // WARNING: If you're using something besides the default replacement encoder fallback,
  947. // then you could have more bytes than this returned from an actual call to GetBytes().
  948. //
  949. public abstract int GetMaxByteCount(int charCount);
  950. // Returns the maximum number of characters produced by decoding a given
  951. // number of bytes. This method can be used to determine an appropriate
  952. // buffer size for character arrays passed to the GetChars method of
  953. // this encoding or the GetChars method of a Decoder for this
  954. // encoding. All encodings must guarantee that no buffer overflow
  955. // exceptions will occur if buffers are sized according to the results of
  956. // this method.
  957. //
  958. public abstract int GetMaxCharCount(int byteCount);
  959. // Returns a string containing the decoded representation of a given byte
  960. // array.
  961. //
  962. public virtual string GetString(byte[] bytes)
  963. {
  964. if (bytes == null)
  965. throw new ArgumentNullException(nameof(bytes),
  966. SR.ArgumentNull_Array);
  967. return GetString(bytes, 0, bytes.Length);
  968. }
  969. // Returns a string containing the decoded representation of a range of
  970. // bytes in a byte array.
  971. //
  972. // Internally we override this for performance
  973. //
  974. public virtual string GetString(byte[] bytes, int index, int count)
  975. {
  976. return new string(GetChars(bytes, index, count));
  977. }
  978. // Returns an encoding for Unicode format. The returned encoding will be
  979. // an instance of the UnicodeEncoding class.
  980. //
  981. // It will use little endian byte order, but will detect
  982. // input in big endian if it finds a byte order mark per Unicode 2.0.
  983. public static Encoding Unicode => UnicodeEncoding.s_littleEndianDefault;
  984. // Returns an encoding for Unicode format. The returned encoding will be
  985. // an instance of the UnicodeEncoding class.
  986. //
  987. // It will use big endian byte order, but will detect
  988. // input in little endian if it finds a byte order mark per Unicode 2.0.
  989. public static Encoding BigEndianUnicode => UnicodeEncoding.s_bigEndianDefault;
  990. // Returns an encoding for the UTF-7 format. The returned encoding will be
  991. // an instance of the UTF7Encoding class.
  992. public static Encoding UTF7 => UTF7Encoding.s_default;
  993. // Returns an encoding for the UTF-8 format. The returned encoding will be
  994. // an instance of the UTF8Encoding class.
  995. public static Encoding UTF8 => UTF8Encoding.s_default;
  996. // Returns an encoding for the UTF-32 format. The returned encoding will be
  997. // an instance of the UTF32Encoding class.
  998. public static Encoding UTF32 => UTF32Encoding.s_default;
  999. // Returns an encoding for the UTF-32 format. The returned encoding will be
  1000. // an instance of the UTF32Encoding class.
  1001. //
  1002. // It will use big endian byte order.
  1003. private static Encoding BigEndianUTF32 => UTF32Encoding.s_bigEndianDefault;
  1004. public override bool Equals(object value)
  1005. {
  1006. Encoding that = value as Encoding;
  1007. if (that != null)
  1008. return (_codePage == that._codePage) &&
  1009. (EncoderFallback.Equals(that.EncoderFallback)) &&
  1010. (DecoderFallback.Equals(that.DecoderFallback));
  1011. return (false);
  1012. }
  1013. public override int GetHashCode()
  1014. {
  1015. return _codePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode();
  1016. }
  1017. internal virtual char[] GetBestFitUnicodeToBytesData()
  1018. {
  1019. // Normally we don't have any best fit data.
  1020. return Array.Empty<char>();
  1021. }
  1022. internal virtual char[] GetBestFitBytesToUnicodeData()
  1023. {
  1024. // Normally we don't have any best fit data.
  1025. return Array.Empty<char>();
  1026. }
  1027. internal void ThrowBytesOverflow()
  1028. {
  1029. // Special message to include fallback type in case fallback's GetMaxCharCount is broken
  1030. // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
  1031. throw new ArgumentException(
  1032. SR.Format(SR.Argument_EncodingConversionOverflowBytes, EncodingName, EncoderFallback.GetType()), "bytes");
  1033. }
  1034. internal void ThrowBytesOverflow(EncoderNLS encoder, bool nothingEncoded)
  1035. {
  1036. if (encoder == null || encoder._throwOnOverflow || nothingEncoded)
  1037. {
  1038. if (encoder != null && encoder.InternalHasFallbackBuffer)
  1039. encoder.FallbackBuffer.InternalReset();
  1040. // Special message to include fallback type in case fallback's GetMaxCharCount is broken
  1041. // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
  1042. ThrowBytesOverflow();
  1043. }
  1044. // If we didn't throw, we are in convert and have to remember our flushing
  1045. encoder.ClearMustFlush();
  1046. }
  1047. internal void ThrowCharsOverflow()
  1048. {
  1049. // Special message to include fallback type in case fallback's GetMaxCharCount is broken
  1050. // This happens if user has implemented a decoder fallback with a broken GetMaxCharCount
  1051. throw new ArgumentException(
  1052. SR.Format(SR.Argument_EncodingConversionOverflowChars, EncodingName, DecoderFallback.GetType()), "chars");
  1053. }
  1054. internal void ThrowCharsOverflow(DecoderNLS decoder, bool nothingDecoded)
  1055. {
  1056. if (decoder == null || decoder._throwOnOverflow || nothingDecoded)
  1057. {
  1058. if (decoder != null && decoder.InternalHasFallbackBuffer)
  1059. decoder.FallbackBuffer.InternalReset();
  1060. // Special message to include fallback type in case fallback's GetMaxCharCount is broken
  1061. // This happens if user has implemented a decoder fallback with a broken GetMaxCharCount
  1062. ThrowCharsOverflow();
  1063. }
  1064. // If we didn't throw, we are in convert and have to remember our flushing
  1065. decoder.ClearMustFlush();
  1066. }
  1067. internal sealed class DefaultEncoder : Encoder, IObjectReference
  1068. {
  1069. private Encoding _encoding;
  1070. public DefaultEncoder(Encoding encoding)
  1071. {
  1072. _encoding = encoding;
  1073. }
  1074. public object GetRealObject(StreamingContext context)
  1075. {
  1076. throw new PlatformNotSupportedException();
  1077. }
  1078. // Returns the number of bytes the next call to GetBytes will
  1079. // produce if presented with the given range of characters and the given
  1080. // value of the flush parameter. The returned value takes into
  1081. // account the state in which the encoder was left following the last call
  1082. // to GetBytes. The state of the encoder is not affected by a call
  1083. // to this method.
  1084. //
  1085. public override int GetByteCount(char[] chars, int index, int count, bool flush)
  1086. {
  1087. return _encoding.GetByteCount(chars, index, count);
  1088. }
  1089. public unsafe override int GetByteCount(char* chars, int count, bool flush)
  1090. {
  1091. return _encoding.GetByteCount(chars, count);
  1092. }
  1093. // Encodes a range of characters in a character array into a range of bytes
  1094. // in a byte array. The method encodes charCount characters from
  1095. // chars starting at index charIndex, storing the resulting
  1096. // bytes in bytes starting at index byteIndex. The encoding
  1097. // takes into account the state in which the encoder was left following the
  1098. // last call to this method. The flush parameter indicates whether
  1099. // the encoder should flush any shift-states and partial characters at the
  1100. // end of the conversion. To ensure correct termination of a sequence of
  1101. // blocks of encoded bytes, the last call to GetBytes should specify
  1102. // a value of true for the flush parameter.
  1103. //
  1104. // An exception occurs if the byte array is not large enough to hold the
  1105. // complete encoding of the characters. The GetByteCount method can
  1106. // be used to determine the exact number of bytes that will be produced for
  1107. // a given range of characters. Alternatively, the GetMaxByteCount
  1108. // method of the Encoding that produced this encoder can be used to
  1109. // determine the maximum number of bytes that will be produced for a given
  1110. // number of characters, regardless of the actual character values.
  1111. //
  1112. public override int GetBytes(char[] chars, int charIndex, int charCount,
  1113. byte[] bytes, int byteIndex, bool flush)
  1114. {
  1115. return _encoding.GetBytes(chars, charIndex, charCount, bytes, byteIndex);
  1116. }
  1117. public unsafe override int GetBytes(char* chars, int charCount,
  1118. byte* bytes, int byteCount, bool flush)
  1119. {
  1120. return _encoding.GetBytes(chars, charCount, bytes, byteCount);
  1121. }
  1122. }
  1123. internal sealed class DefaultDecoder : Decoder, IObjectReference
  1124. {
  1125. private Encoding _encoding;
  1126. public DefaultDecoder(Encoding encoding)
  1127. {
  1128. _encoding = encoding;
  1129. }
  1130. public object GetRealObject(StreamingContext context)
  1131. {
  1132. throw new PlatformNotSupportedException();
  1133. }
  1134. // Returns the number of characters the next call to GetChars will
  1135. // produce if presented with the given range of bytes. The returned value
  1136. // takes into account the state in which the decoder was left following the
  1137. // last call to GetChars. The state of the decoder is not affected
  1138. // by a call to this method.
  1139. //
  1140. public override int GetCharCount(byte[] bytes, int index, int count)
  1141. {
  1142. return GetCharCount(bytes, index, count, false);
  1143. }
  1144. public override int GetCharCount(byte[] bytes, int index, int count, bool flush)
  1145. {
  1146. return _encoding.GetCharCount(bytes, index, count);
  1147. }
  1148. public unsafe override int GetCharCount(byte* bytes, int count, bool flush)
  1149. {
  1150. // By default just call the encoding version, no flush by default
  1151. return _encoding.GetCharCount(bytes, count);
  1152. }
  1153. // Decodes a range of bytes in a byte array into a range of characters
  1154. // in a character array. The method decodes byteCount bytes from
  1155. // bytes starting at index byteIndex, storing the resulting
  1156. // characters in chars starting at index charIndex. The
  1157. // decoding takes into account the state in which the decoder was left
  1158. // following the last call to this method.
  1159. //
  1160. // An exception occurs if the character array is not large enough to
  1161. // hold the complete decoding of the bytes. The GetCharCount method
  1162. // can be used to determine the exact number of characters that will be
  1163. // produced for a given range of bytes. Alternatively, the
  1164. // GetMaxCharCount method of the Encoding that produced this
  1165. // decoder can be used to determine the maximum number of characters that
  1166. // will be produced for a given number of bytes, regardless of the actual
  1167. // byte values.
  1168. //
  1169. public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
  1170. char[] chars, int charIndex)
  1171. {
  1172. return GetChars(bytes, byteIndex, byteCount, chars, charIndex, false);
  1173. }
  1174. public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
  1175. char[] chars, int charIndex, bool flush)
  1176. {
  1177. return _encoding.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
  1178. }
  1179. public unsafe override int GetChars(byte* bytes, int byteCount,
  1180. char* chars, int charCount, bool flush)
  1181. {
  1182. // By default just call the encoding's version
  1183. return _encoding.GetChars(bytes, byteCount, chars, charCount);
  1184. }
  1185. }
  1186. internal class EncodingCharBuffer
  1187. {
  1188. private unsafe char* _chars;
  1189. private unsafe char* _charStart;
  1190. private unsafe char* _charEnd;
  1191. private int _charCountResult = 0;
  1192. private Encoding _enc;
  1193. private DecoderNLS _decoder;
  1194. private unsafe byte* _byteStart;
  1195. private unsafe byte* _byteEnd;
  1196. private unsafe byte* _bytes;
  1197. private DecoderFallbackBuffer _fallbackBuffer;
  1198. internal unsafe EncodingCharBuffer(Encoding enc, DecoderNLS decoder, char* charStart, int charCount,
  1199. byte* byteStart, int byteCount)
  1200. {
  1201. _enc = enc;
  1202. _decoder = decoder;
  1203. _chars = charStart;
  1204. _charStart = charStart;
  1205. _charEnd = charStart + charCount;
  1206. _byteStart = byteStart;
  1207. _bytes = byteStart;
  1208. _byteEnd = byteStart + byteCount;
  1209. if (_decoder == null)
  1210. _fallbackBuffer = enc.DecoderFallback.CreateFallbackBuffer();
  1211. else
  1212. _fallbackBuffer = _decoder.FallbackBuffer;
  1213. // If we're getting chars or getting char count we don't expect to have
  1214. // to remember fallbacks between calls (so it should be empty)
  1215. Debug.Assert(_fallbackBuffer.Remaining == 0,
  1216. "[Encoding.EncodingCharBuffer.EncodingCharBuffer]Expected empty fallback buffer for getchars/charcount");
  1217. _fallbackBuffer.InternalInitialize(_bytes, _charEnd);
  1218. }
  1219. internal unsafe bool AddChar(char ch, int numBytes)
  1220. {
  1221. if (_chars != null)
  1222. {
  1223. if (_chars >= _charEnd)
  1224. {
  1225. // Throw maybe
  1226. _bytes -= numBytes; // Didn't encode these bytes
  1227. _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw?
  1228. return false; // No throw, but no store either
  1229. }
  1230. *(_chars++) = ch;
  1231. }
  1232. _charCountResult++;
  1233. return true;
  1234. }
  1235. internal unsafe bool AddChar(char ch)
  1236. {
  1237. return AddChar(ch, 1);
  1238. }
  1239. internal unsafe bool AddChar(char ch1, char ch2, int numBytes)
  1240. {
  1241. // Need room for 2 chars
  1242. if (_chars >= _charEnd - 1)
  1243. {
  1244. // Throw maybe
  1245. _bytes -= numBytes; // Didn't encode these bytes
  1246. _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw?
  1247. return false; // No throw, but no store either
  1248. }
  1249. return AddChar(ch1, numBytes) && AddChar(ch2, numBytes);
  1250. }
  1251. internal unsafe void AdjustBytes(int count)
  1252. {
  1253. _bytes += count;
  1254. }
  1255. internal unsafe bool MoreData
  1256. {
  1257. get
  1258. {
  1259. return _bytes < _byteEnd;
  1260. }
  1261. }
  1262. // Do we have count more bytes?
  1263. internal unsafe bool EvenMoreData(int count)
  1264. {
  1265. return (_bytes <= _byteEnd - count);
  1266. }
  1267. // GetNextByte shouldn't be called unless the caller's already checked more data or even more data,
  1268. // but we'll double check just to make sure.
  1269. internal unsafe byte GetNextByte()
  1270. {
  1271. Debug.Assert(_bytes < _byteEnd, "[EncodingCharBuffer.GetNextByte]Expected more date");
  1272. if (_bytes >= _byteEnd)
  1273. return 0;
  1274. return *(_bytes++);
  1275. }
  1276. internal unsafe int BytesUsed
  1277. {
  1278. get
  1279. {
  1280. return (int)(_bytes - _byteStart);
  1281. }
  1282. }
  1283. internal unsafe bool Fallback(byte fallbackByte)
  1284. {
  1285. // Build our buffer
  1286. byte[] byteBuffer = new byte[] { fallbackByte };
  1287. // Do the fallback and add the data.
  1288. return Fallback(byteBuffer);
  1289. }
  1290. internal unsafe bool Fallback(byte byte1, byte byte2)
  1291. {
  1292. // Build our buffer
  1293. byte[] byteBuffer = new byte[] { byte1, byte2 };
  1294. // Do the fallback and add the data.
  1295. return Fallback(byteBuffer);
  1296. }
  1297. internal unsafe bool Fallback(byte byte1, byte byte2, byte byte3, byte byte4)
  1298. {
  1299. // Build our buffer
  1300. byte[] byteBuffer = new byte[] { byte1, byte2, byte3, byte4 };
  1301. // Do the fallback and add the data.
  1302. return Fallback(byteBuffer);
  1303. }
  1304. internal unsafe bool Fallback(byte[] byteBuffer)
  1305. {
  1306. // Do the fallback and add the data.
  1307. if (_chars != null)
  1308. {
  1309. char* pTemp = _chars;
  1310. if (_fallbackBuffer.InternalFallback(byteBuffer, _bytes, ref _chars) == false)
  1311. {
  1312. // Throw maybe
  1313. _bytes -= byteBuffer.Length; // Didn't use how many ever bytes we're falling back
  1314. _fallbackBuffer.InternalReset(); // We didn't use this fallback.
  1315. _enc.ThrowCharsOverflow(_decoder, _chars == _charStart); // Throw?
  1316. return false; // No throw, but no store either
  1317. }
  1318. _charCountResult += unchecked((int)(_chars - pTemp));
  1319. }
  1320. else
  1321. {
  1322. _charCountResult += _fallbackBuffer.InternalFallback(byteBuffer, _bytes);
  1323. }
  1324. return true;
  1325. }
  1326. internal unsafe int Count
  1327. {
  1328. get
  1329. {
  1330. return _charCountResult;
  1331. }
  1332. }
  1333. }
  1334. internal class EncodingByteBuffer
  1335. {
  1336. private unsafe byte* _bytes;
  1337. private unsafe byte* _byteStart;
  1338. private unsafe byte* _byteEnd;
  1339. private unsafe char* _chars;
  1340. private unsafe char* _charStart;
  1341. private unsafe char* _charEnd;
  1342. private int _byteCountResult = 0;
  1343. private Encoding _enc;
  1344. private EncoderNLS _encoder;
  1345. internal EncoderFallbackBuffer fallbackBuffer;
  1346. internal unsafe EncodingByteBuffer(Encoding inEncoding, EncoderNLS inEncoder,
  1347. byte* inByteStart, int inByteCount, char* inCharStart, int inCharCount)
  1348. {
  1349. _enc = inEncoding;
  1350. _encoder = inEncoder;
  1351. _charStart = inCharStart;
  1352. _chars = inCharStart;
  1353. _charEnd = inCharStart + inCharCount;
  1354. _bytes = inByteStart;
  1355. _byteStart = inByteStart;
  1356. _byteEnd = inByteStart + inByteCount;
  1357. if (_encoder == null)
  1358. this.fallbackBuffer = _enc.EncoderFallback.CreateFallbackBuffer();
  1359. else
  1360. {
  1361. this.fallbackBuffer = _encoder.FallbackBuffer;
  1362. // If we're not converting we must not have data in our fallback buffer
  1363. if (_encoder._throwOnOverflow && _encoder.InternalHasFallbackBuffer &&
  1364. this.fallbackBuffer.Remaining > 0)
  1365. throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty,
  1366. _encoder.Encoding.EncodingName, _encoder.Fallback.GetType()));
  1367. }
  1368. fallbackBuffer.InternalInitialize(_chars, _charEnd, _encoder, _bytes != null);
  1369. }
  1370. internal unsafe bool AddByte(byte b, int moreBytesExpected)
  1371. {
  1372. Debug.Assert(moreBytesExpected >= 0, "[EncodingByteBuffer.AddByte]expected non-negative moreBytesExpected");
  1373. if (_bytes != null)
  1374. {
  1375. if (_bytes >= _byteEnd - moreBytesExpected)
  1376. {
  1377. // Throw maybe. Check which buffer to back up (only matters if Converting)
  1378. this.MovePrevious(true); // Throw if necessary
  1379. return false; // No throw, but no store either
  1380. }
  1381. *(_bytes++) = b;
  1382. }
  1383. _byteCountResult++;
  1384. return true;
  1385. }
  1386. internal unsafe bool AddByte(byte b1)
  1387. {
  1388. return (AddByte(b1, 0));
  1389. }
  1390. internal unsafe bool AddByte(byte b1, byte b2)
  1391. {
  1392. return (AddByte(b1, b2, 0));
  1393. }
  1394. internal unsafe bool AddByte(byte b1, byte b2, int moreBytesExpected)
  1395. {
  1396. return (AddByte(b1, 1 + moreBytesExpected) && AddByte(b2, moreBytesExpected));
  1397. }
  1398. internal unsafe bool AddByte(byte b1, byte b2, byte b3)
  1399. {
  1400. return AddByte(b1, b2, b3, (int)0);
  1401. }
  1402. internal unsafe bool AddByte(byte b1, byte b2, byte b3, int moreBytesExpected)
  1403. {
  1404. return (AddByte(b1, 2 + moreBytesExpected) &&
  1405. AddByte(b2, 1 + moreBytesExpected) &&
  1406. AddByte(b3, moreBytesExpected));
  1407. }
  1408. internal unsafe bool AddByte(byte b1, byte b2, byte b3, byte b4)
  1409. {
  1410. return (AddByte(b1, 3) &&
  1411. AddByte(b2, 2) &&
  1412. AddByte(b3, 1) &&
  1413. AddByte(b4, 0));
  1414. }
  1415. internal unsafe void MovePrevious(bool bThrow)
  1416. {
  1417. if (fallbackBuffer.bFallingBack)
  1418. fallbackBuffer.MovePrevious(); // don't use last fallback
  1419. else
  1420. {
  1421. Debug.Assert(_chars > _charStart ||
  1422. ((bThrow == true) && (_bytes == _byteStart)),
  1423. "[EncodingByteBuffer.MovePrevious]expected previous data or throw");
  1424. if (_chars > _charStart)
  1425. _chars--; // don't use last char
  1426. }
  1427. if (bThrow)
  1428. _enc.ThrowBytesOverflow(_encoder, _bytes == _byteStart); // Throw? (and reset fallback if not converting)
  1429. }
  1430. internal unsafe bool Fallback(char charFallback)
  1431. {
  1432. // Do the fallback
  1433. return fallbackBuffer.InternalFallback(charFallback, ref _chars);
  1434. }
  1435. internal unsafe bool MoreData
  1436. {
  1437. get
  1438. {
  1439. // See if fallbackBuffer is not empty or if there's data left in chars buffer.
  1440. return ((fallbackBuffer.Remaining > 0) || (_chars < _charEnd));
  1441. }
  1442. }
  1443. internal unsafe char GetNextChar()
  1444. {
  1445. // See if there's something in our fallback buffer
  1446. char cReturn = fallbackBuffer.InternalGetNextChar();
  1447. // Nothing in the fallback buffer, return our normal data.
  1448. if (cReturn == 0)
  1449. {
  1450. if (_chars < _charEnd)
  1451. cReturn = *(_chars++);
  1452. }
  1453. return cReturn;
  1454. }
  1455. internal unsafe int CharsUsed
  1456. {
  1457. get
  1458. {
  1459. return (int)(_chars - _charStart);
  1460. }
  1461. }
  1462. internal unsafe int Count
  1463. {
  1464. get
  1465. {
  1466. return _byteCountResult;
  1467. }
  1468. }
  1469. }
  1470. }
  1471. }