Encoding.cs 66 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Diagnostics;
  5. using System.Diagnostics.CodeAnalysis;
  6. using System.Runtime.InteropServices;
  7. using System.Runtime.Serialization;
  8. namespace System.Text
  9. {
  10. // This abstract base class represents a character encoding. The class provides
  11. // methods to convert arrays and strings of Unicode characters to and from
  12. // arrays of bytes. A number of Encoding implementations are provided in
  13. // the System.Text package, including:
  14. //
  15. // ASCIIEncoding, which encodes Unicode characters as single 7-bit
  16. // ASCII characters. This encoding only supports character values between 0x00
  17. // and 0x7F.
  18. // BaseCodePageEncoding, which encapsulates a Windows code page. Any
  19. // installed code page can be accessed through this encoding, and conversions
  20. // are performed using the WideCharToMultiByte and
  21. // MultiByteToWideChar Windows API functions.
  22. // UnicodeEncoding, which encodes each Unicode character as two
  23. // consecutive bytes. Both little-endian (code page 1200) and big-endian (code
  24. // page 1201) encodings are recognized.
  25. // UTF7Encoding, which encodes Unicode characters using the UTF-7
  26. // encoding (UTF-7 stands for UCS Transformation Format, 7-bit form). This
  27. // encoding supports all Unicode character values, and can also be accessed
  28. // as code page 65000.
  29. // UTF8Encoding, which encodes Unicode characters using the UTF-8
  30. // encoding (UTF-8 stands for UCS Transformation Format, 8-bit form). This
  31. // encoding supports all Unicode character values, and can also be accessed
  32. // as code page 65001.
  33. // UTF32Encoding, both 12000 (little endian) & 12001 (big endian)
  34. //
  35. // In addition to directly instantiating Encoding objects, an
  36. // application can use the ForCodePage, GetASCII,
  37. // GetDefault, GetUnicode, GetUTF7, and GetUTF8
  38. // methods in this class to obtain encodings.
  39. //
  40. // Through an encoding, the GetBytes method is used to convert arrays
  41. // of characters to arrays of bytes, and the GetChars method is used to
  42. // convert arrays of bytes to arrays of characters. The GetBytes and
  43. // GetChars methods maintain no state between conversions, and are
  44. // generally intended for conversions of complete blocks of bytes and
  45. // characters in one operation. When the data to be converted is only available
  46. // in sequential blocks (such as data read from a stream) or when the amount of
  47. // data is so large that it needs to be divided into smaller blocks, an
  48. // application may choose to use a Decoder or an Encoder to
  49. // perform the conversion. Decoders and encoders allow sequential blocks of
  50. // data to be converted and they maintain the state required to support
  51. // conversions of data that spans adjacent blocks. Decoders and encoders are
  52. // obtained using the GetDecoder and GetEncoder methods.
  53. //
  54. // The core GetBytes and GetChars methods require the caller
  55. // to provide the destination buffer and ensure that the buffer is large enough
  56. // to hold the entire result of the conversion. When using these methods,
  57. // either directly on an Encoding object or on an associated
  58. // Decoder or Encoder, an application can use one of two methods
  59. // to allocate destination buffers.
  60. //
  61. // The GetByteCount and GetCharCount methods can be used to
  62. // compute the exact size of the result of a particular conversion, and an
  63. // appropriately sized buffer for that conversion can then be allocated.
  64. // The GetMaxByteCount and GetMaxCharCount methods can be
  65. // be used to compute the maximum possible size of a conversion of a given
  66. // number of bytes or characters, and a buffer of that size can then be reused
  67. // for multiple conversions.
  68. //
  69. // The first method generally uses less memory, whereas the second method
  70. // generally executes faster.
  71. //
  72. public abstract partial class Encoding : ICloneable
  73. {
  74. // For netcore we use UTF8 as default encoding since ANSI isn't available
  75. private static readonly UTF8Encoding.UTF8EncodingSealed s_defaultEncoding = new UTF8Encoding.UTF8EncodingSealed(encoderShouldEmitUTF8Identifier: false);
  76. // Returns an encoding for the system's current ANSI code page.
  77. public static Encoding Default => s_defaultEncoding;
  78. //
  79. // The following values are from mlang.idl. These values
  80. // should be in sync with those in mlang.idl.
  81. //
  82. internal const int MIMECONTF_MAILNEWS = 0x00000001;
  83. internal const int MIMECONTF_BROWSER = 0x00000002;
  84. internal const int MIMECONTF_SAVABLE_MAILNEWS = 0x00000100;
  85. internal const int MIMECONTF_SAVABLE_BROWSER = 0x00000200;
  86. // Special Case Code Pages
  87. private const int CodePageDefault = 0;
  88. private const int CodePageNoOEM = 1; // OEM Code page not supported
  89. private const int CodePageNoMac = 2; // MAC code page not supported
  90. private const int CodePageNoThread = 3; // Thread code page not supported
  91. private const int CodePageNoSymbol = 42; // Symbol code page not supported
  92. private const int CodePageUnicode = 1200; // Unicode
  93. private const int CodePageBigEndian = 1201; // Big Endian Unicode
  94. private const int CodePageWindows1252 = 1252; // Windows 1252 code page
  95. // 20936 has same code page as 10008, so we'll special case it
  96. private const int CodePageMacGB2312 = 10008;
  97. private const int CodePageGB2312 = 20936;
  98. private const int CodePageMacKorean = 10003;
  99. private const int CodePageDLLKorean = 20949;
  100. // ISO 2022 Code Pages
  101. private const int ISO2022JP = 50220;
  102. private const int ISO2022JPESC = 50221;
  103. private const int ISO2022JPSISO = 50222;
  104. private const int ISOKorean = 50225;
  105. private const int ISOSimplifiedCN = 50227;
  106. private const int EUCJP = 51932;
  107. private const int ChineseHZ = 52936; // HZ has ~}~{~~ sequences
  108. // 51936 is the same as 936
  109. private const int DuplicateEUCCN = 51936;
  110. private const int EUCCN = 936;
  111. private const int EUCKR = 51949;
  112. // Latin 1 & ASCII Code Pages
  113. internal const int CodePageASCII = 20127; // ASCII
  114. internal const int ISO_8859_1 = 28591; // Latin1
  115. // ISCII
  116. private const int ISCIIAssemese = 57006;
  117. private const int ISCIIBengali = 57003;
  118. private const int ISCIIDevanagari = 57002;
  119. private const int ISCIIGujarathi = 57010;
  120. private const int ISCIIKannada = 57008;
  121. private const int ISCIIMalayalam = 57009;
  122. private const int ISCIIOriya = 57007;
  123. private const int ISCIIPanjabi = 57011;
  124. private const int ISCIITamil = 57004;
  125. private const int ISCIITelugu = 57005;
  126. // GB18030
  127. private const int GB18030 = 54936;
  128. // Other
  129. private const int ISO_8859_8I = 38598;
  130. private const int ISO_8859_8_Visual = 28598;
  131. // 50229 is currently unsupported // "Chinese Traditional (ISO-2022)"
  132. private const int ENC50229 = 50229;
  133. // Special code pages
  134. private const int CodePageUTF7 = 65000;
  135. private const int CodePageUTF8 = 65001;
  136. private const int CodePageUTF32 = 12000;
  137. private const int CodePageUTF32BE = 12001;
  138. internal int _codePage = 0;
  139. internal CodePageDataItem? _dataItem = null;
  140. // Because of encoders we may be read only
  141. [OptionalField(VersionAdded = 2)]
  142. private bool _isReadOnly = true;
  143. // Encoding (encoder) fallback
  144. internal EncoderFallback encoderFallback = null!;
  145. internal DecoderFallback decoderFallback = null!;
  146. protected Encoding() : this(0)
  147. {
  148. }
  149. protected Encoding(int codePage)
  150. {
  151. // Validate code page
  152. if (codePage < 0)
  153. {
  154. throw new ArgumentOutOfRangeException(nameof(codePage));
  155. }
  156. // Remember code page
  157. _codePage = codePage;
  158. // Use default encoder/decoder fallbacks
  159. this.SetDefaultFallbacks();
  160. }
  161. // This constructor is needed to allow any sub-classing implementation to provide encoder/decoder fallback objects
  162. // because the encoding object is always created as read-only object and don't allow setting encoder/decoder fallback
  163. // after the creation is done.
  164. protected Encoding(int codePage, EncoderFallback? encoderFallback, DecoderFallback? decoderFallback)
  165. {
  166. // Validate code page
  167. if (codePage < 0)
  168. {
  169. throw new ArgumentOutOfRangeException(nameof(codePage));
  170. }
  171. // Remember code page
  172. _codePage = codePage;
  173. this.encoderFallback = encoderFallback ?? new InternalEncoderBestFitFallback(this);
  174. this.decoderFallback = decoderFallback ?? new InternalDecoderBestFitFallback(this);
  175. }
  176. // Default fallback that we'll use.
  177. internal virtual void SetDefaultFallbacks()
  178. {
  179. // For UTF-X encodings, we use a replacement fallback with an "\xFFFD" string,
  180. // For ASCII we use "?" replacement fallback, etc.
  181. encoderFallback = new InternalEncoderBestFitFallback(this);
  182. decoderFallback = new InternalDecoderBestFitFallback(this);
  183. }
  184. // Converts a byte array from one encoding to another. The bytes in the
  185. // bytes array are converted from srcEncoding to
  186. // dstEncoding, and the returned value is a new byte array
  187. // containing the result of the conversion.
  188. //
  189. public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
  190. byte[] bytes)
  191. {
  192. if (bytes == null)
  193. throw new ArgumentNullException(nameof(bytes));
  194. return Convert(srcEncoding, dstEncoding, bytes, 0, bytes.Length);
  195. }
  196. // Converts a range of bytes in a byte array from one encoding to another.
  197. // This method converts count bytes from bytes starting at
  198. // index index from srcEncoding to dstEncoding, and
  199. // returns a new byte array containing the result of the conversion.
  200. //
  201. public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
  202. byte[] bytes, int index, int count)
  203. {
  204. if (srcEncoding == null || dstEncoding == null)
  205. {
  206. throw new ArgumentNullException((srcEncoding == null ? nameof(srcEncoding) : nameof(dstEncoding)),
  207. SR.ArgumentNull_Array);
  208. }
  209. if (bytes == null)
  210. {
  211. throw new ArgumentNullException(nameof(bytes),
  212. SR.ArgumentNull_Array);
  213. }
  214. return dstEncoding.GetBytes(srcEncoding.GetChars(bytes, index, count));
  215. }
  216. public static void RegisterProvider(EncodingProvider provider)
  217. {
  218. // Parameters validated inside EncodingProvider
  219. EncodingProvider.AddProvider(provider);
  220. }
  221. public static Encoding GetEncoding(int codepage)
  222. {
  223. Encoding? result = EncodingProvider.GetEncodingFromProvider(codepage);
  224. if (result != null)
  225. return result;
  226. switch (codepage)
  227. {
  228. case CodePageDefault: return Default; // 0
  229. case CodePageUnicode: return Unicode; // 1200
  230. case CodePageBigEndian: return BigEndianUnicode; // 1201
  231. case CodePageUTF32: return UTF32; // 12000
  232. case CodePageUTF32BE: return BigEndianUTF32; // 12001
  233. case CodePageUTF7: return UTF7; // 65000
  234. case CodePageUTF8: return UTF8; // 65001
  235. case CodePageASCII: return ASCII; // 20127
  236. case ISO_8859_1: return Latin1; // 28591
  237. // We don't allow the following special code page values that Win32 allows.
  238. case CodePageNoOEM: // 1 CP_OEMCP
  239. case CodePageNoMac: // 2 CP_MACCP
  240. case CodePageNoThread: // 3 CP_THREAD_ACP
  241. case CodePageNoSymbol: // 42 CP_SYMBOL
  242. throw new ArgumentException(SR.Format(SR.Argument_CodepageNotSupported, codepage), nameof(codepage));
  243. }
  244. if (codepage < 0 || codepage > 65535)
  245. {
  246. throw new ArgumentOutOfRangeException(
  247. nameof(codepage), SR.Format(SR.ArgumentOutOfRange_Range, 0, 65535));
  248. }
  249. throw new NotSupportedException(SR.Format(SR.NotSupported_NoCodepageData, codepage));
  250. }
  251. public static Encoding GetEncoding(int codepage,
  252. EncoderFallback encoderFallback, DecoderFallback decoderFallback)
  253. {
  254. Encoding? baseEncoding = EncodingProvider.GetEncodingFromProvider(codepage, encoderFallback, decoderFallback);
  255. if (baseEncoding != null)
  256. return baseEncoding;
  257. // Get the default encoding (which is cached and read only)
  258. baseEncoding = GetEncoding(codepage);
  259. // Clone it and set the fallback
  260. Encoding fallbackEncoding = (Encoding)baseEncoding.Clone();
  261. fallbackEncoding.EncoderFallback = encoderFallback;
  262. fallbackEncoding.DecoderFallback = decoderFallback;
  263. return fallbackEncoding;
  264. }
  265. // Returns an Encoding object for a given name or a given code page value.
  266. //
  267. public static Encoding GetEncoding(string name)
  268. {
  269. Encoding? baseEncoding = EncodingProvider.GetEncodingFromProvider(name);
  270. if (baseEncoding != null)
  271. return baseEncoding;
  272. //
  273. // NOTE: If you add a new encoding that can be requested by name, be sure to
  274. // add the corresponding item in EncodingTable.
  275. // Otherwise, the code below will throw exception when trying to call
  276. // EncodingTable.GetCodePageFromName().
  277. //
  278. return GetEncoding(EncodingTable.GetCodePageFromName(name));
  279. }
  280. // Returns an Encoding object for a given name or a given code page value.
  281. //
  282. public static Encoding GetEncoding(string name,
  283. EncoderFallback encoderFallback, DecoderFallback decoderFallback)
  284. {
  285. Encoding? baseEncoding = EncodingProvider.GetEncodingFromProvider(name, encoderFallback, decoderFallback);
  286. if (baseEncoding != null)
  287. return baseEncoding;
  288. //
  289. // NOTE: If you add a new encoding that can be requested by name, be sure to
  290. // add the corresponding item in EncodingTable.
  291. // Otherwise, the code below will throw exception when trying to call
  292. // EncodingTable.GetCodePageFromName().
  293. //
  294. return GetEncoding(EncodingTable.GetCodePageFromName(name), encoderFallback, decoderFallback);
  295. }
  296. // Return a list of all EncodingInfo objects describing all of our encodings
  297. public static EncodingInfo[] GetEncodings()
  298. {
  299. return EncodingTable.GetEncodings();
  300. }
  301. public virtual byte[] GetPreamble()
  302. {
  303. return Array.Empty<byte>();
  304. }
  305. public virtual ReadOnlySpan<byte> Preamble => GetPreamble();
  306. private void GetDataItem()
  307. {
  308. if (_dataItem == null)
  309. {
  310. _dataItem = EncodingTable.GetCodePageDataItem(_codePage);
  311. if (_dataItem == null)
  312. {
  313. throw new NotSupportedException(SR.Format(SR.NotSupported_NoCodepageData, _codePage));
  314. }
  315. }
  316. }
  317. // Returns the name for this encoding that can be used with mail agent body tags.
  318. // If the encoding may not be used, the string is empty.
  319. public virtual string BodyName
  320. {
  321. get
  322. {
  323. if (_dataItem == null)
  324. {
  325. GetDataItem();
  326. }
  327. return _dataItem!.BodyName;
  328. }
  329. }
  330. // Returns the human-readable description of the encoding ( e.g. Hebrew (DOS)).
  331. public virtual string EncodingName
  332. {
  333. get
  334. {
  335. if (_dataItem == null)
  336. {
  337. GetDataItem();
  338. }
  339. return _dataItem!.DisplayName;
  340. }
  341. }
  342. // Returns the name for this encoding that can be used with mail agent header
  343. // tags. If the encoding may not be used, the string is empty.
  344. public virtual string HeaderName
  345. {
  346. get
  347. {
  348. if (_dataItem == null)
  349. {
  350. GetDataItem();
  351. }
  352. return _dataItem!.HeaderName;
  353. }
  354. }
  355. // Returns the IANA preferred name for this encoding.
  356. public virtual string WebName
  357. {
  358. get
  359. {
  360. if (_dataItem == null)
  361. {
  362. GetDataItem();
  363. }
  364. return _dataItem!.WebName;
  365. }
  366. }
  367. // Returns the windows code page that most closely corresponds to this encoding.
  368. public virtual int WindowsCodePage
  369. {
  370. get
  371. {
  372. if (_dataItem == null)
  373. {
  374. GetDataItem();
  375. }
  376. return _dataItem!.UIFamilyCodePage;
  377. }
  378. }
  379. // True if and only if the encoding is used for display by browsers clients.
  380. public virtual bool IsBrowserDisplay
  381. {
  382. get
  383. {
  384. if (_dataItem == null)
  385. {
  386. GetDataItem();
  387. }
  388. return (_dataItem!.Flags & MIMECONTF_BROWSER) != 0;
  389. }
  390. }
  391. // True if and only if the encoding is used for saving by browsers clients.
  392. public virtual bool IsBrowserSave
  393. {
  394. get
  395. {
  396. if (_dataItem == null)
  397. {
  398. GetDataItem();
  399. }
  400. return (_dataItem!.Flags & MIMECONTF_SAVABLE_BROWSER) != 0;
  401. }
  402. }
  403. // True if and only if the encoding is used for display by mail and news clients.
  404. public virtual bool IsMailNewsDisplay
  405. {
  406. get
  407. {
  408. if (_dataItem == null)
  409. {
  410. GetDataItem();
  411. }
  412. return (_dataItem!.Flags & MIMECONTF_MAILNEWS) != 0;
  413. }
  414. }
  415. // True if and only if the encoding is used for saving documents by mail and
  416. // news clients
  417. public virtual bool IsMailNewsSave
  418. {
  419. get
  420. {
  421. if (_dataItem == null)
  422. {
  423. GetDataItem();
  424. }
  425. return (_dataItem!.Flags & MIMECONTF_SAVABLE_MAILNEWS) != 0;
  426. }
  427. }
  428. // True if and only if the encoding only uses single byte code points. (Ie, ASCII, 1252, etc)
  429. public virtual bool IsSingleByte
  430. {
  431. get
  432. {
  433. return false;
  434. }
  435. }
  436. public EncoderFallback EncoderFallback
  437. {
  438. get
  439. {
  440. return encoderFallback;
  441. }
  442. set
  443. {
  444. if (this.IsReadOnly)
  445. throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
  446. if (value == null)
  447. throw new ArgumentNullException(nameof(value));
  448. encoderFallback = value;
  449. }
  450. }
  451. public DecoderFallback DecoderFallback
  452. {
  453. get
  454. {
  455. return decoderFallback;
  456. }
  457. set
  458. {
  459. if (this.IsReadOnly)
  460. throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
  461. if (value == null)
  462. throw new ArgumentNullException(nameof(value));
  463. decoderFallback = value;
  464. }
  465. }
  466. public virtual object Clone()
  467. {
  468. Encoding newEncoding = (Encoding)this.MemberwiseClone();
  469. // New one should be readable
  470. newEncoding._isReadOnly = false;
  471. return newEncoding;
  472. }
  473. public bool IsReadOnly
  474. {
  475. get
  476. {
  477. return (_isReadOnly);
  478. }
  479. private protected set
  480. {
  481. _isReadOnly = value;
  482. }
  483. }
  484. // Returns an encoding for the ASCII character set. The returned encoding
  485. // will be an instance of the ASCIIEncoding class.
  486. public static Encoding ASCII => ASCIIEncoding.s_default;
  487. // Returns an encoding for the Latin1 character set. The returned encoding
  488. // will be an instance of the Latin1Encoding class.
  489. //
  490. // This is for our optimizations
  491. private static Encoding Latin1 => Latin1Encoding.s_default;
  492. // Returns the number of bytes required to encode the given character
  493. // array.
  494. //
  495. public virtual int GetByteCount(char[] chars)
  496. {
  497. if (chars == null)
  498. {
  499. throw new ArgumentNullException(nameof(chars),
  500. SR.ArgumentNull_Array);
  501. }
  502. return GetByteCount(chars, 0, chars.Length);
  503. }
  504. public virtual int GetByteCount(string s)
  505. {
  506. if (s == null)
  507. throw new ArgumentNullException(nameof(s));
  508. char[] chars = s.ToCharArray();
  509. return GetByteCount(chars, 0, chars.Length);
  510. }
  511. // Returns the number of bytes required to encode a range of characters in
  512. // a character array.
  513. //
  514. public abstract int GetByteCount(char[] chars, int index, int count);
  515. // Returns the number of bytes required to encode a string range.
  516. //
  517. public int GetByteCount(string s, int index, int count)
  518. {
  519. if (s == null)
  520. throw new ArgumentNullException(nameof(s),
  521. SR.ArgumentNull_String);
  522. if (index < 0)
  523. throw new ArgumentOutOfRangeException(nameof(index),
  524. SR.ArgumentOutOfRange_NeedNonNegNum);
  525. if (count < 0)
  526. throw new ArgumentOutOfRangeException(nameof(count),
  527. SR.ArgumentOutOfRange_NeedNonNegNum);
  528. if (index > s.Length - count)
  529. throw new ArgumentOutOfRangeException(nameof(index),
  530. SR.ArgumentOutOfRange_IndexCount);
  531. unsafe
  532. {
  533. fixed (char* pChar = s)
  534. {
  535. return GetByteCount(pChar + index, count);
  536. }
  537. }
  538. }
  539. // We expect this to be the workhorse for NLS encodings
  540. // unfortunately for existing overrides, it has to call the [] version,
  541. // which is really slow, so this method should be avoided if you're calling
  542. // a 3rd party encoding.
  543. [CLSCompliant(false)]
  544. public virtual unsafe int GetByteCount(char* chars, int count)
  545. {
  546. // Validate input parameters
  547. if (chars == null)
  548. throw new ArgumentNullException(nameof(chars),
  549. SR.ArgumentNull_Array);
  550. if (count < 0)
  551. throw new ArgumentOutOfRangeException(nameof(count),
  552. SR.ArgumentOutOfRange_NeedNonNegNum);
  553. char[] arrChar = new char[count];
  554. int index;
  555. for (index = 0; index < count; index++)
  556. arrChar[index] = chars[index];
  557. return GetByteCount(arrChar, 0, count);
  558. }
  559. public virtual unsafe int GetByteCount(ReadOnlySpan<char> chars)
  560. {
  561. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  562. {
  563. return GetByteCount(charsPtr, chars.Length);
  564. }
  565. }
  566. // Returns a byte array containing the encoded representation of the given
  567. // character array.
  568. //
  569. public virtual byte[] GetBytes(char[] chars)
  570. {
  571. if (chars == null)
  572. {
  573. throw new ArgumentNullException(nameof(chars),
  574. SR.ArgumentNull_Array);
  575. }
  576. return GetBytes(chars, 0, chars.Length);
  577. }
  578. // Returns a byte array containing the encoded representation of a range
  579. // of characters in a character array.
  580. //
  581. public virtual byte[] GetBytes(char[] chars, int index, int count)
  582. {
  583. byte[] result = new byte[GetByteCount(chars, index, count)];
  584. GetBytes(chars, index, count, result, 0);
  585. return result;
  586. }
  587. // Encodes a range of characters in a character array into a range of bytes
  588. // in a byte array. An exception occurs if the byte array is not large
  589. // enough to hold the complete encoding of the characters. The
  590. // GetByteCount method can be used to determine the exact number of
  591. // bytes that will be produced for a given range of characters.
  592. // Alternatively, the GetMaxByteCount method can be used to
  593. // determine the maximum number of bytes that will be produced for a given
  594. // number of characters, regardless of the actual character values.
  595. //
  596. public abstract int GetBytes(char[] chars, int charIndex, int charCount,
  597. byte[] bytes, int byteIndex);
  598. // Returns a byte array containing the encoded representation of the given
  599. // string.
  600. //
  601. public virtual byte[] GetBytes(string s)
  602. {
  603. if (s == null)
  604. throw new ArgumentNullException(nameof(s),
  605. SR.ArgumentNull_String);
  606. int byteCount = GetByteCount(s);
  607. byte[] bytes = new byte[byteCount];
  608. int bytesReceived = GetBytes(s, 0, s.Length, bytes, 0);
  609. Debug.Assert(byteCount == bytesReceived);
  610. return bytes;
  611. }
  612. // Returns a byte array containing the encoded representation of the given
  613. // string range.
  614. //
  615. public byte[] GetBytes(string s, int index, int count)
  616. {
  617. if (s == null)
  618. throw new ArgumentNullException(nameof(s),
  619. SR.ArgumentNull_String);
  620. if (index < 0)
  621. throw new ArgumentOutOfRangeException(nameof(index),
  622. SR.ArgumentOutOfRange_NeedNonNegNum);
  623. if (count < 0)
  624. throw new ArgumentOutOfRangeException(nameof(count),
  625. SR.ArgumentOutOfRange_NeedNonNegNum);
  626. if (index > s.Length - count)
  627. throw new ArgumentOutOfRangeException(nameof(index),
  628. SR.ArgumentOutOfRange_IndexCount);
  629. unsafe
  630. {
  631. fixed (char* pChar = s)
  632. {
  633. int byteCount = GetByteCount(pChar + index, count);
  634. if (byteCount == 0)
  635. return Array.Empty<byte>();
  636. byte[] bytes = new byte[byteCount];
  637. fixed (byte* pBytes = &bytes[0])
  638. {
  639. int bytesReceived = GetBytes(pChar + index, count, pBytes, byteCount);
  640. Debug.Assert(byteCount == bytesReceived);
  641. }
  642. return bytes;
  643. }
  644. }
  645. }
  646. public virtual int GetBytes(string s, int charIndex, int charCount,
  647. byte[] bytes, int byteIndex)
  648. {
  649. if (s == null)
  650. throw new ArgumentNullException(nameof(s));
  651. return GetBytes(s.ToCharArray(), charIndex, charCount, bytes, byteIndex);
  652. }
  653. // We expect this to be the workhorse for NLS Encodings, but for existing
  654. // ones we need a working (if slow) default implementation)
  655. //
  656. // WARNING WARNING WARNING
  657. //
  658. // WARNING: If this breaks it could be a security threat. Obviously we
  659. // call this internally, so you need to make sure that your pointers, counts
  660. // and indexes are correct when you call this method.
  661. //
  662. // In addition, we have internal code, which will be marked as "safe" calling
  663. // this code. However this code is dependent upon the implementation of an
  664. // external GetBytes() method, which could be overridden by a third party and
  665. // the results of which cannot be guaranteed. We use that result to copy
  666. // the byte[] to our byte* output buffer. If the result count was wrong, we
  667. // could easily overflow our output buffer. Therefore we do an extra test
  668. // when we copy the buffer so that we don't overflow byteCount either.
  669. [CLSCompliant(false)]
  670. public virtual unsafe int GetBytes(char* chars, int charCount,
  671. byte* bytes, int byteCount)
  672. {
  673. // Validate input parameters
  674. if (bytes == null || chars == null)
  675. throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
  676. SR.ArgumentNull_Array);
  677. if (charCount < 0 || byteCount < 0)
  678. throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)),
  679. SR.ArgumentOutOfRange_NeedNonNegNum);
  680. // Get the char array to convert
  681. char[] arrChar = new char[charCount];
  682. int index;
  683. for (index = 0; index < charCount; index++)
  684. arrChar[index] = chars[index];
  685. // Get the byte array to fill
  686. byte[] arrByte = new byte[byteCount];
  687. // Do the work
  688. int result = GetBytes(arrChar, 0, charCount, arrByte, 0);
  689. Debug.Assert(result <= byteCount, "[Encoding.GetBytes]Returned more bytes than we have space for");
  690. // Copy the byte array
  691. // WARNING: We MUST make sure that we don't copy too many bytes. We can't
  692. // rely on result because it could be a 3rd party implementation. We need
  693. // to make sure we never copy more than byteCount bytes no matter the value
  694. // of result
  695. if (result < byteCount)
  696. byteCount = result;
  697. // Copy the data, don't overrun our array!
  698. for (index = 0; index < byteCount; index++)
  699. bytes[index] = arrByte[index];
  700. return byteCount;
  701. }
  702. public virtual unsafe int GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes)
  703. {
  704. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  705. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  706. {
  707. return GetBytes(charsPtr, chars.Length, bytesPtr, bytes.Length);
  708. }
  709. }
  710. // Returns the number of characters produced by decoding the given byte
  711. // array.
  712. //
  713. public virtual int GetCharCount(byte[] bytes)
  714. {
  715. if (bytes == null)
  716. {
  717. throw new ArgumentNullException(nameof(bytes),
  718. SR.ArgumentNull_Array);
  719. }
  720. return GetCharCount(bytes, 0, bytes.Length);
  721. }
  722. // Returns the number of characters produced by decoding a range of bytes
  723. // in a byte array.
  724. //
  725. public abstract int GetCharCount(byte[] bytes, int index, int count);
  726. // We expect this to be the workhorse for NLS Encodings, but for existing
  727. // ones we need a working (if slow) default implementation)
  728. [CLSCompliant(false)]
  729. public virtual unsafe int GetCharCount(byte* bytes, int count)
  730. {
  731. // Validate input parameters
  732. if (bytes == null)
  733. throw new ArgumentNullException(nameof(bytes),
  734. SR.ArgumentNull_Array);
  735. if (count < 0)
  736. throw new ArgumentOutOfRangeException(nameof(count),
  737. SR.ArgumentOutOfRange_NeedNonNegNum);
  738. byte[] arrbyte = new byte[count];
  739. int index;
  740. for (index = 0; index < count; index++)
  741. arrbyte[index] = bytes[index];
  742. return GetCharCount(arrbyte, 0, count);
  743. }
  744. public virtual unsafe int GetCharCount(ReadOnlySpan<byte> bytes)
  745. {
  746. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  747. {
  748. return GetCharCount(bytesPtr, bytes.Length);
  749. }
  750. }
  751. // Returns a character array containing the decoded representation of a
  752. // given byte array.
  753. //
  754. public virtual char[] GetChars(byte[] bytes)
  755. {
  756. if (bytes == null)
  757. {
  758. throw new ArgumentNullException(nameof(bytes),
  759. SR.ArgumentNull_Array);
  760. }
  761. return GetChars(bytes, 0, bytes.Length);
  762. }
  763. // Returns a character array containing the decoded representation of a
  764. // range of bytes in a byte array.
  765. //
  766. public virtual char[] GetChars(byte[] bytes, int index, int count)
  767. {
  768. char[] result = new char[GetCharCount(bytes, index, count)];
  769. GetChars(bytes, index, count, result, 0);
  770. return result;
  771. }
  772. // Decodes a range of bytes in a byte array into a range of characters in a
  773. // character array. An exception occurs if the character array is not large
  774. // enough to hold the complete decoding of the bytes. The
  775. // GetCharCount method can be used to determine the exact number of
  776. // characters that will be produced for a given range of bytes.
  777. // Alternatively, the GetMaxCharCount method can be used to
  778. // determine the maximum number of characters that will be produced for a
  779. // given number of bytes, regardless of the actual byte values.
  780. //
  781. public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount,
  782. char[] chars, int charIndex);
  783. // We expect this to be the workhorse for NLS Encodings, but for existing
  784. // ones we need a working (if slow) default implementation)
  785. //
  786. // WARNING WARNING WARNING
  787. //
  788. // WARNING: If this breaks it could be a security threat. Obviously we
  789. // call this internally, so you need to make sure that your pointers, counts
  790. // and indexes are correct when you call this method.
  791. //
  792. // In addition, we have internal code, which will be marked as "safe" calling
  793. // this code. However this code is dependent upon the implementation of an
  794. // external GetChars() method, which could be overridden by a third party and
  795. // the results of which cannot be guaranteed. We use that result to copy
  796. // the char[] to our char* output buffer. If the result count was wrong, we
  797. // could easily overflow our output buffer. Therefore we do an extra test
  798. // when we copy the buffer so that we don't overflow charCount either.
  799. [CLSCompliant(false)]
  800. public virtual unsafe int GetChars(byte* bytes, int byteCount,
  801. char* chars, int charCount)
  802. {
  803. // Validate input parameters
  804. if (chars == null || bytes == null)
  805. throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
  806. SR.ArgumentNull_Array);
  807. if (byteCount < 0 || charCount < 0)
  808. throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
  809. SR.ArgumentOutOfRange_NeedNonNegNum);
  810. // Get the byte array to convert
  811. byte[] arrByte = new byte[byteCount];
  812. int index;
  813. for (index = 0; index < byteCount; index++)
  814. arrByte[index] = bytes[index];
  815. // Get the char array to fill
  816. char[] arrChar = new char[charCount];
  817. // Do the work
  818. int result = GetChars(arrByte, 0, byteCount, arrChar, 0);
  819. Debug.Assert(result <= charCount, "[Encoding.GetChars]Returned more chars than we have space for");
  820. // Copy the char array
  821. // WARNING: We MUST make sure that we don't copy too many chars. We can't
  822. // rely on result because it could be a 3rd party implementation. We need
  823. // to make sure we never copy more than charCount chars no matter the value
  824. // of result
  825. if (result < charCount)
  826. charCount = result;
  827. // Copy the data, don't overrun our array!
  828. for (index = 0; index < charCount; index++)
  829. chars[index] = arrChar[index];
  830. return charCount;
  831. }
  832. public virtual unsafe int GetChars(ReadOnlySpan<byte> bytes, Span<char> chars)
  833. {
  834. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  835. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  836. {
  837. return GetChars(bytesPtr, bytes.Length, charsPtr, chars.Length);
  838. }
  839. }
  840. [CLSCompliant(false)]
  841. public unsafe string GetString(byte* bytes, int byteCount)
  842. {
  843. if (bytes == null)
  844. throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
  845. if (byteCount < 0)
  846. throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_NeedNonNegNum);
  847. return string.CreateStringFromEncoding(bytes, byteCount, this);
  848. }
  849. public unsafe string GetString(ReadOnlySpan<byte> bytes)
  850. {
  851. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  852. {
  853. return string.CreateStringFromEncoding(bytesPtr, bytes.Length, this);
  854. }
  855. }
  856. // Returns the code page identifier of this encoding. The returned value is
  857. // an integer between 0 and 65535 if the encoding has a code page
  858. // identifier, or -1 if the encoding does not represent a code page.
  859. //
  860. public virtual int CodePage
  861. {
  862. get
  863. {
  864. return _codePage;
  865. }
  866. }
  867. // IsAlwaysNormalized
  868. // Returns true if the encoding is always normalized for the specified encoding form
  869. public bool IsAlwaysNormalized()
  870. {
  871. return this.IsAlwaysNormalized(NormalizationForm.FormC);
  872. }
  873. public virtual bool IsAlwaysNormalized(NormalizationForm form)
  874. {
  875. // Assume false unless the encoding knows otherwise
  876. return false;
  877. }
  878. // Returns a Decoder object for this encoding. The returned object
  879. // can be used to decode a sequence of bytes into a sequence of characters.
  880. // Contrary to the GetChars family of methods, a Decoder can
  881. // convert partial sequences of bytes into partial sequences of characters
  882. // by maintaining the appropriate state between the conversions.
  883. //
  884. // This default implementation returns a Decoder that simply
  885. // forwards calls to the GetCharCount and GetChars methods to
  886. // the corresponding methods of this encoding. Encodings that require state
  887. // to be maintained between successive conversions should override this
  888. // method and return an instance of an appropriate Decoder
  889. // implementation.
  890. //
  891. public virtual Decoder GetDecoder()
  892. {
  893. return new DefaultDecoder(this);
  894. }
  895. // Returns an Encoder object for this encoding. The returned object
  896. // can be used to encode a sequence of characters into a sequence of bytes.
  897. // Contrary to the GetBytes family of methods, an Encoder can
  898. // convert partial sequences of characters into partial sequences of bytes
  899. // by maintaining the appropriate state between the conversions.
  900. //
  901. // This default implementation returns an Encoder that simply
  902. // forwards calls to the GetByteCount and GetBytes methods to
  903. // the corresponding methods of this encoding. Encodings that require state
  904. // to be maintained between successive conversions should override this
  905. // method and return an instance of an appropriate Encoder
  906. // implementation.
  907. //
  908. public virtual Encoder GetEncoder()
  909. {
  910. return new DefaultEncoder(this);
  911. }
  912. // Returns the maximum number of bytes required to encode a given number of
  913. // characters. This method can be used to determine an appropriate buffer
  914. // size for byte arrays passed to the GetBytes method of this
  915. // encoding or the GetBytes method of an Encoder for this
  916. // encoding. All encodings must guarantee that no buffer overflow
  917. // exceptions will occur if buffers are sized according to the results of
  918. // this method.
  919. //
  920. // WARNING: If you're using something besides the default replacement encoder fallback,
  921. // then you could have more bytes than this returned from an actual call to GetBytes().
  922. //
  923. public abstract int GetMaxByteCount(int charCount);
  924. // Returns the maximum number of characters produced by decoding a given
  925. // number of bytes. This method can be used to determine an appropriate
  926. // buffer size for character arrays passed to the GetChars method of
  927. // this encoding or the GetChars method of a Decoder for this
  928. // encoding. All encodings must guarantee that no buffer overflow
  929. // exceptions will occur if buffers are sized according to the results of
  930. // this method.
  931. //
  932. public abstract int GetMaxCharCount(int byteCount);
  933. // Returns a string containing the decoded representation of a given byte
  934. // array.
  935. //
  936. public virtual string GetString(byte[] bytes)
  937. {
  938. if (bytes == null)
  939. throw new ArgumentNullException(nameof(bytes),
  940. SR.ArgumentNull_Array);
  941. return GetString(bytes, 0, bytes.Length);
  942. }
  943. // Returns a string containing the decoded representation of a range of
  944. // bytes in a byte array.
  945. //
  946. // Internally we override this for performance
  947. //
  948. public virtual string GetString(byte[] bytes, int index, int count)
  949. {
  950. return new string(GetChars(bytes, index, count));
  951. }
  952. // Returns an encoding for Unicode format. The returned encoding will be
  953. // an instance of the UnicodeEncoding class.
  954. //
  955. // It will use little endian byte order, but will detect
  956. // input in big endian if it finds a byte order mark per Unicode 2.0.
  957. public static Encoding Unicode => UnicodeEncoding.s_littleEndianDefault;
  958. // Returns an encoding for Unicode format. The returned encoding will be
  959. // an instance of the UnicodeEncoding class.
  960. //
  961. // It will use big endian byte order, but will detect
  962. // input in little endian if it finds a byte order mark per Unicode 2.0.
  963. public static Encoding BigEndianUnicode => UnicodeEncoding.s_bigEndianDefault;
  964. // Returns an encoding for the UTF-7 format. The returned encoding will be
  965. // an instance of the UTF7Encoding class.
  966. public static Encoding UTF7 => UTF7Encoding.s_default;
  967. // Returns an encoding for the UTF-8 format. The returned encoding will be
  968. // an instance of the UTF8Encoding class.
  969. public static Encoding UTF8 => UTF8Encoding.s_default;
  970. // Returns an encoding for the UTF-32 format. The returned encoding will be
  971. // an instance of the UTF32Encoding class.
  972. public static Encoding UTF32 => UTF32Encoding.s_default;
  973. // Returns an encoding for the UTF-32 format. The returned encoding will be
  974. // an instance of the UTF32Encoding class.
  975. //
  976. // It will use big endian byte order.
  977. private static Encoding BigEndianUTF32 => UTF32Encoding.s_bigEndianDefault;
  978. public override bool Equals(object? value)
  979. {
  980. if (value is Encoding that)
  981. return (_codePage == that._codePage) &&
  982. (EncoderFallback.Equals(that.EncoderFallback)) &&
  983. (DecoderFallback.Equals(that.DecoderFallback));
  984. return (false);
  985. }
  986. public override int GetHashCode()
  987. {
  988. return _codePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode();
  989. }
  990. internal virtual char[] GetBestFitUnicodeToBytesData()
  991. {
  992. // Normally we don't have any best fit data.
  993. return Array.Empty<char>();
  994. }
  995. internal virtual char[] GetBestFitBytesToUnicodeData()
  996. {
  997. // Normally we don't have any best fit data.
  998. return Array.Empty<char>();
  999. }
  1000. [DoesNotReturn]
  1001. internal void ThrowBytesOverflow()
  1002. {
  1003. // Special message to include fallback type in case fallback's GetMaxCharCount is broken
  1004. // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
  1005. throw new ArgumentException(
  1006. SR.Format(SR.Argument_EncodingConversionOverflowBytes, EncodingName, EncoderFallback.GetType()), "bytes");
  1007. }
  1008. internal void ThrowBytesOverflow(EncoderNLS? encoder, bool nothingEncoded)
  1009. {
  1010. if (encoder == null || encoder._throwOnOverflow || nothingEncoded)
  1011. {
  1012. if (encoder != null && encoder.InternalHasFallbackBuffer)
  1013. encoder.FallbackBuffer.InternalReset();
  1014. // Special message to include fallback type in case fallback's GetMaxCharCount is broken
  1015. // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
  1016. ThrowBytesOverflow();
  1017. }
  1018. // If we didn't throw, we are in convert and have to remember our flushing
  1019. encoder!.ClearMustFlush();
  1020. }
  1021. [DoesNotReturn]
  1022. [StackTraceHidden]
  1023. internal static void ThrowConversionOverflow()
  1024. {
  1025. throw new ArgumentException(SR.Argument_ConversionOverflow);
  1026. }
  1027. [DoesNotReturn]
  1028. [StackTraceHidden]
  1029. internal void ThrowCharsOverflow()
  1030. {
  1031. // Special message to include fallback type in case fallback's GetMaxCharCount is broken
  1032. // This happens if user has implemented a decoder fallback with a broken GetMaxCharCount
  1033. throw new ArgumentException(
  1034. SR.Format(SR.Argument_EncodingConversionOverflowChars, EncodingName, DecoderFallback.GetType()), "chars");
  1035. }
  1036. internal void ThrowCharsOverflow(DecoderNLS? decoder, bool nothingDecoded)
  1037. {
  1038. if (decoder == null || decoder._throwOnOverflow || nothingDecoded)
  1039. {
  1040. if (decoder != null && decoder.InternalHasFallbackBuffer)
  1041. decoder.FallbackBuffer.InternalReset();
  1042. // Special message to include fallback type in case fallback's GetMaxCharCount is broken
  1043. // This happens if user has implemented a decoder fallback with a broken GetMaxCharCount
  1044. ThrowCharsOverflow();
  1045. }
  1046. // If we didn't throw, we are in convert and have to remember our flushing
  1047. decoder!.ClearMustFlush();
  1048. }
  1049. internal sealed class DefaultEncoder : Encoder, IObjectReference
  1050. {
  1051. private Encoding _encoding;
  1052. public DefaultEncoder(Encoding encoding)
  1053. {
  1054. _encoding = encoding;
  1055. }
  1056. public object GetRealObject(StreamingContext context)
  1057. {
  1058. throw new PlatformNotSupportedException();
  1059. }
  1060. // Returns the number of bytes the next call to GetBytes will
  1061. // produce if presented with the given range of characters and the given
  1062. // value of the flush parameter. The returned value takes into
  1063. // account the state in which the encoder was left following the last call
  1064. // to GetBytes. The state of the encoder is not affected by a call
  1065. // to this method.
  1066. //
  1067. public override int GetByteCount(char[] chars, int index, int count, bool flush)
  1068. {
  1069. return _encoding.GetByteCount(chars, index, count);
  1070. }
  1071. public unsafe override int GetByteCount(char* chars, int count, bool flush)
  1072. {
  1073. return _encoding.GetByteCount(chars, count);
  1074. }
  1075. // Encodes a range of characters in a character array into a range of bytes
  1076. // in a byte array. The method encodes charCount characters from
  1077. // chars starting at index charIndex, storing the resulting
  1078. // bytes in bytes starting at index byteIndex. The encoding
  1079. // takes into account the state in which the encoder was left following the
  1080. // last call to this method. The flush parameter indicates whether
  1081. // the encoder should flush any shift-states and partial characters at the
  1082. // end of the conversion. To ensure correct termination of a sequence of
  1083. // blocks of encoded bytes, the last call to GetBytes should specify
  1084. // a value of true for the flush parameter.
  1085. //
  1086. // An exception occurs if the byte array is not large enough to hold the
  1087. // complete encoding of the characters. The GetByteCount method can
  1088. // be used to determine the exact number of bytes that will be produced for
  1089. // a given range of characters. Alternatively, the GetMaxByteCount
  1090. // method of the Encoding that produced this encoder can be used to
  1091. // determine the maximum number of bytes that will be produced for a given
  1092. // number of characters, regardless of the actual character values.
  1093. //
  1094. public override int GetBytes(char[] chars, int charIndex, int charCount,
  1095. byte[] bytes, int byteIndex, bool flush)
  1096. {
  1097. return _encoding.GetBytes(chars, charIndex, charCount, bytes, byteIndex);
  1098. }
  1099. public unsafe override int GetBytes(char* chars, int charCount,
  1100. byte* bytes, int byteCount, bool flush)
  1101. {
  1102. return _encoding.GetBytes(chars, charCount, bytes, byteCount);
  1103. }
  1104. }
  1105. internal sealed class DefaultDecoder : Decoder, IObjectReference
  1106. {
  1107. private Encoding _encoding;
  1108. public DefaultDecoder(Encoding encoding)
  1109. {
  1110. _encoding = encoding;
  1111. }
  1112. public object GetRealObject(StreamingContext context)
  1113. {
  1114. throw new PlatformNotSupportedException();
  1115. }
  1116. // Returns the number of characters the next call to GetChars will
  1117. // produce if presented with the given range of bytes. The returned value
  1118. // takes into account the state in which the decoder was left following the
  1119. // last call to GetChars. The state of the decoder is not affected
  1120. // by a call to this method.
  1121. //
  1122. public override int GetCharCount(byte[] bytes, int index, int count)
  1123. {
  1124. return GetCharCount(bytes, index, count, false);
  1125. }
  1126. public override int GetCharCount(byte[] bytes, int index, int count, bool flush)
  1127. {
  1128. return _encoding.GetCharCount(bytes, index, count);
  1129. }
  1130. public unsafe override int GetCharCount(byte* bytes, int count, bool flush)
  1131. {
  1132. // By default just call the encoding version, no flush by default
  1133. return _encoding.GetCharCount(bytes, count);
  1134. }
  1135. // Decodes a range of bytes in a byte array into a range of characters
  1136. // in a character array. The method decodes byteCount bytes from
  1137. // bytes starting at index byteIndex, storing the resulting
  1138. // characters in chars starting at index charIndex. The
  1139. // decoding takes into account the state in which the decoder was left
  1140. // following the last call to this method.
  1141. //
  1142. // An exception occurs if the character array is not large enough to
  1143. // hold the complete decoding of the bytes. The GetCharCount method
  1144. // can be used to determine the exact number of characters that will be
  1145. // produced for a given range of bytes. Alternatively, the
  1146. // GetMaxCharCount method of the Encoding that produced this
  1147. // decoder can be used to determine the maximum number of characters that
  1148. // will be produced for a given number of bytes, regardless of the actual
  1149. // byte values.
  1150. //
  1151. public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
  1152. char[] chars, int charIndex)
  1153. {
  1154. return GetChars(bytes, byteIndex, byteCount, chars, charIndex, false);
  1155. }
  1156. public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
  1157. char[] chars, int charIndex, bool flush)
  1158. {
  1159. return _encoding.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
  1160. }
  1161. public unsafe override int GetChars(byte* bytes, int byteCount,
  1162. char* chars, int charCount, bool flush)
  1163. {
  1164. // By default just call the encoding's version
  1165. return _encoding.GetChars(bytes, byteCount, chars, charCount);
  1166. }
  1167. }
  1168. internal class EncodingCharBuffer
  1169. {
  1170. private unsafe char* _chars;
  1171. private unsafe char* _charStart;
  1172. private unsafe char* _charEnd;
  1173. private int _charCountResult = 0;
  1174. private Encoding _enc;
  1175. private DecoderNLS? _decoder;
  1176. private unsafe byte* _byteStart;
  1177. private unsafe byte* _byteEnd;
  1178. private unsafe byte* _bytes;
  1179. private DecoderFallbackBuffer _fallbackBuffer;
  1180. internal unsafe EncodingCharBuffer(Encoding enc, DecoderNLS? decoder, char* charStart, int charCount,
  1181. byte* byteStart, int byteCount)
  1182. {
  1183. _enc = enc;
  1184. _decoder = decoder;
  1185. _chars = charStart;
  1186. _charStart = charStart;
  1187. _charEnd = charStart + charCount;
  1188. _byteStart = byteStart;
  1189. _bytes = byteStart;
  1190. _byteEnd = byteStart + byteCount;
  1191. if (_decoder == null)
  1192. _fallbackBuffer = enc.DecoderFallback.CreateFallbackBuffer();
  1193. else
  1194. _fallbackBuffer = _decoder.FallbackBuffer;
  1195. // If we're getting chars or getting char count we don't expect to have
  1196. // to remember fallbacks between calls (so it should be empty)
  1197. Debug.Assert(_fallbackBuffer.Remaining == 0,
  1198. "[Encoding.EncodingCharBuffer.EncodingCharBuffer]Expected empty fallback buffer for getchars/charcount");
  1199. _fallbackBuffer.InternalInitialize(_bytes, _charEnd);
  1200. }
  1201. internal unsafe bool AddChar(char ch, int numBytes)
  1202. {
  1203. if (_chars != null)
  1204. {
  1205. if (_chars >= _charEnd)
  1206. {
  1207. // Throw maybe
  1208. _bytes -= numBytes; // Didn't encode these bytes
  1209. _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw?
  1210. return false; // No throw, but no store either
  1211. }
  1212. *(_chars++) = ch;
  1213. }
  1214. _charCountResult++;
  1215. return true;
  1216. }
  1217. internal bool AddChar(char ch)
  1218. {
  1219. return AddChar(ch, 1);
  1220. }
  1221. internal unsafe bool AddChar(char ch1, char ch2, int numBytes)
  1222. {
  1223. // Need room for 2 chars
  1224. if (_chars >= _charEnd - 1)
  1225. {
  1226. // Throw maybe
  1227. _bytes -= numBytes; // Didn't encode these bytes
  1228. _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw?
  1229. return false; // No throw, but no store either
  1230. }
  1231. return AddChar(ch1, numBytes) && AddChar(ch2, numBytes);
  1232. }
  1233. internal unsafe void AdjustBytes(int count)
  1234. {
  1235. _bytes += count;
  1236. }
  1237. internal unsafe bool MoreData
  1238. {
  1239. get
  1240. {
  1241. return _bytes < _byteEnd;
  1242. }
  1243. }
  1244. // Do we have count more bytes?
  1245. internal unsafe bool EvenMoreData(int count)
  1246. {
  1247. return (_bytes <= _byteEnd - count);
  1248. }
  1249. // GetNextByte shouldn't be called unless the caller's already checked more data or even more data,
  1250. // but we'll double check just to make sure.
  1251. internal unsafe byte GetNextByte()
  1252. {
  1253. Debug.Assert(_bytes < _byteEnd, "[EncodingCharBuffer.GetNextByte]Expected more date");
  1254. if (_bytes >= _byteEnd)
  1255. return 0;
  1256. return *(_bytes++);
  1257. }
  1258. internal unsafe int BytesUsed
  1259. {
  1260. get
  1261. {
  1262. return (int)(_bytes - _byteStart);
  1263. }
  1264. }
  1265. internal bool Fallback(byte fallbackByte)
  1266. {
  1267. // Build our buffer
  1268. byte[] byteBuffer = new byte[] { fallbackByte };
  1269. // Do the fallback and add the data.
  1270. return Fallback(byteBuffer);
  1271. }
  1272. internal bool Fallback(byte byte1, byte byte2)
  1273. {
  1274. // Build our buffer
  1275. byte[] byteBuffer = new byte[] { byte1, byte2 };
  1276. // Do the fallback and add the data.
  1277. return Fallback(byteBuffer);
  1278. }
  1279. internal bool Fallback(byte byte1, byte byte2, byte byte3, byte byte4)
  1280. {
  1281. // Build our buffer
  1282. byte[] byteBuffer = new byte[] { byte1, byte2, byte3, byte4 };
  1283. // Do the fallback and add the data.
  1284. return Fallback(byteBuffer);
  1285. }
  1286. internal unsafe bool Fallback(byte[] byteBuffer)
  1287. {
  1288. // Do the fallback and add the data.
  1289. if (_chars != null)
  1290. {
  1291. char* pTemp = _chars;
  1292. if (_fallbackBuffer.InternalFallback(byteBuffer, _bytes, ref _chars) == false)
  1293. {
  1294. // Throw maybe
  1295. _bytes -= byteBuffer.Length; // Didn't use how many ever bytes we're falling back
  1296. _fallbackBuffer.InternalReset(); // We didn't use this fallback.
  1297. _enc.ThrowCharsOverflow(_decoder, _chars == _charStart); // Throw?
  1298. return false; // No throw, but no store either
  1299. }
  1300. _charCountResult += unchecked((int)(_chars - pTemp));
  1301. }
  1302. else
  1303. {
  1304. _charCountResult += _fallbackBuffer.InternalFallback(byteBuffer, _bytes);
  1305. }
  1306. return true;
  1307. }
  1308. internal int Count
  1309. {
  1310. get
  1311. {
  1312. return _charCountResult;
  1313. }
  1314. }
  1315. }
  1316. internal class EncodingByteBuffer
  1317. {
  1318. private unsafe byte* _bytes;
  1319. private unsafe byte* _byteStart;
  1320. private unsafe byte* _byteEnd;
  1321. private unsafe char* _chars;
  1322. private unsafe char* _charStart;
  1323. private unsafe char* _charEnd;
  1324. private int _byteCountResult = 0;
  1325. private Encoding _enc;
  1326. private EncoderNLS? _encoder;
  1327. internal EncoderFallbackBuffer fallbackBuffer;
  1328. internal unsafe EncodingByteBuffer(Encoding inEncoding, EncoderNLS? inEncoder,
  1329. byte* inByteStart, int inByteCount, char* inCharStart, int inCharCount)
  1330. {
  1331. _enc = inEncoding;
  1332. _encoder = inEncoder;
  1333. _charStart = inCharStart;
  1334. _chars = inCharStart;
  1335. _charEnd = inCharStart + inCharCount;
  1336. _bytes = inByteStart;
  1337. _byteStart = inByteStart;
  1338. _byteEnd = inByteStart + inByteCount;
  1339. if (_encoder == null)
  1340. this.fallbackBuffer = _enc.EncoderFallback.CreateFallbackBuffer();
  1341. else
  1342. {
  1343. this.fallbackBuffer = _encoder.FallbackBuffer;
  1344. // If we're not converting we must not have data in our fallback buffer
  1345. if (_encoder._throwOnOverflow && _encoder.InternalHasFallbackBuffer &&
  1346. this.fallbackBuffer.Remaining > 0)
  1347. throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty,
  1348. _encoder.Encoding.EncodingName, _encoder.Fallback!.GetType()));
  1349. }
  1350. fallbackBuffer.InternalInitialize(_chars, _charEnd, _encoder, _bytes != null);
  1351. }
  1352. internal unsafe bool AddByte(byte b, int moreBytesExpected)
  1353. {
  1354. Debug.Assert(moreBytesExpected >= 0, "[EncodingByteBuffer.AddByte]expected non-negative moreBytesExpected");
  1355. if (_bytes != null)
  1356. {
  1357. if (_bytes >= _byteEnd - moreBytesExpected)
  1358. {
  1359. // Throw maybe. Check which buffer to back up (only matters if Converting)
  1360. this.MovePrevious(true); // Throw if necessary
  1361. return false; // No throw, but no store either
  1362. }
  1363. *(_bytes++) = b;
  1364. }
  1365. _byteCountResult++;
  1366. return true;
  1367. }
  1368. internal bool AddByte(byte b1)
  1369. {
  1370. return AddByte(b1, 0);
  1371. }
  1372. internal bool AddByte(byte b1, byte b2)
  1373. {
  1374. return AddByte(b1, b2, 0);
  1375. }
  1376. internal bool AddByte(byte b1, byte b2, int moreBytesExpected)
  1377. {
  1378. return AddByte(b1, 1 + moreBytesExpected) && AddByte(b2, moreBytesExpected);
  1379. }
  1380. internal bool AddByte(byte b1, byte b2, byte b3)
  1381. {
  1382. return AddByte(b1, b2, b3, (int)0);
  1383. }
  1384. internal bool AddByte(byte b1, byte b2, byte b3, int moreBytesExpected)
  1385. {
  1386. return AddByte(b1, 2 + moreBytesExpected) &&
  1387. AddByte(b2, 1 + moreBytesExpected) &&
  1388. AddByte(b3, moreBytesExpected);
  1389. }
  1390. internal bool AddByte(byte b1, byte b2, byte b3, byte b4)
  1391. {
  1392. return AddByte(b1, 3) &&
  1393. AddByte(b2, 2) &&
  1394. AddByte(b3, 1) &&
  1395. AddByte(b4, 0);
  1396. }
  1397. internal unsafe void MovePrevious(bool bThrow)
  1398. {
  1399. if (fallbackBuffer.bFallingBack)
  1400. fallbackBuffer.MovePrevious(); // don't use last fallback
  1401. else
  1402. {
  1403. Debug.Assert(_chars > _charStart ||
  1404. ((bThrow == true) && (_bytes == _byteStart)),
  1405. "[EncodingByteBuffer.MovePrevious]expected previous data or throw");
  1406. if (_chars > _charStart)
  1407. _chars--; // don't use last char
  1408. }
  1409. if (bThrow)
  1410. _enc.ThrowBytesOverflow(_encoder, _bytes == _byteStart); // Throw? (and reset fallback if not converting)
  1411. }
  1412. internal unsafe bool Fallback(char charFallback)
  1413. {
  1414. // Do the fallback
  1415. return fallbackBuffer.InternalFallback(charFallback, ref _chars);
  1416. }
  1417. internal unsafe bool MoreData
  1418. {
  1419. get
  1420. {
  1421. // See if fallbackBuffer is not empty or if there's data left in chars buffer.
  1422. return (fallbackBuffer.Remaining > 0) || (_chars < _charEnd);
  1423. }
  1424. }
  1425. internal unsafe char GetNextChar()
  1426. {
  1427. // See if there's something in our fallback buffer
  1428. char cReturn = fallbackBuffer.InternalGetNextChar();
  1429. // Nothing in the fallback buffer, return our normal data.
  1430. if (cReturn == 0)
  1431. {
  1432. if (_chars < _charEnd)
  1433. cReturn = *(_chars++);
  1434. }
  1435. return cReturn;
  1436. }
  1437. internal unsafe int CharsUsed
  1438. {
  1439. get
  1440. {
  1441. return (int)(_chars - _charStart);
  1442. }
  1443. }
  1444. internal int Count
  1445. {
  1446. get
  1447. {
  1448. return _byteCountResult;
  1449. }
  1450. }
  1451. }
  1452. }
  1453. }