ASCIIEncoding.cs 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System;
  5. using System.Diagnostics;
  6. using System.Runtime.InteropServices;
  7. namespace System.Text
  8. {
  9. // ASCIIEncoding
  10. //
  11. // Note that ASCIIEncoding is optimized with no best fit and ? for fallback.
  12. // It doesn't come in other flavors.
  13. //
  14. // Note: ASCIIEncoding is the only encoding that doesn't do best fit (windows has best fit).
  15. //
  16. // Note: IsAlwaysNormalized remains false because 1/2 the code points are unassigned, so they'd
  17. // use fallbacks, and we cannot guarantee that fallbacks are normalized.
  18. public class ASCIIEncoding : Encoding
  19. {
  20. // Allow for devirtualization (see https://github.com/dotnet/coreclr/pull/9230)
  21. internal sealed class ASCIIEncodingSealed : ASCIIEncoding { }
  22. // Used by Encoding.ASCII for lazy initialization
  23. // The initialization code will not be run until a static member of the class is referenced
  24. internal static readonly ASCIIEncodingSealed s_default = new ASCIIEncodingSealed();
  25. public ASCIIEncoding() : base(Encoding.CodePageASCII)
  26. {
  27. }
  28. internal sealed override void SetDefaultFallbacks()
  29. {
  30. // For ASCIIEncoding we just use default replacement fallback
  31. this.encoderFallback = EncoderFallback.ReplacementFallback;
  32. this.decoderFallback = DecoderFallback.ReplacementFallback;
  33. }
  34. // WARNING: GetByteCount(string chars), GetBytes(string chars,...), and GetString(byte[] byteIndex...)
  35. // WARNING: have different variable names than EncodingNLS.cs, so this can't just be cut & pasted,
  36. // WARNING: or it'll break VB's way of calling these.
  37. //
  38. // The following methods are copied from EncodingNLS.cs.
  39. // Unfortunately EncodingNLS.cs is internal and we're public, so we have to re-implement them here.
  40. // These should be kept in sync for the following classes:
  41. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  42. // Returns the number of bytes required to encode a range of characters in
  43. // a character array.
  44. //
  45. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  46. // So if you fix this, fix the others. Currently those include:
  47. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  48. // parent method is safe
  49. public override unsafe int GetByteCount(char[] chars, int index, int count)
  50. {
  51. // Validate input parameters
  52. if (chars == null)
  53. throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
  54. if (index < 0 || count < 0)
  55. throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
  56. if (chars.Length - index < count)
  57. throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
  58. // If no input, return 0, avoid fixed empty array problem
  59. if (count == 0)
  60. return 0;
  61. // Just call the pointer version
  62. fixed (char* pChars = chars)
  63. return GetByteCount(pChars + index, count, null);
  64. }
  65. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  66. // So if you fix this, fix the others. Currently those include:
  67. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  68. // parent method is safe
  69. public override unsafe int GetByteCount(string chars)
  70. {
  71. // Validate input
  72. if (chars==null)
  73. throw new ArgumentNullException(nameof(chars));
  74. fixed (char* pChars = chars)
  75. return GetByteCount(pChars, chars.Length, null);
  76. }
  77. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  78. // So if you fix this, fix the others. Currently those include:
  79. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  80. [CLSCompliant(false)]
  81. public override unsafe int GetByteCount(char* chars, int count)
  82. {
  83. // Validate Parameters
  84. if (chars == null)
  85. throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
  86. if (count < 0)
  87. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  88. // Call it with empty encoder
  89. return GetByteCount(chars, count, null);
  90. }
  91. public override unsafe int GetByteCount(ReadOnlySpan<char> chars)
  92. {
  93. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  94. {
  95. return GetByteCount(charsPtr, chars.Length, encoder: null);
  96. }
  97. }
  98. // Parent method is safe.
  99. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  100. // So if you fix this, fix the others. Currently those include:
  101. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  102. public override unsafe int GetBytes(string chars, int charIndex, int charCount,
  103. byte[] bytes, int byteIndex)
  104. {
  105. if (chars == null || bytes == null)
  106. throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)), SR.ArgumentNull_Array);
  107. if (charIndex < 0 || charCount < 0)
  108. throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
  109. if (chars.Length - charIndex < charCount)
  110. throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCount);
  111. if (byteIndex < 0 || byteIndex > bytes.Length)
  112. throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
  113. int byteCount = bytes.Length - byteIndex;
  114. fixed (char* pChars = chars) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
  115. return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
  116. }
  117. // Encodes a range of characters in a character array into a range of bytes
  118. // in a byte array. An exception occurs if the byte array is not large
  119. // enough to hold the complete encoding of the characters. The
  120. // GetByteCount method can be used to determine the exact number of
  121. // bytes that will be produced for a given range of characters.
  122. // Alternatively, the GetMaxByteCount method can be used to
  123. // determine the maximum number of bytes that will be produced for a given
  124. // number of characters, regardless of the actual character values.
  125. //
  126. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  127. // So if you fix this, fix the others. Currently those include:
  128. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  129. // parent method is safe
  130. public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
  131. byte[] bytes, int byteIndex)
  132. {
  133. // Validate parameters
  134. if (chars == null || bytes == null)
  135. throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)), SR.ArgumentNull_Array);
  136. if (charIndex < 0 || charCount < 0)
  137. throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
  138. if (chars.Length - charIndex < charCount)
  139. throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
  140. if (byteIndex < 0 || byteIndex > bytes.Length)
  141. throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
  142. // If nothing to encode return 0
  143. if (charCount == 0)
  144. return 0;
  145. // Just call pointer version
  146. int byteCount = bytes.Length - byteIndex;
  147. fixed (char* pChars = chars) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
  148. // Remember that byteCount is # to decode, not size of array.
  149. return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
  150. }
  151. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  152. // So if you fix this, fix the others. Currently those include:
  153. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  154. [CLSCompliant(false)]
  155. public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
  156. {
  157. // Validate Parameters
  158. if (bytes == null || chars == null)
  159. throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
  160. if (charCount < 0 || byteCount < 0)
  161. throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
  162. return GetBytes(chars, charCount, bytes, byteCount, null);
  163. }
  164. public override unsafe int GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes)
  165. {
  166. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  167. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  168. {
  169. return GetBytes(charsPtr, chars.Length, bytesPtr, bytes.Length, encoder: null);
  170. }
  171. }
  172. // Returns the number of characters produced by decoding a range of bytes
  173. // in a byte array.
  174. //
  175. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  176. // So if you fix this, fix the others. Currently those include:
  177. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  178. // parent method is safe
  179. public override unsafe int GetCharCount(byte[] bytes, int index, int count)
  180. {
  181. // Validate Parameters
  182. if (bytes == null)
  183. throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
  184. if (index < 0 || count < 0)
  185. throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
  186. if (bytes.Length - index < count)
  187. throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
  188. // If no input just return 0, fixed doesn't like 0 length arrays
  189. if (count == 0)
  190. return 0;
  191. // Just call pointer version
  192. fixed (byte* pBytes = bytes)
  193. return GetCharCount(pBytes + index, count, null);
  194. }
  195. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  196. // So if you fix this, fix the others. Currently those include:
  197. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  198. [CLSCompliant(false)]
  199. public override unsafe int GetCharCount(byte* bytes, int count)
  200. {
  201. // Validate Parameters
  202. if (bytes == null)
  203. throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
  204. if (count < 0)
  205. throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
  206. return GetCharCount(bytes, count, null);
  207. }
  208. public override unsafe int GetCharCount(ReadOnlySpan<byte> bytes)
  209. {
  210. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  211. {
  212. return GetCharCount(bytesPtr, bytes.Length, decoder: null);
  213. }
  214. }
  215. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  216. // So if you fix this, fix the others. Currently those include:
  217. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  218. // parent method is safe
  219. public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
  220. char[] chars, int charIndex)
  221. {
  222. // Validate Parameters
  223. if (bytes == null || chars == null)
  224. throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
  225. if (byteIndex < 0 || byteCount < 0)
  226. throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
  227. if ( bytes.Length - byteIndex < byteCount)
  228. throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
  229. if (charIndex < 0 || charIndex > chars.Length)
  230. throw new ArgumentOutOfRangeException(nameof(charIndex), SR.ArgumentOutOfRange_Index);
  231. // If no input, return 0 & avoid fixed problem
  232. if (byteCount == 0)
  233. return 0;
  234. // Just call pointer version
  235. int charCount = chars.Length - charIndex;
  236. fixed (byte* pBytes = bytes) fixed (char* pChars = &MemoryMarshal.GetReference((Span<char>)chars))
  237. // Remember that charCount is # to decode, not size of array
  238. return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
  239. }
  240. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  241. // So if you fix this, fix the others. Currently those include:
  242. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  243. [CLSCompliant(false)]
  244. public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
  245. {
  246. // Validate Parameters
  247. if (bytes == null || chars == null)
  248. throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
  249. if (charCount < 0 || byteCount < 0)
  250. throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
  251. return GetChars(bytes, byteCount, chars, charCount, null);
  252. }
  253. public override unsafe int GetChars(ReadOnlySpan<byte> bytes, Span<char> chars)
  254. {
  255. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  256. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  257. {
  258. return GetChars(bytesPtr, bytes.Length, charsPtr, chars.Length, decoder: null);
  259. }
  260. }
  261. // Returns a string containing the decoded representation of a range of
  262. // bytes in a byte array.
  263. //
  264. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  265. // So if you fix this, fix the others. Currently those include:
  266. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  267. // parent method is safe
  268. public override unsafe string GetString(byte[] bytes, int byteIndex, int byteCount)
  269. {
  270. // Validate Parameters
  271. if (bytes == null)
  272. throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
  273. if (byteIndex < 0 || byteCount < 0)
  274. throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
  275. if (bytes.Length - byteIndex < byteCount)
  276. throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
  277. // Avoid problems with empty input buffer
  278. if (byteCount == 0) return string.Empty;
  279. fixed (byte* pBytes = bytes)
  280. return string.CreateStringFromEncoding(
  281. pBytes + byteIndex, byteCount, this);
  282. }
  283. //
  284. // End of standard methods copied from EncodingNLS.cs
  285. //
  286. // GetByteCount
  287. // Note: We start by assuming that the output will be the same as count. Having
  288. // an encoder or fallback may change that assumption
  289. internal sealed override unsafe int GetByteCount(char* chars, int charCount, EncoderNLS encoder)
  290. {
  291. // Just need to ASSERT, this is called by something else internal that checked parameters already
  292. Debug.Assert(charCount >= 0, "[ASCIIEncoding.GetByteCount]count is negative");
  293. Debug.Assert(chars != null, "[ASCIIEncoding.GetByteCount]chars is null");
  294. // Assert because we shouldn't be able to have a null encoder.
  295. Debug.Assert(encoderFallback != null, "[ASCIIEncoding.GetByteCount]Attempting to use null fallback encoder");
  296. char charLeftOver = (char)0;
  297. EncoderReplacementFallback fallback = null;
  298. // Start by assuming default count, then +/- for fallback characters
  299. char* charEnd = chars + charCount;
  300. // For fallback we may need a fallback buffer, we know we aren't default fallback.
  301. EncoderFallbackBuffer fallbackBuffer = null;
  302. char* charsForFallback;
  303. if (encoder != null)
  304. {
  305. charLeftOver = encoder._charLeftOver;
  306. Debug.Assert(charLeftOver == 0 || char.IsHighSurrogate(charLeftOver),
  307. "[ASCIIEncoding.GetByteCount]leftover character should be high surrogate");
  308. fallback = encoder.Fallback as EncoderReplacementFallback;
  309. // We mustn't have left over fallback data when counting
  310. if (encoder.InternalHasFallbackBuffer)
  311. {
  312. // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary
  313. fallbackBuffer = encoder.FallbackBuffer;
  314. if (fallbackBuffer.Remaining > 0 && encoder._throwOnOverflow)
  315. throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
  316. // Set our internal fallback interesting things.
  317. fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false);
  318. }
  319. // Verify that we have no fallbackbuffer, for ASCII its always empty, so just assert
  320. Debug.Assert(!encoder._throwOnOverflow || !encoder.InternalHasFallbackBuffer ||
  321. encoder.FallbackBuffer.Remaining == 0,
  322. "[ASCIICodePageEncoding.GetByteCount]Expected empty fallback buffer");
  323. }
  324. else
  325. {
  326. fallback = this.EncoderFallback as EncoderReplacementFallback;
  327. }
  328. // If we have an encoder AND we aren't using default fallback,
  329. // then we may have a complicated count.
  330. if (fallback != null && fallback.MaxCharCount == 1)
  331. {
  332. // Replacement fallback encodes surrogate pairs as two ?? (or two whatever), so return size is always
  333. // same as input size.
  334. // Note that no existing SBCS code pages map code points to supplimentary characters, so this is easy.
  335. // We could however have 1 extra byte if the last call had an encoder and a funky fallback and
  336. // if we don't use the funky fallback this time.
  337. // Do we have an extra char left over from last time?
  338. if (charLeftOver > 0)
  339. charCount++;
  340. return (charCount);
  341. }
  342. // Count is more complicated if you have a funky fallback
  343. // For fallback we may need a fallback buffer, we know we're not default fallback
  344. int byteCount = 0;
  345. // We may have a left over character from last time, try and process it.
  346. if (charLeftOver > 0)
  347. {
  348. Debug.Assert(char.IsHighSurrogate(charLeftOver), "[ASCIIEncoding.GetByteCount]leftover character should be high surrogate");
  349. Debug.Assert(encoder != null, "[ASCIIEncoding.GetByteCount]Expected encoder");
  350. // Since left over char was a surrogate, it'll have to be fallen back.
  351. // Get Fallback
  352. fallbackBuffer = encoder.FallbackBuffer;
  353. fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false);
  354. // This will fallback a pair if *chars is a low surrogate
  355. charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
  356. fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
  357. chars = charsForFallback;
  358. }
  359. // Now we may have fallback char[] already from the encoder
  360. // Go ahead and do it, including the fallback.
  361. char ch;
  362. while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 ||
  363. chars < charEnd)
  364. {
  365. // First unwind any fallback
  366. if (ch == 0)
  367. {
  368. // No fallback, just get next char
  369. ch = *chars;
  370. chars++;
  371. }
  372. // Check for fallback, this'll catch surrogate pairs too.
  373. // no chars >= 0x80 are allowed.
  374. if (ch > 0x7f)
  375. {
  376. if (fallbackBuffer == null)
  377. {
  378. // Initialize the buffer
  379. if (encoder == null)
  380. fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
  381. else
  382. fallbackBuffer = encoder.FallbackBuffer;
  383. fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, false);
  384. }
  385. // Get Fallback
  386. charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
  387. fallbackBuffer.InternalFallback(ch, ref charsForFallback);
  388. chars = charsForFallback;
  389. continue;
  390. }
  391. // We'll use this one
  392. byteCount++;
  393. }
  394. Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
  395. "[ASCIIEncoding.GetByteCount]Expected Empty fallback buffer");
  396. return byteCount;
  397. }
  398. internal sealed override unsafe int GetBytes(
  399. char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS encoder)
  400. {
  401. // Just need to ASSERT, this is called by something else internal that checked parameters already
  402. Debug.Assert(bytes != null, "[ASCIIEncoding.GetBytes]bytes is null");
  403. Debug.Assert(byteCount >= 0, "[ASCIIEncoding.GetBytes]byteCount is negative");
  404. Debug.Assert(chars != null, "[ASCIIEncoding.GetBytes]chars is null");
  405. Debug.Assert(charCount >= 0, "[ASCIIEncoding.GetBytes]charCount is negative");
  406. // Assert because we shouldn't be able to have a null encoder.
  407. Debug.Assert(encoderFallback != null, "[ASCIIEncoding.GetBytes]Attempting to use null encoder fallback");
  408. // Get any left over characters
  409. char charLeftOver = (char)0;
  410. EncoderReplacementFallback fallback = null;
  411. // For fallback we may need a fallback buffer, we know we aren't default fallback.
  412. EncoderFallbackBuffer fallbackBuffer = null;
  413. char* charsForFallback;
  414. // prepare our end
  415. char* charEnd = chars + charCount;
  416. byte* byteStart = bytes;
  417. char* charStart = chars;
  418. if (encoder != null)
  419. {
  420. charLeftOver = encoder._charLeftOver;
  421. fallback = encoder.Fallback as EncoderReplacementFallback;
  422. // We mustn't have left over fallback data when counting
  423. if (encoder.InternalHasFallbackBuffer)
  424. {
  425. // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary
  426. fallbackBuffer = encoder.FallbackBuffer;
  427. if (fallbackBuffer.Remaining > 0 && encoder._throwOnOverflow)
  428. throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
  429. // Set our internal fallback interesting things.
  430. fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
  431. }
  432. Debug.Assert(charLeftOver == 0 || char.IsHighSurrogate(charLeftOver),
  433. "[ASCIIEncoding.GetBytes]leftover character should be high surrogate");
  434. // Verify that we have no fallbackbuffer, for ASCII its always empty, so just assert
  435. Debug.Assert(!encoder._throwOnOverflow || !encoder.InternalHasFallbackBuffer ||
  436. encoder.FallbackBuffer.Remaining == 0,
  437. "[ASCIICodePageEncoding.GetBytes]Expected empty fallback buffer");
  438. }
  439. else
  440. {
  441. fallback = this.EncoderFallback as EncoderReplacementFallback;
  442. }
  443. // See if we do the fast default or slightly slower fallback
  444. if (fallback != null && fallback.MaxCharCount == 1)
  445. {
  446. // Fast version
  447. char cReplacement = fallback.DefaultString[0];
  448. // Check for replacements in range, otherwise fall back to slow version.
  449. if (cReplacement <= (char)0x7f)
  450. {
  451. // We should have exactly as many output bytes as input bytes, unless there's a left
  452. // over character, in which case we may need one more.
  453. // If we had a left over character will have to add a ? (This happens if they had a funky
  454. // fallback last time, but not this time.) (We can't spit any out though
  455. // because with fallback encoder each surrogate is treated as a seperate code point)
  456. if (charLeftOver > 0)
  457. {
  458. // Have to have room
  459. // Throw even if doing no throw version because this is just 1 char,
  460. // so buffer will never be big enough
  461. if (byteCount == 0)
  462. ThrowBytesOverflow(encoder, true);
  463. // This'll make sure we still have more room and also make sure our return value is correct.
  464. *(bytes++) = (byte)cReplacement;
  465. byteCount--; // We used one of the ones we were counting.
  466. }
  467. // This keeps us from overrunning our output buffer
  468. if (byteCount < charCount)
  469. {
  470. // Throw or make buffer smaller?
  471. ThrowBytesOverflow(encoder, byteCount < 1);
  472. // Just use what we can
  473. charEnd = chars + byteCount;
  474. }
  475. // We just do a quick copy
  476. while (chars < charEnd)
  477. {
  478. char ch2 = *(chars++);
  479. if (ch2 >= 0x0080) *(bytes++) = (byte)cReplacement;
  480. else *(bytes++) = unchecked((byte)(ch2));
  481. }
  482. // Clear encoder
  483. if (encoder != null)
  484. {
  485. encoder._charLeftOver = (char)0;
  486. encoder._charsUsed = (int)(chars - charStart);
  487. }
  488. return (int)(bytes - byteStart);
  489. }
  490. }
  491. // Slower version, have to do real fallback.
  492. // prepare our end
  493. byte* byteEnd = bytes + byteCount;
  494. // We may have a left over character from last time, try and process it.
  495. if (charLeftOver > 0)
  496. {
  497. // Initialize the buffer
  498. Debug.Assert(encoder != null,
  499. "[ASCIIEncoding.GetBytes]Expected non null encoder if we have surrogate left over");
  500. fallbackBuffer = encoder.FallbackBuffer;
  501. fallbackBuffer.InternalInitialize(chars, charEnd, encoder, true);
  502. // Since left over char was a surrogate, it'll have to be fallen back.
  503. // Get Fallback
  504. // This will fallback a pair if *chars is a low surrogate
  505. charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
  506. fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
  507. chars = charsForFallback;
  508. }
  509. // Now we may have fallback char[] already from the encoder
  510. // Go ahead and do it, including the fallback.
  511. char ch;
  512. while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 ||
  513. chars < charEnd)
  514. {
  515. // First unwind any fallback
  516. if (ch == 0)
  517. {
  518. // No fallback, just get next char
  519. ch = *chars;
  520. chars++;
  521. }
  522. // Check for fallback, this'll catch surrogate pairs too.
  523. // All characters >= 0x80 must fall back.
  524. if (ch > 0x7f)
  525. {
  526. // Initialize the buffer
  527. if (fallbackBuffer == null)
  528. {
  529. if (encoder == null)
  530. fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
  531. else
  532. fallbackBuffer = encoder.FallbackBuffer;
  533. fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, true);
  534. }
  535. // Get Fallback
  536. charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
  537. fallbackBuffer.InternalFallback(ch, ref charsForFallback);
  538. chars = charsForFallback;
  539. // Go ahead & continue (& do the fallback)
  540. continue;
  541. }
  542. // We'll use this one
  543. // Bounds check
  544. if (bytes >= byteEnd)
  545. {
  546. // didn't use this char, we'll throw or use buffer
  547. if (fallbackBuffer == null || fallbackBuffer.bFallingBack == false)
  548. {
  549. Debug.Assert(chars > charStart || bytes == byteStart,
  550. "[ASCIIEncoding.GetBytes]Expected chars to have advanced already.");
  551. chars--; // don't use last char
  552. }
  553. else
  554. fallbackBuffer.MovePrevious();
  555. // Are we throwing or using buffer?
  556. ThrowBytesOverflow(encoder, bytes == byteStart); // throw?
  557. break; // don't throw, stop
  558. }
  559. // Go ahead and add it
  560. *bytes = unchecked((byte)ch);
  561. bytes++;
  562. }
  563. // Need to do encoder stuff
  564. if (encoder != null)
  565. {
  566. // Fallback stuck it in encoder if necessary, but we have to clear MustFlush cases
  567. if (fallbackBuffer != null && !fallbackBuffer.bUsedEncoder)
  568. // Clear it in case of MustFlush
  569. encoder._charLeftOver = (char)0;
  570. // Set our chars used count
  571. encoder._charsUsed = (int)(chars - charStart);
  572. }
  573. Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0 ||
  574. (encoder != null && !encoder._throwOnOverflow),
  575. "[ASCIIEncoding.GetBytes]Expected Empty fallback buffer at end");
  576. return (int)(bytes - byteStart);
  577. }
  578. // This is internal and called by something else,
  579. internal sealed override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder)
  580. {
  581. // Just assert, we're called internally so these should be safe, checked already
  582. Debug.Assert(bytes != null, "[ASCIIEncoding.GetCharCount]bytes is null");
  583. Debug.Assert(count >= 0, "[ASCIIEncoding.GetCharCount]byteCount is negative");
  584. // ASCII doesn't do best fit, so don't have to check for it, find out which decoder fallback we're using
  585. DecoderReplacementFallback fallback = null;
  586. if (decoder == null)
  587. fallback = this.DecoderFallback as DecoderReplacementFallback;
  588. else
  589. {
  590. fallback = decoder.Fallback as DecoderReplacementFallback;
  591. Debug.Assert(!decoder._throwOnOverflow || !decoder.InternalHasFallbackBuffer ||
  592. decoder.FallbackBuffer.Remaining == 0,
  593. "[ASCIICodePageEncoding.GetCharCount]Expected empty fallback buffer");
  594. }
  595. if (fallback != null && fallback.MaxCharCount == 1)
  596. {
  597. // Just return length, SBCS stay the same length because they don't map to surrogate
  598. // pairs and we don't have a decoder fallback.
  599. return count;
  600. }
  601. // Only need decoder fallback buffer if not using default replacement fallback, no best fit for ASCII
  602. DecoderFallbackBuffer fallbackBuffer = null;
  603. // Have to do it the hard way.
  604. // Assume charCount will be == count
  605. int charCount = count;
  606. byte[] byteBuffer = new byte[1];
  607. // Do it our fast way
  608. byte* byteEnd = bytes + count;
  609. // Quick loop
  610. while (bytes < byteEnd)
  611. {
  612. // Faster if don't use *bytes++;
  613. byte b = *bytes;
  614. bytes++;
  615. // If unknown we have to do fallback count
  616. if (b >= 0x80)
  617. {
  618. if (fallbackBuffer == null)
  619. {
  620. if (decoder == null)
  621. fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer();
  622. else
  623. fallbackBuffer = decoder.FallbackBuffer;
  624. fallbackBuffer.InternalInitialize(byteEnd - count, null);
  625. }
  626. // Use fallback buffer
  627. byteBuffer[0] = b;
  628. charCount--; // Have to unreserve the one we already allocated for b
  629. charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
  630. }
  631. }
  632. // Fallback buffer must be empty
  633. Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
  634. "[ASCIIEncoding.GetCharCount]Expected Empty fallback buffer");
  635. // Converted sequence is same length as input
  636. return charCount;
  637. }
  638. internal sealed override unsafe int GetChars(
  639. byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS decoder)
  640. {
  641. // Just need to ASSERT, this is called by something else internal that checked parameters already
  642. Debug.Assert(bytes != null, "[ASCIIEncoding.GetChars]bytes is null");
  643. Debug.Assert(byteCount >= 0, "[ASCIIEncoding.GetChars]byteCount is negative");
  644. Debug.Assert(chars != null, "[ASCIIEncoding.GetChars]chars is null");
  645. Debug.Assert(charCount >= 0, "[ASCIIEncoding.GetChars]charCount is negative");
  646. // Do it fast way if using ? replacement fallback
  647. byte* byteEnd = bytes + byteCount;
  648. byte* byteStart = bytes;
  649. char* charStart = chars;
  650. // Note: ASCII doesn't do best fit, but we have to fallback if they use something > 0x7f
  651. // Only need decoder fallback buffer if not using ? fallback.
  652. // ASCII doesn't do best fit, so don't have to check for it, find out which decoder fallback we're using
  653. DecoderReplacementFallback fallback = null;
  654. char* charsForFallback;
  655. if (decoder == null)
  656. fallback = this.DecoderFallback as DecoderReplacementFallback;
  657. else
  658. {
  659. fallback = decoder.Fallback as DecoderReplacementFallback;
  660. Debug.Assert(!decoder._throwOnOverflow || !decoder.InternalHasFallbackBuffer ||
  661. decoder.FallbackBuffer.Remaining == 0,
  662. "[ASCIICodePageEncoding.GetChars]Expected empty fallback buffer");
  663. }
  664. if (fallback != null && fallback.MaxCharCount == 1)
  665. {
  666. // Try it the fast way
  667. char replacementChar = fallback.DefaultString[0];
  668. // Need byteCount chars, otherwise too small buffer
  669. if (charCount < byteCount)
  670. {
  671. // Need at least 1 output byte, throw if must throw
  672. ThrowCharsOverflow(decoder, charCount < 1);
  673. // Not throwing, use what we can
  674. byteEnd = bytes + charCount;
  675. }
  676. // Quick loop, just do '?' replacement because we don't have fallbacks for decodings.
  677. while (bytes < byteEnd)
  678. {
  679. byte b = *(bytes++);
  680. if (b >= 0x80)
  681. // This is an invalid byte in the ASCII encoding.
  682. *(chars++) = replacementChar;
  683. else
  684. *(chars++) = unchecked((char)b);
  685. }
  686. // bytes & chars used are the same
  687. if (decoder != null)
  688. decoder._bytesUsed = (int)(bytes - byteStart);
  689. return (int)(chars - charStart);
  690. }
  691. // Slower way's going to need a fallback buffer
  692. DecoderFallbackBuffer fallbackBuffer = null;
  693. byte[] byteBuffer = new byte[1];
  694. char* charEnd = chars + charCount;
  695. // Not quite so fast loop
  696. while (bytes < byteEnd)
  697. {
  698. // Faster if don't use *bytes++;
  699. byte b = *(bytes);
  700. bytes++;
  701. if (b >= 0x80)
  702. {
  703. // This is an invalid byte in the ASCII encoding.
  704. if (fallbackBuffer == null)
  705. {
  706. if (decoder == null)
  707. fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer();
  708. else
  709. fallbackBuffer = decoder.FallbackBuffer;
  710. fallbackBuffer.InternalInitialize(byteEnd - byteCount, charEnd);
  711. }
  712. // Use fallback buffer
  713. byteBuffer[0] = b;
  714. // Note that chars won't get updated unless this succeeds
  715. charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
  716. bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
  717. chars = charsForFallback;
  718. if (!fallbackResult)
  719. {
  720. // May or may not throw, but we didn't get this byte
  721. Debug.Assert(bytes > byteStart || chars == charStart,
  722. "[ASCIIEncoding.GetChars]Expected bytes to have advanced already (fallback case)");
  723. bytes--; // unused byte
  724. fallbackBuffer.InternalReset(); // Didn't fall this back
  725. ThrowCharsOverflow(decoder, chars == charStart); // throw?
  726. break; // don't throw, but stop loop
  727. }
  728. }
  729. else
  730. {
  731. // Make sure we have buffer space
  732. if (chars >= charEnd)
  733. {
  734. Debug.Assert(bytes > byteStart || chars == charStart,
  735. "[ASCIIEncoding.GetChars]Expected bytes to have advanced already (normal case)");
  736. bytes--; // unused byte
  737. ThrowCharsOverflow(decoder, chars == charStart); // throw?
  738. break; // don't throw, but stop loop
  739. }
  740. *(chars) = unchecked((char)b);
  741. chars++;
  742. }
  743. }
  744. // Might have had decoder fallback stuff.
  745. if (decoder != null)
  746. decoder._bytesUsed = (int)(bytes - byteStart);
  747. // Expect Empty fallback buffer for GetChars
  748. Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
  749. "[ASCIIEncoding.GetChars]Expected Empty fallback buffer");
  750. return (int)(chars - charStart);
  751. }
  752. public override int GetMaxByteCount(int charCount)
  753. {
  754. if (charCount < 0)
  755. throw new ArgumentOutOfRangeException(nameof(charCount),
  756. SR.ArgumentOutOfRange_NeedNonNegNum);
  757. // Characters would be # of characters + 1 in case high surrogate is ? * max fallback
  758. long byteCount = (long)charCount + 1;
  759. if (EncoderFallback.MaxCharCount > 1)
  760. byteCount *= EncoderFallback.MaxCharCount;
  761. // 1 to 1 for most characters. Only surrogates with fallbacks have less.
  762. if (byteCount > 0x7fffffff)
  763. throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
  764. return (int)byteCount;
  765. }
  766. public override int GetMaxCharCount(int byteCount)
  767. {
  768. if (byteCount < 0)
  769. throw new ArgumentOutOfRangeException(nameof(byteCount),
  770. SR.ArgumentOutOfRange_NeedNonNegNum);
  771. // Just return length, SBCS stay the same length because they don't map to surrogate
  772. long charCount = (long)byteCount;
  773. // 1 to 1 for most characters. Only surrogates with fallbacks have less, unknown fallbacks could be longer.
  774. if (DecoderFallback.MaxCharCount > 1)
  775. charCount *= DecoderFallback.MaxCharCount;
  776. if (charCount > 0x7fffffff)
  777. throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_GetCharCountOverflow);
  778. return (int)charCount;
  779. }
  780. // True if and only if the encoding only uses single byte code points. (Ie, ASCII, 1252, etc)
  781. public override bool IsSingleByte
  782. {
  783. get
  784. {
  785. return true;
  786. }
  787. }
  788. public override Decoder GetDecoder()
  789. {
  790. return new DecoderNLS(this);
  791. }
  792. public override Encoder GetEncoder()
  793. {
  794. return new EncoderNLS(this);
  795. }
  796. }
  797. }