ASCIIEncoding.cs 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Buffers;
  5. using System.Diagnostics;
  6. using System.Runtime.CompilerServices;
  7. using System.Runtime.InteropServices;
  8. namespace System.Text
  9. {
  10. // ASCIIEncoding
  11. //
  12. // Note that ASCIIEncoding is optimized with no best fit and ? for fallback.
  13. // It doesn't come in other flavors.
  14. //
  15. // Note: ASCIIEncoding is the only encoding that doesn't do best fit (windows has best fit).
  16. //
  17. // Note: IsAlwaysNormalized remains false because 1/2 the code points are unassigned, so they'd
  18. // use fallbacks, and we cannot guarantee that fallbacks are normalized.
  19. public partial class ASCIIEncoding : Encoding
  20. {
  21. // This specialized sealed type has two benefits:
  22. // 1) it allows for devirtualization (see https://github.com/dotnet/coreclr/pull/9230), and
  23. // 2) it allows us to provide highly optimized implementations of certain routines because
  24. // we can make assumptions about the fallback mechanisms in use (in particular, always
  25. // replace with "?").
  26. //
  27. // (We don't take advantage of #2 yet, but we can do so in the future because the implementation
  28. // of cloning below allows us to make assumptions about the behaviors of the sealed type.)
  29. internal sealed class ASCIIEncodingSealed : ASCIIEncoding
  30. {
  31. public override object Clone()
  32. {
  33. // The base implementation of Encoding.Clone calls object.MemberwiseClone and marks the new object mutable.
  34. // We don't want to do this because it violates the invariants we have set for the sealed type.
  35. // Instead, we'll create a new instance of the base ASCIIEncoding type and mark it mutable.
  36. return new ASCIIEncoding()
  37. {
  38. IsReadOnly = false
  39. };
  40. }
  41. }
  42. // Used by Encoding.ASCII for lazy initialization
  43. // The initialization code will not be run until a static member of the class is referenced
  44. internal static readonly ASCIIEncodingSealed s_default = new ASCIIEncodingSealed();
  45. public ASCIIEncoding() : base(Encoding.CodePageASCII)
  46. {
  47. }
  48. internal sealed override void SetDefaultFallbacks()
  49. {
  50. // For ASCIIEncoding we just use default replacement fallback
  51. this.encoderFallback = EncoderFallback.ReplacementFallback;
  52. this.decoderFallback = DecoderFallback.ReplacementFallback;
  53. }
  54. // WARNING: GetByteCount(string chars), GetBytes(string chars,...), and GetString(byte[] byteIndex...)
  55. // WARNING: have different variable names than EncodingNLS.cs, so this can't just be cut & pasted,
  56. // WARNING: or it'll break VB's way of calling these.
  57. //
  58. // The following methods are copied from EncodingNLS.cs.
  59. // Unfortunately EncodingNLS.cs is internal and we're public, so we have to re-implement them here.
  60. // These should be kept in sync for the following classes:
  61. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  62. // Returns the number of bytes required to encode a range of characters in
  63. // a character array.
  64. //
  65. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  66. // So if you fix this, fix the others. Currently those include:
  67. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  68. // parent method is safe
  69. public override unsafe int GetByteCount(char[] chars, int index, int count)
  70. {
  71. // Validate input parameters
  72. if (chars is null)
  73. {
  74. ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars, ExceptionResource.ArgumentNull_Array);
  75. }
  76. if ((index | count) < 0)
  77. {
  78. ThrowHelper.ThrowArgumentOutOfRangeException((index < 0) ? ExceptionArgument.index : ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
  79. }
  80. if (chars!.Length - index < count)
  81. {
  82. ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.chars, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer);
  83. }
  84. fixed (char* pChars = chars)
  85. {
  86. return GetByteCountCommon(pChars + index, count);
  87. }
  88. }
  89. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  90. // So if you fix this, fix the others. Currently those include:
  91. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  92. // parent method is safe
  93. public override unsafe int GetByteCount(string chars)
  94. {
  95. // Validate input parameters
  96. if (chars is null)
  97. {
  98. ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars);
  99. }
  100. fixed (char* pChars = chars)
  101. {
  102. return GetByteCountCommon(pChars, chars!.Length);
  103. }
  104. }
  105. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  106. // So if you fix this, fix the others. Currently those include:
  107. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  108. [CLSCompliant(false)]
  109. public override unsafe int GetByteCount(char* chars, int count)
  110. {
  111. // Validate Parameters
  112. if (chars == null)
  113. {
  114. ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars);
  115. }
  116. if (count < 0)
  117. {
  118. ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
  119. }
  120. return GetByteCountCommon(chars, count);
  121. }
  122. public override unsafe int GetByteCount(ReadOnlySpan<char> chars)
  123. {
  124. // It's ok for us to pass null pointers down to the workhorse below.
  125. fixed (char* charsPtr = &MemoryMarshal.GetReference(chars))
  126. {
  127. return GetByteCountCommon(charsPtr, chars.Length);
  128. }
  129. }
  130. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  131. private unsafe int GetByteCountCommon(char* pChars, int charCount)
  132. {
  133. // Common helper method for all non-EncoderNLS entry points to GetByteCount.
  134. // A modification of this method should be copied in to each of the supported encodings: ASCII, UTF8, UTF16, UTF32.
  135. Debug.Assert(charCount >= 0, "Caller shouldn't specify negative length buffer.");
  136. Debug.Assert(pChars != null || charCount == 0, "Input pointer shouldn't be null if non-zero length specified.");
  137. // First call into the fast path.
  138. int totalByteCount = GetByteCountFast(pChars, charCount, EncoderFallback, out int charsConsumed);
  139. if (charsConsumed != charCount)
  140. {
  141. // If there's still data remaining in the source buffer, go down the fallback path.
  142. // We need to check for integer overflow since the fallback could change the required
  143. // output count in unexpected ways.
  144. totalByteCount += GetByteCountWithFallback(pChars, charCount, charsConsumed);
  145. if (totalByteCount < 0)
  146. {
  147. ThrowConversionOverflow();
  148. }
  149. }
  150. return totalByteCount;
  151. }
  152. [MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetByteCountCommon
  153. private protected sealed override unsafe int GetByteCountFast(char* pChars, int charsLength, EncoderFallback? fallback, out int charsConsumed)
  154. {
  155. // First: Can we short-circuit the entire calculation?
  156. // If an EncoderReplacementFallback is in use, all non-ASCII chars
  157. // (including surrogate halves) are replaced with the default string.
  158. // If the default string consists of a single ASCII value, then we
  159. // know there's a 1:1 char->byte transcoding in all cases.
  160. int byteCount = charsLength;
  161. if (!(fallback is EncoderReplacementFallback replacementFallback
  162. && replacementFallback.MaxCharCount == 1
  163. && replacementFallback.DefaultString[0] <= 0x7F))
  164. {
  165. // Unrecognized fallback mechanism - count chars manually.
  166. byteCount = (int)ASCIIUtility.GetIndexOfFirstNonAsciiChar(pChars, (uint)charsLength);
  167. }
  168. charsConsumed = byteCount;
  169. return byteCount;
  170. }
  171. // Parent method is safe.
  172. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  173. // So if you fix this, fix the others. Currently those include:
  174. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  175. public override unsafe int GetBytes(string chars, int charIndex, int charCount,
  176. byte[] bytes, int byteIndex)
  177. {
  178. // Validate Parameters
  179. if (chars is null || bytes is null)
  180. {
  181. ThrowHelper.ThrowArgumentNullException(
  182. argument: (chars is null) ? ExceptionArgument.chars : ExceptionArgument.bytes,
  183. resource: ExceptionResource.ArgumentNull_Array);
  184. }
  185. if ((charIndex | charCount) < 0)
  186. {
  187. ThrowHelper.ThrowArgumentOutOfRangeException(
  188. argument: (charIndex < 0) ? ExceptionArgument.charIndex : ExceptionArgument.charCount,
  189. resource: ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
  190. }
  191. if (chars!.Length - charIndex < charCount)
  192. {
  193. ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.chars, ExceptionResource.ArgumentOutOfRange_IndexCount);
  194. }
  195. if ((uint)byteIndex > bytes!.Length)
  196. {
  197. ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.byteIndex, ExceptionResource.ArgumentOutOfRange_Index);
  198. }
  199. fixed (char* pChars = chars)
  200. fixed (byte* pBytes = bytes)
  201. {
  202. return GetBytesCommon(pChars + charIndex, charCount, pBytes + byteIndex, bytes.Length - byteIndex);
  203. }
  204. }
  205. // Encodes a range of characters in a character array into a range of bytes
  206. // in a byte array. An exception occurs if the byte array is not large
  207. // enough to hold the complete encoding of the characters. The
  208. // GetByteCount method can be used to determine the exact number of
  209. // bytes that will be produced for a given range of characters.
  210. // Alternatively, the GetMaxByteCount method can be used to
  211. // determine the maximum number of bytes that will be produced for a given
  212. // number of characters, regardless of the actual character values.
  213. //
  214. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  215. // So if you fix this, fix the others. Currently those include:
  216. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  217. // parent method is safe
  218. public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
  219. byte[] bytes, int byteIndex)
  220. {
  221. // Validate parameters
  222. if (chars is null || bytes is null)
  223. {
  224. ThrowHelper.ThrowArgumentNullException(
  225. argument: (chars is null) ? ExceptionArgument.chars : ExceptionArgument.bytes,
  226. resource: ExceptionResource.ArgumentNull_Array);
  227. }
  228. if ((charIndex | charCount) < 0)
  229. {
  230. ThrowHelper.ThrowArgumentOutOfRangeException(
  231. argument: (charIndex < 0) ? ExceptionArgument.charIndex : ExceptionArgument.charCount,
  232. resource: ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
  233. }
  234. if (chars!.Length - charIndex < charCount)
  235. {
  236. ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.chars, ExceptionResource.ArgumentOutOfRange_IndexCount);
  237. }
  238. if ((uint)byteIndex > bytes!.Length)
  239. {
  240. ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.byteIndex, ExceptionResource.ArgumentOutOfRange_Index);
  241. }
  242. fixed (char* pChars = chars)
  243. fixed (byte* pBytes = bytes)
  244. {
  245. return GetBytesCommon(pChars + charIndex, charCount, pBytes + byteIndex, bytes.Length - byteIndex);
  246. }
  247. }
  248. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  249. // So if you fix this, fix the others. Currently those include:
  250. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  251. [CLSCompliant(false)]
  252. public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
  253. {
  254. // Validate Parameters
  255. if (chars == null || bytes == null)
  256. {
  257. ThrowHelper.ThrowArgumentNullException(
  258. argument: (chars is null) ? ExceptionArgument.chars : ExceptionArgument.bytes,
  259. resource: ExceptionResource.ArgumentNull_Array);
  260. }
  261. if ((charCount | byteCount) < 0)
  262. {
  263. ThrowHelper.ThrowArgumentOutOfRangeException(
  264. argument: (charCount < 0) ? ExceptionArgument.charCount : ExceptionArgument.byteCount,
  265. resource: ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
  266. }
  267. return GetBytesCommon(chars, charCount, bytes, byteCount);
  268. }
  269. public override unsafe int GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes)
  270. {
  271. // It's ok for us to operate on null / empty spans.
  272. fixed (char* charsPtr = &MemoryMarshal.GetReference(chars))
  273. fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes))
  274. {
  275. return GetBytesCommon(charsPtr, chars.Length, bytesPtr, bytes.Length);
  276. }
  277. }
  278. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  279. private unsafe int GetBytesCommon(char* pChars, int charCount, byte* pBytes, int byteCount)
  280. {
  281. // Common helper method for all non-EncoderNLS entry points to GetBytes.
  282. // A modification of this method should be copied in to each of the supported encodings: ASCII, UTF8, UTF16, UTF32.
  283. Debug.Assert(charCount >= 0, "Caller shouldn't specify negative length buffer.");
  284. Debug.Assert(pChars != null || charCount == 0, "Input pointer shouldn't be null if non-zero length specified.");
  285. Debug.Assert(byteCount >= 0, "Caller shouldn't specify negative length buffer.");
  286. Debug.Assert(pBytes != null || byteCount == 0, "Input pointer shouldn't be null if non-zero length specified.");
  287. // First call into the fast path.
  288. int bytesWritten = GetBytesFast(pChars, charCount, pBytes, byteCount, out int charsConsumed);
  289. if (charsConsumed == charCount)
  290. {
  291. // All elements converted - return immediately.
  292. return bytesWritten;
  293. }
  294. else
  295. {
  296. // Simple narrowing conversion couldn't operate on entire buffer - invoke fallback.
  297. return GetBytesWithFallback(pChars, charCount, pBytes, byteCount, charsConsumed, bytesWritten);
  298. }
  299. }
  300. [MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetBytesCommon
  301. private protected sealed override unsafe int GetBytesFast(char* pChars, int charsLength, byte* pBytes, int bytesLength, out int charsConsumed)
  302. {
  303. int bytesWritten = (int)ASCIIUtility.NarrowUtf16ToAscii(pChars, pBytes, (uint)Math.Min(charsLength, bytesLength));
  304. charsConsumed = bytesWritten;
  305. return bytesWritten;
  306. }
  307. private protected sealed override unsafe int GetBytesWithFallback(ReadOnlySpan<char> chars, int originalCharsLength, Span<byte> bytes, int originalBytesLength, EncoderNLS? encoder)
  308. {
  309. // We special-case EncoderReplacementFallback if it's telling us to write a single ASCII char,
  310. // since we believe this to be relatively common and we can handle it more efficiently than
  311. // the base implementation.
  312. if (((encoder is null) ? this.EncoderFallback : encoder.Fallback) is EncoderReplacementFallback replacementFallback
  313. && replacementFallback.MaxCharCount == 1
  314. && replacementFallback.DefaultString[0] <= 0x7F)
  315. {
  316. byte replacementByte = (byte)replacementFallback.DefaultString[0];
  317. int numElementsToConvert = Math.Min(chars.Length, bytes.Length);
  318. int idx = 0;
  319. fixed (char* pChars = &MemoryMarshal.GetReference(chars))
  320. fixed (byte* pBytes = &MemoryMarshal.GetReference(bytes))
  321. {
  322. // In a loop, replace the non-convertible data, then bulk-convert as much as we can.
  323. while (idx < numElementsToConvert)
  324. {
  325. pBytes[idx++] = replacementByte;
  326. if (idx < numElementsToConvert)
  327. {
  328. idx += (int)ASCIIUtility.NarrowUtf16ToAscii(&pChars[idx], &pBytes[idx], (uint)(numElementsToConvert - idx));
  329. }
  330. Debug.Assert(idx <= numElementsToConvert, "Somehow went beyond bounds of source or destination buffer?");
  331. }
  332. }
  333. // Slice off how much we consumed / wrote.
  334. chars = chars.Slice(numElementsToConvert);
  335. bytes = bytes.Slice(numElementsToConvert);
  336. }
  337. // If we couldn't go through our fast fallback mechanism, or if we still have leftover
  338. // data because we couldn't consume everything in the loop above, we need to go down the
  339. // slow fallback path.
  340. if (chars.IsEmpty)
  341. {
  342. return originalBytesLength - bytes.Length; // total number of bytes written
  343. }
  344. else
  345. {
  346. return base.GetBytesWithFallback(chars, originalCharsLength, bytes, originalBytesLength, encoder);
  347. }
  348. }
  349. // Returns the number of characters produced by decoding a range of bytes
  350. // in a byte array.
  351. //
  352. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  353. // So if you fix this, fix the others. Currently those include:
  354. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  355. // parent method is safe
  356. public override unsafe int GetCharCount(byte[] bytes, int index, int count)
  357. {
  358. // Validate Parameters
  359. if (bytes is null)
  360. {
  361. ThrowHelper.ThrowArgumentNullException(ExceptionArgument.bytes, ExceptionResource.ArgumentNull_Array);
  362. }
  363. if ((index | count) < 0)
  364. {
  365. ThrowHelper.ThrowArgumentOutOfRangeException((index < 0) ? ExceptionArgument.index : ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
  366. }
  367. if (bytes!.Length - index < count)
  368. {
  369. ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.bytes, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer);
  370. }
  371. fixed (byte* pBytes = bytes)
  372. {
  373. return GetCharCountCommon(pBytes + index, count);
  374. }
  375. }
  376. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  377. // So if you fix this, fix the others. Currently those include:
  378. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  379. [CLSCompliant(false)]
  380. public override unsafe int GetCharCount(byte* bytes, int count)
  381. {
  382. // Validate Parameters
  383. if (bytes == null)
  384. {
  385. ThrowHelper.ThrowArgumentNullException(ExceptionArgument.bytes, ExceptionResource.ArgumentNull_Array);
  386. }
  387. if (count < 0)
  388. {
  389. ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
  390. }
  391. return GetCharCountCommon(bytes, count);
  392. }
  393. public override unsafe int GetCharCount(ReadOnlySpan<byte> bytes)
  394. {
  395. // It's ok for us to pass null pointers down to the workhorse routine.
  396. fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes))
  397. {
  398. return GetCharCountCommon(bytesPtr, bytes.Length);
  399. }
  400. }
  401. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  402. private unsafe int GetCharCountCommon(byte* pBytes, int byteCount)
  403. {
  404. // Common helper method for all non-DecoderNLS entry points to GetCharCount.
  405. // A modification of this method should be copied in to each of the supported encodings: ASCII, UTF8, UTF16, UTF32.
  406. Debug.Assert(byteCount >= 0, "Caller shouldn't specify negative length buffer.");
  407. Debug.Assert(pBytes != null || byteCount == 0, "Input pointer shouldn't be null if non-zero length specified.");
  408. // First call into the fast path.
  409. int totalCharCount = GetCharCountFast(pBytes, byteCount, DecoderFallback, out int bytesConsumed);
  410. if (bytesConsumed != byteCount)
  411. {
  412. // If there's still data remaining in the source buffer, go down the fallback path.
  413. // We need to check for integer overflow since the fallback could change the required
  414. // output count in unexpected ways.
  415. totalCharCount += GetCharCountWithFallback(pBytes, byteCount, bytesConsumed);
  416. if (totalCharCount < 0)
  417. {
  418. ThrowConversionOverflow();
  419. }
  420. }
  421. return totalCharCount;
  422. }
  423. [MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetCharCountCommon
  424. private protected sealed override unsafe int GetCharCountFast(byte* pBytes, int bytesLength, DecoderFallback? fallback, out int bytesConsumed)
  425. {
  426. // First: Can we short-circuit the entire calculation?
  427. // If a DecoderReplacementFallback is in use, all non-ASCII bytes are replaced with
  428. // the default string. If the default string consists of a single BMP value, then we
  429. // know there's a 1:1 byte->char transcoding in all cases.
  430. int charCount = bytesLength;
  431. if (!(fallback is DecoderReplacementFallback replacementFallback) || replacementFallback.MaxCharCount != 1)
  432. {
  433. // Unrecognized fallback mechanism - count bytes manually.
  434. charCount = (int)ASCIIUtility.GetIndexOfFirstNonAsciiByte(pBytes, (uint)bytesLength);
  435. }
  436. bytesConsumed = charCount;
  437. return charCount;
  438. }
  439. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  440. // So if you fix this, fix the others. Currently those include:
  441. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  442. // parent method is safe
  443. public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
  444. char[] chars, int charIndex)
  445. {
  446. // Validate Parameters
  447. if (bytes is null || chars is null)
  448. {
  449. ThrowHelper.ThrowArgumentNullException(
  450. argument: (bytes is null) ? ExceptionArgument.bytes : ExceptionArgument.chars,
  451. resource: ExceptionResource.ArgumentNull_Array);
  452. }
  453. if ((byteIndex | byteCount) < 0)
  454. {
  455. ThrowHelper.ThrowArgumentOutOfRangeException(
  456. argument: (byteIndex < 0) ? ExceptionArgument.byteIndex : ExceptionArgument.byteCount,
  457. resource: ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
  458. }
  459. if (bytes!.Length - byteIndex < byteCount)
  460. {
  461. ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.bytes, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer);
  462. }
  463. if ((uint)charIndex > (uint)chars!.Length)
  464. {
  465. ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.charIndex, ExceptionResource.ArgumentOutOfRange_Index);
  466. }
  467. fixed (byte* pBytes = bytes)
  468. fixed (char* pChars = chars)
  469. {
  470. return GetCharsCommon(pBytes + byteIndex, byteCount, pChars + charIndex, chars.Length - charIndex);
  471. }
  472. }
  473. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  474. // So if you fix this, fix the others. Currently those include:
  475. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  476. [CLSCompliant(false)]
  477. public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
  478. {
  479. // Validate Parameters
  480. if (bytes is null || chars is null)
  481. {
  482. ThrowHelper.ThrowArgumentNullException(
  483. argument: (bytes is null) ? ExceptionArgument.bytes : ExceptionArgument.chars,
  484. resource: ExceptionResource.ArgumentNull_Array);
  485. }
  486. if ((byteCount | charCount) < 0)
  487. {
  488. ThrowHelper.ThrowArgumentOutOfRangeException(
  489. argument: (byteCount < 0) ? ExceptionArgument.byteCount : ExceptionArgument.charCount,
  490. resource: ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
  491. }
  492. return GetCharsCommon(bytes, byteCount, chars, charCount);
  493. }
  494. public override unsafe int GetChars(ReadOnlySpan<byte> bytes, Span<char> chars)
  495. {
  496. // It's ok for us to pass null pointers down to the workhorse below.
  497. fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes))
  498. fixed (char* charsPtr = &MemoryMarshal.GetReference(chars))
  499. {
  500. return GetCharsCommon(bytesPtr, bytes.Length, charsPtr, chars.Length);
  501. }
  502. }
  503. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  504. private unsafe int GetCharsCommon(byte* pBytes, int byteCount, char* pChars, int charCount)
  505. {
  506. // Common helper method for all non-DecoderNLS entry points to GetChars.
  507. // A modification of this method should be copied in to each of the supported encodings: ASCII, UTF8, UTF16, UTF32.
  508. Debug.Assert(byteCount >= 0, "Caller shouldn't specify negative length buffer.");
  509. Debug.Assert(pBytes != null || byteCount == 0, "Input pointer shouldn't be null if non-zero length specified.");
  510. Debug.Assert(charCount >= 0, "Caller shouldn't specify negative length buffer.");
  511. Debug.Assert(pChars != null || charCount == 0, "Input pointer shouldn't be null if non-zero length specified.");
  512. // First call into the fast path.
  513. int charsWritten = GetCharsFast(pBytes, byteCount, pChars, charCount, out int bytesConsumed);
  514. if (bytesConsumed == byteCount)
  515. {
  516. // All elements converted - return immediately.
  517. return charsWritten;
  518. }
  519. else
  520. {
  521. // Simple narrowing conversion couldn't operate on entire buffer - invoke fallback.
  522. return GetCharsWithFallback(pBytes, byteCount, pChars, charCount, bytesConsumed, charsWritten);
  523. }
  524. }
  525. [MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetCharsCommon
  526. private protected sealed override unsafe int GetCharsFast(byte* pBytes, int bytesLength, char* pChars, int charsLength, out int bytesConsumed)
  527. {
  528. int charsWritten = (int)ASCIIUtility.WidenAsciiToUtf16(pBytes, pChars, (uint)Math.Min(bytesLength, charsLength));
  529. bytesConsumed = charsWritten;
  530. return charsWritten;
  531. }
  532. private protected sealed override unsafe int GetCharsWithFallback(ReadOnlySpan<byte> bytes, int originalBytesLength, Span<char> chars, int originalCharsLength, DecoderNLS? decoder)
  533. {
  534. // We special-case DecoderReplacementFallback if it's telling us to write a single BMP char,
  535. // since we believe this to be relatively common and we can handle it more efficiently than
  536. // the base implementation.
  537. if (((decoder is null) ? this.DecoderFallback: decoder.Fallback) is DecoderReplacementFallback replacementFallback
  538. && replacementFallback.MaxCharCount == 1)
  539. {
  540. char replacementChar = replacementFallback.DefaultString[0];
  541. int numElementsToConvert = Math.Min( bytes.Length, chars.Length);
  542. int idx = 0;
  543. fixed (byte* pBytes = &MemoryMarshal.GetReference(bytes))
  544. fixed (char* pChars = &MemoryMarshal.GetReference(chars))
  545. {
  546. // In a loop, replace the non-convertible data, then bulk-convert as much as we can.
  547. while (idx < numElementsToConvert)
  548. {
  549. pChars[idx++] = replacementChar;
  550. if (idx < numElementsToConvert)
  551. {
  552. idx += (int)ASCIIUtility.WidenAsciiToUtf16(&pBytes[idx], &pChars[idx], (uint)(numElementsToConvert - idx));
  553. }
  554. Debug.Assert(idx <= numElementsToConvert, "Somehow went beyond bounds of source or destination buffer?");
  555. }
  556. }
  557. // Slice off how much we consumed / wrote.
  558. bytes = bytes.Slice(numElementsToConvert);
  559. chars = chars.Slice(numElementsToConvert);
  560. }
  561. // If we couldn't go through our fast fallback mechanism, or if we still have leftover
  562. // data because we couldn't consume everything in the loop above, we need to go down the
  563. // slow fallback path.
  564. if (bytes.IsEmpty)
  565. {
  566. return originalCharsLength - chars.Length; // total number of chars written
  567. }
  568. else
  569. {
  570. return base.GetCharsWithFallback(bytes, originalBytesLength, chars, originalCharsLength, decoder);
  571. }
  572. }
  573. // Returns a string containing the decoded representation of a range of
  574. // bytes in a byte array.
  575. //
  576. // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
  577. // So if you fix this, fix the others. Currently those include:
  578. // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
  579. // parent method is safe
  580. public override unsafe string GetString(byte[] bytes, int byteIndex, int byteCount)
  581. {
  582. // Validate Parameters
  583. if (bytes is null)
  584. {
  585. ThrowHelper.ThrowArgumentNullException(ExceptionArgument.bytes, ExceptionResource.ArgumentNull_Array);
  586. }
  587. if ((byteIndex | byteCount) < 0)
  588. {
  589. ThrowHelper.ThrowArgumentOutOfRangeException(
  590. argument: (byteIndex < 0) ? ExceptionArgument.byteIndex : ExceptionArgument.byteCount,
  591. resource: ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
  592. }
  593. if (bytes!.Length - byteIndex < byteCount)
  594. {
  595. ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.bytes, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer);
  596. }
  597. // Avoid problems with empty input buffer
  598. if (byteCount == 0)
  599. return string.Empty;
  600. fixed (byte* pBytes = bytes)
  601. {
  602. return string.CreateStringFromEncoding(pBytes + byteIndex, byteCount, this);
  603. }
  604. }
  605. //
  606. // End of standard methods copied from EncodingNLS.cs
  607. //
  608. //
  609. // Beginning of methods used by shared fallback logic.
  610. //
  611. internal sealed override bool TryGetByteCount(Rune value, out int byteCount)
  612. {
  613. if (value.IsAscii)
  614. {
  615. byteCount = 1;
  616. return true;
  617. }
  618. else
  619. {
  620. byteCount = default;
  621. return false;
  622. }
  623. }
  624. internal sealed override OperationStatus EncodeRune(Rune value, Span<byte> bytes, out int bytesWritten)
  625. {
  626. if (value.IsAscii)
  627. {
  628. if (!bytes.IsEmpty)
  629. {
  630. bytes[0] = (byte)value.Value;
  631. bytesWritten = 1;
  632. return OperationStatus.Done;
  633. }
  634. else
  635. {
  636. bytesWritten = 0;
  637. return OperationStatus.DestinationTooSmall;
  638. }
  639. }
  640. else
  641. {
  642. bytesWritten = 0;
  643. return OperationStatus.InvalidData;
  644. }
  645. }
  646. internal sealed override OperationStatus DecodeFirstRune(ReadOnlySpan<byte> bytes, out Rune value, out int bytesConsumed)
  647. {
  648. if (!bytes.IsEmpty)
  649. {
  650. byte b = bytes[0];
  651. if (b <= 0x7F)
  652. {
  653. // ASCII byte
  654. value = new Rune(b);
  655. bytesConsumed = 1;
  656. return OperationStatus.Done;
  657. }
  658. else
  659. {
  660. // Non-ASCII byte
  661. value = Rune.ReplacementChar;
  662. bytesConsumed = 1;
  663. return OperationStatus.InvalidData;
  664. }
  665. }
  666. else
  667. {
  668. // No data to decode
  669. value = Rune.ReplacementChar;
  670. bytesConsumed = 0;
  671. return OperationStatus.NeedMoreData;
  672. }
  673. }
  674. //
  675. // End of methods used by shared fallback logic.
  676. //
  677. public override int GetMaxByteCount(int charCount)
  678. {
  679. if (charCount < 0)
  680. throw new ArgumentOutOfRangeException(nameof(charCount),
  681. SR.ArgumentOutOfRange_NeedNonNegNum);
  682. // Characters would be # of characters + 1 in case high surrogate is ? * max fallback
  683. long byteCount = (long)charCount + 1;
  684. if (EncoderFallback.MaxCharCount > 1)
  685. byteCount *= EncoderFallback.MaxCharCount;
  686. // 1 to 1 for most characters. Only surrogates with fallbacks have less.
  687. if (byteCount > 0x7fffffff)
  688. throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
  689. return (int)byteCount;
  690. }
  691. public override int GetMaxCharCount(int byteCount)
  692. {
  693. if (byteCount < 0)
  694. throw new ArgumentOutOfRangeException(nameof(byteCount),
  695. SR.ArgumentOutOfRange_NeedNonNegNum);
  696. // Just return length, SBCS stay the same length because they don't map to surrogate
  697. long charCount = (long)byteCount;
  698. // 1 to 1 for most characters. Only surrogates with fallbacks have less, unknown fallbacks could be longer.
  699. if (DecoderFallback.MaxCharCount > 1)
  700. charCount *= DecoderFallback.MaxCharCount;
  701. if (charCount > 0x7fffffff)
  702. throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_GetCharCountOverflow);
  703. return (int)charCount;
  704. }
  705. // True if and only if the encoding only uses single byte code points. (Ie, ASCII, 1252, etc)
  706. public override bool IsSingleByte
  707. {
  708. get
  709. {
  710. return true;
  711. }
  712. }
  713. public override Decoder GetDecoder()
  714. {
  715. return new DecoderNLS(this);
  716. }
  717. public override Encoder GetEncoder()
  718. {
  719. return new EncoderNLS(this);
  720. }
  721. }
  722. }