| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979 |
- // Licensed to the .NET Foundation under one or more agreements.
- // The .NET Foundation licenses this file to you under the MIT license.
- // See the LICENSE file in the project root for more information.
- using System;
- using System.Diagnostics;
- using System.Runtime.InteropServices;
- namespace System.Text
- {
- // ASCIIEncoding
- //
- // Note that ASCIIEncoding is optimized with no best fit and ? for fallback.
- // It doesn't come in other flavors.
- //
- // Note: ASCIIEncoding is the only encoding that doesn't do best fit (windows has best fit).
- //
- // Note: IsAlwaysNormalized remains false because 1/2 the code points are unassigned, so they'd
- // use fallbacks, and we cannot guarantee that fallbacks are normalized.
- public class ASCIIEncoding : Encoding
- {
- // Allow for devirtualization (see https://github.com/dotnet/coreclr/pull/9230)
- internal sealed class ASCIIEncodingSealed : ASCIIEncoding { }
- // Used by Encoding.ASCII for lazy initialization
- // The initialization code will not be run until a static member of the class is referenced
- internal static readonly ASCIIEncodingSealed s_default = new ASCIIEncodingSealed();
- public ASCIIEncoding() : base(Encoding.CodePageASCII)
- {
- }
- internal sealed override void SetDefaultFallbacks()
- {
- // For ASCIIEncoding we just use default replacement fallback
- this.encoderFallback = EncoderFallback.ReplacementFallback;
- this.decoderFallback = DecoderFallback.ReplacementFallback;
- }
- // WARNING: GetByteCount(string chars), GetBytes(string chars,...), and GetString(byte[] byteIndex...)
- // WARNING: have different variable names than EncodingNLS.cs, so this can't just be cut & pasted,
- // WARNING: or it'll break VB's way of calling these.
- //
- // The following methods are copied from EncodingNLS.cs.
- // Unfortunately EncodingNLS.cs is internal and we're public, so we have to re-implement them here.
- // These should be kept in sync for the following classes:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // Returns the number of bytes required to encode a range of characters in
- // a character array.
- //
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // parent method is safe
- public override unsafe int GetByteCount(char[] chars, int index, int count)
- {
- // Validate input parameters
- if (chars == null)
- throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
- if (index < 0 || count < 0)
- throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
- if (chars.Length - index < count)
- throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
- // If no input, return 0, avoid fixed empty array problem
- if (count == 0)
- return 0;
- // Just call the pointer version
- fixed (char* pChars = chars)
- return GetByteCount(pChars + index, count, null);
- }
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // parent method is safe
- public override unsafe int GetByteCount(string chars)
- {
- // Validate input
- if (chars==null)
- throw new ArgumentNullException(nameof(chars));
- fixed (char* pChars = chars)
- return GetByteCount(pChars, chars.Length, null);
- }
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- [CLSCompliant(false)]
- public override unsafe int GetByteCount(char* chars, int count)
- {
- // Validate Parameters
- if (chars == null)
- throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
- if (count < 0)
- throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
- // Call it with empty encoder
- return GetByteCount(chars, count, null);
- }
- public override unsafe int GetByteCount(ReadOnlySpan<char> chars)
- {
- fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
- {
- return GetByteCount(charsPtr, chars.Length, encoder: null);
- }
- }
- // Parent method is safe.
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- public override unsafe int GetBytes(string chars, int charIndex, int charCount,
- byte[] bytes, int byteIndex)
- {
- if (chars == null || bytes == null)
- throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)), SR.ArgumentNull_Array);
- if (charIndex < 0 || charCount < 0)
- throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
- if (chars.Length - charIndex < charCount)
- throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCount);
- if (byteIndex < 0 || byteIndex > bytes.Length)
- throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
- int byteCount = bytes.Length - byteIndex;
- fixed (char* pChars = chars) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
- return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
- }
- // Encodes a range of characters in a character array into a range of bytes
- // in a byte array. An exception occurs if the byte array is not large
- // enough to hold the complete encoding of the characters. The
- // GetByteCount method can be used to determine the exact number of
- // bytes that will be produced for a given range of characters.
- // Alternatively, the GetMaxByteCount method can be used to
- // determine the maximum number of bytes that will be produced for a given
- // number of characters, regardless of the actual character values.
- //
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // parent method is safe
- public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
- byte[] bytes, int byteIndex)
- {
- // Validate parameters
- if (chars == null || bytes == null)
- throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)), SR.ArgumentNull_Array);
- if (charIndex < 0 || charCount < 0)
- throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
- if (chars.Length - charIndex < charCount)
- throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
- if (byteIndex < 0 || byteIndex > bytes.Length)
- throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
- // If nothing to encode return 0
- if (charCount == 0)
- return 0;
- // Just call pointer version
- int byteCount = bytes.Length - byteIndex;
- fixed (char* pChars = chars) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
- // Remember that byteCount is # to decode, not size of array.
- return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
- }
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- [CLSCompliant(false)]
- public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
- {
- // Validate Parameters
- if (bytes == null || chars == null)
- throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
- if (charCount < 0 || byteCount < 0)
- throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
- return GetBytes(chars, charCount, bytes, byteCount, null);
- }
- public override unsafe int GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes)
- {
- fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
- fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
- {
- return GetBytes(charsPtr, chars.Length, bytesPtr, bytes.Length, encoder: null);
- }
- }
- // Returns the number of characters produced by decoding a range of bytes
- // in a byte array.
- //
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // parent method is safe
- public override unsafe int GetCharCount(byte[] bytes, int index, int count)
- {
- // Validate Parameters
- if (bytes == null)
- throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
- if (index < 0 || count < 0)
- throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
- if (bytes.Length - index < count)
- throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
- // If no input just return 0, fixed doesn't like 0 length arrays
- if (count == 0)
- return 0;
- // Just call pointer version
- fixed (byte* pBytes = bytes)
- return GetCharCount(pBytes + index, count, null);
- }
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- [CLSCompliant(false)]
- public override unsafe int GetCharCount(byte* bytes, int count)
- {
- // Validate Parameters
- if (bytes == null)
- throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
- if (count < 0)
- throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
- return GetCharCount(bytes, count, null);
- }
- public override unsafe int GetCharCount(ReadOnlySpan<byte> bytes)
- {
- fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
- {
- return GetCharCount(bytesPtr, bytes.Length, decoder: null);
- }
- }
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // parent method is safe
- public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
- char[] chars, int charIndex)
- {
- // Validate Parameters
- if (bytes == null || chars == null)
- throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
- if (byteIndex < 0 || byteCount < 0)
- throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
- if ( bytes.Length - byteIndex < byteCount)
- throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
- if (charIndex < 0 || charIndex > chars.Length)
- throw new ArgumentOutOfRangeException(nameof(charIndex), SR.ArgumentOutOfRange_Index);
- // If no input, return 0 & avoid fixed problem
- if (byteCount == 0)
- return 0;
- // Just call pointer version
- int charCount = chars.Length - charIndex;
- fixed (byte* pBytes = bytes) fixed (char* pChars = &MemoryMarshal.GetReference((Span<char>)chars))
- // Remember that charCount is # to decode, not size of array
- return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
- }
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- [CLSCompliant(false)]
- public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
- {
- // Validate Parameters
- if (bytes == null || chars == null)
- throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
- if (charCount < 0 || byteCount < 0)
- throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
- return GetChars(bytes, byteCount, chars, charCount, null);
- }
- public override unsafe int GetChars(ReadOnlySpan<byte> bytes, Span<char> chars)
- {
- fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
- fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
- {
- return GetChars(bytesPtr, bytes.Length, charsPtr, chars.Length, decoder: null);
- }
- }
- // Returns a string containing the decoded representation of a range of
- // bytes in a byte array.
- //
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // parent method is safe
- public override unsafe string GetString(byte[] bytes, int byteIndex, int byteCount)
- {
- // Validate Parameters
- if (bytes == null)
- throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
- if (byteIndex < 0 || byteCount < 0)
- throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
- if (bytes.Length - byteIndex < byteCount)
- throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
- // Avoid problems with empty input buffer
- if (byteCount == 0) return string.Empty;
- fixed (byte* pBytes = bytes)
- return string.CreateStringFromEncoding(
- pBytes + byteIndex, byteCount, this);
- }
- //
- // End of standard methods copied from EncodingNLS.cs
- //
- // GetByteCount
- // Note: We start by assuming that the output will be the same as count. Having
- // an encoder or fallback may change that assumption
- internal sealed override unsafe int GetByteCount(char* chars, int charCount, EncoderNLS encoder)
- {
- // Just need to ASSERT, this is called by something else internal that checked parameters already
- Debug.Assert(charCount >= 0, "[ASCIIEncoding.GetByteCount]count is negative");
- Debug.Assert(chars != null, "[ASCIIEncoding.GetByteCount]chars is null");
- // Assert because we shouldn't be able to have a null encoder.
- Debug.Assert(encoderFallback != null, "[ASCIIEncoding.GetByteCount]Attempting to use null fallback encoder");
- char charLeftOver = (char)0;
- EncoderReplacementFallback fallback = null;
- // Start by assuming default count, then +/- for fallback characters
- char* charEnd = chars + charCount;
- // For fallback we may need a fallback buffer, we know we aren't default fallback.
- EncoderFallbackBuffer fallbackBuffer = null;
- char* charsForFallback;
- if (encoder != null)
- {
- charLeftOver = encoder._charLeftOver;
- Debug.Assert(charLeftOver == 0 || char.IsHighSurrogate(charLeftOver),
- "[ASCIIEncoding.GetByteCount]leftover character should be high surrogate");
- fallback = encoder.Fallback as EncoderReplacementFallback;
- // We mustn't have left over fallback data when counting
- if (encoder.InternalHasFallbackBuffer)
- {
- // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary
- fallbackBuffer = encoder.FallbackBuffer;
- if (fallbackBuffer.Remaining > 0 && encoder._throwOnOverflow)
- throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
- // Set our internal fallback interesting things.
- fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false);
- }
- // Verify that we have no fallbackbuffer, for ASCII its always empty, so just assert
- Debug.Assert(!encoder._throwOnOverflow || !encoder.InternalHasFallbackBuffer ||
- encoder.FallbackBuffer.Remaining == 0,
- "[ASCIICodePageEncoding.GetByteCount]Expected empty fallback buffer");
- }
- else
- {
- fallback = this.EncoderFallback as EncoderReplacementFallback;
- }
- // If we have an encoder AND we aren't using default fallback,
- // then we may have a complicated count.
- if (fallback != null && fallback.MaxCharCount == 1)
- {
- // Replacement fallback encodes surrogate pairs as two ?? (or two whatever), so return size is always
- // same as input size.
- // Note that no existing SBCS code pages map code points to supplimentary characters, so this is easy.
- // We could however have 1 extra byte if the last call had an encoder and a funky fallback and
- // if we don't use the funky fallback this time.
- // Do we have an extra char left over from last time?
- if (charLeftOver > 0)
- charCount++;
- return (charCount);
- }
- // Count is more complicated if you have a funky fallback
- // For fallback we may need a fallback buffer, we know we're not default fallback
- int byteCount = 0;
- // We may have a left over character from last time, try and process it.
- if (charLeftOver > 0)
- {
- Debug.Assert(char.IsHighSurrogate(charLeftOver), "[ASCIIEncoding.GetByteCount]leftover character should be high surrogate");
- Debug.Assert(encoder != null, "[ASCIIEncoding.GetByteCount]Expected encoder");
- // Since left over char was a surrogate, it'll have to be fallen back.
- // Get Fallback
- fallbackBuffer = encoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false);
- // This will fallback a pair if *chars is a low surrogate
- charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
- fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
- chars = charsForFallback;
- }
- // Now we may have fallback char[] already from the encoder
- // Go ahead and do it, including the fallback.
- char ch;
- while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 ||
- chars < charEnd)
- {
- // First unwind any fallback
- if (ch == 0)
- {
- // No fallback, just get next char
- ch = *chars;
- chars++;
- }
- // Check for fallback, this'll catch surrogate pairs too.
- // no chars >= 0x80 are allowed.
- if (ch > 0x7f)
- {
- if (fallbackBuffer == null)
- {
- // Initialize the buffer
- if (encoder == null)
- fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
- else
- fallbackBuffer = encoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, false);
- }
- // Get Fallback
- charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
- fallbackBuffer.InternalFallback(ch, ref charsForFallback);
- chars = charsForFallback;
- continue;
- }
- // We'll use this one
- byteCount++;
- }
- Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
- "[ASCIIEncoding.GetByteCount]Expected Empty fallback buffer");
- return byteCount;
- }
- internal sealed override unsafe int GetBytes(
- char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS encoder)
- {
- // Just need to ASSERT, this is called by something else internal that checked parameters already
- Debug.Assert(bytes != null, "[ASCIIEncoding.GetBytes]bytes is null");
- Debug.Assert(byteCount >= 0, "[ASCIIEncoding.GetBytes]byteCount is negative");
- Debug.Assert(chars != null, "[ASCIIEncoding.GetBytes]chars is null");
- Debug.Assert(charCount >= 0, "[ASCIIEncoding.GetBytes]charCount is negative");
- // Assert because we shouldn't be able to have a null encoder.
- Debug.Assert(encoderFallback != null, "[ASCIIEncoding.GetBytes]Attempting to use null encoder fallback");
- // Get any left over characters
- char charLeftOver = (char)0;
- EncoderReplacementFallback fallback = null;
- // For fallback we may need a fallback buffer, we know we aren't default fallback.
- EncoderFallbackBuffer fallbackBuffer = null;
- char* charsForFallback;
- // prepare our end
- char* charEnd = chars + charCount;
- byte* byteStart = bytes;
- char* charStart = chars;
- if (encoder != null)
- {
- charLeftOver = encoder._charLeftOver;
- fallback = encoder.Fallback as EncoderReplacementFallback;
- // We mustn't have left over fallback data when counting
- if (encoder.InternalHasFallbackBuffer)
- {
- // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary
- fallbackBuffer = encoder.FallbackBuffer;
- if (fallbackBuffer.Remaining > 0 && encoder._throwOnOverflow)
- throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
- // Set our internal fallback interesting things.
- fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
- }
- Debug.Assert(charLeftOver == 0 || char.IsHighSurrogate(charLeftOver),
- "[ASCIIEncoding.GetBytes]leftover character should be high surrogate");
- // Verify that we have no fallbackbuffer, for ASCII its always empty, so just assert
- Debug.Assert(!encoder._throwOnOverflow || !encoder.InternalHasFallbackBuffer ||
- encoder.FallbackBuffer.Remaining == 0,
- "[ASCIICodePageEncoding.GetBytes]Expected empty fallback buffer");
- }
- else
- {
- fallback = this.EncoderFallback as EncoderReplacementFallback;
- }
- // See if we do the fast default or slightly slower fallback
- if (fallback != null && fallback.MaxCharCount == 1)
- {
- // Fast version
- char cReplacement = fallback.DefaultString[0];
- // Check for replacements in range, otherwise fall back to slow version.
- if (cReplacement <= (char)0x7f)
- {
- // We should have exactly as many output bytes as input bytes, unless there's a left
- // over character, in which case we may need one more.
- // If we had a left over character will have to add a ? (This happens if they had a funky
- // fallback last time, but not this time.) (We can't spit any out though
- // because with fallback encoder each surrogate is treated as a seperate code point)
- if (charLeftOver > 0)
- {
- // Have to have room
- // Throw even if doing no throw version because this is just 1 char,
- // so buffer will never be big enough
- if (byteCount == 0)
- ThrowBytesOverflow(encoder, true);
- // This'll make sure we still have more room and also make sure our return value is correct.
- *(bytes++) = (byte)cReplacement;
- byteCount--; // We used one of the ones we were counting.
- }
- // This keeps us from overrunning our output buffer
- if (byteCount < charCount)
- {
- // Throw or make buffer smaller?
- ThrowBytesOverflow(encoder, byteCount < 1);
- // Just use what we can
- charEnd = chars + byteCount;
- }
- // We just do a quick copy
- while (chars < charEnd)
- {
- char ch2 = *(chars++);
- if (ch2 >= 0x0080) *(bytes++) = (byte)cReplacement;
- else *(bytes++) = unchecked((byte)(ch2));
- }
- // Clear encoder
- if (encoder != null)
- {
- encoder._charLeftOver = (char)0;
- encoder._charsUsed = (int)(chars - charStart);
- }
- return (int)(bytes - byteStart);
- }
- }
- // Slower version, have to do real fallback.
- // prepare our end
- byte* byteEnd = bytes + byteCount;
- // We may have a left over character from last time, try and process it.
- if (charLeftOver > 0)
- {
- // Initialize the buffer
- Debug.Assert(encoder != null,
- "[ASCIIEncoding.GetBytes]Expected non null encoder if we have surrogate left over");
- fallbackBuffer = encoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(chars, charEnd, encoder, true);
- // Since left over char was a surrogate, it'll have to be fallen back.
- // Get Fallback
- // This will fallback a pair if *chars is a low surrogate
- charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
- fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
- chars = charsForFallback;
- }
- // Now we may have fallback char[] already from the encoder
- // Go ahead and do it, including the fallback.
- char ch;
- while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 ||
- chars < charEnd)
- {
- // First unwind any fallback
- if (ch == 0)
- {
- // No fallback, just get next char
- ch = *chars;
- chars++;
- }
- // Check for fallback, this'll catch surrogate pairs too.
- // All characters >= 0x80 must fall back.
- if (ch > 0x7f)
- {
- // Initialize the buffer
- if (fallbackBuffer == null)
- {
- if (encoder == null)
- fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
- else
- fallbackBuffer = encoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, true);
- }
- // Get Fallback
- charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
- fallbackBuffer.InternalFallback(ch, ref charsForFallback);
- chars = charsForFallback;
- // Go ahead & continue (& do the fallback)
- continue;
- }
- // We'll use this one
- // Bounds check
- if (bytes >= byteEnd)
- {
- // didn't use this char, we'll throw or use buffer
- if (fallbackBuffer == null || fallbackBuffer.bFallingBack == false)
- {
- Debug.Assert(chars > charStart || bytes == byteStart,
- "[ASCIIEncoding.GetBytes]Expected chars to have advanced already.");
- chars--; // don't use last char
- }
- else
- fallbackBuffer.MovePrevious();
- // Are we throwing or using buffer?
- ThrowBytesOverflow(encoder, bytes == byteStart); // throw?
- break; // don't throw, stop
- }
- // Go ahead and add it
- *bytes = unchecked((byte)ch);
- bytes++;
- }
- // Need to do encoder stuff
- if (encoder != null)
- {
- // Fallback stuck it in encoder if necessary, but we have to clear MustFlush cases
- if (fallbackBuffer != null && !fallbackBuffer.bUsedEncoder)
- // Clear it in case of MustFlush
- encoder._charLeftOver = (char)0;
- // Set our chars used count
- encoder._charsUsed = (int)(chars - charStart);
- }
- Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0 ||
- (encoder != null && !encoder._throwOnOverflow),
- "[ASCIIEncoding.GetBytes]Expected Empty fallback buffer at end");
- return (int)(bytes - byteStart);
- }
- // This is internal and called by something else,
- internal sealed override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder)
- {
- // Just assert, we're called internally so these should be safe, checked already
- Debug.Assert(bytes != null, "[ASCIIEncoding.GetCharCount]bytes is null");
- Debug.Assert(count >= 0, "[ASCIIEncoding.GetCharCount]byteCount is negative");
- // ASCII doesn't do best fit, so don't have to check for it, find out which decoder fallback we're using
- DecoderReplacementFallback fallback = null;
- if (decoder == null)
- fallback = this.DecoderFallback as DecoderReplacementFallback;
- else
- {
- fallback = decoder.Fallback as DecoderReplacementFallback;
- Debug.Assert(!decoder._throwOnOverflow || !decoder.InternalHasFallbackBuffer ||
- decoder.FallbackBuffer.Remaining == 0,
- "[ASCIICodePageEncoding.GetCharCount]Expected empty fallback buffer");
- }
- if (fallback != null && fallback.MaxCharCount == 1)
- {
- // Just return length, SBCS stay the same length because they don't map to surrogate
- // pairs and we don't have a decoder fallback.
- return count;
- }
- // Only need decoder fallback buffer if not using default replacement fallback, no best fit for ASCII
- DecoderFallbackBuffer fallbackBuffer = null;
- // Have to do it the hard way.
- // Assume charCount will be == count
- int charCount = count;
- byte[] byteBuffer = new byte[1];
- // Do it our fast way
- byte* byteEnd = bytes + count;
- // Quick loop
- while (bytes < byteEnd)
- {
- // Faster if don't use *bytes++;
- byte b = *bytes;
- bytes++;
- // If unknown we have to do fallback count
- if (b >= 0x80)
- {
- if (fallbackBuffer == null)
- {
- if (decoder == null)
- fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer();
- else
- fallbackBuffer = decoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(byteEnd - count, null);
- }
- // Use fallback buffer
- byteBuffer[0] = b;
- charCount--; // Have to unreserve the one we already allocated for b
- charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
- }
- }
- // Fallback buffer must be empty
- Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
- "[ASCIIEncoding.GetCharCount]Expected Empty fallback buffer");
- // Converted sequence is same length as input
- return charCount;
- }
- internal sealed override unsafe int GetChars(
- byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS decoder)
- {
- // Just need to ASSERT, this is called by something else internal that checked parameters already
- Debug.Assert(bytes != null, "[ASCIIEncoding.GetChars]bytes is null");
- Debug.Assert(byteCount >= 0, "[ASCIIEncoding.GetChars]byteCount is negative");
- Debug.Assert(chars != null, "[ASCIIEncoding.GetChars]chars is null");
- Debug.Assert(charCount >= 0, "[ASCIIEncoding.GetChars]charCount is negative");
- // Do it fast way if using ? replacement fallback
- byte* byteEnd = bytes + byteCount;
- byte* byteStart = bytes;
- char* charStart = chars;
- // Note: ASCII doesn't do best fit, but we have to fallback if they use something > 0x7f
- // Only need decoder fallback buffer if not using ? fallback.
- // ASCII doesn't do best fit, so don't have to check for it, find out which decoder fallback we're using
- DecoderReplacementFallback fallback = null;
- char* charsForFallback;
- if (decoder == null)
- fallback = this.DecoderFallback as DecoderReplacementFallback;
- else
- {
- fallback = decoder.Fallback as DecoderReplacementFallback;
- Debug.Assert(!decoder._throwOnOverflow || !decoder.InternalHasFallbackBuffer ||
- decoder.FallbackBuffer.Remaining == 0,
- "[ASCIICodePageEncoding.GetChars]Expected empty fallback buffer");
- }
- if (fallback != null && fallback.MaxCharCount == 1)
- {
- // Try it the fast way
- char replacementChar = fallback.DefaultString[0];
- // Need byteCount chars, otherwise too small buffer
- if (charCount < byteCount)
- {
- // Need at least 1 output byte, throw if must throw
- ThrowCharsOverflow(decoder, charCount < 1);
- // Not throwing, use what we can
- byteEnd = bytes + charCount;
- }
- // Quick loop, just do '?' replacement because we don't have fallbacks for decodings.
- while (bytes < byteEnd)
- {
- byte b = *(bytes++);
- if (b >= 0x80)
- // This is an invalid byte in the ASCII encoding.
- *(chars++) = replacementChar;
- else
- *(chars++) = unchecked((char)b);
- }
- // bytes & chars used are the same
- if (decoder != null)
- decoder._bytesUsed = (int)(bytes - byteStart);
- return (int)(chars - charStart);
- }
- // Slower way's going to need a fallback buffer
- DecoderFallbackBuffer fallbackBuffer = null;
- byte[] byteBuffer = new byte[1];
- char* charEnd = chars + charCount;
- // Not quite so fast loop
- while (bytes < byteEnd)
- {
- // Faster if don't use *bytes++;
- byte b = *(bytes);
- bytes++;
- if (b >= 0x80)
- {
- // This is an invalid byte in the ASCII encoding.
- if (fallbackBuffer == null)
- {
- if (decoder == null)
- fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer();
- else
- fallbackBuffer = decoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(byteEnd - byteCount, charEnd);
- }
- // Use fallback buffer
- byteBuffer[0] = b;
- // Note that chars won't get updated unless this succeeds
- charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
- bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
- chars = charsForFallback;
- if (!fallbackResult)
- {
- // May or may not throw, but we didn't get this byte
- Debug.Assert(bytes > byteStart || chars == charStart,
- "[ASCIIEncoding.GetChars]Expected bytes to have advanced already (fallback case)");
- bytes--; // unused byte
- fallbackBuffer.InternalReset(); // Didn't fall this back
- ThrowCharsOverflow(decoder, chars == charStart); // throw?
- break; // don't throw, but stop loop
- }
- }
- else
- {
- // Make sure we have buffer space
- if (chars >= charEnd)
- {
- Debug.Assert(bytes > byteStart || chars == charStart,
- "[ASCIIEncoding.GetChars]Expected bytes to have advanced already (normal case)");
- bytes--; // unused byte
- ThrowCharsOverflow(decoder, chars == charStart); // throw?
- break; // don't throw, but stop loop
- }
- *(chars) = unchecked((char)b);
- chars++;
- }
- }
- // Might have had decoder fallback stuff.
- if (decoder != null)
- decoder._bytesUsed = (int)(bytes - byteStart);
- // Expect Empty fallback buffer for GetChars
- Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
- "[ASCIIEncoding.GetChars]Expected Empty fallback buffer");
- return (int)(chars - charStart);
- }
- public override int GetMaxByteCount(int charCount)
- {
- if (charCount < 0)
- throw new ArgumentOutOfRangeException(nameof(charCount),
- SR.ArgumentOutOfRange_NeedNonNegNum);
- // Characters would be # of characters + 1 in case high surrogate is ? * max fallback
- long byteCount = (long)charCount + 1;
- if (EncoderFallback.MaxCharCount > 1)
- byteCount *= EncoderFallback.MaxCharCount;
- // 1 to 1 for most characters. Only surrogates with fallbacks have less.
- if (byteCount > 0x7fffffff)
- throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
- return (int)byteCount;
- }
- public override int GetMaxCharCount(int byteCount)
- {
- if (byteCount < 0)
- throw new ArgumentOutOfRangeException(nameof(byteCount),
- SR.ArgumentOutOfRange_NeedNonNegNum);
- // Just return length, SBCS stay the same length because they don't map to surrogate
- long charCount = (long)byteCount;
- // 1 to 1 for most characters. Only surrogates with fallbacks have less, unknown fallbacks could be longer.
- if (DecoderFallback.MaxCharCount > 1)
- charCount *= DecoderFallback.MaxCharCount;
- if (charCount > 0x7fffffff)
- throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_GetCharCountOverflow);
- return (int)charCount;
- }
- // True if and only if the encoding only uses single byte code points. (Ie, ASCII, 1252, etc)
- public override bool IsSingleByte
- {
- get
- {
- return true;
- }
- }
- public override Decoder GetDecoder()
- {
- return new DecoderNLS(this);
- }
- public override Encoder GetEncoder()
- {
- return new EncoderNLS(this);
- }
- }
- }
|