| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978 |
- // Licensed to the .NET Foundation under one or more agreements.
- // The .NET Foundation licenses this file to you under the MIT license.
- // See the LICENSE file in the project root for more information.
- //
- // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
- //
- using System;
- using System.Diagnostics;
- using System.Runtime.InteropServices;
- namespace System.Text
- {
- public class UTF7Encoding : Encoding
- {
- private const string base64Chars =
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
- // 0123456789111111111122222222223333333333444444444455555555556666
- // 012345678901234567890123456789012345678901234567890123
- // These are the characters that can be directly encoded in UTF7.
- private const string directChars =
- "\t\n\r '(),-./0123456789:?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
- // These are the characters that can be optionally directly encoded in UTF7.
- private const string optionalChars =
- "!\"#$%&*;<=>@[]^_`{|}";
- // Used by Encoding.UTF7 for lazy initialization
- // The initialization code will not be run until a static member of the class is referenced
- internal static readonly UTF7Encoding s_default = new UTF7Encoding();
- // The set of base 64 characters.
- private byte[] _base64Bytes;
- // The decoded bits for every base64 values. This array has a size of 128 elements.
- // The index is the code point value of the base 64 characters. The value is -1 if
- // the code point is not a valid base 64 character. Otherwise, the value is a value
- // from 0 ~ 63.
- private sbyte[] _base64Values;
- // The array to decide if a Unicode code point below 0x80 can be directly encoded in UTF7.
- // This array has a size of 128.
- private bool[] _directEncode;
- private bool _allowOptionals;
- private const int UTF7_CODEPAGE = 65000;
- public UTF7Encoding()
- : this(false)
- {
- }
- public UTF7Encoding(bool allowOptionals)
- : base(UTF7_CODEPAGE) //Set the data item.
- {
- // Allowing optionals?
- _allowOptionals = allowOptionals;
- // Make our tables
- MakeTables();
- }
- private void MakeTables()
- {
- // Build our tables
- _base64Bytes = new byte[64];
- for (int i = 0; i < 64; i++) _base64Bytes[i] = (byte)base64Chars[i];
- _base64Values = new sbyte[128];
- for (int i = 0; i < 128; i++) _base64Values[i] = -1;
- for (int i = 0; i < 64; i++) _base64Values[_base64Bytes[i]] = (sbyte)i;
- _directEncode = new bool[128];
- int count = directChars.Length;
- for (int i = 0; i < count; i++)
- {
- _directEncode[directChars[i]] = true;
- }
- if (_allowOptionals)
- {
- count = optionalChars.Length;
- for (int i = 0; i < count; i++)
- {
- _directEncode[optionalChars[i]] = true;
- }
- }
- }
- // We go ahead and set this because Encoding expects it, however nothing can fall back in UTF7.
- internal sealed override void SetDefaultFallbacks()
- {
- // UTF7 had an odd decoderFallback behavior, and the Encoder fallback
- // is irrelevant because we encode surrogates individually and never check for unmatched ones
- // (so nothing can fallback during encoding)
- this.encoderFallback = new EncoderReplacementFallback(string.Empty);
- this.decoderFallback = new DecoderUTF7Fallback();
- }
- public override bool Equals(object value)
- {
- UTF7Encoding that = value as UTF7Encoding;
- if (that != null)
- {
- return (_allowOptionals == that._allowOptionals) &&
- (EncoderFallback.Equals(that.EncoderFallback)) &&
- (DecoderFallback.Equals(that.DecoderFallback));
- }
- return (false);
- }
- // Compared to all the other encodings, variations of UTF7 are unlikely
- public override int GetHashCode()
- {
- return this.CodePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode();
- }
- // The following methods are copied from EncodingNLS.cs.
- // Unfortunately EncodingNLS.cs is internal and we're public, so we have to re-implement them here.
- // These should be kept in sync for the following classes:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // Returns the number of bytes required to encode a range of characters in
- // a character array.
- //
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // parent method is safe
- public override unsafe int GetByteCount(char[] chars, int index, int count)
- {
- // Validate input parameters
- if (chars == null)
- throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
- if (index < 0 || count < 0)
- throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
- if (chars.Length - index < count)
- throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
- // If no input, return 0, avoid fixed empty array problem
- if (count == 0)
- return 0;
- // Just call the pointer version
- fixed (char* pChars = chars)
- return GetByteCount(pChars + index, count, null);
- }
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // parent method is safe
- public override unsafe int GetByteCount(string s)
- {
- // Validate input
- if (s==null)
- throw new ArgumentNullException(nameof(s));
- fixed (char* pChars = s)
- return GetByteCount(pChars, s.Length, null);
- }
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- [CLSCompliant(false)]
- public override unsafe int GetByteCount(char* chars, int count)
- {
- // Validate Parameters
- if (chars == null)
- throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
- if (count < 0)
- throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
- // Call it with empty encoder
- return GetByteCount(chars, count, null);
- }
- // Parent method is safe.
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- public override unsafe int GetBytes(string s, int charIndex, int charCount,
- byte[] bytes, int byteIndex)
- {
- if (s == null || bytes == null)
- throw new ArgumentNullException((s == null ? nameof(s) : nameof(bytes)), SR.ArgumentNull_Array);
- if (charIndex < 0 || charCount < 0)
- throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
- if (s.Length - charIndex < charCount)
- throw new ArgumentOutOfRangeException(nameof(s), SR.ArgumentOutOfRange_IndexCount);
- if (byteIndex < 0 || byteIndex > bytes.Length)
- throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
- int byteCount = bytes.Length - byteIndex;
- fixed (char* pChars = s) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
- return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
- }
- // Encodes a range of characters in a character array into a range of bytes
- // in a byte array. An exception occurs if the byte array is not large
- // enough to hold the complete encoding of the characters. The
- // GetByteCount method can be used to determine the exact number of
- // bytes that will be produced for a given range of characters.
- // Alternatively, the GetMaxByteCount method can be used to
- // determine the maximum number of bytes that will be produced for a given
- // number of characters, regardless of the actual character values.
- //
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // parent method is safe
- public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
- byte[] bytes, int byteIndex)
- {
- // Validate parameters
- if (chars == null || bytes == null)
- throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)), SR.ArgumentNull_Array);
- if (charIndex < 0 || charCount < 0)
- throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
- if (chars.Length - charIndex < charCount)
- throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
- if (byteIndex < 0 || byteIndex > bytes.Length)
- throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
- // If nothing to encode return 0, avoid fixed problem
- if (charCount == 0)
- return 0;
- // Just call pointer version
- int byteCount = bytes.Length - byteIndex;
- fixed (char* pChars = chars) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
- // Remember that byteCount is # to decode, not size of array.
- return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
- }
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- [CLSCompliant(false)]
- public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
- {
- // Validate Parameters
- if (bytes == null || chars == null)
- throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
- if (charCount < 0 || byteCount < 0)
- throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
- return GetBytes(chars, charCount, bytes, byteCount, null);
- }
- // Returns the number of characters produced by decoding a range of bytes
- // in a byte array.
- //
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // parent method is safe
- public override unsafe int GetCharCount(byte[] bytes, int index, int count)
- {
- // Validate Parameters
- if (bytes == null)
- throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
- if (index < 0 || count < 0)
- throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
- if (bytes.Length - index < count)
- throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
- // If no input just return 0, fixed doesn't like 0 length arrays.
- if (count == 0)
- return 0;
- // Just call pointer version
- fixed (byte* pBytes = bytes)
- return GetCharCount(pBytes + index, count, null);
- }
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- [CLSCompliant(false)]
- public override unsafe int GetCharCount(byte* bytes, int count)
- {
- // Validate Parameters
- if (bytes == null)
- throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
- if (count < 0)
- throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
- return GetCharCount(bytes, count, null);
- }
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // parent method is safe
- public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
- char[] chars, int charIndex)
- {
- // Validate Parameters
- if (bytes == null || chars == null)
- throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
- if (byteIndex < 0 || byteCount < 0)
- throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
- if ( bytes.Length - byteIndex < byteCount)
- throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
- if (charIndex < 0 || charIndex > chars.Length)
- throw new ArgumentOutOfRangeException(nameof(charIndex), SR.ArgumentOutOfRange_Index);
- // If no input, return 0 & avoid fixed problem
- if (byteCount == 0)
- return 0;
- // Just call pointer version
- int charCount = chars.Length - charIndex;
- fixed (byte* pBytes = bytes) fixed (char* pChars = &MemoryMarshal.GetReference((Span<char>)chars))
- // Remember that charCount is # to decode, not size of array
- return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
- }
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- [CLSCompliant(false)]
- public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
- {
- // Validate Parameters
- if (bytes == null || chars == null)
- throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
- if (charCount < 0 || byteCount < 0)
- throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
- return GetChars(bytes, byteCount, chars, charCount, null);
- }
- // Returns a string containing the decoded representation of a range of
- // bytes in a byte array.
- //
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // parent method is safe
- public override unsafe string GetString(byte[] bytes, int index, int count)
- {
- // Validate Parameters
- if (bytes == null)
- throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
- if (index < 0 || count < 0)
- throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
- if (bytes.Length - index < count)
- throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
- // Avoid problems with empty input buffer
- if (count == 0) return string.Empty;
- fixed (byte* pBytes = bytes)
- return string.CreateStringFromEncoding(
- pBytes + index, count, this);
- }
- //
- // End of standard methods copied from EncodingNLS.cs
- //
- internal sealed override unsafe int GetByteCount(char* chars, int count, EncoderNLS baseEncoder)
- {
- Debug.Assert(chars != null, "[UTF7Encoding.GetByteCount]chars!=null");
- Debug.Assert(count >= 0, "[UTF7Encoding.GetByteCount]count >=0");
- // Just call GetBytes with bytes == null
- return GetBytes(chars, count, null, 0, baseEncoder);
- }
- internal sealed override unsafe int GetBytes(
- char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS baseEncoder)
- {
- Debug.Assert(byteCount >= 0, "[UTF7Encoding.GetBytes]byteCount >=0");
- Debug.Assert(chars != null, "[UTF7Encoding.GetBytes]chars!=null");
- Debug.Assert(charCount >= 0, "[UTF7Encoding.GetBytes]charCount >=0");
- // Get encoder info
- UTF7Encoding.Encoder encoder = (UTF7Encoding.Encoder)baseEncoder;
- // Default bits & count
- int bits = 0;
- int bitCount = -1;
- // prepare our helpers
- Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer(
- this, encoder, bytes, byteCount, chars, charCount);
- if (encoder != null)
- {
- bits = encoder.bits;
- bitCount = encoder.bitCount;
- // May have had too many left over
- while (bitCount >= 6)
- {
- bitCount -= 6;
- // If we fail we'll never really have enough room
- if (!buffer.AddByte(_base64Bytes[(bits >> bitCount) & 0x3F]))
- ThrowBytesOverflow(encoder, buffer.Count == 0);
- }
- }
- while (buffer.MoreData)
- {
- char currentChar = buffer.GetNextChar();
- if (currentChar < 0x80 && _directEncode[currentChar])
- {
- if (bitCount >= 0)
- {
- if (bitCount > 0)
- {
- // Try to add the next byte
- if (!buffer.AddByte(_base64Bytes[bits << 6 - bitCount & 0x3F]))
- break; // Stop here, didn't throw
- bitCount = 0;
- }
- // Need to get emit '-' and our char, 2 bytes total
- if (!buffer.AddByte((byte)'-'))
- break; // Stop here, didn't throw
- bitCount = -1;
- }
- // Need to emit our char
- if (!buffer.AddByte((byte)currentChar))
- break; // Stop here, didn't throw
- }
- else if (bitCount < 0 && currentChar == '+')
- {
- if (!buffer.AddByte((byte)'+', (byte)'-'))
- break; // Stop here, didn't throw
- }
- else
- {
- if (bitCount < 0)
- {
- // Need to emit a + and 12 bits (3 bytes)
- // Only 12 of the 16 bits will be emitted this time, the other 4 wait 'til next time
- if (!buffer.AddByte((byte)'+'))
- break; // Stop here, didn't throw
- // We're now in bit mode, but haven't stored data yet
- bitCount = 0;
- }
- // Add our bits
- bits = bits << 16 | currentChar;
- bitCount += 16;
- while (bitCount >= 6)
- {
- bitCount -= 6;
- if (!buffer.AddByte(_base64Bytes[(bits >> bitCount) & 0x3F]))
- {
- bitCount += 6; // We didn't use these bits
- currentChar = buffer.GetNextChar(); // We're processing this char still, but AddByte
- // --'d it when we ran out of space
- break; // Stop here, not enough room for bytes
- }
- }
- if (bitCount >= 6)
- break; // Didn't have room to encode enough bits
- }
- }
- // Now if we have bits left over we have to encode them.
- // MustFlush may have been cleared by encoding.ThrowBytesOverflow earlier if converting
- if (bitCount >= 0 && (encoder == null || encoder.MustFlush))
- {
- // Do we have bits we have to stick in?
- if (bitCount > 0)
- {
- if (buffer.AddByte(_base64Bytes[(bits << (6 - bitCount)) & 0x3F]))
- {
- // Emitted spare bits, 0 bits left
- bitCount = 0;
- }
- }
- // If converting and failed bitCount above, then we'll fail this too
- if (buffer.AddByte((byte)'-'))
- {
- // turned off bit mode';
- bits = 0;
- bitCount = -1;
- }
- else
- // If not successful, convert will maintain state for next time, also
- // AddByte will have decremented our char count, however we need it to remain the same
- buffer.GetNextChar();
- }
- // Do we have an encoder we're allowed to use?
- // bytes == null if counting, so don't use encoder then
- if (bytes != null && encoder != null)
- {
- // We already cleared bits & bitcount for mustflush case
- encoder.bits = bits;
- encoder.bitCount = bitCount;
- encoder._charsUsed = buffer.CharsUsed;
- }
- return buffer.Count;
- }
- internal sealed override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
- {
- Debug.Assert(count >= 0, "[UTF7Encoding.GetCharCount]count >=0");
- Debug.Assert(bytes != null, "[UTF7Encoding.GetCharCount]bytes!=null");
- // Just call GetChars with null char* to do counting
- return GetChars(bytes, count, null, 0, baseDecoder);
- }
- internal sealed override unsafe int GetChars(
- byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder)
- {
- Debug.Assert(byteCount >= 0, "[UTF7Encoding.GetChars]byteCount >=0");
- Debug.Assert(bytes != null, "[UTF7Encoding.GetChars]bytes!=null");
- Debug.Assert(charCount >= 0, "[UTF7Encoding.GetChars]charCount >=0");
- // Might use a decoder
- UTF7Encoding.Decoder decoder = (UTF7Encoding.Decoder)baseDecoder;
- // Get our output buffer info.
- Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
- this, decoder, chars, charCount, bytes, byteCount);
- // Get decoder info
- int bits = 0;
- int bitCount = -1;
- bool firstByte = false;
- if (decoder != null)
- {
- bits = decoder.bits;
- bitCount = decoder.bitCount;
- firstByte = decoder.firstByte;
- Debug.Assert(firstByte == false || decoder.bitCount <= 0,
- "[UTF7Encoding.GetChars]If remembered bits, then first byte flag shouldn't be set");
- }
- // We may have had bits in the decoder that we couldn't output last time, so do so now
- if (bitCount >= 16)
- {
- // Check our decoder buffer
- if (!buffer.AddChar((char)((bits >> (bitCount - 16)) & 0xFFFF)))
- ThrowCharsOverflow(decoder, true); // Always throw, they need at least 1 char even in Convert
- // Used this one, clean up extra bits
- bitCount -= 16;
- }
- // Loop through the input
- while (buffer.MoreData)
- {
- byte currentByte = buffer.GetNextByte();
- int c;
- if (bitCount >= 0)
- {
- //
- // Modified base 64 encoding.
- //
- sbyte v;
- if (currentByte < 0x80 && ((v = _base64Values[currentByte]) >= 0))
- {
- firstByte = false;
- bits = (bits << 6) | ((byte)v);
- bitCount += 6;
- if (bitCount >= 16)
- {
- c = (bits >> (bitCount - 16)) & 0xFFFF;
- bitCount -= 16;
- }
- // If not enough bits just continue
- else continue;
- }
- else
- {
- // If it wasn't a base 64 byte, everything's going to turn off base 64 mode
- bitCount = -1;
- if (currentByte != '-')
- {
- // >= 0x80 (because of 1st if statemtn)
- // We need this check since the _base64Values[b] check below need b <= 0x7f.
- // This is not a valid base 64 byte. Terminate the shifted-sequence and
- // emit this byte.
- // not in base 64 table
- // According to the RFC 1642 and the example code of UTF-7
- // in Unicode 2.0, we should just zero-extend the invalid UTF7 byte
- // Chars won't be updated unless this works, try to fallback
- if (!buffer.Fallback(currentByte))
- break; // Stop here, didn't throw
- // Used that byte, we're done with it
- continue;
- }
- //
- // The encoding for '+' is "+-".
- //
- if (firstByte) c = '+';
- // We just turn it off if not emitting a +, so we're done.
- else continue;
- }
- //
- // End of modified base 64 encoding block.
- //
- }
- else if (currentByte == '+')
- {
- //
- // Found the start of a modified base 64 encoding block or a plus sign.
- //
- bitCount = 0;
- firstByte = true;
- continue;
- }
- else
- {
- // Normal character
- if (currentByte >= 0x80)
- {
- // Try to fallback
- if (!buffer.Fallback(currentByte))
- break; // Stop here, didn't throw
- // Done falling back
- continue;
- }
- // Use the normal character
- c = currentByte;
- }
- if (c >= 0)
- {
- // Check our buffer
- if (!buffer.AddChar((char)c))
- {
- // No room. If it was a plain char we'll try again later.
- // Note, we'll consume this byte and stick it in decoder, even if we can't output it
- if (bitCount >= 0) // Can we rememmber this byte (char)
- {
- buffer.AdjustBytes(+1); // Need to readd the byte that AddChar subtracted when it failed
- bitCount += 16; // We'll still need that char we have in our bits
- }
- break; // didn't throw, stop
- }
- }
- }
- // Stick stuff in the decoder if we can (chars == null if counting, so don't store decoder)
- if (chars != null && decoder != null)
- {
- // MustFlush? (Could've been cleared by ThrowCharsOverflow if Convert & didn't reach end of buffer)
- if (decoder.MustFlush)
- {
- // RFC doesn't specify what would happen if we have non-0 leftover bits, we just drop them
- decoder.bits = 0;
- decoder.bitCount = -1;
- decoder.firstByte = false;
- }
- else
- {
- decoder.bits = bits;
- decoder.bitCount = bitCount;
- decoder.firstByte = firstByte;
- }
- decoder._bytesUsed = buffer.BytesUsed;
- }
- // else ignore any hanging bits.
- // Return our count
- return buffer.Count;
- }
- public override System.Text.Decoder GetDecoder()
- {
- return new UTF7Encoding.Decoder(this);
- }
- public override System.Text.Encoder GetEncoder()
- {
- return new UTF7Encoding.Encoder(this);
- }
- public override int GetMaxByteCount(int charCount)
- {
- if (charCount < 0)
- throw new ArgumentOutOfRangeException(nameof(charCount),
- SR.ArgumentOutOfRange_NeedNonNegNum);
- // Suppose that every char can not be direct-encoded, we know that
- // a byte can encode 6 bits of the Unicode character. And we will
- // also need two extra bytes for the shift-in ('+') and shift-out ('-') mark.
- // Therefore, the max byte should be:
- // byteCount = 2 + Math.Ceiling((double)charCount * 16 / 6);
- // That is always <= 2 + 3 * charCount;
- // Longest case is alternating encoded, direct, encoded data for 5 + 1 + 5... bytes per char.
- // UTF7 doesn't have left over surrogates, but if no input we may need an output - to turn off
- // encoding if MustFlush is true.
- // Its easiest to think of this as 2 bytes to turn on/off the base64 mode, then 3 bytes per char.
- // 3 bytes is 18 bits of encoding, which is more than we need, but if its direct encoded then 3
- // bytes allows us to turn off and then back on base64 mode if necessary.
- // Note that UTF7 encoded surrogates individually and isn't worried about mismatches, so all
- // code points are encodable int UTF7.
- long byteCount = (long)charCount * 3 + 2;
- // check for overflow
- if (byteCount > 0x7fffffff)
- throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
- return (int)byteCount;
- }
- public override int GetMaxCharCount(int byteCount)
- {
- if (byteCount < 0)
- throw new ArgumentOutOfRangeException(nameof(byteCount),
- SR.ArgumentOutOfRange_NeedNonNegNum);
- // Worst case is 1 char per byte. Minimum 1 for left over bits in case decoder is being flushed
- // Also note that we ignore extra bits (per spec), so UTF7 doesn't have unknown in this direction.
- int charCount = byteCount;
- if (charCount == 0) charCount = 1;
- return charCount;
- }
- // Of all the amazing things... This MUST be Decoder so that our com name
- // for System.Text.Decoder doesn't change
- private sealed class Decoder : DecoderNLS
- {
- /*private*/
- internal int bits;
- /*private*/
- internal int bitCount;
- /*private*/
- internal bool firstByte;
- public Decoder(UTF7Encoding encoding) : base(encoding)
- {
- // base calls reset
- }
- public override void Reset()
- {
- this.bits = 0;
- this.bitCount = -1;
- this.firstByte = false;
- if (_fallbackBuffer != null)
- _fallbackBuffer.Reset();
- }
- // Anything left in our encoder?
- internal override bool HasState
- {
- get
- {
- // NOTE: This forces the last -, which some encoder might not encode. If we
- // don't see it we don't think we're done reading.
- return (this.bitCount != -1);
- }
- }
- }
- // Of all the amazing things... This MUST be Encoder so that our com name
- // for System.Text.Encoder doesn't change
- private sealed class Encoder : EncoderNLS
- {
- /*private*/
- internal int bits;
- /*private*/
- internal int bitCount;
- public Encoder(UTF7Encoding encoding) : base(encoding)
- {
- // base calls reset
- }
- public override void Reset()
- {
- this.bitCount = -1;
- this.bits = 0;
- if (_fallbackBuffer != null)
- _fallbackBuffer.Reset();
- }
- // Anything left in our encoder?
- internal override bool HasState
- {
- get
- {
- return (this.bits != 0 || this.bitCount != -1);
- }
- }
- }
- // Preexisting UTF7 behavior for bad bytes was just to spit out the byte as the next char
- // and turn off base64 mode if it was in that mode. We still exit the mode, but now we fallback.
- private sealed class DecoderUTF7Fallback : DecoderFallback
- {
- // Construction. Default replacement fallback uses no best fit and ? replacement string
- public DecoderUTF7Fallback()
- {
- }
- public override DecoderFallbackBuffer CreateFallbackBuffer()
- {
- return new DecoderUTF7FallbackBuffer(this);
- }
- // Maximum number of characters that this instance of this fallback could return
- public override int MaxCharCount
- {
- get
- {
- // returns 1 char per bad byte
- return 1;
- }
- }
- public override bool Equals(object value)
- {
- DecoderUTF7Fallback that = value as DecoderUTF7Fallback;
- if (that != null)
- {
- return true;
- }
- return (false);
- }
- public override int GetHashCode()
- {
- return 984;
- }
- }
- private sealed class DecoderUTF7FallbackBuffer : DecoderFallbackBuffer
- {
- // Store our default string
- private char cFallback = (char)0;
- private int iCount = -1;
- private int iSize;
- // Construction
- public DecoderUTF7FallbackBuffer(DecoderUTF7Fallback fallback)
- {
- }
- // Fallback Methods
- public override bool Fallback(byte[] bytesUnknown, int index)
- {
- // We expect no previous fallback in our buffer
- Debug.Assert(iCount < 0, "[DecoderUTF7FallbackBuffer.Fallback] Can't have recursive fallbacks");
- Debug.Assert(bytesUnknown.Length == 1, "[DecoderUTF7FallbackBuffer.Fallback] Only possible fallback case should be 1 unknown byte");
- // Go ahead and get our fallback
- cFallback = (char)bytesUnknown[0];
- // Any of the fallback characters can be handled except for 0
- if (cFallback == 0)
- {
- return false;
- }
- iCount = iSize = 1;
- return true;
- }
- public override char GetNextChar()
- {
- if (iCount-- > 0)
- return cFallback;
- // Note: this means that 0 in UTF7 stream will never be emitted.
- return (char)0;
- }
- public override bool MovePrevious()
- {
- if (iCount >= 0)
- {
- iCount++;
- }
- // return true if we were allowed to do this
- return (iCount >= 0 && iCount <= iSize);
- }
- // Return # of chars left in this fallback
- public override int Remaining
- {
- get
- {
- return (iCount > 0) ? iCount : 0;
- }
- }
- // Clear the buffer
- public override unsafe void Reset()
- {
- iCount = -1;
- byteStart = null;
- }
- // This version just counts the fallback and doesn't actually copy anything.
- internal unsafe override int InternalFallback(byte[] bytes, byte* pBytes)
- // Right now this has both bytes and bytes[], since we might have extra bytes, hence the
- // array, and we might need the index, hence the byte*
- {
- // We expect no previous fallback in our buffer
- Debug.Assert(iCount < 0, "[DecoderUTF7FallbackBuffer.InternalFallback] Can't have recursive fallbacks");
- if (bytes.Length != 1)
- {
- throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
- }
- // Can't fallback a byte 0, so return for that case, 1 otherwise.
- return bytes[0] == 0 ? 0 : 1;
- }
- }
- }
- }
|