c
/
mono
mirror da https://github.com/mono/mono.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
							// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Buffers;
using System.Diagnostics;
using System.Runtime.InteropServices;

namespace System.Text
{
    // An Encoder is used to encode a sequence of blocks of characters into
    // a sequence of blocks of bytes. Following instantiation of an encoder,
    // sequential blocks of characters are converted into blocks of bytes through
    // calls to the GetBytes method. The encoder maintains state between the
    // conversions, allowing it to correctly encode character sequences that span
    // adjacent blocks.
    //
    // Instances of specific implementations of the Encoder abstract base
    // class are typically obtained through calls to the GetEncoder method
    // of Encoding objects.
    //

    internal class EncoderNLS : Encoder
    {
        // Need a place for the last left over character, most of our encodings use this
        internal char _charLeftOver;
        private Encoding _encoding;
        private bool _mustFlush;
        internal bool _throwOnOverflow;
        internal int _charsUsed;

        internal EncoderNLS(Encoding encoding)
        {
            _encoding = encoding;
            _fallback = _encoding.EncoderFallback;
            this.Reset();
        }

        public override void Reset()
        {
            _charLeftOver = (char)0;
            if (_fallbackBuffer != null)
                _fallbackBuffer.Reset();
        }

        public override unsafe int GetByteCount(char[] chars, int index, int count, bool flush)
        {
            // Validate input parameters
            if (chars == null)
                throw new ArgumentNullException(nameof(chars),
                      SR.ArgumentNull_Array);

            if (index < 0 || count < 0)
                throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)),
                      SR.ArgumentOutOfRange_NeedNonNegNum);

            if (chars.Length - index < count)
                throw new ArgumentOutOfRangeException(nameof(chars),
                      SR.ArgumentOutOfRange_IndexCountBuffer);

            // Just call the pointer version
            int result = -1;
            fixed (char* pChars = &MemoryMarshal.GetReference((Span<char>)chars))
            {
                result = GetByteCount(pChars + index, count, flush);
            }
            return result;
        }

        public unsafe override int GetByteCount(char* chars, int count, bool flush)
        {
            // Validate input parameters
            if (chars == null)
                throw new ArgumentNullException(nameof(chars),
                      SR.ArgumentNull_Array);

            if (count < 0)
                throw new ArgumentOutOfRangeException(nameof(count),
                      SR.ArgumentOutOfRange_NeedNonNegNum);

            _mustFlush = flush;
            _throwOnOverflow = true;
            Debug.Assert(_encoding != null);
            return _encoding.GetByteCount(chars, count, this);
        }

        public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
                                              byte[] bytes, int byteIndex, bool flush)
        {
            // Validate parameters
            if (chars == null || bytes == null)
                throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)),
                      SR.ArgumentNull_Array);

            if (charIndex < 0 || charCount < 0)
                throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)),
                      SR.ArgumentOutOfRange_NeedNonNegNum);

            if (chars.Length - charIndex < charCount)
                throw new ArgumentOutOfRangeException(nameof(chars),
                      SR.ArgumentOutOfRange_IndexCountBuffer);

            if (byteIndex < 0 || byteIndex > bytes.Length)
                throw new ArgumentOutOfRangeException(nameof(byteIndex),
                     SR.ArgumentOutOfRange_Index);

            int byteCount = bytes.Length - byteIndex;

            // Just call pointer version
            fixed (char* pChars = &MemoryMarshal.GetReference((Span<char>)chars))
            fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))

                // Remember that charCount is # to decode, not size of array.
                return GetBytes(pChars + charIndex, charCount,
                                pBytes + byteIndex, byteCount, flush);
        }

        public unsafe override int GetBytes(char* chars, int charCount, byte* bytes, int byteCount, bool flush)
        {
            // Validate parameters
            if (chars == null || bytes == null)
                throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)),
                      SR.ArgumentNull_Array);

            if (byteCount < 0 || charCount < 0)
                throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
                      SR.ArgumentOutOfRange_NeedNonNegNum);

            _mustFlush = flush;
            _throwOnOverflow = true;
            Debug.Assert(_encoding != null);
            return _encoding.GetBytes(chars, charCount, bytes, byteCount, this);
        }

        // This method is used when your output buffer might not be large enough for the entire result.
        // Just call the pointer version.  (This gets bytes)
        public override unsafe void Convert(char[] chars, int charIndex, int charCount,
                                              byte[] bytes, int byteIndex, int byteCount, bool flush,
                                              out int charsUsed, out int bytesUsed, out bool completed)
        {
            // Validate parameters
            if (chars == null || bytes == null)
                throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)),
                      SR.ArgumentNull_Array);

            if (charIndex < 0 || charCount < 0)
                throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)),
                      SR.ArgumentOutOfRange_NeedNonNegNum);

            if (byteIndex < 0 || byteCount < 0)
                throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)),
                      SR.ArgumentOutOfRange_NeedNonNegNum);

            if (chars.Length - charIndex < charCount)
                throw new ArgumentOutOfRangeException(nameof(chars),
                      SR.ArgumentOutOfRange_IndexCountBuffer);

            if (bytes.Length - byteIndex < byteCount)
                throw new ArgumentOutOfRangeException(nameof(bytes),
                      SR.ArgumentOutOfRange_IndexCountBuffer);

            // Just call the pointer version (can't do this for non-msft encoders)
            fixed (char* pChars = &MemoryMarshal.GetReference((Span<char>)chars))
            {
                fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
                {
                    Convert(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, flush,
                        out charsUsed, out bytesUsed, out completed);
                }
            }
        }

        // This is the version that uses pointers.  We call the base encoding worker function
        // after setting our appropriate internal variables.  This is getting bytes
        public override unsafe void Convert(char* chars, int charCount,
                                              byte* bytes, int byteCount, bool flush,
                                              out int charsUsed, out int bytesUsed, out bool completed)
        {
            // Validate input parameters
            if (bytes == null || chars == null)
                throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
                    SR.ArgumentNull_Array);
            if (charCount < 0 || byteCount < 0)
                throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)),
                    SR.ArgumentOutOfRange_NeedNonNegNum);

            // We don't want to throw
            _mustFlush = flush;
            _throwOnOverflow = false;
            _charsUsed = 0;

            // Do conversion
            Debug.Assert(_encoding != null);
            bytesUsed = _encoding.GetBytes(chars, charCount, bytes, byteCount, this);
            charsUsed = _charsUsed;

            // Per MSDN, "The completed output parameter indicates whether all the data in the input
            // buffer was converted and stored in the output buffer." That means we've successfully
            // consumed all the input _and_ there's no pending state or fallback data remaining to be output.

            completed = (charsUsed == charCount)
                && !this.HasState
                && (_fallbackBuffer is null || _fallbackBuffer.Remaining == 0);

            // Our data thingys are now full, we can return
        }

        public Encoding Encoding
        {
            get
            {
                Debug.Assert(_encoding != null);
                return _encoding;
            }
        }

        public bool MustFlush
        {
            get
            {
                return _mustFlush;
            }
        }

        /// <summary>
        /// States whether a call to <see cref="Encoding.GetBytes(char*, int, byte*, int, EncoderNLS)"/> must first drain data on this <see cref="EncoderNLS"/> instance.
        /// </summary>
        internal bool HasLeftoverData => _charLeftOver != default || (_fallbackBuffer != null && _fallbackBuffer.Remaining > 0);

        // Anything left in our encoder?
        internal virtual bool HasState
        {
            get
            {
                return (_charLeftOver != (char)0);
            }
        }

        // Allow encoding to clear our must flush instead of throwing (in ThrowBytesOverflow)
        internal void ClearMustFlush()
        {
            _mustFlush = false;
        }

        internal int DrainLeftoverDataForGetByteCount(ReadOnlySpan<char> chars, out int charsConsumed)
        {
            // Quick check: we _should not_ have leftover fallback data from a previous invocation,
            // as we'd end up consuming any such data and would corrupt whatever Convert call happens
            // to be in progress.

            if (_fallbackBuffer != null && _fallbackBuffer.Remaining > 0)
            {
                throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, Encoding.EncodingName, _fallbackBuffer.GetType()));
            }

            // If we have a leftover high surrogate from a previous operation, consume it now.
            // We won't clear the _charLeftOver field since GetByteCount is supposed to be
            // a non-mutating operation, and we need the field to retain its value for the
            // next call to Convert.

            charsConsumed = 0; // could be incorrect, will fix up later in the method

            if (_charLeftOver == default)
            {
                return 0; // no leftover high surrogate char - short-circuit and finish
            }
            else
            {
                char secondChar = default;

                if (chars.IsEmpty)
                {
                    // If the input buffer is empty and we're not being asked to flush, no-op and return
                    // success to our caller. If we're being asked to flush, the leftover high surrogate from
                    // the previous operation will go through the fallback mechanism by itself.

                    if (!MustFlush)
                    {
                        return 0; // no-op = success
                    }
                }
                else
                {
                    secondChar = chars[0];
                }

                // If we have to fallback the chars we're reading immediately below, populate the
                // fallback buffer with the invalid data. We'll just fall through to the "consume
                // fallback buffer" logic at the end of the method.

                bool didFallback;

                if (Rune.TryCreate(_charLeftOver, secondChar, out Rune rune))
                {
                    charsConsumed = 1; // consumed the leftover high surrogate + the first char in the input buffer

                    Debug.Assert(_encoding != null);
                    if (_encoding.TryGetByteCount(rune, out int byteCount))
                    {
                        Debug.Assert(byteCount >= 0, "Encoding shouldn't have returned a negative byte count.");
                        return byteCount;
                    }
                    else
                    {
                        // The fallback mechanism relies on a negative index to convey "the start of the invalid
                        // sequence was some number of chars back before the current buffer." In this block and
                        // in the block immediately thereafter, we know we have a single leftover high surrogate
                        // character from a previous operation, so we provide an index of -1 to convey that the
                        // char immediately before the current buffer was the start of the invalid sequence.

                        didFallback = FallbackBuffer.Fallback(_charLeftOver, secondChar, index: -1);
                    }
                }
                else
                {
                    didFallback = FallbackBuffer.Fallback(_charLeftOver, index: -1);
                }

                // Now tally the number of bytes that would've been emitted as part of fallback.
                Debug.Assert(_fallbackBuffer != null);
                return _fallbackBuffer.DrainRemainingDataForGetByteCount();
            }
        }

        internal bool TryDrainLeftoverDataForGetBytes(ReadOnlySpan<char> chars, Span<byte> bytes, out int charsConsumed, out int bytesWritten)
        {
            // We may have a leftover high surrogate data from a previous invocation, or we may have leftover
            // data in the fallback buffer, or we may have neither, but we will never have both. Check for these
            // conditions and handle them now.

            charsConsumed = 0; // could be incorrect, will fix up later in the method
            bytesWritten = 0; // could be incorrect, will fix up later in the method

            if (_charLeftOver != default)
            {
                char secondChar = default;

                if (chars.IsEmpty)
                {
                    // If the input buffer is empty and we're not being asked to flush, no-op and return
                    // success to our caller. If we're being asked to flush, the leftover high surrogate from
                    // the previous operation will go through the fallback mechanism by itself.

                    if (!MustFlush)
                    {
                        charsConsumed = 0;
                        bytesWritten = 0;
                        return true; // no-op = success
                    }
                }
                else
                {
                    secondChar = chars[0];
                }

                // If we have to fallback the chars we're reading immediately below, populate the
                // fallback buffer with the invalid data. We'll just fall through to the "consume
                // fallback buffer" logic at the end of the method.

                if (Rune.TryCreate(_charLeftOver, secondChar, out Rune rune))
                {
                    charsConsumed = 1; // at the very least, we consumed 1 char from the input
                    Debug.Assert(_encoding != null);
                    switch (_encoding.EncodeRune(rune, bytes, out bytesWritten))
                    {
                        case OperationStatus.Done:
                            _charLeftOver = default; // we just consumed this char
                            return true; // that's all - we've handled the leftover data

                        case OperationStatus.DestinationTooSmall:
                            _charLeftOver = default; // we just consumed this char
                            _encoding.ThrowBytesOverflow(this, nothingEncoded: true); // will throw
                            break;

                        case OperationStatus.InvalidData:
                            FallbackBuffer.Fallback(_charLeftOver, secondChar, index: -1); // see comment in DrainLeftoverDataForGetByteCount
                            break;

                        default:
                            Debug.Fail("Unknown return value.");
                            break;
                    }
                }
                else
                {
                    FallbackBuffer.Fallback(_charLeftOver, index: -1); // see comment in DrainLeftoverDataForGetByteCount
                }
            }

            // Now check the fallback buffer for any remaining data.

            if (_fallbackBuffer != null && _fallbackBuffer.Remaining > 0)
            {
                return _fallbackBuffer.TryDrainRemainingDataForGetBytes(bytes, out bytesWritten);
            }

            // And we're done!

            return true; // success
        }
    }
}