| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211 |
- // Licensed to the .NET Foundation under one or more agreements.
- // The .NET Foundation licenses this file to you under the MIT license.
- // See the LICENSE file in the project root for more information.
- //
- // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
- //
- using System;
- using System.Diagnostics;
- using System.Globalization;
- using System.Runtime.InteropServices;
- namespace System.Text
- {
- // Encodes text into and out of UTF-32. UTF-32 is a way of writing
- // Unicode characters with a single storage unit (32 bits) per character,
- //
- // The UTF-32 byte order mark is simply the Unicode byte order mark
- // (0x00FEFF) written in UTF-32 (0x0000FEFF or 0xFFFE0000). The byte order
- // mark is used mostly to distinguish UTF-32 text from other encodings, and doesn't
- // switch the byte orderings.
- public sealed class UTF32Encoding : Encoding
- {
- /*
- words bits UTF-32 representation
- ----- ---- -----------------------------------
- 1 16 00000000 00000000 xxxxxxxx xxxxxxxx
- 2 21 00000000 000xxxxx hhhhhhll llllllll
- ----- ---- -----------------------------------
- Surrogate:
- Real Unicode value = (HighSurrogate - 0xD800) * 0x400 + (LowSurrogate - 0xDC00) + 0x10000
- */
- // Used by Encoding.UTF32/BigEndianUTF32 for lazy initialization
- // The initialization code will not be run until a static member of the class is referenced
- internal static readonly UTF32Encoding s_default = new UTF32Encoding(bigEndian: false, byteOrderMark: true);
- internal static readonly UTF32Encoding s_bigEndianDefault = new UTF32Encoding(bigEndian: true, byteOrderMark: true);
- private readonly bool _emitUTF32ByteOrderMark = false;
- private readonly bool _isThrowException = false;
- private readonly bool _bigEndian = false;
- public UTF32Encoding() : this(false, true)
- {
- }
- public UTF32Encoding(bool bigEndian, bool byteOrderMark) :
- base(bigEndian ? 12001 : 12000)
- {
- _bigEndian = bigEndian;
- _emitUTF32ByteOrderMark = byteOrderMark;
- }
- public UTF32Encoding(bool bigEndian, bool byteOrderMark, bool throwOnInvalidCharacters) :
- this(bigEndian, byteOrderMark)
- {
- _isThrowException = throwOnInvalidCharacters;
- // Encoding constructor already did this, but it'll be wrong if we're throwing exceptions
- if (_isThrowException)
- SetDefaultFallbacks();
- }
- internal override void SetDefaultFallbacks()
- {
- // For UTF-X encodings, we use a replacement fallback with an empty string
- if (_isThrowException)
- {
- this.encoderFallback = EncoderFallback.ExceptionFallback;
- this.decoderFallback = DecoderFallback.ExceptionFallback;
- }
- else
- {
- this.encoderFallback = new EncoderReplacementFallback("\xFFFD");
- this.decoderFallback = new DecoderReplacementFallback("\xFFFD");
- }
- }
- // The following methods are copied from EncodingNLS.cs.
- // Unfortunately EncodingNLS.cs is internal and we're public, so we have to re-implement them here.
- // These should be kept in sync for the following classes:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // Returns the number of bytes required to encode a range of characters in
- // a character array.
- //
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // parent method is safe
- public override unsafe int GetByteCount(char[] chars, int index, int count)
- {
- // Validate input parameters
- if (chars == null)
- throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
- if (index < 0 || count < 0)
- throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
- if (chars.Length - index < count)
- throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
- // If no input, return 0, avoid fixed empty array problem
- if (count == 0)
- return 0;
- // Just call the pointer version
- fixed (char* pChars = chars)
- return GetByteCount(pChars + index, count, null);
- }
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // parent method is safe
- public override unsafe int GetByteCount(string s)
- {
- // Validate input
- if (s==null)
- throw new ArgumentNullException(nameof(s));
- fixed (char* pChars = s)
- return GetByteCount(pChars, s.Length, null);
- }
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- [CLSCompliant(false)]
- public override unsafe int GetByteCount(char* chars, int count)
- {
- // Validate Parameters
- if (chars == null)
- throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
- if (count < 0)
- throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
- // Call it with empty encoder
- return GetByteCount(chars, count, null);
- }
- // Parent method is safe.
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- public override unsafe int GetBytes(string s, int charIndex, int charCount,
- byte[] bytes, int byteIndex)
- {
- if (s == null || bytes == null)
- throw new ArgumentNullException((s == null ? nameof(s) : nameof(bytes)), SR.ArgumentNull_Array);
- if (charIndex < 0 || charCount < 0)
- throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
- if (s.Length - charIndex < charCount)
- throw new ArgumentOutOfRangeException(nameof(s), SR.ArgumentOutOfRange_IndexCount);
- if (byteIndex < 0 || byteIndex > bytes.Length)
- throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
- int byteCount = bytes.Length - byteIndex;
- fixed (char* pChars = s) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
- return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
- }
- // Encodes a range of characters in a character array into a range of bytes
- // in a byte array. An exception occurs if the byte array is not large
- // enough to hold the complete encoding of the characters. The
- // GetByteCount method can be used to determine the exact number of
- // bytes that will be produced for a given range of characters.
- // Alternatively, the GetMaxByteCount method can be used to
- // determine the maximum number of bytes that will be produced for a given
- // number of characters, regardless of the actual character values.
- //
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // parent method is safe
- public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
- byte[] bytes, int byteIndex)
- {
- // Validate parameters
- if (chars == null || bytes == null)
- throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)), SR.ArgumentNull_Array);
- if (charIndex < 0 || charCount < 0)
- throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
- if (chars.Length - charIndex < charCount)
- throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
- if (byteIndex < 0 || byteIndex > bytes.Length)
- throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
- // If nothing to encode return 0, avoid fixed problem
- if (charCount == 0)
- return 0;
- // Just call pointer version
- int byteCount = bytes.Length - byteIndex;
- fixed (char* pChars = chars) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
- // Remember that byteCount is # to decode, not size of array.
- return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
- }
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- [CLSCompliant(false)]
- public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
- {
- // Validate Parameters
- if (bytes == null || chars == null)
- throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
- if (charCount < 0 || byteCount < 0)
- throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
- return GetBytes(chars, charCount, bytes, byteCount, null);
- }
- // Returns the number of characters produced by decoding a range of bytes
- // in a byte array.
- //
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // parent method is safe
- public override unsafe int GetCharCount(byte[] bytes, int index, int count)
- {
- // Validate Parameters
- if (bytes == null)
- throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
- if (index < 0 || count < 0)
- throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
- if (bytes.Length - index < count)
- throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
- // If no input just return 0, fixed doesn't like 0 length arrays.
- if (count == 0)
- return 0;
- // Just call pointer version
- fixed (byte* pBytes = bytes)
- return GetCharCount(pBytes + index, count, null);
- }
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- [CLSCompliant(false)]
- public override unsafe int GetCharCount(byte* bytes, int count)
- {
- // Validate Parameters
- if (bytes == null)
- throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
- if (count < 0)
- throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
- return GetCharCount(bytes, count, null);
- }
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // parent method is safe
- public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
- char[] chars, int charIndex)
- {
- // Validate Parameters
- if (bytes == null || chars == null)
- throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
- if (byteIndex < 0 || byteCount < 0)
- throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
- if ( bytes.Length - byteIndex < byteCount)
- throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
- if (charIndex < 0 || charIndex > chars.Length)
- throw new ArgumentOutOfRangeException(nameof(charIndex), SR.ArgumentOutOfRange_Index);
- // If no input, return 0 & avoid fixed problem
- if (byteCount == 0)
- return 0;
- // Just call pointer version
- int charCount = chars.Length - charIndex;
- fixed (byte* pBytes = bytes) fixed (char* pChars = &MemoryMarshal.GetReference((Span<char>)chars))
- // Remember that charCount is # to decode, not size of array
- return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
- }
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- [CLSCompliant(false)]
- public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
- {
- // Validate Parameters
- if (bytes == null || chars == null)
- throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
- if (charCount < 0 || byteCount < 0)
- throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
- return GetChars(bytes, byteCount, chars, charCount, null);
- }
- // Returns a string containing the decoded representation of a range of
- // bytes in a byte array.
- //
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // parent method is safe
- public override unsafe string GetString(byte[] bytes, int index, int count)
- {
- // Validate Parameters
- if (bytes == null)
- throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
- if (index < 0 || count < 0)
- throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
- if (bytes.Length - index < count)
- throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
- // Avoid problems with empty input buffer
- if (count == 0) return string.Empty;
- fixed (byte* pBytes = bytes)
- return string.CreateStringFromEncoding(
- pBytes + index, count, this);
- }
- //
- // End of standard methods copied from EncodingNLS.cs
- //
- internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
- {
- Debug.Assert(chars != null, "[UTF32Encoding.GetByteCount]chars!=null");
- Debug.Assert(count >= 0, "[UTF32Encoding.GetByteCount]count >=0");
- char* end = chars + count;
- char* charStart = chars;
- int byteCount = 0;
- char highSurrogate = '\0';
- // For fallback we may need a fallback buffer
- EncoderFallbackBuffer fallbackBuffer = null;
- char* charsForFallback;
- if (encoder != null)
- {
- highSurrogate = encoder._charLeftOver;
- fallbackBuffer = encoder.FallbackBuffer;
- // We mustn't have left over fallback data when counting
- if (fallbackBuffer.Remaining > 0)
- throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
- }
- else
- {
- fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
- }
- // Set our internal fallback interesting things.
- fallbackBuffer.InternalInitialize(charStart, end, encoder, false);
- char ch;
- TryAgain:
- while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < end)
- {
- // First unwind any fallback
- if (ch == 0)
- {
- // No fallback, just get next char
- ch = *chars;
- chars++;
- }
- // Do we need a low surrogate?
- if (highSurrogate != '\0')
- {
- //
- // In previous char, we encounter a high surrogate, so we are expecting a low surrogate here.
- //
- if (char.IsLowSurrogate(ch))
- {
- // They're all legal
- highSurrogate = '\0';
- //
- // One surrogate pair will be translated into 4 bytes UTF32.
- //
- byteCount += 4;
- continue;
- }
- // We are missing our low surrogate, decrement chars and fallback the high surrogate
- // The high surrogate may have come from the encoder, but nothing else did.
- Debug.Assert(chars > charStart,
- "[UTF32Encoding.GetByteCount]Expected chars to have advanced if no low surrogate");
- chars--;
- // Do the fallback
- charsForFallback = chars;
- fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
- chars = charsForFallback;
- // We're going to fallback the old high surrogate.
- highSurrogate = '\0';
- continue;
- }
- // Do we have another high surrogate?
- if (char.IsHighSurrogate(ch))
- {
- //
- // We'll have a high surrogate to check next time.
- //
- highSurrogate = ch;
- continue;
- }
- // Check for illegal characters
- if (char.IsLowSurrogate(ch))
- {
- // We have a leading low surrogate, do the fallback
- charsForFallback = chars;
- fallbackBuffer.InternalFallback(ch, ref charsForFallback);
- chars = charsForFallback;
- // Try again with fallback buffer
- continue;
- }
- // We get to add the character (4 bytes UTF32)
- byteCount += 4;
- }
- // May have to do our last surrogate
- if ((encoder == null || encoder.MustFlush) && highSurrogate > 0)
- {
- // We have to do the fallback for the lonely high surrogate
- charsForFallback = chars;
- fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
- chars = charsForFallback;
- highSurrogate = (char)0;
- goto TryAgain;
- }
- // Check for overflows.
- if (byteCount < 0)
- throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_GetByteCountOverflow);
- // Shouldn't have anything in fallback buffer for GetByteCount
- // (don't have to check _throwOnOverflow for count)
- Debug.Assert(fallbackBuffer.Remaining == 0,
- "[UTF32Encoding.GetByteCount]Expected empty fallback buffer at end");
- // Return our count
- return byteCount;
- }
- internal override unsafe int GetBytes(char* chars, int charCount,
- byte* bytes, int byteCount, EncoderNLS encoder)
- {
- Debug.Assert(chars != null, "[UTF32Encoding.GetBytes]chars!=null");
- Debug.Assert(bytes != null, "[UTF32Encoding.GetBytes]bytes!=null");
- Debug.Assert(byteCount >= 0, "[UTF32Encoding.GetBytes]byteCount >=0");
- Debug.Assert(charCount >= 0, "[UTF32Encoding.GetBytes]charCount >=0");
- char* charStart = chars;
- char* charEnd = chars + charCount;
- byte* byteStart = bytes;
- byte* byteEnd = bytes + byteCount;
- char highSurrogate = '\0';
- // For fallback we may need a fallback buffer
- EncoderFallbackBuffer fallbackBuffer = null;
- char* charsForFallback;
- if (encoder != null)
- {
- highSurrogate = encoder._charLeftOver;
- fallbackBuffer = encoder.FallbackBuffer;
- // We mustn't have left over fallback data when not converting
- if (encoder._throwOnOverflow && fallbackBuffer.Remaining > 0)
- throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
- }
- else
- {
- fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
- }
- // Set our internal fallback interesting things.
- fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
- char ch;
- TryAgain:
- while (((ch = fallbackBuffer.InternalGetNextChar()) != 0) || chars < charEnd)
- {
- // First unwind any fallback
- if (ch == 0)
- {
- // No fallback, just get next char
- ch = *chars;
- chars++;
- }
- // Do we need a low surrogate?
- if (highSurrogate != '\0')
- {
- //
- // In previous char, we encountered a high surrogate, so we are expecting a low surrogate here.
- //
- if (char.IsLowSurrogate(ch))
- {
- // Is it a legal one?
- uint iTemp = GetSurrogate(highSurrogate, ch);
- highSurrogate = '\0';
- //
- // One surrogate pair will be translated into 4 bytes UTF32.
- //
- if (bytes + 3 >= byteEnd)
- {
- // Don't have 4 bytes
- if (fallbackBuffer.bFallingBack)
- {
- fallbackBuffer.MovePrevious(); // Aren't using these 2 fallback chars
- fallbackBuffer.MovePrevious();
- }
- else
- {
- // If we don't have enough room, then either we should've advanced a while
- // or we should have bytes==byteStart and throw below
- Debug.Assert(chars > charStart + 1 || bytes == byteStart,
- "[UnicodeEncoding.GetBytes]Expected chars to have when no room to add surrogate pair");
- chars -= 2; // Aren't using those 2 chars
- }
- ThrowBytesOverflow(encoder, bytes == byteStart); // Throw maybe (if no bytes written)
- highSurrogate = (char)0; // Nothing left over (we backed up to start of pair if supplimentary)
- break;
- }
- if (_bigEndian)
- {
- *(bytes++) = (byte)(0x00);
- *(bytes++) = (byte)(iTemp >> 16); // Implies & 0xFF, which isn't needed cause high are all 0
- *(bytes++) = (byte)(iTemp >> 8); // Implies & 0xFF
- *(bytes++) = (byte)(iTemp); // Implies & 0xFF
- }
- else
- {
- *(bytes++) = (byte)(iTemp); // Implies & 0xFF
- *(bytes++) = (byte)(iTemp >> 8); // Implies & 0xFF
- *(bytes++) = (byte)(iTemp >> 16); // Implies & 0xFF, which isn't needed cause high are all 0
- *(bytes++) = (byte)(0x00);
- }
- continue;
- }
- // We are missing our low surrogate, decrement chars and fallback the high surrogate
- // The high surrogate may have come from the encoder, but nothing else did.
- Debug.Assert(chars > charStart,
- "[UTF32Encoding.GetBytes]Expected chars to have advanced if no low surrogate");
- chars--;
- // Do the fallback
- charsForFallback = chars;
- fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
- chars = charsForFallback;
- // We're going to fallback the old high surrogate.
- highSurrogate = '\0';
- continue;
- }
- // Do we have another high surrogate?, if so remember it
- if (char.IsHighSurrogate(ch))
- {
- //
- // We'll have a high surrogate to check next time.
- //
- highSurrogate = ch;
- continue;
- }
- // Check for illegal characters (low surrogate)
- if (char.IsLowSurrogate(ch))
- {
- // We have a leading low surrogate, do the fallback
- charsForFallback = chars;
- fallbackBuffer.InternalFallback(ch, ref charsForFallback);
- chars = charsForFallback;
- // Try again with fallback buffer
- continue;
- }
- // We get to add the character, yippee.
- if (bytes + 3 >= byteEnd)
- {
- // Don't have 4 bytes
- if (fallbackBuffer.bFallingBack)
- fallbackBuffer.MovePrevious(); // Aren't using this fallback char
- else
- {
- // Must've advanced already
- Debug.Assert(chars > charStart,
- "[UTF32Encoding.GetBytes]Expected chars to have advanced if normal character");
- chars--; // Aren't using this char
- }
- ThrowBytesOverflow(encoder, bytes == byteStart); // Throw maybe (if no bytes written)
- break; // Didn't throw, stop
- }
- if (_bigEndian)
- {
- *(bytes++) = (byte)(0x00);
- *(bytes++) = (byte)(0x00);
- *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF
- *(bytes++) = (byte)(ch); // Implies & 0xFF
- }
- else
- {
- *(bytes++) = (byte)(ch); // Implies & 0xFF
- *(bytes++) = (byte)((uint)ch >> 8); // Implies & 0xFF
- *(bytes++) = (byte)(0x00);
- *(bytes++) = (byte)(0x00);
- }
- }
- // May have to do our last surrogate
- if ((encoder == null || encoder.MustFlush) && highSurrogate > 0)
- {
- // We have to do the fallback for the lonely high surrogate
- charsForFallback = chars;
- fallbackBuffer.InternalFallback(highSurrogate, ref charsForFallback);
- chars = charsForFallback;
- highSurrogate = (char)0;
- goto TryAgain;
- }
- // Fix our encoder if we have one
- Debug.Assert(highSurrogate == 0 || (encoder != null && !encoder.MustFlush),
- "[UTF32Encoding.GetBytes]Expected encoder to be flushed.");
- if (encoder != null)
- {
- // Remember our left over surrogate (or 0 if flushing)
- encoder._charLeftOver = highSurrogate;
- // Need # chars used
- encoder._charsUsed = (int)(chars - charStart);
- }
- // return the new length
- return (int)(bytes - byteStart);
- }
- internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
- {
- Debug.Assert(bytes != null, "[UTF32Encoding.GetCharCount]bytes!=null");
- Debug.Assert(count >= 0, "[UTF32Encoding.GetCharCount]count >=0");
- UTF32Decoder decoder = (UTF32Decoder)baseDecoder;
- // None so far!
- int charCount = 0;
- byte* end = bytes + count;
- byte* byteStart = bytes;
- // Set up decoder
- int readCount = 0;
- uint iChar = 0;
- // For fallback we may need a fallback buffer
- DecoderFallbackBuffer fallbackBuffer = null;
- // See if there's anything in our decoder
- if (decoder != null)
- {
- readCount = decoder.readByteCount;
- iChar = (uint)decoder.iChar;
- fallbackBuffer = decoder.FallbackBuffer;
- // Shouldn't have anything in fallback buffer for GetCharCount
- // (don't have to check _throwOnOverflow for chars or count)
- Debug.Assert(fallbackBuffer.Remaining == 0,
- "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at start");
- }
- else
- {
- fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
- }
- // Set our internal fallback interesting things.
- fallbackBuffer.InternalInitialize(byteStart, null);
- // Loop through our input, 4 characters at a time!
- while (bytes < end && charCount >= 0)
- {
- // Get our next character
- if (_bigEndian)
- {
- // Scoot left and add it to the bottom
- iChar <<= 8;
- iChar += *(bytes++);
- }
- else
- {
- // Scoot right and add it to the top
- iChar >>= 8;
- iChar += (uint)(*(bytes++)) << 24;
- }
- readCount++;
- // See if we have all the bytes yet
- if (readCount < 4)
- continue;
- // Have the bytes
- readCount = 0;
- // See if its valid to encode
- if (iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF))
- {
- // Need to fall back these 4 bytes
- byte[] fallbackBytes;
- if (_bigEndian)
- {
- fallbackBytes = new byte[] {
- unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)),
- unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) };
- }
- else
- {
- fallbackBytes = new byte[] {
- unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)),
- unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) };
- }
- charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);
- // Ignore the illegal character
- iChar = 0;
- continue;
- }
- // Ok, we have something we can add to our output
- if (iChar >= 0x10000)
- {
- // Surrogates take 2
- charCount++;
- }
- // Add the rest of the surrogate or our normal character
- charCount++;
- // iChar is back to 0
- iChar = 0;
- }
- // See if we have something left over that has to be decoded
- if (readCount > 0 && (decoder == null || decoder.MustFlush))
- {
- // Oops, there's something left over with no place to go.
- byte[] fallbackBytes = new byte[readCount];
- if (_bigEndian)
- {
- while (readCount > 0)
- {
- fallbackBytes[--readCount] = unchecked((byte)iChar);
- iChar >>= 8;
- }
- }
- else
- {
- while (readCount > 0)
- {
- fallbackBytes[--readCount] = unchecked((byte)(iChar >> 24));
- iChar <<= 8;
- }
- }
- charCount += fallbackBuffer.InternalFallback(fallbackBytes, bytes);
- }
- // Check for overflows.
- if (charCount < 0)
- throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_GetByteCountOverflow);
- // Shouldn't have anything in fallback buffer for GetCharCount
- // (don't have to check _throwOnOverflow for chars or count)
- Debug.Assert(fallbackBuffer.Remaining == 0,
- "[UTF32Encoding.GetCharCount]Expected empty fallback buffer at end");
- // Return our count
- return charCount;
- }
- internal override unsafe int GetChars(byte* bytes, int byteCount,
- char* chars, int charCount, DecoderNLS baseDecoder)
- {
- Debug.Assert(chars != null, "[UTF32Encoding.GetChars]chars!=null");
- Debug.Assert(bytes != null, "[UTF32Encoding.GetChars]bytes!=null");
- Debug.Assert(byteCount >= 0, "[UTF32Encoding.GetChars]byteCount >=0");
- Debug.Assert(charCount >= 0, "[UTF32Encoding.GetChars]charCount >=0");
- UTF32Decoder decoder = (UTF32Decoder)baseDecoder;
- // None so far!
- char* charStart = chars;
- char* charEnd = chars + charCount;
- byte* byteStart = bytes;
- byte* byteEnd = bytes + byteCount;
- // See if there's anything in our decoder (but don't clear it yet)
- int readCount = 0;
- uint iChar = 0;
- // For fallback we may need a fallback buffer
- DecoderFallbackBuffer fallbackBuffer = null;
- char* charsForFallback;
- // See if there's anything in our decoder
- if (decoder != null)
- {
- readCount = decoder.readByteCount;
- iChar = (uint)decoder.iChar;
- fallbackBuffer = baseDecoder.FallbackBuffer;
- // Shouldn't have anything in fallback buffer for GetChars
- // (don't have to check _throwOnOverflow for chars)
- Debug.Assert(fallbackBuffer.Remaining == 0,
- "[UTF32Encoding.GetChars]Expected empty fallback buffer at start");
- }
- else
- {
- fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
- }
- // Set our internal fallback interesting things.
- fallbackBuffer.InternalInitialize(bytes, chars + charCount);
- // Loop through our input, 4 characters at a time!
- while (bytes < byteEnd)
- {
- // Get our next character
- if (_bigEndian)
- {
- // Scoot left and add it to the bottom
- iChar <<= 8;
- iChar += *(bytes++);
- }
- else
- {
- // Scoot right and add it to the top
- iChar >>= 8;
- iChar += (uint)(*(bytes++)) << 24;
- }
- readCount++;
- // See if we have all the bytes yet
- if (readCount < 4)
- continue;
- // Have the bytes
- readCount = 0;
- // See if its valid to encode
- if (iChar > 0x10FFFF || (iChar >= 0xD800 && iChar <= 0xDFFF))
- {
- // Need to fall back these 4 bytes
- byte[] fallbackBytes;
- if (_bigEndian)
- {
- fallbackBytes = new byte[] {
- unchecked((byte)(iChar>>24)), unchecked((byte)(iChar>>16)),
- unchecked((byte)(iChar>>8)), unchecked((byte)(iChar)) };
- }
- else
- {
- fallbackBytes = new byte[] {
- unchecked((byte)(iChar)), unchecked((byte)(iChar>>8)),
- unchecked((byte)(iChar>>16)), unchecked((byte)(iChar>>24)) };
- }
- // Chars won't be updated unless this works.
- charsForFallback = chars;
- bool fallbackResult = fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref charsForFallback);
- chars = charsForFallback;
- if (!fallbackResult)
- {
- // Couldn't fallback, throw or wait til next time
- // We either read enough bytes for bytes-=4 to work, or we're
- // going to throw in ThrowCharsOverflow because chars == charStart
- Debug.Assert(bytes >= byteStart + 4 || chars == charStart,
- "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (bad surrogate)");
- bytes -= 4; // get back to where we were
- iChar = 0; // Remembering nothing
- fallbackBuffer.InternalReset();
- ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
- break; // Stop here, didn't throw
- }
- // Ignore the illegal character
- iChar = 0;
- continue;
- }
- // Ok, we have something we can add to our output
- if (iChar >= 0x10000)
- {
- // Surrogates take 2
- if (chars >= charEnd - 1)
- {
- // Throwing or stopping
- // We either read enough bytes for bytes-=4 to work, or we're
- // going to throw in ThrowCharsOverflow because chars == charStart
- Debug.Assert(bytes >= byteStart + 4 || chars == charStart,
- "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (surrogate)");
- bytes -= 4; // get back to where we were
- iChar = 0; // Remembering nothing
- ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
- break; // Stop here, didn't throw
- }
- *(chars++) = GetHighSurrogate(iChar);
- iChar = GetLowSurrogate(iChar);
- }
- // Bounds check for normal character
- else if (chars >= charEnd)
- {
- // Throwing or stopping
- // We either read enough bytes for bytes-=4 to work, or we're
- // going to throw in ThrowCharsOverflow because chars == charStart
- Debug.Assert(bytes >= byteStart + 4 || chars == charStart,
- "[UTF32Encoding.GetChars]Expected to have consumed bytes or throw (normal char)");
- bytes -= 4; // get back to where we were
- iChar = 0; // Remembering nothing
- ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
- break; // Stop here, didn't throw
- }
- // Add the rest of the surrogate or our normal character
- *(chars++) = (char)iChar;
- // iChar is back to 0
- iChar = 0;
- }
- // See if we have something left over that has to be decoded
- if (readCount > 0 && (decoder == null || decoder.MustFlush))
- {
- // Oops, there's something left over with no place to go.
- byte[] fallbackBytes = new byte[readCount];
- int tempCount = readCount;
- if (_bigEndian)
- {
- while (tempCount > 0)
- {
- fallbackBytes[--tempCount] = unchecked((byte)iChar);
- iChar >>= 8;
- }
- }
- else
- {
- while (tempCount > 0)
- {
- fallbackBytes[--tempCount] = unchecked((byte)(iChar >> 24));
- iChar <<= 8;
- }
- }
- charsForFallback = chars;
- bool fallbackResult = fallbackBuffer.InternalFallback(fallbackBytes, bytes, ref charsForFallback);
- chars = charsForFallback;
- if (!fallbackResult)
- {
- // Couldn't fallback.
- fallbackBuffer.InternalReset();
- ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
- // Stop here, didn't throw, backed up, so still nothing in buffer
- }
- else
- {
- // Don't clear our decoder unless we could fall it back.
- // If we caught the if above, then we're a convert() and will catch this next time.
- readCount = 0;
- iChar = 0;
- }
- }
- // Remember any left over stuff, clearing buffer as well for MustFlush
- if (decoder != null)
- {
- decoder.iChar = (int)iChar;
- decoder.readByteCount = readCount;
- decoder._bytesUsed = (int)(bytes - byteStart);
- }
- // Shouldn't have anything in fallback buffer for GetChars
- // (don't have to check _throwOnOverflow for chars)
- Debug.Assert(fallbackBuffer.Remaining == 0,
- "[UTF32Encoding.GetChars]Expected empty fallback buffer at end");
- // Return our count
- return (int)(chars - charStart);
- }
- private uint GetSurrogate(char cHigh, char cLow)
- {
- return (((uint)cHigh - 0xD800) * 0x400) + ((uint)cLow - 0xDC00) + 0x10000;
- }
- private char GetHighSurrogate(uint iChar)
- {
- return (char)((iChar - 0x10000) / 0x400 + 0xD800);
- }
- private char GetLowSurrogate(uint iChar)
- {
- return (char)((iChar - 0x10000) % 0x400 + 0xDC00);
- }
- public override Decoder GetDecoder()
- {
- return new UTF32Decoder(this);
- }
- public override Encoder GetEncoder()
- {
- return new EncoderNLS(this);
- }
- public override int GetMaxByteCount(int charCount)
- {
- if (charCount < 0)
- throw new ArgumentOutOfRangeException(nameof(charCount),
- SR.ArgumentOutOfRange_NeedNonNegNum);
- // Characters would be # of characters + 1 in case left over high surrogate is ? * max fallback
- long byteCount = (long)charCount + 1;
- if (EncoderFallback.MaxCharCount > 1)
- byteCount *= EncoderFallback.MaxCharCount;
- // 4 bytes per char
- byteCount *= 4;
- if (byteCount > 0x7fffffff)
- throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
- return (int)byteCount;
- }
- public override int GetMaxCharCount(int byteCount)
- {
- if (byteCount < 0)
- throw new ArgumentOutOfRangeException(nameof(byteCount),
- SR.ArgumentOutOfRange_NeedNonNegNum);
- // A supplementary character becomes 2 surrogate characters, so 4 input bytes becomes 2 chars,
- // plus we may have 1 surrogate char left over if the decoder has 3 bytes in it already for a non-bmp char.
- // Have to add another one because 1/2 == 0, but 3 bytes left over could be 2 char surrogate pair
- int charCount = (byteCount / 2) + 2;
- // Also consider fallback because our input bytes could be out of range of unicode.
- // Since fallback would fallback 4 bytes at a time, we'll only fall back 1/2 of MaxCharCount.
- if (DecoderFallback.MaxCharCount > 2)
- {
- // Multiply time fallback size
- charCount *= DecoderFallback.MaxCharCount;
- // We were already figuring 2 chars per 4 bytes, but fallback will be different #
- charCount /= 2;
- }
- if (charCount > 0x7fffffff)
- throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_GetCharCountOverflow);
- return (int)charCount;
- }
- public override byte[] GetPreamble()
- {
- if (_emitUTF32ByteOrderMark)
- {
- // Allocate new array to prevent users from modifying it.
- if (_bigEndian)
- {
- return new byte[4] { 0x00, 0x00, 0xFE, 0xFF };
- }
- else
- {
- return new byte[4] { 0xFF, 0xFE, 0x00, 0x00 }; // 00 00 FE FF
- }
- }
- else
- return Array.Empty<byte>();
- }
- public override ReadOnlySpan<byte> Preamble =>
- GetType() != typeof(UTF32Encoding) ? new ReadOnlySpan<byte>(GetPreamble()) : // in case a derived UTF32Encoding overrode GetPreamble
- !_emitUTF32ByteOrderMark ? default :
- _bigEndian ? (ReadOnlySpan<byte>)new byte[4] { 0x00, 0x00, 0xFE, 0xFF } : // uses C# compiler's optimization for static byte[] data
- (ReadOnlySpan<byte>)new byte[4] { 0xFF, 0xFE, 0x00, 0x00 };
- public override bool Equals(object value)
- {
- UTF32Encoding that = value as UTF32Encoding;
- if (that != null)
- {
- return (_emitUTF32ByteOrderMark == that._emitUTF32ByteOrderMark) &&
- (_bigEndian == that._bigEndian) &&
- (EncoderFallback.Equals(that.EncoderFallback)) &&
- (DecoderFallback.Equals(that.DecoderFallback));
- }
- return (false);
- }
- public override int GetHashCode()
- {
- //Not great distribution, but this is relatively unlikely to be used as the key in a hashtable.
- return this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode() +
- CodePage + (_emitUTF32ByteOrderMark ? 4 : 0) + (_bigEndian ? 8 : 0);
- }
- private sealed class UTF32Decoder : DecoderNLS
- {
- // Need a place to store any extra bytes we may have picked up
- internal int iChar = 0;
- internal int readByteCount = 0;
- public UTF32Decoder(UTF32Encoding encoding) : base(encoding)
- {
- // base calls reset
- }
- public override void Reset()
- {
- this.iChar = 0;
- this.readByteCount = 0;
- if (_fallbackBuffer != null)
- _fallbackBuffer.Reset();
- }
- // Anything left in our decoder?
- internal override bool HasState
- {
- get
- {
- // ReadByteCount is our flag. (iChar==0 doesn't mean much).
- return (this.readByteCount != 0);
- }
- }
- }
- }
- }
|