DecoderFallback.cs 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Diagnostics;
  5. using System.Globalization;
  6. using System.Threading;
  7. namespace System.Text
  8. {
  9. public abstract class DecoderFallback
  10. {
  11. private static DecoderFallback s_replacementFallback; // Default fallback, uses no best fit & "?"
  12. private static DecoderFallback s_exceptionFallback;
  13. public static DecoderFallback ReplacementFallback =>
  14. s_replacementFallback ?? Interlocked.CompareExchange(ref s_replacementFallback, new DecoderReplacementFallback(), null) ?? s_replacementFallback;
  15. public static DecoderFallback ExceptionFallback =>
  16. s_exceptionFallback ?? Interlocked.CompareExchange<DecoderFallback>(ref s_exceptionFallback, new DecoderExceptionFallback(), null) ?? s_exceptionFallback;
  17. // Fallback
  18. //
  19. // Return the appropriate unicode string alternative to the character that need to fall back.
  20. // Most implementations will be:
  21. // return new MyCustomDecoderFallbackBuffer(this);
  22. public abstract DecoderFallbackBuffer CreateFallbackBuffer();
  23. // Maximum number of characters that this instance of this fallback could return
  24. public abstract int MaxCharCount { get; }
  25. }
  26. public abstract class DecoderFallbackBuffer
  27. {
  28. // Most implementations will probably need an implementation-specific constructor
  29. // internal methods that cannot be overridden that let us do our fallback thing
  30. // These wrap the internal methods so that we can check for people doing stuff that's incorrect
  31. public abstract bool Fallback(byte[] bytesUnknown, int index);
  32. // Get next character
  33. public abstract char GetNextChar();
  34. // Back up a character
  35. public abstract bool MovePrevious();
  36. // How many chars left in this fallback?
  37. public abstract int Remaining { get; }
  38. // Clear the buffer
  39. public virtual void Reset()
  40. {
  41. while (GetNextChar() != (char)0) ;
  42. }
  43. // Internal items to help us figure out what we're doing as far as error messages, etc.
  44. // These help us with our performance and messages internally
  45. internal unsafe byte* byteStart;
  46. internal unsafe char* charEnd;
  47. // Internal Reset
  48. internal unsafe void InternalReset()
  49. {
  50. byteStart = null;
  51. Reset();
  52. }
  53. // Set the above values
  54. // This can't be part of the constructor because DecoderFallbacks would have to know how to implement these.
  55. internal unsafe void InternalInitialize(byte* byteStart, char* charEnd)
  56. {
  57. this.byteStart = byteStart;
  58. this.charEnd = charEnd;
  59. }
  60. // Fallback the current byte by sticking it into the remaining char buffer.
  61. // This can only be called by our encodings (other have to use the public fallback methods), so
  62. // we can use our DecoderNLS here too (except we don't).
  63. // Returns true if we are successful, false if we can't fallback the character (no buffer space)
  64. // So caller needs to throw buffer space if return false.
  65. // Right now this has both bytes and bytes[], since we might have extra bytes, hence the
  66. // array, and we might need the index, hence the byte*
  67. // Don't touch ref chars unless we succeed
  68. internal unsafe virtual bool InternalFallback(byte[] bytes, byte* pBytes, ref char* chars)
  69. {
  70. Debug.Assert(byteStart != null, "[DecoderFallback.InternalFallback]Used InternalFallback without calling InternalInitialize");
  71. // See if there's a fallback character and we have an output buffer then copy our string.
  72. if (this.Fallback(bytes, (int)(pBytes - byteStart - bytes.Length)))
  73. {
  74. // Copy the chars to our output
  75. char ch;
  76. char* charTemp = chars;
  77. bool bHighSurrogate = false;
  78. while ((ch = GetNextChar()) != 0)
  79. {
  80. // Make sure no mixed up surrogates
  81. if (char.IsSurrogate(ch))
  82. {
  83. if (char.IsHighSurrogate(ch))
  84. {
  85. // High Surrogate
  86. if (bHighSurrogate)
  87. throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
  88. bHighSurrogate = true;
  89. }
  90. else
  91. {
  92. // Low surrogate
  93. if (bHighSurrogate == false)
  94. throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
  95. bHighSurrogate = false;
  96. }
  97. }
  98. if (charTemp >= charEnd)
  99. {
  100. // No buffer space
  101. return false;
  102. }
  103. *(charTemp++) = ch;
  104. }
  105. // Need to make sure that bHighSurrogate isn't true
  106. if (bHighSurrogate)
  107. throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
  108. // Now we aren't going to be false, so its OK to update chars
  109. chars = charTemp;
  110. }
  111. return true;
  112. }
  113. // This version just counts the fallback and doesn't actually copy anything.
  114. internal unsafe virtual int InternalFallback(byte[] bytes, byte* pBytes)
  115. // Right now this has both bytes and bytes[], since we might have extra bytes, hence the
  116. // array, and we might need the index, hence the byte*
  117. {
  118. Debug.Assert(byteStart != null, "[DecoderFallback.InternalFallback]Used InternalFallback without calling InternalInitialize");
  119. // See if there's a fallback character and we have an output buffer then copy our string.
  120. if (this.Fallback(bytes, (int)(pBytes - byteStart - bytes.Length)))
  121. {
  122. int count = 0;
  123. char ch;
  124. bool bHighSurrogate = false;
  125. while ((ch = GetNextChar()) != 0)
  126. {
  127. // Make sure no mixed up surrogates
  128. if (char.IsSurrogate(ch))
  129. {
  130. if (char.IsHighSurrogate(ch))
  131. {
  132. // High Surrogate
  133. if (bHighSurrogate)
  134. throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
  135. bHighSurrogate = true;
  136. }
  137. else
  138. {
  139. // Low surrogate
  140. if (bHighSurrogate == false)
  141. throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
  142. bHighSurrogate = false;
  143. }
  144. }
  145. count++;
  146. }
  147. // Need to make sure that bHighSurrogate isn't true
  148. if (bHighSurrogate)
  149. throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
  150. return count;
  151. }
  152. // If no fallback return 0
  153. return 0;
  154. }
  155. // private helper methods
  156. internal void ThrowLastBytesRecursive(byte[] bytesUnknown)
  157. {
  158. // Create a string representation of our bytes.
  159. StringBuilder strBytes = new StringBuilder(bytesUnknown.Length * 3);
  160. int i;
  161. for (i = 0; i < bytesUnknown.Length && i < 20; i++)
  162. {
  163. if (strBytes.Length > 0)
  164. strBytes.Append(' ');
  165. strBytes.AppendFormat(CultureInfo.InvariantCulture, "\\x{0:X2}", bytesUnknown[i]);
  166. }
  167. // In case the string's really long
  168. if (i == 20)
  169. strBytes.Append(" ...");
  170. // Throw it, using our complete bytes
  171. throw new ArgumentException(
  172. SR.Format(SR.Argument_RecursiveFallbackBytes,
  173. strBytes.ToString()), nameof(bytesUnknown));
  174. }
  175. }
  176. }