EncoderFallback.cs 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Diagnostics;
  5. using System.Threading;
  6. namespace System.Text
  7. {
  8. public abstract class EncoderFallback
  9. {
  10. private static EncoderFallback s_replacementFallback; // Default fallback, uses no best fit & "?"
  11. private static EncoderFallback s_exceptionFallback;
  12. // Get each of our generic fallbacks.
  13. public static EncoderFallback ReplacementFallback
  14. {
  15. get
  16. {
  17. if (s_replacementFallback == null)
  18. Interlocked.CompareExchange<EncoderFallback>(ref s_replacementFallback, new EncoderReplacementFallback(), null);
  19. return s_replacementFallback;
  20. }
  21. }
  22. public static EncoderFallback ExceptionFallback
  23. {
  24. get
  25. {
  26. if (s_exceptionFallback == null)
  27. Interlocked.CompareExchange<EncoderFallback>(ref s_exceptionFallback, new EncoderExceptionFallback(), null);
  28. return s_exceptionFallback;
  29. }
  30. }
  31. // Fallback
  32. //
  33. // Return the appropriate unicode string alternative to the character that need to fall back.
  34. // Most implementations will be:
  35. // return new MyCustomEncoderFallbackBuffer(this);
  36. public abstract EncoderFallbackBuffer CreateFallbackBuffer();
  37. // Maximum number of characters that this instance of this fallback could return
  38. public abstract int MaxCharCount { get; }
  39. }
  40. public abstract class EncoderFallbackBuffer
  41. {
  42. // Most implementations will probably need an implementation-specific constructor
  43. // Public methods that cannot be overridden that let us do our fallback thing
  44. // These wrap the internal methods so that we can check for people doing stuff that is incorrect
  45. public abstract bool Fallback(char charUnknown, int index);
  46. public abstract bool Fallback(char charUnknownHigh, char charUnknownLow, int index);
  47. // Get next character
  48. public abstract char GetNextChar();
  49. // Back up a character
  50. public abstract bool MovePrevious();
  51. // How many chars left in this fallback?
  52. public abstract int Remaining { get; }
  53. // Not sure if this should be public or not.
  54. // Clear the buffer
  55. public virtual void Reset()
  56. {
  57. while (GetNextChar() != (char)0) ;
  58. }
  59. // Internal items to help us figure out what we're doing as far as error messages, etc.
  60. // These help us with our performance and messages internally
  61. internal unsafe char* charStart;
  62. internal unsafe char* charEnd;
  63. internal EncoderNLS encoder;
  64. internal bool setEncoder;
  65. internal bool bUsedEncoder;
  66. internal bool bFallingBack = false;
  67. internal int iRecursionCount = 0;
  68. private const int iMaxRecursion = 250;
  69. // Internal Reset
  70. // For example, what if someone fails a conversion and wants to reset one of our fallback buffers?
  71. internal unsafe void InternalReset()
  72. {
  73. charStart = null;
  74. bFallingBack = false;
  75. iRecursionCount = 0;
  76. Reset();
  77. }
  78. // Set the above values
  79. // This can't be part of the constructor because EncoderFallbacks would have to know how to implement these.
  80. internal unsafe void InternalInitialize(char* charStart, char* charEnd, EncoderNLS encoder, bool setEncoder)
  81. {
  82. this.charStart = charStart;
  83. this.charEnd = charEnd;
  84. this.encoder = encoder;
  85. this.setEncoder = setEncoder;
  86. this.bUsedEncoder = false;
  87. this.bFallingBack = false;
  88. this.iRecursionCount = 0;
  89. }
  90. internal char InternalGetNextChar()
  91. {
  92. char ch = GetNextChar();
  93. bFallingBack = (ch != 0);
  94. if (ch == 0) iRecursionCount = 0;
  95. return ch;
  96. }
  97. // Fallback the current character using the remaining buffer and encoder if necessary
  98. // This can only be called by our encodings (other have to use the public fallback methods), so
  99. // we can use our EncoderNLS here too.
  100. // setEncoder is true if we're calling from a GetBytes method, false if we're calling from a GetByteCount
  101. //
  102. // Note that this could also change the contents of this.encoder, which is the same
  103. // object that the caller is using, so the caller could mess up the encoder for us
  104. // if they aren't careful.
  105. internal unsafe virtual bool InternalFallback(char ch, ref char* chars)
  106. {
  107. // Shouldn't have null charStart
  108. Debug.Assert(charStart != null,
  109. "[EncoderFallback.InternalFallbackBuffer]Fallback buffer is not initialized");
  110. // Get our index, remember chars was preincremented to point at next char, so have to -1
  111. int index = (int)(chars - charStart) - 1;
  112. // See if it was a high surrogate
  113. if (char.IsHighSurrogate(ch))
  114. {
  115. // See if there's a low surrogate to go with it
  116. if (chars >= this.charEnd)
  117. {
  118. // Nothing left in input buffer
  119. // No input, return 0 if mustflush is false
  120. if (this.encoder != null && !this.encoder.MustFlush)
  121. {
  122. // Done, nothing to fallback
  123. if (this.setEncoder)
  124. {
  125. bUsedEncoder = true;
  126. this.encoder._charLeftOver = ch;
  127. }
  128. bFallingBack = false;
  129. return false;
  130. }
  131. }
  132. else
  133. {
  134. // Might have a low surrogate
  135. char cNext = *chars;
  136. if (char.IsLowSurrogate(cNext))
  137. {
  138. // If already falling back then fail
  139. if (bFallingBack && iRecursionCount++ > iMaxRecursion)
  140. ThrowLastCharRecursive(char.ConvertToUtf32(ch, cNext));
  141. // Next is a surrogate, add it as surrogate pair, and increment chars
  142. chars++;
  143. bFallingBack = Fallback(ch, cNext, index);
  144. return bFallingBack;
  145. }
  146. // Next isn't a low surrogate, just fallback the high surrogate
  147. }
  148. }
  149. // If already falling back then fail
  150. if (bFallingBack && iRecursionCount++ > iMaxRecursion)
  151. ThrowLastCharRecursive((int)ch);
  152. // Fall back our char
  153. bFallingBack = Fallback(ch, index);
  154. return bFallingBack;
  155. }
  156. // private helper methods
  157. internal void ThrowLastCharRecursive(int charRecursive)
  158. {
  159. // Throw it, using our complete character
  160. throw new ArgumentException(
  161. SR.Format(SR.Argument_RecursiveFallback,
  162. charRecursive), "chars");
  163. }
  164. }
  165. }