EncoderReplacementFallback.cs 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Diagnostics;
  5. namespace System.Text
  6. {
  7. public sealed class EncoderReplacementFallback : EncoderFallback
  8. {
  9. // Our variables
  10. private readonly string _strDefault;
  11. // Construction. Default replacement fallback uses no best fit and ? replacement string
  12. public EncoderReplacementFallback() : this("?")
  13. {
  14. }
  15. public EncoderReplacementFallback(string replacement)
  16. {
  17. // Must not be null
  18. if (replacement == null)
  19. throw new ArgumentNullException(nameof(replacement));
  20. // Make sure it doesn't have bad surrogate pairs
  21. bool bFoundHigh = false;
  22. for (int i = 0; i < replacement.Length; i++)
  23. {
  24. // Found a surrogate?
  25. if (char.IsSurrogate(replacement, i))
  26. {
  27. // High or Low?
  28. if (char.IsHighSurrogate(replacement, i))
  29. {
  30. // if already had a high one, stop
  31. if (bFoundHigh)
  32. break; // break & throw at the bFoundHIgh below
  33. bFoundHigh = true;
  34. }
  35. else
  36. {
  37. // Low, did we have a high?
  38. if (!bFoundHigh)
  39. {
  40. // Didn't have one, make if fail when we stop
  41. bFoundHigh = true;
  42. break;
  43. }
  44. // Clear flag
  45. bFoundHigh = false;
  46. }
  47. }
  48. // If last was high we're in trouble (not surrogate so not low surrogate, so break)
  49. else if (bFoundHigh)
  50. break;
  51. }
  52. if (bFoundHigh)
  53. throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequenceNoIndex, nameof(replacement)));
  54. _strDefault = replacement;
  55. }
  56. public string DefaultString => _strDefault;
  57. public override EncoderFallbackBuffer CreateFallbackBuffer() =>
  58. new EncoderReplacementFallbackBuffer(this);
  59. // Maximum number of characters that this instance of this fallback could return
  60. public override int MaxCharCount => _strDefault.Length;
  61. public override bool Equals(object? value) =>
  62. value is EncoderReplacementFallback that &&
  63. _strDefault == that._strDefault;
  64. public override int GetHashCode() => _strDefault.GetHashCode();
  65. }
  66. public sealed class EncoderReplacementFallbackBuffer : EncoderFallbackBuffer
  67. {
  68. // Store our default string
  69. private readonly string _strDefault;
  70. private int _fallbackCount = -1;
  71. private int _fallbackIndex = -1;
  72. // Construction
  73. public EncoderReplacementFallbackBuffer(EncoderReplacementFallback fallback)
  74. {
  75. // 2X in case we're a surrogate pair
  76. _strDefault = fallback.DefaultString + fallback.DefaultString;
  77. }
  78. // Fallback Methods
  79. public override bool Fallback(char charUnknown, int index)
  80. {
  81. // If we had a buffer already we're being recursive, throw, it's probably at the suspect
  82. // character in our array.
  83. if (_fallbackCount >= 1)
  84. {
  85. // If we're recursive we may still have something in our buffer that makes this a surrogate
  86. if (char.IsHighSurrogate(charUnknown) && _fallbackCount >= 0 &&
  87. char.IsLowSurrogate(_strDefault[_fallbackIndex + 1]))
  88. ThrowLastCharRecursive(char.ConvertToUtf32(charUnknown, _strDefault[_fallbackIndex + 1]));
  89. // Nope, just one character
  90. ThrowLastCharRecursive(unchecked((int)charUnknown));
  91. }
  92. // Go ahead and get our fallback
  93. // Divide by 2 because we aren't a surrogate pair
  94. _fallbackCount = _strDefault.Length / 2;
  95. _fallbackIndex = -1;
  96. return _fallbackCount != 0;
  97. }
  98. public override bool Fallback(char charUnknownHigh, char charUnknownLow, int index)
  99. {
  100. // Double check input surrogate pair
  101. if (!char.IsHighSurrogate(charUnknownHigh))
  102. throw new ArgumentOutOfRangeException(nameof(charUnknownHigh),
  103. SR.Format(SR.ArgumentOutOfRange_Range, 0xD800, 0xDBFF));
  104. if (!char.IsLowSurrogate(charUnknownLow))
  105. throw new ArgumentOutOfRangeException(nameof(charUnknownLow),
  106. SR.Format(SR.ArgumentOutOfRange_Range, 0xDC00, 0xDFFF));
  107. // If we had a buffer already we're being recursive, throw, it's probably at the suspect
  108. // character in our array.
  109. if (_fallbackCount >= 1)
  110. ThrowLastCharRecursive(char.ConvertToUtf32(charUnknownHigh, charUnknownLow));
  111. // Go ahead and get our fallback
  112. _fallbackCount = _strDefault.Length;
  113. _fallbackIndex = -1;
  114. return _fallbackCount != 0;
  115. }
  116. public override char GetNextChar()
  117. {
  118. // We want it to get < 0 because == 0 means that the current/last character is a fallback
  119. // and we need to detect recursion. We could have a flag but we already have this counter.
  120. _fallbackCount--;
  121. _fallbackIndex++;
  122. // Do we have anything left? 0 is now last fallback char, negative is nothing left
  123. if (_fallbackCount < 0)
  124. return '\0';
  125. // Need to get it out of the buffer.
  126. // Make sure it didn't wrap from the fast count-- path
  127. if (_fallbackCount == int.MaxValue)
  128. {
  129. _fallbackCount = -1;
  130. return '\0';
  131. }
  132. // Now make sure its in the expected range
  133. Debug.Assert(_fallbackIndex < _strDefault.Length && _fallbackIndex >= 0,
  134. "Index exceeds buffer range");
  135. return _strDefault[_fallbackIndex];
  136. }
  137. public override bool MovePrevious()
  138. {
  139. // Back up one, only if we just processed the last character (or earlier)
  140. if (_fallbackCount >= -1 && _fallbackIndex >= 0)
  141. {
  142. _fallbackIndex--;
  143. _fallbackCount++;
  144. return true;
  145. }
  146. // Return false 'cause we couldn't do it.
  147. return false;
  148. }
  149. // How many characters left to output?
  150. public override int Remaining =>
  151. // Our count is 0 for 1 character left.
  152. (_fallbackCount < 0) ? 0 : _fallbackCount;
  153. // Clear the buffer
  154. public override unsafe void Reset()
  155. {
  156. _fallbackCount = -1;
  157. _fallbackIndex = 0;
  158. charStart = null;
  159. bFallingBack = false;
  160. }
  161. }
  162. }