DecoderBestFitFallback.cs 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. //
  5. // This is used internally to create best fit behavior as per the original windows best fit behavior.
  6. //
  7. using System.Diagnostics;
  8. using System.Threading;
  9. namespace System.Text
  10. {
  11. internal sealed class InternalDecoderBestFitFallback : DecoderFallback
  12. {
  13. // Our variables
  14. internal Encoding _encoding;
  15. internal char[]? _arrayBestFit = null;
  16. internal char _cReplacement = '?';
  17. internal InternalDecoderBestFitFallback(Encoding encoding)
  18. {
  19. // Need to load our replacement characters table.
  20. _encoding = encoding;
  21. }
  22. public override DecoderFallbackBuffer CreateFallbackBuffer() =>
  23. new InternalDecoderBestFitFallbackBuffer(this);
  24. // Maximum number of characters that this instance of this fallback could return
  25. public override int MaxCharCount => 1;
  26. public override bool Equals(object? value) =>
  27. value is InternalDecoderBestFitFallback that &&
  28. _encoding.CodePage == that._encoding.CodePage;
  29. public override int GetHashCode() => _encoding.CodePage;
  30. }
  31. internal sealed class InternalDecoderBestFitFallbackBuffer : DecoderFallbackBuffer
  32. {
  33. // Our variables
  34. private char _cBestFit = '\0';
  35. private int _iCount = -1;
  36. private int _iSize;
  37. private readonly InternalDecoderBestFitFallback _oFallback;
  38. // Private object for locking instead of locking on a public type for SQL reliability work.
  39. private static object? s_InternalSyncObject;
  40. private static object InternalSyncObject
  41. {
  42. get
  43. {
  44. if (s_InternalSyncObject == null)
  45. {
  46. object o = new object();
  47. Interlocked.CompareExchange<object?>(ref s_InternalSyncObject, o, null);
  48. }
  49. return s_InternalSyncObject;
  50. }
  51. }
  52. // Constructor
  53. public InternalDecoderBestFitFallbackBuffer(InternalDecoderBestFitFallback fallback)
  54. {
  55. _oFallback = fallback;
  56. if (_oFallback._arrayBestFit == null)
  57. {
  58. // Lock so we don't confuse ourselves.
  59. lock (InternalSyncObject)
  60. {
  61. // Double check before we do it again.
  62. _oFallback._arrayBestFit ??= fallback._encoding.GetBestFitBytesToUnicodeData();
  63. }
  64. }
  65. }
  66. // Fallback methods
  67. public override bool Fallback(byte[] bytesUnknown, int index)
  68. {
  69. // We expect no previous fallback in our buffer
  70. Debug.Assert(_iCount < 1, "[DecoderReplacementFallbackBuffer.Fallback] Calling fallback without a previously empty buffer");
  71. _cBestFit = TryBestFit(bytesUnknown);
  72. if (_cBestFit == '\0')
  73. _cBestFit = _oFallback._cReplacement;
  74. _iCount = _iSize = 1;
  75. return true;
  76. }
  77. // Default version is overridden in DecoderReplacementFallback.cs
  78. public override char GetNextChar()
  79. {
  80. // We want it to get < 0 because == 0 means that the current/last character is a fallback
  81. // and we need to detect recursion. We could have a flag but we already have this counter.
  82. _iCount--;
  83. // Do we have anything left? 0 is now last fallback char, negative is nothing left
  84. if (_iCount < 0)
  85. return '\0';
  86. // Need to get it out of the buffer.
  87. // Make sure it didn't wrap from the fast count-- path
  88. if (_iCount == int.MaxValue)
  89. {
  90. _iCount = -1;
  91. return '\0';
  92. }
  93. // Return the best fit character
  94. return _cBestFit;
  95. }
  96. public override bool MovePrevious()
  97. {
  98. // Exception fallback doesn't have anywhere to back up to.
  99. if (_iCount >= 0)
  100. _iCount++;
  101. // Return true if we could do it.
  102. return _iCount >= 0 && _iCount <= _iSize;
  103. }
  104. // How many characters left to output?
  105. public override int Remaining => (_iCount > 0) ? _iCount : 0;
  106. // Clear the buffer
  107. public override unsafe void Reset()
  108. {
  109. _iCount = -1;
  110. byteStart = null;
  111. }
  112. // This version just counts the fallback and doesn't actually copy anything.
  113. internal override unsafe int InternalFallback(byte[] bytes, byte* pBytes)
  114. // Right now this has both bytes and bytes[], since we might have extra bytes, hence the
  115. // array, and we might need the index, hence the byte*
  116. {
  117. // return our replacement string Length (always 1 for InternalDecoderBestFitFallback, either
  118. // a best fit char or ?
  119. return 1;
  120. }
  121. // private helper methods
  122. private char TryBestFit(byte[] bytesCheck)
  123. {
  124. // Need to figure out our best fit character, low is beginning of array, high is 1 AFTER end of array
  125. int lowBound = 0;
  126. Debug.Assert(_oFallback._arrayBestFit != null);
  127. int highBound = _oFallback._arrayBestFit.Length;
  128. int index;
  129. char cCheck;
  130. // Check trivial case first (no best fit)
  131. if (highBound == 0)
  132. return '\0';
  133. // If our array is too small or too big we can't check
  134. if (bytesCheck.Length == 0 || bytesCheck.Length > 2)
  135. return '\0';
  136. if (bytesCheck.Length == 1)
  137. cCheck = unchecked((char)bytesCheck[0]);
  138. else
  139. cCheck = unchecked((char)((bytesCheck[0] << 8) + bytesCheck[1]));
  140. // Check trivial out of range case
  141. if (cCheck < _oFallback._arrayBestFit[0] || cCheck > _oFallback._arrayBestFit[highBound - 2])
  142. return '\0';
  143. // Binary search the array
  144. int iDiff;
  145. while ((iDiff = (highBound - lowBound)) > 6)
  146. {
  147. // Look in the middle, which is complicated by the fact that we have 2 #s for each pair,
  148. // so we don't want index to be odd because it must be word aligned.
  149. // Also note that index can never == highBound (because diff is rounded down)
  150. index = ((iDiff / 2) + lowBound) & 0xFFFE;
  151. char cTest = _oFallback._arrayBestFit[index];
  152. if (cTest == cCheck)
  153. {
  154. // We found it
  155. Debug.Assert(index + 1 < _oFallback._arrayBestFit.Length,
  156. "[InternalDecoderBestFitFallbackBuffer.TryBestFit]Expected replacement character at end of array");
  157. return _oFallback._arrayBestFit[index + 1];
  158. }
  159. else if (cTest < cCheck)
  160. {
  161. // We weren't high enough
  162. lowBound = index;
  163. }
  164. else
  165. {
  166. // We weren't low enough
  167. highBound = index;
  168. }
  169. }
  170. for (index = lowBound; index < highBound; index += 2)
  171. {
  172. if (_oFallback._arrayBestFit[index] == cCheck)
  173. {
  174. // We found it
  175. Debug.Assert(index + 1 < _oFallback._arrayBestFit.Length,
  176. "[InternalDecoderBestFitFallbackBuffer.TryBestFit]Expected replacement character at end of array");
  177. return _oFallback._arrayBestFit[index + 1];
  178. }
  179. }
  180. // Char wasn't in our table
  181. return '\0';
  182. }
  183. }
  184. }