Decoder.cs 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Text;
  5. using System;
  6. using System.Diagnostics;
  7. using System.Runtime.InteropServices;
  8. namespace System.Text
  9. {
  10. // A Decoder is used to decode a sequence of blocks of bytes into a
  11. // sequence of blocks of characters. Following instantiation of a decoder,
  12. // sequential blocks of bytes are converted into blocks of characters through
  13. // calls to the GetChars method. The decoder maintains state between the
  14. // conversions, allowing it to correctly decode byte sequences that span
  15. // adjacent blocks.
  16. //
  17. // Instances of specific implementations of the Decoder abstract base
  18. // class are typically obtained through calls to the GetDecoder method
  19. // of Encoding objects.
  20. //
  21. public abstract class Decoder
  22. {
  23. internal DecoderFallback _fallback = null;
  24. internal DecoderFallbackBuffer _fallbackBuffer = null;
  25. protected Decoder()
  26. {
  27. // We don't call default reset because default reset probably isn't good if we aren't initialized.
  28. }
  29. public DecoderFallback Fallback
  30. {
  31. get
  32. {
  33. return _fallback;
  34. }
  35. set
  36. {
  37. if (value == null)
  38. throw new ArgumentNullException(nameof(value));
  39. // Can't change fallback if buffer is wrong
  40. if (_fallbackBuffer != null && _fallbackBuffer.Remaining > 0)
  41. throw new ArgumentException(
  42. SR.Argument_FallbackBufferNotEmpty, nameof(value));
  43. _fallback = value;
  44. _fallbackBuffer = null;
  45. }
  46. }
  47. // Note: we don't test for threading here because async access to Encoders and Decoders
  48. // doesn't work anyway.
  49. public DecoderFallbackBuffer FallbackBuffer
  50. {
  51. get
  52. {
  53. if (_fallbackBuffer == null)
  54. {
  55. if (_fallback != null)
  56. _fallbackBuffer = _fallback.CreateFallbackBuffer();
  57. else
  58. _fallbackBuffer = DecoderFallback.ReplacementFallback.CreateFallbackBuffer();
  59. }
  60. return _fallbackBuffer;
  61. }
  62. }
  63. internal bool InternalHasFallbackBuffer
  64. {
  65. get
  66. {
  67. return _fallbackBuffer != null;
  68. }
  69. }
  70. // Reset the Decoder
  71. //
  72. // Normally if we call GetChars() and an error is thrown we don't change the state of the Decoder. This
  73. // would allow the caller to correct the error condition and try again (such as if they need a bigger buffer.)
  74. //
  75. // If the caller doesn't want to try again after GetChars() throws an error, then they need to call Reset().
  76. //
  77. // Virtual implementation has to call GetChars with flush and a big enough buffer to clear a 0 byte string
  78. // We avoid GetMaxCharCount() because a) we can't call the base encoder and b) it might be really big.
  79. public virtual void Reset()
  80. {
  81. byte[] byteTemp = Array.Empty<byte>();
  82. char[] charTemp = new char[GetCharCount(byteTemp, 0, 0, true)];
  83. GetChars(byteTemp, 0, 0, charTemp, 0, true);
  84. _fallbackBuffer?.Reset();
  85. }
  86. // Returns the number of characters the next call to GetChars will
  87. // produce if presented with the given range of bytes. The returned value
  88. // takes into account the state in which the decoder was left following the
  89. // last call to GetChars. The state of the decoder is not affected
  90. // by a call to this method.
  91. //
  92. public abstract int GetCharCount(byte[] bytes, int index, int count);
  93. public virtual int GetCharCount(byte[] bytes, int index, int count, bool flush)
  94. {
  95. return GetCharCount(bytes, index, count);
  96. }
  97. // We expect this to be the workhorse for NLS Encodings, but for existing
  98. // ones we need a working (if slow) default implementation)
  99. [CLSCompliant(false)]
  100. public virtual unsafe int GetCharCount(byte* bytes, int count, bool flush)
  101. {
  102. // Validate input parameters
  103. if (bytes == null)
  104. throw new ArgumentNullException(nameof(bytes),
  105. SR.ArgumentNull_Array);
  106. if (count < 0)
  107. throw new ArgumentOutOfRangeException(nameof(count),
  108. SR.ArgumentOutOfRange_NeedNonNegNum);
  109. byte[] arrbyte = new byte[count];
  110. int index;
  111. for (index = 0; index < count; index++)
  112. arrbyte[index] = bytes[index];
  113. return GetCharCount(arrbyte, 0, count);
  114. }
  115. public virtual unsafe int GetCharCount(ReadOnlySpan<byte> bytes, bool flush)
  116. {
  117. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  118. {
  119. return GetCharCount(bytesPtr, bytes.Length, flush);
  120. }
  121. }
  122. // Decodes a range of bytes in a byte array into a range of characters
  123. // in a character array. The method decodes byteCount bytes from
  124. // bytes starting at index byteIndex, storing the resulting
  125. // characters in chars starting at index charIndex. The
  126. // decoding takes into account the state in which the decoder was left
  127. // following the last call to this method.
  128. //
  129. // An exception occurs if the character array is not large enough to
  130. // hold the complete decoding of the bytes. The GetCharCount method
  131. // can be used to determine the exact number of characters that will be
  132. // produced for a given range of bytes. Alternatively, the
  133. // GetMaxCharCount method of the Encoding that produced this
  134. // decoder can be used to determine the maximum number of characters that
  135. // will be produced for a given number of bytes, regardless of the actual
  136. // byte values.
  137. //
  138. public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount,
  139. char[] chars, int charIndex);
  140. public virtual int GetChars(byte[] bytes, int byteIndex, int byteCount,
  141. char[] chars, int charIndex, bool flush)
  142. {
  143. return GetChars(bytes, byteIndex, byteCount, chars, charIndex);
  144. }
  145. // We expect this to be the workhorse for NLS Encodings, but for existing
  146. // ones we need a working (if slow) default implementation)
  147. //
  148. // WARNING WARNING WARNING
  149. //
  150. // WARNING: If this breaks it could be a security threat. Obviously we
  151. // call this internally, so you need to make sure that your pointers, counts
  152. // and indexes are correct when you call this method.
  153. //
  154. // In addition, we have internal code, which will be marked as "safe" calling
  155. // this code. However this code is dependent upon the implementation of an
  156. // external GetChars() method, which could be overridden by a third party and
  157. // the results of which cannot be guaranteed. We use that result to copy
  158. // the char[] to our char* output buffer. If the result count was wrong, we
  159. // could easily overflow our output buffer. Therefore we do an extra test
  160. // when we copy the buffer so that we don't overflow charCount either.
  161. [CLSCompliant(false)]
  162. public virtual unsafe int GetChars(byte* bytes, int byteCount,
  163. char* chars, int charCount, bool flush)
  164. {
  165. // Validate input parameters
  166. if (chars == null || bytes == null)
  167. throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
  168. SR.ArgumentNull_Array);
  169. if (byteCount < 0 || charCount < 0)
  170. throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
  171. SR.ArgumentOutOfRange_NeedNonNegNum);
  172. // Get the byte array to convert
  173. byte[] arrByte = new byte[byteCount];
  174. int index;
  175. for (index = 0; index < byteCount; index++)
  176. arrByte[index] = bytes[index];
  177. // Get the char array to fill
  178. char[] arrChar = new char[charCount];
  179. // Do the work
  180. int result = GetChars(arrByte, 0, byteCount, arrChar, 0, flush);
  181. Debug.Assert(result <= charCount, "Returned more chars than we have space for");
  182. // Copy the char array
  183. // WARNING: We MUST make sure that we don't copy too many chars. We can't
  184. // rely on result because it could be a 3rd party implementation. We need
  185. // to make sure we never copy more than charCount chars no matter the value
  186. // of result
  187. if (result < charCount)
  188. charCount = result;
  189. // We check both result and charCount so that we don't accidentally overrun
  190. // our pointer buffer just because of an issue in GetChars
  191. for (index = 0; index < charCount; index++)
  192. chars[index] = arrChar[index];
  193. return charCount;
  194. }
  195. public virtual unsafe int GetChars(ReadOnlySpan<byte> bytes, Span<char> chars, bool flush)
  196. {
  197. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  198. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  199. {
  200. return GetChars(bytesPtr, bytes.Length, charsPtr, chars.Length, flush);
  201. }
  202. }
  203. // This method is used when the output buffer might not be large enough.
  204. // It will decode until it runs out of bytes, and then it will return
  205. // true if it the entire input was converted. In either case it
  206. // will also return the number of converted bytes and output characters used.
  207. // It will only throw a buffer overflow exception if the entire lenght of chars[] is
  208. // too small to store the next char. (like 0 or maybe 1 or 4 for some encodings)
  209. // We're done processing this buffer only if completed returns true.
  210. //
  211. // Might consider checking Max...Count to avoid the extra counting step.
  212. //
  213. // Note that if all of the input bytes are not consumed, then we'll do a /2, which means
  214. // that its likely that we didn't consume as many bytes as we could have. For some
  215. // applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
  216. public virtual void Convert(byte[] bytes, int byteIndex, int byteCount,
  217. char[] chars, int charIndex, int charCount, bool flush,
  218. out int bytesUsed, out int charsUsed, out bool completed)
  219. {
  220. // Validate parameters
  221. if (bytes == null || chars == null)
  222. throw new ArgumentNullException((bytes == null ? nameof(bytes) : nameof(chars)),
  223. SR.ArgumentNull_Array);
  224. if (byteIndex < 0 || byteCount < 0)
  225. throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)),
  226. SR.ArgumentOutOfRange_NeedNonNegNum);
  227. if (charIndex < 0 || charCount < 0)
  228. throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)),
  229. SR.ArgumentOutOfRange_NeedNonNegNum);
  230. if (bytes.Length - byteIndex < byteCount)
  231. throw new ArgumentOutOfRangeException(nameof(bytes),
  232. SR.ArgumentOutOfRange_IndexCountBuffer);
  233. if (chars.Length - charIndex < charCount)
  234. throw new ArgumentOutOfRangeException(nameof(chars),
  235. SR.ArgumentOutOfRange_IndexCountBuffer);
  236. bytesUsed = byteCount;
  237. // Its easy to do if it won't overrun our buffer.
  238. while (bytesUsed > 0)
  239. {
  240. if (GetCharCount(bytes, byteIndex, bytesUsed, flush) <= charCount)
  241. {
  242. charsUsed = GetChars(bytes, byteIndex, bytesUsed, chars, charIndex, flush);
  243. completed = (bytesUsed == byteCount &&
  244. (_fallbackBuffer == null || _fallbackBuffer.Remaining == 0));
  245. return;
  246. }
  247. // Try again with 1/2 the count, won't flush then 'cause won't read it all
  248. flush = false;
  249. bytesUsed /= 2;
  250. }
  251. // Oops, we didn't have anything, we'll have to throw an overflow
  252. throw new ArgumentException(SR.Argument_ConversionOverflow);
  253. }
  254. // This is the version that uses *.
  255. // We're done processing this buffer only if completed returns true.
  256. //
  257. // Might consider checking Max...Count to avoid the extra counting step.
  258. //
  259. // Note that if all of the input bytes are not consumed, then we'll do a /2, which means
  260. // that its likely that we didn't consume as many bytes as we could have. For some
  261. // applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
  262. [CLSCompliant(false)]
  263. public virtual unsafe void Convert(byte* bytes, int byteCount,
  264. char* chars, int charCount, bool flush,
  265. out int bytesUsed, out int charsUsed, out bool completed)
  266. {
  267. // Validate input parameters
  268. if (chars == null || bytes == null)
  269. throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
  270. SR.ArgumentNull_Array);
  271. if (byteCount < 0 || charCount < 0)
  272. throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
  273. SR.ArgumentOutOfRange_NeedNonNegNum);
  274. // Get ready to do it
  275. bytesUsed = byteCount;
  276. // Its easy to do if it won't overrun our buffer.
  277. while (bytesUsed > 0)
  278. {
  279. if (GetCharCount(bytes, bytesUsed, flush) <= charCount)
  280. {
  281. charsUsed = GetChars(bytes, bytesUsed, chars, charCount, flush);
  282. completed = (bytesUsed == byteCount &&
  283. (_fallbackBuffer == null || _fallbackBuffer.Remaining == 0));
  284. return;
  285. }
  286. // Try again with 1/2 the count, won't flush then 'cause won't read it all
  287. flush = false;
  288. bytesUsed /= 2;
  289. }
  290. // Oops, we didn't have anything, we'll have to throw an overflow
  291. throw new ArgumentException(SR.Argument_ConversionOverflow);
  292. }
  293. public virtual unsafe void Convert(ReadOnlySpan<byte> bytes, Span<char> chars, bool flush, out int bytesUsed, out int charsUsed, out bool completed)
  294. {
  295. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  296. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  297. {
  298. Convert(bytesPtr, bytes.Length, charsPtr, chars.Length, flush, out bytesUsed, out charsUsed, out completed);
  299. }
  300. }
  301. }
  302. }