Encoder.cs 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Text;
  5. using System;
  6. using System.Diagnostics;
  7. using System.Runtime.InteropServices;
  8. namespace System.Text
  9. {
  10. // An Encoder is used to encode a sequence of blocks of characters into
  11. // a sequence of blocks of bytes. Following instantiation of an encoder,
  12. // sequential blocks of characters are converted into blocks of bytes through
  13. // calls to the GetBytes method. The encoder maintains state between the
  14. // conversions, allowing it to correctly encode character sequences that span
  15. // adjacent blocks.
  16. //
  17. // Instances of specific implementations of the Encoder abstract base
  18. // class are typically obtained through calls to the GetEncoder method
  19. // of Encoding objects.
  20. //
  21. public abstract class Encoder
  22. {
  23. internal EncoderFallback _fallback = null;
  24. internal EncoderFallbackBuffer _fallbackBuffer = null;
  25. protected Encoder()
  26. {
  27. // We don't call default reset because default reset probably isn't good if we aren't initialized.
  28. }
  29. public EncoderFallback Fallback
  30. {
  31. get
  32. {
  33. return _fallback;
  34. }
  35. set
  36. {
  37. if (value == null)
  38. throw new ArgumentNullException(nameof(value));
  39. // Can't change fallback if buffer is wrong
  40. if (_fallbackBuffer != null && _fallbackBuffer.Remaining > 0)
  41. throw new ArgumentException(
  42. SR.Argument_FallbackBufferNotEmpty, nameof(value));
  43. _fallback = value;
  44. _fallbackBuffer = null;
  45. }
  46. }
  47. // Note: we don't test for threading here because async access to Encoders and Decoders
  48. // doesn't work anyway.
  49. public EncoderFallbackBuffer FallbackBuffer
  50. {
  51. get
  52. {
  53. if (_fallbackBuffer == null)
  54. {
  55. if (_fallback != null)
  56. _fallbackBuffer = _fallback.CreateFallbackBuffer();
  57. else
  58. _fallbackBuffer = EncoderFallback.ReplacementFallback.CreateFallbackBuffer();
  59. }
  60. return _fallbackBuffer;
  61. }
  62. }
  63. internal bool InternalHasFallbackBuffer
  64. {
  65. get
  66. {
  67. return _fallbackBuffer != null;
  68. }
  69. }
  70. // Reset the Encoder
  71. //
  72. // Normally if we call GetBytes() and an error is thrown we don't change the state of the encoder. This
  73. // would allow the caller to correct the error condition and try again (such as if they need a bigger buffer.)
  74. //
  75. // If the caller doesn't want to try again after GetBytes() throws an error, then they need to call Reset().
  76. //
  77. // Virtual implementation has to call GetBytes with flush and a big enough buffer to clear a 0 char string
  78. // We avoid GetMaxByteCount() because a) we can't call the base encoder and b) it might be really big.
  79. public virtual void Reset()
  80. {
  81. char[] charTemp = { };
  82. byte[] byteTemp = new byte[GetByteCount(charTemp, 0, 0, true)];
  83. GetBytes(charTemp, 0, 0, byteTemp, 0, true);
  84. if (_fallbackBuffer != null)
  85. _fallbackBuffer.Reset();
  86. }
  87. // Returns the number of bytes the next call to GetBytes will
  88. // produce if presented with the given range of characters and the given
  89. // value of the flush parameter. The returned value takes into
  90. // account the state in which the encoder was left following the last call
  91. // to GetBytes. The state of the encoder is not affected by a call
  92. // to this method.
  93. //
  94. public abstract int GetByteCount(char[] chars, int index, int count, bool flush);
  95. // We expect this to be the workhorse for NLS encodings
  96. // unfortunately for existing overrides, it has to call the [] version,
  97. // which is really slow, so avoid this method if you might be calling external encodings.
  98. [CLSCompliant(false)]
  99. public virtual unsafe int GetByteCount(char* chars, int count, bool flush)
  100. {
  101. // Validate input parameters
  102. if (chars == null)
  103. throw new ArgumentNullException(nameof(chars),
  104. SR.ArgumentNull_Array);
  105. if (count < 0)
  106. throw new ArgumentOutOfRangeException(nameof(count),
  107. SR.ArgumentOutOfRange_NeedNonNegNum);
  108. char[] arrChar = new char[count];
  109. int index;
  110. for (index = 0; index < count; index++)
  111. arrChar[index] = chars[index];
  112. return GetByteCount(arrChar, 0, count, flush);
  113. }
  114. public virtual unsafe int GetByteCount(ReadOnlySpan<char> chars, bool flush)
  115. {
  116. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  117. {
  118. return GetByteCount(charsPtr, chars.Length, flush);
  119. }
  120. }
  121. // Encodes a range of characters in a character array into a range of bytes
  122. // in a byte array. The method encodes charCount characters from
  123. // chars starting at index charIndex, storing the resulting
  124. // bytes in bytes starting at index byteIndex. The encoding
  125. // takes into account the state in which the encoder was left following the
  126. // last call to this method. The flush parameter indicates whether
  127. // the encoder should flush any shift-states and partial characters at the
  128. // end of the conversion. To ensure correct termination of a sequence of
  129. // blocks of encoded bytes, the last call to GetBytes should specify
  130. // a value of true for the flush parameter.
  131. //
  132. // An exception occurs if the byte array is not large enough to hold the
  133. // complete encoding of the characters. The GetByteCount method can
  134. // be used to determine the exact number of bytes that will be produced for
  135. // a given range of characters. Alternatively, the GetMaxByteCount
  136. // method of the Encoding that produced this encoder can be used to
  137. // determine the maximum number of bytes that will be produced for a given
  138. // number of characters, regardless of the actual character values.
  139. //
  140. public abstract int GetBytes(char[] chars, int charIndex, int charCount,
  141. byte[] bytes, int byteIndex, bool flush);
  142. // We expect this to be the workhorse for NLS Encodings, but for existing
  143. // ones we need a working (if slow) default implementation)
  144. //
  145. // WARNING WARNING WARNING
  146. //
  147. // WARNING: If this breaks it could be a security threat. Obviously we
  148. // call this internally, so you need to make sure that your pointers, counts
  149. // and indexes are correct when you call this method.
  150. //
  151. // In addition, we have internal code, which will be marked as "safe" calling
  152. // this code. However this code is dependent upon the implementation of an
  153. // external GetBytes() method, which could be overridden by a third party and
  154. // the results of which cannot be guaranteed. We use that result to copy
  155. // the byte[] to our byte* output buffer. If the result count was wrong, we
  156. // could easily overflow our output buffer. Therefore we do an extra test
  157. // when we copy the buffer so that we don't overflow byteCount either.
  158. [CLSCompliant(false)]
  159. public virtual unsafe int GetBytes(char* chars, int charCount,
  160. byte* bytes, int byteCount, bool flush)
  161. {
  162. // Validate input parameters
  163. if (bytes == null || chars == null)
  164. throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
  165. SR.ArgumentNull_Array);
  166. if (charCount < 0 || byteCount < 0)
  167. throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)),
  168. SR.ArgumentOutOfRange_NeedNonNegNum);
  169. // Get the char array to convert
  170. char[] arrChar = new char[charCount];
  171. int index;
  172. for (index = 0; index < charCount; index++)
  173. arrChar[index] = chars[index];
  174. // Get the byte array to fill
  175. byte[] arrByte = new byte[byteCount];
  176. // Do the work
  177. int result = GetBytes(arrChar, 0, charCount, arrByte, 0, flush);
  178. Debug.Assert(result <= byteCount, "Returned more bytes than we have space for");
  179. // Copy the byte array
  180. // WARNING: We MUST make sure that we don't copy too many bytes. We can't
  181. // rely on result because it could be a 3rd party implementation. We need
  182. // to make sure we never copy more than byteCount bytes no matter the value
  183. // of result
  184. if (result < byteCount)
  185. byteCount = result;
  186. // Don't copy too many bytes!
  187. for (index = 0; index < byteCount; index++)
  188. bytes[index] = arrByte[index];
  189. return byteCount;
  190. }
  191. public virtual unsafe int GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes, bool flush)
  192. {
  193. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  194. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  195. {
  196. return GetBytes(charsPtr, chars.Length, bytesPtr, bytes.Length, flush);
  197. }
  198. }
  199. // This method is used to avoid running out of output buffer space.
  200. // It will encode until it runs out of chars, and then it will return
  201. // true if it the entire input was converted. In either case it
  202. // will also return the number of converted chars and output bytes used.
  203. // It will only throw a buffer overflow exception if the entire lenght of bytes[] is
  204. // too small to store the next byte. (like 0 or maybe 1 or 4 for some encodings)
  205. // We're done processing this buffer only if completed returns true.
  206. //
  207. // Might consider checking Max...Count to avoid the extra counting step.
  208. //
  209. // Note that if all of the input chars are not consumed, then we'll do a /2, which means
  210. // that its likely that we didn't consume as many chars as we could have. For some
  211. // applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
  212. public virtual void Convert(char[] chars, int charIndex, int charCount,
  213. byte[] bytes, int byteIndex, int byteCount, bool flush,
  214. out int charsUsed, out int bytesUsed, out bool completed)
  215. {
  216. // Validate parameters
  217. if (chars == null || bytes == null)
  218. throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)),
  219. SR.ArgumentNull_Array);
  220. if (charIndex < 0 || charCount < 0)
  221. throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)),
  222. SR.ArgumentOutOfRange_NeedNonNegNum);
  223. if (byteIndex < 0 || byteCount < 0)
  224. throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)),
  225. SR.ArgumentOutOfRange_NeedNonNegNum);
  226. if (chars.Length - charIndex < charCount)
  227. throw new ArgumentOutOfRangeException(nameof(chars),
  228. SR.ArgumentOutOfRange_IndexCountBuffer);
  229. if (bytes.Length - byteIndex < byteCount)
  230. throw new ArgumentOutOfRangeException(nameof(bytes),
  231. SR.ArgumentOutOfRange_IndexCountBuffer);
  232. charsUsed = charCount;
  233. // Its easy to do if it won't overrun our buffer.
  234. // Note: We don't want to call unsafe version because that might be an untrusted version
  235. // which could be really unsafe and we don't want to mix it up.
  236. while (charsUsed > 0)
  237. {
  238. if (GetByteCount(chars, charIndex, charsUsed, flush) <= byteCount)
  239. {
  240. bytesUsed = GetBytes(chars, charIndex, charsUsed, bytes, byteIndex, flush);
  241. completed = (charsUsed == charCount &&
  242. (_fallbackBuffer == null || _fallbackBuffer.Remaining == 0));
  243. return;
  244. }
  245. // Try again with 1/2 the count, won't flush then 'cause won't read it all
  246. flush = false;
  247. charsUsed /= 2;
  248. }
  249. // Oops, we didn't have anything, we'll have to throw an overflow
  250. throw new ArgumentException(SR.Argument_ConversionOverflow);
  251. }
  252. // Same thing, but using pointers
  253. //
  254. // Might consider checking Max...Count to avoid the extra counting step.
  255. //
  256. // Note that if all of the input chars are not consumed, then we'll do a /2, which means
  257. // that its likely that we didn't consume as many chars as we could have. For some
  258. // applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream)
  259. [CLSCompliant(false)]
  260. public virtual unsafe void Convert(char* chars, int charCount,
  261. byte* bytes, int byteCount, bool flush,
  262. out int charsUsed, out int bytesUsed, out bool completed)
  263. {
  264. // Validate input parameters
  265. if (bytes == null || chars == null)
  266. throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
  267. SR.ArgumentNull_Array);
  268. if (charCount < 0 || byteCount < 0)
  269. throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)),
  270. SR.ArgumentOutOfRange_NeedNonNegNum);
  271. // Get ready to do it
  272. charsUsed = charCount;
  273. // Its easy to do if it won't overrun our buffer.
  274. while (charsUsed > 0)
  275. {
  276. if (GetByteCount(chars, charsUsed, flush) <= byteCount)
  277. {
  278. bytesUsed = GetBytes(chars, charsUsed, bytes, byteCount, flush);
  279. completed = (charsUsed == charCount &&
  280. (_fallbackBuffer == null || _fallbackBuffer.Remaining == 0));
  281. return;
  282. }
  283. // Try again with 1/2 the count, won't flush then 'cause won't read it all
  284. flush = false;
  285. charsUsed /= 2;
  286. }
  287. // Oops, we didn't have anything, we'll have to throw an overflow
  288. throw new ArgumentException(SR.Argument_ConversionOverflow);
  289. }
  290. public virtual unsafe void Convert(ReadOnlySpan<char> chars, Span<byte> bytes, bool flush, out int charsUsed, out int bytesUsed, out bool completed)
  291. {
  292. fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
  293. fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
  294. {
  295. Convert(charsPtr, chars.Length, bytesPtr, bytes.Length, flush, out charsUsed, out bytesUsed, out completed);
  296. }
  297. }
  298. }
  299. }