Encoding.Internal.cs 69 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Buffers;
  5. using System.Diagnostics;
  6. using System.Runtime.CompilerServices;
  7. using System.Runtime.InteropServices;
  8. using Internal.Runtime.CompilerServices;
  9. namespace System.Text
  10. {
  11. public partial class Encoding
  12. {
  13. /*
  14. * This file contains infrastructure code that supports a simplified way of writing
  15. * internally-implemented Encoding types. In this system, the individual Encoding types
  16. * are no longer responsible for handling anything related to the EncoderNLS / DecoderNLS
  17. * infrastructure, nor are they responsible for implementing anything related to fallback
  18. * buffers logic.
  19. *
  20. * Instead, subclassed types are responsible only for transcoding of individual scalar values
  21. * to and from the encoding's byte representation (see the two methods immediately below).
  22. * They can optionally implement fast-path logic to perform bulk transcoding up until the
  23. * first segment of data that cannot be transcoded. They can special-case certain fallback
  24. * mechanisms if desired.
  25. *
  26. * Most of the fast-path code is written using raw pointers as the exchange types, just as
  27. * in the standard Encoding infrastructure. Since the fallback logic is more complex, most
  28. * of it is written using type-safe constructs like Span<T>, with some amount of glue to
  29. * allow it to work correctly with pointer-based fast-path code.
  30. *
  31. * A typical call graph for GetBytes is represented below, using ASCIIEncoding as an example.
  32. *
  33. * ASCIIEncoding.GetBytes(...) [non-EncoderNLS path, public virtual override]
  34. * `- <parameter validation>
  35. * - ASCIIEncoding.GetBytesCommon [private helper method per derived type, inlined]
  36. * `- ASCIIEncoding.GetBytesFast [overridden fast-path implementation, inlined]
  37. * - <if all data transcoded, return immediately>
  38. * - <if all data not transcoded...>
  39. * `- Encoding.GetBytesWithFallback [non-virtual stub method to call main GetBytesWithFallback worker]
  40. * `- Encoding.GetBytesWithFallback [virtual method whose base implementation contains slow fallback logic]
  41. * `- <may be overridden to provide optimized fallback logic>
  42. * - <create EncodeFallbackBuffer instance>
  43. * - <perform the following in a loop:>
  44. * `- <invoke fast-path logic via virtual method dispatch on derived type>
  45. * - <read next "bad" scalar value from source>
  46. * - <run this bad value through the fallback buffer>
  47. * - <drain the fallback buffer to the destination>
  48. * - <loop until source is fully consumed or destination is full>
  49. * - <signal full or partial success to EncoderNLS instance / throw if necessary>
  50. *
  51. * The call graph for GetBytes(..., EncoderNLS) is similar:
  52. *
  53. * Encoding.GetBytes(..., EncoderNLS) [base implementation]
  54. * `- <if no leftover data from previous invocation, invoke fast-path>
  55. * - <if fast-path invocation above completed, return immediately>
  56. * - <if not all data transcoded, or if there was leftover data from previous invocation...>
  57. * `- Encoding.GetBytesWithFallback [non-virtual stub method]
  58. * `- <drain any leftover data from previous invocation>
  59. * - <invoke fast-path again>
  60. * - <if all data transcoded, return immediately>
  61. * - <if all data not transcoded...>
  62. * `- Encoding.GetBytesWithFallback [virtual method as described above]
  63. *
  64. * There are different considerations in each call graph for things like error handling,
  65. * since the error conditions will be different depending on whether or not an EncoderNLS
  66. * instance is available and what values its properties have.
  67. */
  68. /*
  69. * THESE TWO METHODS MUST BE OVERRIDDEN BY A SUBCLASSED TYPE
  70. */
  71. internal virtual OperationStatus DecodeFirstRune(ReadOnlySpan<byte> bytes, out Rune value, out int bytesConsumed)
  72. {
  73. Debug.Fail("This should be overridden by a subclassed type.");
  74. throw NotImplemented.ByDesign;
  75. }
  76. internal virtual OperationStatus EncodeRune(Rune value, Span<byte> bytes, out int bytesWritten)
  77. {
  78. Debug.Fail("This should be overridden by a subclassed type.");
  79. throw NotImplemented.ByDesign;
  80. }
  81. /*
  82. * ALL OTHER LOGIC CAN BE IMPLEMENTED IN TERMS OF THE TWO METHODS ABOVE.
  83. * FOR IMPROVED PERFORMANCE, SUBCLASSED TYPES MAY WANT TO OVERRIDE ONE OR MORE VIRTUAL METHODS BELOW.
  84. */
  85. /*
  86. * GETBYTECOUNT FAMILY OF FUNCTIONS
  87. */
  88. /// <summary>
  89. /// Given a <see cref="Rune"/>, determines its byte count under the current <see cref="Encoding"/>.
  90. /// Returns <see langword="false"/> if the <see cref="Rune"/> cannot be represented in the
  91. /// current <see cref="Encoding"/>.
  92. /// </summary>
  93. internal virtual bool TryGetByteCount(Rune value, out int byteCount)
  94. {
  95. // Any production-quality type would override this method and provide a real
  96. // implementation, so we won't provide a base implementation. However, a
  97. // non-shipping slow reference implementation is provided below for convenience.
  98. #if false
  99. Span<byte> bytes = stackalloc byte[4]; // max 4 bytes per input scalar
  100. OperationStatus opStatus = EncodeRune(value, bytes, out byteCount);
  101. Debug.Assert(opStatus == OperationStatus.Done || opStatus == OperationStatus.InvalidData, "Unexpected return value.");
  102. return (opStatus == OperationStatus.Done);
  103. #else
  104. Debug.Fail("This should be overridden by a subclassed type.");
  105. throw NotImplemented.ByDesign;
  106. #endif
  107. }
  108. /// <summary>
  109. /// Entry point from <see cref="EncoderNLS.GetByteCount"/>.
  110. /// </summary>
  111. internal virtual unsafe int GetByteCount(char* pChars, int charCount, EncoderNLS? encoder)
  112. {
  113. Debug.Assert(encoder != null, "This code path should only be called from EncoderNLS.");
  114. Debug.Assert(charCount >= 0, "Caller should've checked this condition.");
  115. Debug.Assert(pChars != null || charCount == 0, "Cannot provide a null pointer and a non-zero count.");
  116. // We're going to try to stay on the fast-path as much as we can. That means that we have
  117. // no leftover data to drain and the entire source buffer can be consumed in a single
  118. // fast-path invocation. If either of these doesn't hold, we'll go down the slow path of
  119. // creating spans, draining the EncoderNLS instance, and falling back.
  120. int totalByteCount = 0;
  121. int charsConsumed = 0;
  122. if (!encoder.HasLeftoverData)
  123. {
  124. totalByteCount = GetByteCountFast(pChars, charCount, encoder.Fallback, out charsConsumed);
  125. if (charsConsumed == charCount)
  126. {
  127. return totalByteCount;
  128. }
  129. }
  130. // We had leftover data, or we couldn't consume the entire input buffer.
  131. // Let's go down the draining + fallback mechanisms.
  132. totalByteCount += GetByteCountWithFallback(pChars, charCount, charsConsumed, encoder);
  133. if (totalByteCount < 0)
  134. {
  135. ThrowConversionOverflow();
  136. }
  137. return totalByteCount;
  138. }
  139. /// <summary>
  140. /// Counts the number of <see langword="byte"/>s that would result from transcoding the source
  141. /// data, exiting when the source buffer is consumed or when the first unreadable data is encountered.
  142. /// The implementation may inspect <paramref name="fallback"/> to short-circuit any counting
  143. /// operation, but it should not attempt to call <see cref="EncoderFallback.CreateFallbackBuffer"/>.
  144. /// </summary>
  145. /// <returns>
  146. /// Via <paramref name="charsConsumed"/>, the number of elements from <paramref name="pChars"/> which
  147. /// were consumed; and returns the transcoded byte count up to this point.
  148. /// </returns>
  149. /// <exception cref="ArgumentException">
  150. /// If the byte count would be greater than <see cref="int.MaxValue"/>.
  151. /// (Implementation should call <see cref="ThrowConversionOverflow"/>.)
  152. /// </exception>
  153. /// <remarks>
  154. /// The implementation should not attempt to perform any sort of fallback behavior.
  155. /// If custom fallback behavior is necessary, override <see cref="GetByteCountWithFallback"/>.
  156. /// </remarks>
  157. private protected virtual unsafe int GetByteCountFast(char* pChars, int charsLength, EncoderFallback? fallback, out int charsConsumed)
  158. {
  159. // Any production-quality type would override this method and provide a real
  160. // implementation, so we won't provide a base implementation. However, a
  161. // non-shipping slow reference implementation is provided below for convenience.
  162. #if false
  163. ReadOnlySpan<char> chars = new ReadOnlySpan<char>(pChars, charsLength);
  164. int totalByteCount = 0;
  165. while (!chars.IsEmpty)
  166. {
  167. if (Rune.DecodeUtf16(chars, out Rune scalarValue, out int charsConsumedThisIteration) != OperationStatus.Done
  168. || !TryGetByteCount(scalarValue, out int byteCountThisIteration))
  169. {
  170. // Invalid UTF-16 data, or not convertible to target encoding
  171. break;
  172. }
  173. chars = chars.Slice(charsConsumedThisIteration);
  174. totalByteCount += byteCountThisIteration;
  175. if (totalByteCount < 0)
  176. {
  177. ThrowConversionOverflow();
  178. }
  179. }
  180. charsConsumed = charsLength - chars.Length; // number of chars consumed across all loop iterations above
  181. return totalByteCount;
  182. #else
  183. Debug.Fail("This should be overridden by a subclassed type.");
  184. throw NotImplemented.ByDesign;
  185. #endif
  186. }
  187. /// <summary>
  188. /// Counts the number of bytes that would result from transcoding the provided chars,
  189. /// with no associated <see cref="EncoderNLS"/>. The first two arguments are based on the
  190. /// original input before invoking this method; and <paramref name="charsConsumedSoFar"/>
  191. /// signals where in the provided buffer the fallback loop should begin operating.
  192. /// </summary>
  193. /// <returns>
  194. /// The byte count resulting from transcoding the input data.
  195. /// </returns>
  196. /// <exception cref="ArgumentException">
  197. /// If the resulting byte count is greater than <see cref="int.MaxValue"/>.
  198. /// (Implementation should call <see cref="ThrowConversionOverflow"/>.)
  199. /// </exception>
  200. [MethodImpl(MethodImplOptions.NoInlining)] // don't stack spill spans into our caller
  201. private protected unsafe int GetByteCountWithFallback(char* pCharsOriginal, int originalCharCount, int charsConsumedSoFar)
  202. {
  203. // This is a stub method that's marked "no-inlining" so that it we don't stack-spill spans
  204. // into our immediate caller. Doing so increases the method prolog in what's supposed to
  205. // be a very fast path.
  206. Debug.Assert(0 <= charsConsumedSoFar && charsConsumedSoFar < originalCharCount, "Invalid arguments provided to method.");
  207. return GetByteCountWithFallback(
  208. chars: new ReadOnlySpan<char>(pCharsOriginal, originalCharCount).Slice(charsConsumedSoFar),
  209. originalCharsLength: originalCharCount,
  210. encoder: null);
  211. }
  212. /// <summary>
  213. /// Gets the number of <see langword="byte"/>s that would result from transcoding the provided
  214. /// input data, with an associated <see cref="EncoderNLS"/>. The first two arguments are
  215. /// based on the original input before invoking this method; and <paramref name="charsConsumedSoFar"/>
  216. /// signals where in the provided source buffer the fallback loop should begin operating.
  217. /// The behavior of this method is to consume (non-destructively) any leftover data in the
  218. /// <see cref="EncoderNLS"/> instance, then to invoke the <see cref="GetByteCountFast"/> virtual method
  219. /// after data has been drained, then to call <see cref="GetByteCountWithFallback(ReadOnlySpan{char}, int, EncoderNLS)"/>.
  220. /// </summary>
  221. /// <returns>
  222. /// The total number of bytes that would result from transcoding the remaining portion of the source buffer.
  223. /// </returns>
  224. /// <exception cref="ArgumentException">
  225. /// If the return value would exceed <see cref="int.MaxValue"/>.
  226. /// (The implementation should call <see cref="ThrowConversionOverflow"/>.)
  227. /// </exception>
  228. private unsafe int GetByteCountWithFallback(char* pOriginalChars, int originalCharCount, int charsConsumedSoFar, EncoderNLS encoder)
  229. {
  230. Debug.Assert(encoder != null, "This code path should only be called from EncoderNLS.");
  231. Debug.Assert(0 <= charsConsumedSoFar && charsConsumedSoFar <= originalCharCount, "Caller should've checked this condition.");
  232. // First, try draining any data that already exists on the encoder instance. If we can't complete
  233. // that operation, there's no point to continuing down to the main workhorse methods.
  234. ReadOnlySpan<char> chars = new ReadOnlySpan<char>(pOriginalChars, originalCharCount).Slice(charsConsumedSoFar);
  235. int totalByteCount = encoder.DrainLeftoverDataForGetByteCount(chars, out int charsConsumedJustNow);
  236. chars = chars.Slice(charsConsumedJustNow);
  237. // Now try invoking the "fast path" (no fallback) implementation.
  238. // We can use Unsafe.AsPointer here since these spans are created from pinned data (raw pointers).
  239. totalByteCount += GetByteCountFast(
  240. pChars: (char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(chars)),
  241. charsLength: chars.Length,
  242. fallback: encoder.Fallback,
  243. charsConsumed: out charsConsumedJustNow);
  244. if (totalByteCount < 0)
  245. {
  246. ThrowConversionOverflow();
  247. }
  248. chars = chars.Slice(charsConsumedJustNow);
  249. // If there's still data remaining in the source buffer, go down the fallback path.
  250. // Otherwise we're finished.
  251. if (!chars.IsEmpty)
  252. {
  253. totalByteCount += GetByteCountWithFallback(chars, originalCharCount, encoder);
  254. if (totalByteCount < 0)
  255. {
  256. ThrowConversionOverflow();
  257. }
  258. }
  259. return totalByteCount;
  260. }
  261. /// <summary>
  262. /// Counts the number of bytes that would result from transcoding the provided chars,
  263. /// using the provided <see cref="EncoderFallbackBuffer"/> if necessary.
  264. /// </summary>
  265. /// <returns>
  266. /// The byte count resulting from transcoding the input data.
  267. /// </returns>
  268. /// <exception cref="ArgumentException">
  269. /// If the resulting byte count is greater than <see cref="int.MaxValue"/>.
  270. /// (Implementation should call <see cref="ThrowConversionOverflow"/>.)
  271. /// </exception>
  272. private protected virtual unsafe int GetByteCountWithFallback(ReadOnlySpan<char> chars, int originalCharsLength, EncoderNLS? encoder)
  273. {
  274. Debug.Assert(!chars.IsEmpty, "Caller shouldn't invoke this method with an empty input buffer.");
  275. Debug.Assert(originalCharsLength >= 0, "Caller provided invalid parameter.");
  276. // Since we're using Unsafe.AsPointer in our central loop, we want to ensure everything is pinned.
  277. fixed (char* _pChars_Unused = &MemoryMarshal.GetReference(chars))
  278. {
  279. EncoderFallbackBuffer fallbackBuffer = EncoderFallbackBuffer.CreateAndInitialize(this, encoder, originalCharsLength);
  280. int totalByteCount = 0;
  281. do
  282. {
  283. // There's still data in the source buffer; why wasn't the previous fast-path able to consume it fully?
  284. // There are two scenarios: (a) the source buffer contained invalid / incomplete UTF-16 data;
  285. // or (b) the encoding can't translate this scalar value.
  286. if (Rune.DecodeFromUtf16(chars, out Rune firstScalarValue, out int charsConsumedThisIteration) == OperationStatus.NeedMoreData
  287. && encoder != null
  288. && !encoder.MustFlush)
  289. {
  290. // We saw a standalone high surrogate at the end of the buffer, and the
  291. // active EncoderNLS instance isn't asking us to flush. Since a call to
  292. // GetBytes would've consumed this char by storing it in EncoderNLS._charLeftOver,
  293. // we'll "consume" it by ignoring it. The next call to GetBytes will
  294. // pick it up correctly.
  295. goto Finish;
  296. }
  297. // We saw invalid UTF-16 data, or we saw a high surrogate that we need to flush (and
  298. // thus treat as invalid), or we saw valid UTF-16 data that this encoder doesn't support.
  299. // In any case we'll run it through the fallback mechanism.
  300. int byteCountThisIteration = fallbackBuffer.InternalFallbackGetByteCount(chars, out charsConsumedThisIteration);
  301. Debug.Assert(byteCountThisIteration >= 0, "Fallback shouldn't have returned a negative value.");
  302. Debug.Assert(charsConsumedThisIteration >= 0, "Fallback shouldn't have returned a negative value.");
  303. totalByteCount += byteCountThisIteration;
  304. if (totalByteCount < 0)
  305. {
  306. ThrowConversionOverflow();
  307. }
  308. chars = chars.Slice(charsConsumedThisIteration);
  309. if (!chars.IsEmpty)
  310. {
  311. // Still data remaining - run it through the fast-path to find the next data to fallback.
  312. // While building up the tally we need to continually check for integer overflow
  313. // since fallbacks can change the total byte count in unexpected ways.
  314. byteCountThisIteration = GetByteCountFast(
  315. pChars: (char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(chars)),
  316. charsLength: chars.Length,
  317. fallback: null, // already tried this earlier and we still fell down the common path, so skip from now on
  318. charsConsumed: out charsConsumedThisIteration);
  319. Debug.Assert(byteCountThisIteration >= 0, "Workhorse shouldn't have returned a negative value.");
  320. Debug.Assert(charsConsumedThisIteration >= 0, "Workhorse shouldn't have returned a negative value.");
  321. totalByteCount += byteCountThisIteration;
  322. if (totalByteCount < 0)
  323. {
  324. ThrowConversionOverflow();
  325. }
  326. chars = chars.Slice(charsConsumedThisIteration);
  327. }
  328. } while (!chars.IsEmpty);
  329. Finish:
  330. Debug.Assert(fallbackBuffer.Remaining == 0, "There should be no data in the fallback buffer after GetByteCount.");
  331. return totalByteCount;
  332. }
  333. }
  334. /*
  335. * GETBYTES FAMILY OF FUNCTIONS
  336. */
  337. /// <summary>
  338. /// Entry point from <see cref="EncoderNLS.GetBytes"/> and <see cref="EncoderNLS.Convert"/>.
  339. /// </summary>
  340. internal virtual unsafe int GetBytes(char* pChars, int charCount, byte* pBytes, int byteCount, EncoderNLS? encoder)
  341. {
  342. Debug.Assert(encoder != null, "This code path should only be called from EncoderNLS.");
  343. Debug.Assert(charCount >= 0, "Caller should've checked this condition.");
  344. Debug.Assert(pChars != null || charCount == 0, "Cannot provide a null pointer and a non-zero count.");
  345. Debug.Assert(byteCount >= 0, "Caller should've checked this condition.");
  346. Debug.Assert(pBytes != null || byteCount == 0, "Cannot provide a null pointer and a non-zero count.");
  347. // We're going to try to stay on the fast-path as much as we can. That means that we have
  348. // no leftover data to drain and the entire source buffer can be transcoded in a single
  349. // fast-path invocation. If either of these doesn't hold, we'll go down the slow path of
  350. // creating spans, draining the EncoderNLS instance, and falling back.
  351. int bytesWritten = 0;
  352. int charsConsumed = 0;
  353. if (!encoder.HasLeftoverData)
  354. {
  355. bytesWritten = GetBytesFast(pChars, charCount, pBytes, byteCount, out charsConsumed);
  356. if (charsConsumed == charCount)
  357. {
  358. encoder._charsUsed = charCount;
  359. return bytesWritten;
  360. }
  361. }
  362. // We had leftover data, or we couldn't consume the entire input buffer.
  363. // Let's go down the draining + fallback mechanisms.
  364. return GetBytesWithFallback(pChars, charCount, pBytes, byteCount, charsConsumed, bytesWritten, encoder);
  365. }
  366. /// <summary>
  367. /// Transcodes <see langword="char"/>s to <see langword="byte"/>s, exiting when the source or destination
  368. /// buffer is consumed or when the first unreadable data is encountered.
  369. /// </summary>
  370. /// <returns>
  371. /// Via <paramref name="charsConsumed"/>, the number of elements from <paramref name="pChars"/> which
  372. /// were consumed; and returns the number of elements written to <paramref name="pBytes"/>.
  373. /// </returns>
  374. /// <remarks>
  375. /// The implementation should not attempt to perform any sort of fallback behavior.
  376. /// If custom fallback behavior is necessary, override <see cref="GetBytesWithFallback"/>.
  377. /// </remarks>
  378. private protected virtual unsafe int GetBytesFast(char* pChars, int charsLength, byte* pBytes, int bytesLength, out int charsConsumed)
  379. {
  380. // Any production-quality type would override this method and provide a real
  381. // implementation, so we won't provide a base implementation. However, a
  382. // non-shipping slow reference implementation is provided below for convenience.
  383. #if false
  384. ReadOnlySpan<char> chars = new ReadOnlySpan<char>(pChars, charsLength);
  385. Span<byte> bytes = new Span<byte>(pBytes, bytesLength);
  386. while (!chars.IsEmpty)
  387. {
  388. if (Rune.DecodeUtf16(chars, out Rune scalarValue, out int charsConsumedJustNow) != OperationStatus.Done
  389. || EncodeRune(scalarValue, bytes, out int bytesWrittenJustNow) != OperationStatus.Done)
  390. {
  391. // Invalid UTF-16 data, or not convertible to target encoding, or destination buffer too small to contain encoded value
  392. break;
  393. }
  394. chars = chars.Slice(charsConsumedJustNow);
  395. bytes = bytes.Slice(bytesWrittenJustNow);
  396. }
  397. charsConsumed = charsLength - chars.Length; // number of chars consumed across all loop iterations above
  398. return bytesLength - bytes.Length; // number of bytes written across all loop iterations above
  399. #else
  400. Debug.Fail("This should be overridden by a subclassed type.");
  401. throw NotImplemented.ByDesign;
  402. #endif
  403. }
  404. /// <summary>
  405. /// Transcodes chars to bytes, with no associated <see cref="EncoderNLS"/>. The first four arguments are
  406. /// based on the original input before invoking this method; and <paramref name="charsConsumedSoFar"/>
  407. /// and <paramref name="bytesWrittenSoFar"/> signal where in the provided buffers the fallback loop
  408. /// should begin operating. The behavior of this method is to call the <see cref="GetBytesWithFallback"/>
  409. /// virtual method as overridden by the specific type, and failing that go down the shared fallback path.
  410. /// </summary>
  411. /// <returns>
  412. /// The total number of bytes written to <paramref name="pOriginalBytes"/>, including <paramref name="bytesWrittenSoFar"/>.
  413. /// </returns>
  414. /// <exception cref="ArgumentException">
  415. /// If the destination buffer is not large enough to hold the entirety of the transcoded data.
  416. /// </exception>
  417. [MethodImpl(MethodImplOptions.NoInlining)]
  418. private protected unsafe int GetBytesWithFallback(char* pOriginalChars, int originalCharCount, byte* pOriginalBytes, int originalByteCount, int charsConsumedSoFar, int bytesWrittenSoFar)
  419. {
  420. // This is a stub method that's marked "no-inlining" so that it we don't stack-spill spans
  421. // into our immediate caller. Doing so increases the method prolog in what's supposed to
  422. // be a very fast path.
  423. Debug.Assert(0 <= charsConsumedSoFar && charsConsumedSoFar < originalCharCount, "Invalid arguments provided to method.");
  424. Debug.Assert(0 <= bytesWrittenSoFar && bytesWrittenSoFar <= originalByteCount, "Invalid arguments provided to method.");
  425. return GetBytesWithFallback(
  426. chars: new ReadOnlySpan<char>(pOriginalChars, originalCharCount).Slice(charsConsumedSoFar),
  427. originalCharsLength: originalCharCount,
  428. bytes: new Span<byte>(pOriginalBytes, originalByteCount).Slice(bytesWrittenSoFar),
  429. originalBytesLength: originalByteCount,
  430. encoder: null);
  431. }
  432. /// <summary>
  433. /// Transcodes chars to bytes, with an associated <see cref="EncoderNLS"/>. The first four arguments are
  434. /// based on the original input before invoking this method; and <paramref name="charsConsumedSoFar"/>
  435. /// and <paramref name="bytesWrittenSoFar"/> signal where in the provided buffers the fallback loop
  436. /// should begin operating. The behavior of this method is to drain any leftover data in the
  437. /// <see cref="EncoderNLS"/> instance, then to invoke the <see cref="GetBytesFast"/> virtual method
  438. /// after data has been drained, then to call <see cref="GetBytesWithFallback(ReadOnlySpan{char}, int, Span{byte}, int, EncoderNLS)"/>.
  439. /// </summary>
  440. /// <returns>
  441. /// The total number of bytes written to <paramref name="pOriginalBytes"/>, including <paramref name="bytesWrittenSoFar"/>.
  442. /// </returns>
  443. /// <exception cref="ArgumentException">
  444. /// If the destination buffer is too small to make any forward progress at all, or if the destination buffer is
  445. /// too small to contain the entirety of the transcoded data and the <see cref="EncoderNLS"/> instance disallows
  446. /// partial transcoding.
  447. /// </exception>
  448. private unsafe int GetBytesWithFallback(char* pOriginalChars, int originalCharCount, byte* pOriginalBytes, int originalByteCount, int charsConsumedSoFar, int bytesWrittenSoFar, EncoderNLS encoder)
  449. {
  450. Debug.Assert(encoder != null, "This code path should only be called from EncoderNLS.");
  451. Debug.Assert(0 <= charsConsumedSoFar && charsConsumedSoFar <= originalCharCount, "Caller should've checked this condition.");
  452. Debug.Assert(0 <= bytesWrittenSoFar && bytesWrittenSoFar <= originalByteCount, "Caller should've checked this condition.");
  453. // First, try draining any data that already exists on the encoder instance. If we can't complete
  454. // that operation, there's no point to continuing down to the main workhorse methods.
  455. ReadOnlySpan<char> chars = new ReadOnlySpan<char>(pOriginalChars, originalCharCount).Slice(charsConsumedSoFar);
  456. Span<byte> bytes = new Span<byte>(pOriginalBytes, originalByteCount).Slice(bytesWrittenSoFar);
  457. bool drainFinishedSuccessfully = encoder.TryDrainLeftoverDataForGetBytes(chars, bytes, out int charsConsumedJustNow, out int bytesWrittenJustNow);
  458. chars = chars.Slice(charsConsumedJustNow); // whether or not the drain finished, we may have made some progress
  459. bytes = bytes.Slice(bytesWrittenJustNow);
  460. if (!drainFinishedSuccessfully)
  461. {
  462. ThrowBytesOverflow(encoder, nothingEncoded: bytes.Length == originalByteCount); // might not throw if we wrote at least one byte
  463. }
  464. else
  465. {
  466. // Now try invoking the "fast path" (no fallback) implementation.
  467. // We can use Unsafe.AsPointer here since these spans are created from pinned data (raw pointers).
  468. bytesWrittenJustNow = GetBytesFast(
  469. pChars: (char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(chars)),
  470. charsLength: chars.Length,
  471. pBytes: (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(bytes)),
  472. bytesLength: bytes.Length,
  473. charsConsumed: out charsConsumedJustNow);
  474. chars = chars.Slice(charsConsumedJustNow);
  475. bytes = bytes.Slice(bytesWrittenJustNow);
  476. // If there's still data remaining in the source buffer, go down the fallback path.
  477. // Otherwise we're finished.
  478. if (!chars.IsEmpty)
  479. {
  480. // We'll optimistically tell the encoder that we're using everything; the
  481. // GetBytesWithFallback method will overwrite this field if necessary.
  482. encoder._charsUsed = originalCharCount;
  483. return GetBytesWithFallback(chars, originalCharCount, bytes, originalByteCount, encoder);
  484. }
  485. }
  486. encoder._charsUsed = originalCharCount - chars.Length; // total number of characters consumed up until now
  487. return originalByteCount - bytes.Length; // total number of bytes written up until now
  488. }
  489. /// <summary>
  490. /// Transcodes chars to bytes, using <see cref="Encoding.EncoderFallback"/> or <see cref="Encoder.Fallback"/> if needed.
  491. /// </summary>
  492. /// <returns>
  493. /// The total number of bytes written to <paramref name="bytes"/> (based on <paramref name="originalBytesLength"/>).
  494. /// </returns>
  495. /// <remarks>
  496. /// The derived class should override this method if it might be able to provide a more optimized fallback
  497. /// implementation, deferring to the base implementation if needed. This method calls <see cref="ThrowBytesOverflow"/>
  498. /// if necessary.
  499. /// </remarks>
  500. private protected virtual unsafe int GetBytesWithFallback(ReadOnlySpan<char> chars, int originalCharsLength, Span<byte> bytes, int originalBytesLength, EncoderNLS? encoder)
  501. {
  502. Debug.Assert(!chars.IsEmpty, "Caller shouldn't invoke this method with an empty input buffer.");
  503. Debug.Assert(originalCharsLength >= 0, "Caller provided invalid parameter.");
  504. Debug.Assert(originalBytesLength >= 0, "Caller provided invalid parameter.");
  505. // Since we're using Unsafe.AsPointer in our central loop, we want to ensure everything is pinned.
  506. fixed (char* _pChars_Unused = &MemoryMarshal.GetReference(chars))
  507. fixed (byte* _pBytes_Unused = &MemoryMarshal.GetReference(bytes))
  508. {
  509. EncoderFallbackBuffer fallbackBuffer = EncoderFallbackBuffer.CreateAndInitialize(this, encoder, originalCharsLength);
  510. do
  511. {
  512. // There's still data in the source buffer; why wasn't the previous fast-path able to consume it fully?
  513. // There are two scenarios: (a) the source buffer contained invalid / incomplete UTF-16 data;
  514. // or (b) the encoding can't translate this scalar value.
  515. switch (Rune.DecodeFromUtf16(chars, out Rune firstScalarValue, out int charsConsumedThisIteration))
  516. {
  517. case OperationStatus.NeedMoreData:
  518. Debug.Assert(charsConsumedThisIteration == chars.Length, "If returning NeedMoreData, should out the entire buffer length as chars consumed.");
  519. if (encoder is null || encoder.MustFlush)
  520. {
  521. goto case OperationStatus.InvalidData; // see comment in GetByteCountWithFallback
  522. }
  523. else
  524. {
  525. encoder._charLeftOver = chars[0]; // squirrel away remaining high surrogate char and finish
  526. chars = ReadOnlySpan<char>.Empty;
  527. goto Finish;
  528. }
  529. case OperationStatus.InvalidData:
  530. break;
  531. default:
  532. if (EncodeRune(firstScalarValue, bytes, out _) == OperationStatus.DestinationTooSmall)
  533. {
  534. goto Finish; // source buffer contained valid UTF-16 but encoder ran out of space in destination buffer
  535. }
  536. break; // source buffer contained valid UTF-16 but encoder doesn't support this scalar value
  537. }
  538. // Now we know the reason for failure was that the original input was invalid
  539. // for the encoding in use. Run it through the fallback mechanism.
  540. bool fallbackFinished = fallbackBuffer.TryInternalFallbackGetBytes(chars, bytes, out charsConsumedThisIteration, out int bytesWrittenThisIteration);
  541. // Regardless of whether the fallback finished, it did consume some number of
  542. // chars, and it may have written some number of bytes.
  543. chars = chars.Slice(charsConsumedThisIteration);
  544. bytes = bytes.Slice(bytesWrittenThisIteration);
  545. if (!fallbackFinished)
  546. {
  547. goto Finish; // fallback has pending state - it'll get written out on the next GetBytes call
  548. }
  549. if (!chars.IsEmpty)
  550. {
  551. // Still data remaining - run it through the fast-path to find the next data to fallback.
  552. bytesWrittenThisIteration = GetBytesFast(
  553. pChars: (char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(chars)),
  554. charsLength: chars.Length,
  555. pBytes: (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(bytes)),
  556. bytesLength: bytes.Length,
  557. charsConsumed: out charsConsumedThisIteration);
  558. Debug.Assert(bytesWrittenThisIteration >= 0, "Workhorse shouldn't have returned a negative value.");
  559. Debug.Assert(charsConsumedThisIteration >= 0, "Workhorse shouldn't have returned a negative value.");
  560. chars = chars.Slice(charsConsumedThisIteration);
  561. bytes = bytes.Slice(bytesWrittenThisIteration);
  562. }
  563. } while (!chars.IsEmpty);
  564. Finish:
  565. // We reach this point when we deplete the source or destination buffer. There are a few
  566. // cases to consider now. If the source buffer has been fully consumed and there's no
  567. // leftover data in the EncoderNLS or the fallback buffer, we've completed transcoding.
  568. // If the source buffer isn't empty or there's leftover data in the fallback buffer,
  569. // it means we ran out of space in the destintion buffer. This is an unrecoverable error
  570. // if no EncoderNLS is in use (because only EncoderNLS can handle partial success), and
  571. // even if an EncoderNLS is in use this is only recoverable if the EncoderNLS instance
  572. // allows partial completion. Let's check all of these conditions now.
  573. if (!chars.IsEmpty || fallbackBuffer.Remaining > 0)
  574. {
  575. // The line below will also throw if the encoder couldn't make any progress at all
  576. // because the output buffer wasn't large enough to contain the result of even
  577. // a single scalar conversion or fallback.
  578. ThrowBytesOverflow(encoder, nothingEncoded: bytes.Length == originalBytesLength);
  579. }
  580. // If an EncoderNLS instance is active, update its "total consumed character count" value.
  581. if (encoder != null)
  582. {
  583. Debug.Assert(originalCharsLength >= chars.Length, "About to report a negative number of chars used?");
  584. encoder._charsUsed = originalCharsLength - chars.Length; // number of chars consumed
  585. }
  586. Debug.Assert(fallbackBuffer.Remaining == 0 || encoder != null, "Shouldn't have any leftover data in fallback buffer unless an EncoderNLS is in use.");
  587. return originalBytesLength - bytes.Length;
  588. }
  589. }
  590. /*
  591. * GETCHARCOUNT FAMILY OF FUNCTIONS
  592. */
  593. /// <summary>
  594. /// Entry point from <see cref="DecoderNLS.GetCharCount"/>.
  595. /// </summary>
  596. internal virtual unsafe int GetCharCount(byte* pBytes, int byteCount, DecoderNLS? decoder)
  597. {
  598. Debug.Assert(decoder != null, "This code path should only be called from DecoderNLS.");
  599. Debug.Assert(byteCount >= 0, "Caller should've checked this condition.");
  600. Debug.Assert(pBytes != null || byteCount == 0, "Cannot provide a null pointer and a non-zero count.");
  601. // We're going to try to stay on the fast-path as much as we can. That means that we have
  602. // no leftover data to drain and the entire source buffer can be consumed in a single
  603. // fast-path invocation. If either of these doesn't hold, we'll go down the slow path of
  604. // creating spans, draining the DecoderNLS instance, and falling back.
  605. Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0, "Fallback buffer can't hold data between GetChars invocations.");
  606. int totalCharCount = 0;
  607. int bytesConsumed = 0;
  608. if (!decoder.HasLeftoverData)
  609. {
  610. totalCharCount = GetCharCountFast(pBytes, byteCount, decoder.Fallback, out bytesConsumed);
  611. if (bytesConsumed == byteCount)
  612. {
  613. return totalCharCount;
  614. }
  615. }
  616. // We had leftover data, or we couldn't consume the entire input buffer.
  617. // Let's go down the draining + fallback mechanisms.
  618. totalCharCount += GetCharCountWithFallback(pBytes, byteCount, bytesConsumed, decoder);
  619. if (totalCharCount < 0)
  620. {
  621. ThrowConversionOverflow();
  622. }
  623. return totalCharCount;
  624. }
  625. /// <summary>
  626. /// Counts the number of <see langword="char"/>s that would result from transcoding the source
  627. /// data, exiting when the source buffer is consumed or when the first unreadable data is encountered.
  628. /// The implementation may inspect <paramref name="fallback"/> to short-circuit any counting
  629. /// operation, but it should not attempt to call <see cref="DecoderFallback.CreateFallbackBuffer"/>.
  630. /// </summary>
  631. /// <returns>
  632. /// Via <paramref name="bytesConsumed"/>, the number of elements from <paramref name="pBytes"/> which
  633. /// were consumed; and returns the transcoded char count up to this point.
  634. /// </returns>
  635. /// <exception cref="ArgumentException">
  636. /// If the char count would be greater than <see cref="int.MaxValue"/>.
  637. /// (Implementation should call <see cref="ThrowConversionOverflow"/>.)
  638. /// </exception>
  639. /// <remarks>
  640. /// The implementation should not attempt to perform any sort of fallback behavior.
  641. /// If custom fallback behavior is necessary, override <see cref="GetCharCountWithFallback"/>.
  642. /// </remarks>
  643. private protected virtual unsafe int GetCharCountFast(byte* pBytes, int bytesLength, DecoderFallback? fallback, out int bytesConsumed)
  644. {
  645. // Any production-quality type would override this method and provide a real
  646. // implementation, so we won't provide a base implementation. However, a
  647. // non-shipping slow reference implementation is provided below for convenience.
  648. #if false
  649. ReadOnlySpan<byte> bytes = new ReadOnlySpan<byte>(pBytes, bytesLength);
  650. int totalCharCount = 0;
  651. while (!bytes.IsEmpty)
  652. {
  653. // We don't care about statuses other than Done. The fallback mechanism will handle those.
  654. if (DecodeFirstRune(bytes, out Rune value, out int bytesConsumedJustNow) != OperationStatus.Done)
  655. {
  656. break;
  657. }
  658. totalCharCount += value.Utf16SequenceLength;
  659. if (totalCharCount < 0)
  660. {
  661. ThrowConversionOverflow();
  662. }
  663. bytes = bytes.Slice(bytesConsumedJustNow);
  664. }
  665. bytesConsumed = bytesLength - bytes.Length; // number of bytes consumed across all loop iterations above
  666. return totalCharCount;
  667. #else
  668. Debug.Fail("This should be overridden by a subclassed type.");
  669. throw NotImplemented.ByDesign;
  670. #endif
  671. }
  672. /// <summary>
  673. /// Counts the number of chars that would result from transcoding the provided bytes,
  674. /// with no associated <see cref="DecoderNLS"/>. The first two arguments are based on the
  675. /// original input before invoking this method; and <paramref name="bytesConsumedSoFar"/>
  676. /// signals where in the provided buffer the fallback loop should begin operating.
  677. /// </summary>
  678. /// <returns>
  679. /// The char count resulting from transcoding the input data.
  680. /// </returns>
  681. /// <exception cref="ArgumentException">
  682. /// If the resulting char count is greater than <see cref="int.MaxValue"/>.
  683. /// (Implementation should call <see cref="ThrowConversionOverflow"/>.)
  684. /// </exception>
  685. [MethodImpl(MethodImplOptions.NoInlining)] // don't stack spill spans into our caller
  686. private protected unsafe int GetCharCountWithFallback(byte* pBytesOriginal, int originalByteCount, int bytesConsumedSoFar)
  687. {
  688. // This is a stub method that's marked "no-inlining" so that it we don't stack-spill spans
  689. // into our immediate caller. Doing so increases the method prolog in what's supposed to
  690. // be a very fast path.
  691. Debug.Assert(0 <= bytesConsumedSoFar && bytesConsumedSoFar < originalByteCount, "Invalid arguments provided to method.");
  692. return GetCharCountWithFallback(
  693. bytes: new ReadOnlySpan<byte>(pBytesOriginal, originalByteCount).Slice(bytesConsumedSoFar),
  694. originalBytesLength: originalByteCount,
  695. decoder: null);
  696. }
  697. /// <summary>
  698. /// Gets the number of <see langword="char"/>s that would result from transcoding the provided
  699. /// input data, with an associated <see cref="DecoderNLS"/>. The first two arguments are
  700. /// based on the original input before invoking this method; and <paramref name="bytesConsumedSoFar"/>
  701. /// signals where in the provided source buffer the fallback loop should begin operating.
  702. /// The behavior of this method is to consume (non-destructively) any leftover data in the
  703. /// <see cref="DecoderNLS"/> instance, then to invoke the <see cref="GetCharCountFast"/> virtual method
  704. /// after data has been drained, then to call <see cref="GetCharCountWithFallback(ReadOnlySpan{byte}, int, DecoderNLS)"/>.
  705. /// </summary>
  706. /// <returns>
  707. /// The total number of chars that would result from transcoding the remaining portion of the source buffer.
  708. /// </returns>
  709. /// <exception cref="ArgumentException">
  710. /// If the return value would exceed <see cref="int.MaxValue"/>.
  711. /// (The implementation should call <see cref="ThrowConversionOverflow"/>.)
  712. /// </exception>
  713. private unsafe int GetCharCountWithFallback(byte* pOriginalBytes, int originalByteCount, int bytesConsumedSoFar, DecoderNLS decoder)
  714. {
  715. Debug.Assert(decoder != null, "This code path should only be called from DecoderNLS.");
  716. Debug.Assert(0 <= bytesConsumedSoFar && bytesConsumedSoFar <= originalByteCount, "Caller should've checked this condition.");
  717. // First, try draining any data that already exists on the decoder instance. If we can't complete
  718. // that operation, there's no point to continuing down to the main workhorse methods.
  719. ReadOnlySpan<byte> bytes = new ReadOnlySpan<byte>(pOriginalBytes, originalByteCount).Slice(bytesConsumedSoFar);
  720. int bytesConsumedJustNow = 0;
  721. int totalCharCount = 0;
  722. if (decoder.HasLeftoverData)
  723. {
  724. totalCharCount = decoder.DrainLeftoverDataForGetCharCount(bytes, out bytesConsumedJustNow);
  725. bytes = bytes.Slice(bytesConsumedJustNow);
  726. }
  727. // Now try invoking the "fast path" (no fallback) implementation.
  728. // We can use Unsafe.AsPointer here since these spans are created from pinned data (raw pointers).
  729. totalCharCount += GetCharCountFast(
  730. pBytes: (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(bytes)),
  731. bytesLength: bytes.Length,
  732. fallback: decoder.Fallback,
  733. bytesConsumed: out bytesConsumedJustNow);
  734. if (totalCharCount < 0)
  735. {
  736. ThrowConversionOverflow();
  737. }
  738. bytes = bytes.Slice(bytesConsumedJustNow);
  739. // If there's still data remaining in the source buffer, go down the fallback path.
  740. // Otherwise we're finished.
  741. if (!bytes.IsEmpty)
  742. {
  743. totalCharCount += GetCharCountWithFallback(bytes, originalByteCount, decoder);
  744. if (totalCharCount < 0)
  745. {
  746. ThrowConversionOverflow();
  747. }
  748. }
  749. return totalCharCount;
  750. }
  751. /// <summary>
  752. /// Counts the number of chars that would result from transcoding the provided bytes,
  753. /// using the provided <see cref="DecoderFallbackBuffer"/> if necessary.
  754. /// </summary>
  755. /// <returns>
  756. /// The char count resulting from transcoding the input data.
  757. /// </returns>
  758. /// <exception cref="ArgumentException">
  759. /// If the resulting char count is greater than <see cref="int.MaxValue"/>.
  760. /// (Implementation should call <see cref="ThrowConversionOverflow"/>.)
  761. /// </exception>
  762. private unsafe int GetCharCountWithFallback(ReadOnlySpan<byte> bytes, int originalBytesLength, DecoderNLS? decoder)
  763. {
  764. Debug.Assert(!bytes.IsEmpty, "Caller shouldn't invoke this method with an empty input buffer.");
  765. Debug.Assert(originalBytesLength >= 0, "Caller provided invalid parameter.");
  766. // Since we're using Unsafe.AsPointer in our central loop, we want to ensure everything is pinned.
  767. fixed (byte* _pBytes_Unused = &MemoryMarshal.GetReference(bytes))
  768. {
  769. DecoderFallbackBuffer fallbackBuffer = DecoderFallbackBuffer.CreateAndInitialize(this, decoder, originalBytesLength);
  770. int totalCharCount = 0;
  771. do
  772. {
  773. // There's still data in the source buffer; why wasn't the previous fast-path able to consume it fully?
  774. // There are two scenarios: (a) the source buffer contained invalid data, or it contained incomplete data.
  775. if (DecodeFirstRune(bytes, out Rune firstScalarValue, out int bytesConsumedThisIteration) == OperationStatus.NeedMoreData
  776. && decoder != null
  777. && !decoder.MustFlush)
  778. {
  779. // We saw incomplete data at the end of the buffer, and the active DecoderNLS isntance
  780. // isn't asking us to flush. Since a call to GetChars would've consumed this data by
  781. // storing it in the DecoderNLS instance, we'll "consume" it by ignoring it.
  782. // The next call to GetChars will pick it up correctly.
  783. goto Finish;
  784. }
  785. // We saw invalid binary data, or we saw incomplete data that we need to flush (and thus
  786. // treat as invalid). In any case we'll run through the fallback mechanism.
  787. int charCountThisIteration = fallbackBuffer.InternalFallbackGetCharCount(bytes, bytesConsumedThisIteration);
  788. Debug.Assert(charCountThisIteration >= 0, "Fallback shouldn't have returned a negative value.");
  789. totalCharCount += charCountThisIteration;
  790. if (totalCharCount < 0)
  791. {
  792. ThrowConversionOverflow();
  793. }
  794. bytes = bytes.Slice(bytesConsumedThisIteration);
  795. if (!bytes.IsEmpty)
  796. {
  797. // Still data remaining - run it through the fast-path to find the next data to fallback.
  798. // While building up the tally we need to continually check for integer overflow
  799. // since fallbacks can change the total byte count in unexpected ways.
  800. charCountThisIteration = GetCharCountFast(
  801. pBytes: (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(bytes)),
  802. bytesLength: bytes.Length,
  803. fallback: null, // wasn't able to be short-circuited by our caller; don't bother trying again
  804. bytesConsumed: out bytesConsumedThisIteration);
  805. Debug.Assert(charCountThisIteration >= 0, "Workhorse shouldn't have returned a negative value.");
  806. Debug.Assert(bytesConsumedThisIteration >= 0, "Workhorse shouldn't have returned a negative value.");
  807. totalCharCount += charCountThisIteration;
  808. if (totalCharCount < 0)
  809. {
  810. ThrowConversionOverflow();
  811. }
  812. bytes = bytes.Slice(bytesConsumedThisIteration);
  813. }
  814. } while (!bytes.IsEmpty);
  815. Finish:
  816. Debug.Assert(fallbackBuffer.Remaining == 0, "There should be no data in the fallback buffer after GetCharCount.");
  817. return totalCharCount;
  818. }
  819. }
  820. /*
  821. * GETCHARS FAMILY OF FUNCTIONS
  822. */
  823. /// <summary>
  824. /// Entry point from <see cref="DecoderNLS.GetChars"/> and <see cref="DecoderNLS.Convert"/>.
  825. /// </summary>
  826. internal virtual unsafe int GetChars(byte* pBytes, int byteCount, char* pChars, int charCount, DecoderNLS? decoder)
  827. {
  828. Debug.Assert(decoder != null, "This code path should only be called from DecoderNLS.");
  829. Debug.Assert(byteCount >= 0, "Caller should've checked this condition.");
  830. Debug.Assert(pBytes != null || byteCount == 0, "Cannot provide a null pointer and a non-zero count.");
  831. Debug.Assert(charCount >= 0, "Caller should've checked this condition.");
  832. Debug.Assert(pChars != null || charCount == 0, "Cannot provide a null pointer and a non-zero count.");
  833. // We're going to try to stay on the fast-path as much as we can. That means that we have
  834. // no leftover data to drain and the entire source buffer can be transcoded in a single
  835. // fast-path invocation. If either of these doesn't hold, we'll go down the slow path of
  836. // creating spans, draining the DecoderNLS instance, and falling back.
  837. int charsWritten = 0;
  838. int bytesConsumed = 0;
  839. if (!decoder.HasLeftoverData)
  840. {
  841. charsWritten = GetCharsFast(pBytes, byteCount, pChars, charCount, out bytesConsumed);
  842. if (bytesConsumed == byteCount)
  843. {
  844. decoder._bytesUsed = byteCount;
  845. return charsWritten;
  846. }
  847. }
  848. // We had leftover data, or we couldn't consume the entire input buffer.
  849. // Let's go down the draining + fallback mechanisms.
  850. return GetCharsWithFallback(pBytes, byteCount, pChars, charCount, bytesConsumed, charsWritten, decoder);
  851. }
  852. /// <summary>
  853. /// Transcodes <see langword="byte"/>s to <see langword="char"/>s, exiting when the source or destination
  854. /// buffer is consumed or when the first unreadable data is encountered.
  855. /// </summary>
  856. /// <returns>
  857. /// Via <paramref name="bytesConsumed"/>, the number of elements from <paramref name="pBytes"/> which
  858. /// were consumed; and returns the number of elements written to <paramref name="pChars"/>.
  859. /// </returns>
  860. /// <remarks>
  861. /// The implementation should not attempt to perform any sort of fallback behavior.
  862. /// If custom fallback behavior is necessary, override <see cref="GetCharsWithFallback"/>.
  863. /// </remarks>
  864. private protected virtual unsafe int GetCharsFast(byte* pBytes, int bytesLength, char* pChars, int charsLength, out int bytesConsumed)
  865. {
  866. // Any production-quality type would override this method and provide a real
  867. // implementation, so we won't provide a base implementation. However, a
  868. // non-shipping slow reference implementation is provided below for convenience.
  869. #if false
  870. ReadOnlySpan<byte> bytes = new ReadOnlySpan<byte>(pBytes, bytesLength);
  871. Span<char> chars = new Span<char>(pChars, charsLength);
  872. while (!bytes.IsEmpty)
  873. {
  874. if ((DecodeFirstRune(bytes, out Rune firstScalarValue, out int bytesConsumedJustNow) != OperationStatus.Done)
  875. || !firstScalarValue.TryEncode(chars, out int charsWrittenJustNow))
  876. {
  877. // Invalid or incomplete binary data, or destination buffer too small to contain decoded value
  878. break;
  879. }
  880. bytes = bytes.Slice(bytesConsumedJustNow);
  881. chars = chars.Slice(charsWrittenJustNow);
  882. }
  883. bytesConsumed = bytesLength - bytes.Length; // number of bytes consumed across all loop iterations above
  884. return charsLength - chars.Length; // number of chars written across all loop iterations above
  885. #else
  886. Debug.Fail("This should be overridden by a subclassed type.");
  887. throw NotImplemented.ByDesign;
  888. #endif
  889. }
  890. /// <summary>
  891. /// Transcodes bytes to chars, with no associated <see cref="DecoderNLS"/>. The first four arguments are
  892. /// based on the original input before invoking this method; and <paramref name="bytesConsumedSoFar"/>
  893. /// and <paramref name="charsWrittenSoFar"/> signal where in the provided buffers the fallback loop
  894. /// should begin operating. The behavior of this method is to call the <see cref="GetCharsWithFallback"/>
  895. /// virtual method as overridden by the specific type, and failing that go down the shared fallback path.
  896. /// </summary>
  897. /// <returns>
  898. /// The total number of chars written to <paramref name="pOriginalChars"/>, including <paramref name="charsWrittenSoFar"/>.
  899. /// </returns>
  900. /// <exception cref="ArgumentException">
  901. /// If the destination buffer is not large enough to hold the entirety of the transcoded data.
  902. /// </exception>
  903. [MethodImpl(MethodImplOptions.NoInlining)]
  904. private protected unsafe int GetCharsWithFallback(byte* pOriginalBytes, int originalByteCount, char* pOriginalChars, int originalCharCount, int bytesConsumedSoFar, int charsWrittenSoFar)
  905. {
  906. // This is a stub method that's marked "no-inlining" so that it we don't stack-spill spans
  907. // into our immediate caller. Doing so increases the method prolog in what's supposed to
  908. // be a very fast path.
  909. Debug.Assert(0 <= bytesConsumedSoFar && bytesConsumedSoFar < originalByteCount, "Invalid arguments provided to method.");
  910. Debug.Assert(0 <= charsWrittenSoFar && charsWrittenSoFar <= originalCharCount, "Invalid arguments provided to method.");
  911. return GetCharsWithFallback(
  912. bytes: new ReadOnlySpan<byte>(pOriginalBytes, originalByteCount).Slice(bytesConsumedSoFar),
  913. originalBytesLength: originalByteCount,
  914. chars: new Span<char>(pOriginalChars, originalCharCount).Slice(charsWrittenSoFar),
  915. originalCharsLength: originalCharCount,
  916. decoder: null);
  917. }
  918. /// <summary>
  919. /// Transcodes bytes to chars, with an associated <see cref="DecoderNLS"/>. The first four arguments are
  920. /// based on the original input before invoking this method; and <paramref name="bytesConsumedSoFar"/>
  921. /// and <paramref name="charsWrittenSoFar"/> signal where in the provided buffers the fallback loop
  922. /// should begin operating. The behavior of this method is to drain any leftover data in the
  923. /// <see cref="DecoderNLS"/> instance, then to invoke the <see cref="GetCharsFast"/> virtual method
  924. /// after data has been drained, then to call <see cref="GetCharsWithFallback(ReadOnlySpan{byte}, int, Span{char}, int, DecoderNLS)"/>.
  925. /// </summary>
  926. /// <returns>
  927. /// The total number of chars written to <paramref name="pOriginalChars"/>, including <paramref name="charsWrittenSoFar"/>.
  928. /// </returns>
  929. /// <exception cref="ArgumentException">
  930. /// If the destination buffer is too small to make any forward progress at all, or if the destination buffer is
  931. /// too small to contain the entirety of the transcoded data and the <see cref="DecoderNLS"/> instance disallows
  932. /// partial transcoding.
  933. /// </exception>
  934. private protected unsafe int GetCharsWithFallback(byte* pOriginalBytes, int originalByteCount, char* pOriginalChars, int originalCharCount, int bytesConsumedSoFar, int charsWrittenSoFar, DecoderNLS decoder)
  935. {
  936. Debug.Assert(decoder != null, "This code path should only be called from DecoderNLS.");
  937. Debug.Assert(0 <= bytesConsumedSoFar && bytesConsumedSoFar <= originalByteCount, "Caller should've checked this condition.");
  938. Debug.Assert(0 <= charsWrittenSoFar && charsWrittenSoFar <= originalCharCount, "Caller should've checked this condition.");
  939. // First, try draining any data that already exists on the encoder instance. If we can't complete
  940. // that operation, there's no point to continuing down to the main workhorse methods.
  941. //
  942. // Like GetBytes, there may be leftover data in the DecoderNLS instance. But unlike GetBytes,
  943. // the bytes -> chars conversion doesn't allow leftover data in the fallback buffer. This means
  944. // that the drain operation below will either succeed fully or fail; there's no partial success
  945. // condition as with the chars -> bytes conversion. The drain method will throw if there's not
  946. // enough space in the destination buffer.
  947. ReadOnlySpan<byte> bytes = new ReadOnlySpan<byte>(pOriginalBytes, originalByteCount).Slice(bytesConsumedSoFar);
  948. Span<char> chars = new Span<char>(pOriginalChars, originalCharCount).Slice(charsWrittenSoFar);
  949. int bytesConsumedJustNow = 0;
  950. int charsWrittenJustNow = 0;
  951. if (decoder.HasLeftoverData)
  952. {
  953. charsWrittenJustNow = decoder.DrainLeftoverDataForGetChars(bytes, chars, out bytesConsumedJustNow);
  954. bytes = bytes.Slice(bytesConsumedJustNow);
  955. chars = chars.Slice(charsWrittenJustNow);
  956. }
  957. Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0, "Should be no remaining fallback data at this point.");
  958. // Now try invoking the "fast path" (no fallback buffer) implementation.
  959. // We can use Unsafe.AsPointer here since these spans are created from pinned data (raw pointers).
  960. charsWrittenJustNow = GetCharsFast(
  961. pBytes: (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(bytes)),
  962. bytesLength: bytes.Length,
  963. pChars: (char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(chars)),
  964. charsLength: chars.Length,
  965. bytesConsumed: out bytesConsumedJustNow);
  966. bytes = bytes.Slice(bytesConsumedJustNow);
  967. chars = chars.Slice(charsWrittenJustNow);
  968. // We'll optimistically tell the decoder that we're using everything; the
  969. // GetCharsWithFallback method will overwrite this field if necessary.
  970. decoder._bytesUsed = originalByteCount;
  971. if (bytes.IsEmpty)
  972. {
  973. return originalCharCount - chars.Length; // total number of chars written
  974. }
  975. else
  976. {
  977. return GetCharsWithFallback(bytes, originalByteCount, chars, originalCharCount, decoder);
  978. }
  979. }
  980. /// <summary>
  981. /// Transcodes bytes to chars, using <see cref="Encoding.DecoderFallback"/> or <see cref="Decoder.Fallback"/> if needed.
  982. /// </summary>
  983. /// <returns>
  984. /// The total number of chars written to <paramref name="chars"/> (based on <paramref name="originalCharsLength"/>).
  985. /// </returns>
  986. /// <remarks>
  987. /// The derived class should override this method if it might be able to provide a more optimized fallback
  988. /// implementation, deferring to the base implementation if needed. This method calls <see cref="ThrowCharsOverflow"/>
  989. /// if necessary.
  990. /// </remarks>
  991. private protected virtual unsafe int GetCharsWithFallback(ReadOnlySpan<byte> bytes, int originalBytesLength, Span<char> chars, int originalCharsLength, DecoderNLS? decoder)
  992. {
  993. Debug.Assert(!bytes.IsEmpty, "Caller shouldn't invoke this method with an empty input buffer.");
  994. Debug.Assert(originalBytesLength >= 0, "Caller provided invalid parameter.");
  995. Debug.Assert(originalCharsLength >= 0, "Caller provided invalid parameter.");
  996. // Since we're using Unsafe.AsPointer in our central loop, we want to ensure everything is pinned.
  997. fixed (byte* _pBytes_Unused = &MemoryMarshal.GetReference(bytes))
  998. fixed (char* _pChars_Unused = &MemoryMarshal.GetReference(chars))
  999. {
  1000. DecoderFallbackBuffer fallbackBuffer = DecoderFallbackBuffer.CreateAndInitialize(this, decoder, originalBytesLength);
  1001. do
  1002. {
  1003. // There's still data in the source buffer; why wasn't the previous fast-path able to consume it fully?
  1004. // There are two scenarios: (a) the source buffer contained invalid data, or it contained incomplete data.
  1005. int charsWrittenThisIteration;
  1006. switch (DecodeFirstRune(bytes, out _, out int bytesConsumedThisIteration))
  1007. {
  1008. case OperationStatus.NeedMoreData:
  1009. Debug.Assert(bytesConsumedThisIteration == bytes.Length, "If returning NeedMoreData, should out the entire buffer length as bytes consumed.");
  1010. if (decoder is null || decoder.MustFlush)
  1011. {
  1012. goto case OperationStatus.InvalidData; // see comment in GetCharCountWithFallback
  1013. }
  1014. else
  1015. {
  1016. decoder.SetLeftoverData(bytes); // squirrel away remaining data and finish
  1017. bytes = ReadOnlySpan<byte>.Empty;
  1018. goto Finish;
  1019. }
  1020. case OperationStatus.InvalidData:
  1021. if (fallbackBuffer.TryInternalFallbackGetChars(bytes, bytesConsumedThisIteration, chars, out charsWrittenThisIteration))
  1022. {
  1023. // We successfully consumed some bytes, sent it through the fallback, and wrote some chars.
  1024. Debug.Assert(charsWrittenThisIteration >= 0, "Fallback shouldn't have returned a negative value.");
  1025. break;
  1026. }
  1027. else
  1028. {
  1029. // We generated fallback data, but the destination buffer wasn't large enough to hold it.
  1030. // Don't mark any of the bytes we ran through the fallback as consumed, and terminate
  1031. // the loop now and let our caller handle this condition.
  1032. goto Finish;
  1033. }
  1034. default:
  1035. goto Finish; // no error on input, so destination must have been too small
  1036. }
  1037. bytes = bytes.Slice(bytesConsumedThisIteration);
  1038. chars = chars.Slice(charsWrittenThisIteration);
  1039. if (!bytes.IsEmpty)
  1040. {
  1041. // Still data remaining - run it through the fast-path to find the next data to fallback.
  1042. // We need to figure out why we weren't able to make progress.
  1043. charsWrittenThisIteration = GetCharsFast(
  1044. pBytes: (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(bytes)),
  1045. bytesLength: bytes.Length,
  1046. pChars: (char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(chars)),
  1047. charsLength: chars.Length,
  1048. bytesConsumed: out bytesConsumedThisIteration);
  1049. Debug.Assert(charsWrittenThisIteration >= 0, "Workhorse shouldn't have returned a negative value.");
  1050. Debug.Assert(bytesConsumedThisIteration >= 0, "Workhorse shouldn't have returned a negative value.");
  1051. bytes = bytes.Slice(bytesConsumedThisIteration);
  1052. chars = chars.Slice(charsWrittenThisIteration);
  1053. }
  1054. } while (!bytes.IsEmpty);
  1055. Finish:
  1056. // We reach this point when we deplete the source or destination buffer. See main comment
  1057. // at the end of GetBytesWithFallback for how the below logic works; the primary difference
  1058. // here is that GetChars disallows leftover data in the fallback buffer between calls.
  1059. Debug.Assert(fallbackBuffer.Remaining == 0);
  1060. if (!bytes.IsEmpty)
  1061. {
  1062. // The line below will also throw if the decoder couldn't make any progress at all
  1063. // because the output buffer wasn't large enough to contain the result of even
  1064. // a single scalar conversion or fallback.
  1065. ThrowCharsOverflow(decoder, nothingDecoded: chars.Length == originalCharsLength);
  1066. }
  1067. // If a DecoderNLS instance is active, update its "total consumed byte count" value.
  1068. if (decoder != null)
  1069. {
  1070. Debug.Assert(originalBytesLength >= bytes.Length, "About to report a negative number of bytes used?");
  1071. decoder._bytesUsed = originalBytesLength - bytes.Length; // number of bytes consumed
  1072. }
  1073. return originalCharsLength - chars.Length; // total number of chars written
  1074. }
  1075. }
  1076. }
  1077. }