Convert.Base64.cs 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Diagnostics;
  5. using System.Runtime.CompilerServices;
  6. using System.Runtime.InteropServices;
  7. using Internal.Runtime.CompilerServices;
  8. namespace System
  9. {
  10. public static partial class Convert
  11. {
  12. /// <summary>
  13. /// Decode the span of UTF-16 encoded text represented as base 64 into binary data.
  14. /// If the input is not a multiple of 4, or contains illegal characters, it will decode as much as it can, to the largest possible multiple of 4.
  15. /// This invariant allows continuation of the parse with a slower, whitespace-tolerant algorithm.
  16. ///
  17. /// <param name="utf16">The input span which contains UTF-16 encoded text in base 64 that needs to be decoded.</param>
  18. /// <param name="bytes">The output span which contains the result of the operation, i.e. the decoded binary data.</param>
  19. /// <param name="consumed">The number of input bytes consumed during the operation. This can be used to slice the input for subsequent calls, if necessary.</param>
  20. /// <param name="written">The number of bytes written into the output span. This can be used to slice the output for subsequent calls, if necessary.</param>
  21. /// </summary>
  22. /// <returns>Returns:
  23. /// - true - The entire input span was successfully parsed.
  24. /// - false - Only a part of the input span was successfully parsed. Failure causes may include embedded or trailing whitespace,
  25. /// other illegal Base64 characters, trailing characters after an encoding pad ('='), an input span whose length is not divisible by 4
  26. /// or a destination span that's too small. <paramref name="consumed"/> and <paramref name="written"/> are set so that
  27. /// parsing can continue with a slower whitespace-tolerant algorithm.
  28. ///
  29. /// Note: This is a cut down version of the implementation of Base64.DecodeFromUtf8(), modified the accept UTF16 chars and act as a fast-path
  30. /// helper for the Convert routines when the input string contains no whitespace.
  31. /// </returns>
  32. private static bool TryDecodeFromUtf16(ReadOnlySpan<char> utf16, Span<byte> bytes, out int consumed, out int written)
  33. {
  34. ref char srcChars = ref MemoryMarshal.GetReference(utf16);
  35. ref byte destBytes = ref MemoryMarshal.GetReference(bytes);
  36. int srcLength = utf16.Length & ~0x3; // only decode input up to the closest multiple of 4.
  37. int destLength = bytes.Length;
  38. int sourceIndex = 0;
  39. int destIndex = 0;
  40. if (utf16.Length == 0)
  41. goto DoneExit;
  42. ref sbyte decodingMap = ref s_decodingMap[0];
  43. // Last bytes could have padding characters, so process them separately and treat them as valid.
  44. const int skipLastChunk = 4;
  45. int maxSrcLength;
  46. if (destLength >= (srcLength >> 2) * 3)
  47. {
  48. maxSrcLength = srcLength - skipLastChunk;
  49. }
  50. else
  51. {
  52. // This should never overflow since destLength here is less than int.MaxValue / 4 * 3 (i.e. 1610612733)
  53. // Therefore, (destLength / 3) * 4 will always be less than 2147483641
  54. maxSrcLength = (destLength / 3) * 4;
  55. }
  56. while (sourceIndex < maxSrcLength)
  57. {
  58. int result = Decode(ref Unsafe.Add(ref srcChars, sourceIndex), ref decodingMap);
  59. if (result < 0)
  60. goto InvalidExit;
  61. WriteThreeLowOrderBytes(ref Unsafe.Add(ref destBytes, destIndex), result);
  62. destIndex += 3;
  63. sourceIndex += 4;
  64. }
  65. if (maxSrcLength != srcLength - skipLastChunk)
  66. goto InvalidExit;
  67. // If input is less than 4 bytes, srcLength == sourceIndex == 0
  68. // If input is not a multiple of 4, sourceIndex == srcLength != 0
  69. if (sourceIndex == srcLength)
  70. {
  71. goto InvalidExit;
  72. }
  73. int i0 = Unsafe.Add(ref srcChars, srcLength - 4);
  74. int i1 = Unsafe.Add(ref srcChars, srcLength - 3);
  75. int i2 = Unsafe.Add(ref srcChars, srcLength - 2);
  76. int i3 = Unsafe.Add(ref srcChars, srcLength - 1);
  77. if (((i0 | i1 | i2 | i3) & 0xffffff00) != 0)
  78. goto InvalidExit;
  79. i0 = Unsafe.Add(ref decodingMap, i0);
  80. i1 = Unsafe.Add(ref decodingMap, i1);
  81. i0 <<= 18;
  82. i1 <<= 12;
  83. i0 |= i1;
  84. if (i3 != EncodingPad)
  85. {
  86. i2 = Unsafe.Add(ref decodingMap, i2);
  87. i3 = Unsafe.Add(ref decodingMap, i3);
  88. i2 <<= 6;
  89. i0 |= i3;
  90. i0 |= i2;
  91. if (i0 < 0)
  92. goto InvalidExit;
  93. if (destIndex > destLength - 3)
  94. goto InvalidExit;
  95. WriteThreeLowOrderBytes(ref Unsafe.Add(ref destBytes, destIndex), i0);
  96. destIndex += 3;
  97. }
  98. else if (i2 != EncodingPad)
  99. {
  100. i2 = Unsafe.Add(ref decodingMap, i2);
  101. i2 <<= 6;
  102. i0 |= i2;
  103. if (i0 < 0)
  104. goto InvalidExit;
  105. if (destIndex > destLength - 2)
  106. goto InvalidExit;
  107. Unsafe.Add(ref destBytes, destIndex) = (byte)(i0 >> 16);
  108. Unsafe.Add(ref destBytes, destIndex + 1) = (byte)(i0 >> 8);
  109. destIndex += 2;
  110. }
  111. else
  112. {
  113. if (i0 < 0)
  114. goto InvalidExit;
  115. if (destIndex > destLength - 1)
  116. goto InvalidExit;
  117. Unsafe.Add(ref destBytes, destIndex) = (byte)(i0 >> 16);
  118. destIndex += 1;
  119. }
  120. sourceIndex += 4;
  121. if (srcLength != utf16.Length)
  122. goto InvalidExit;
  123. DoneExit:
  124. consumed = sourceIndex;
  125. written = destIndex;
  126. return true;
  127. InvalidExit:
  128. consumed = sourceIndex;
  129. written = destIndex;
  130. Debug.Assert((consumed % 4) == 0);
  131. return false;
  132. }
  133. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  134. private static int Decode(ref char encodedChars, ref sbyte decodingMap)
  135. {
  136. int i0 = encodedChars;
  137. int i1 = Unsafe.Add(ref encodedChars, 1);
  138. int i2 = Unsafe.Add(ref encodedChars, 2);
  139. int i3 = Unsafe.Add(ref encodedChars, 3);
  140. if (((i0 | i1 | i2 | i3) & 0xffffff00) != 0)
  141. return -1; // One or more chars falls outside the 00..ff range. This cannot be a valid Base64 character.
  142. i0 = Unsafe.Add(ref decodingMap, i0);
  143. i1 = Unsafe.Add(ref decodingMap, i1);
  144. i2 = Unsafe.Add(ref decodingMap, i2);
  145. i3 = Unsafe.Add(ref decodingMap, i3);
  146. i0 <<= 18;
  147. i1 <<= 12;
  148. i2 <<= 6;
  149. i0 |= i3;
  150. i1 |= i2;
  151. i0 |= i1;
  152. return i0;
  153. }
  154. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  155. private static void WriteThreeLowOrderBytes(ref byte destination, int value)
  156. {
  157. destination = (byte)(value >> 16);
  158. Unsafe.Add(ref destination, 1) = (byte)(value >> 8);
  159. Unsafe.Add(ref destination, 2) = (byte)value;
  160. }
  161. // Pre-computing this table using a custom string(s_characters) and GenerateDecodingMapAndVerify (found in tests)
  162. private static readonly sbyte[] s_decodingMap = {
  163. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  164. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  165. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, //62 is placed at index 43 (for +), 63 at index 47 (for /)
  166. 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, //52-61 are placed at index 48-57 (for 0-9), 64 at index 61 (for =)
  167. -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
  168. 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, //0-25 are placed at index 65-90 (for A-Z)
  169. -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
  170. 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, //26-51 are placed at index 97-122 (for a-z)
  171. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bytes over 122 ('z') are invalid and cannot be decoded
  172. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Hence, padding the map with 255, which indicates invalid input
  173. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  174. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  175. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  176. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  177. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  178. -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  179. };
  180. private const byte EncodingPad = (byte)'='; // '=', for padding
  181. }
  182. }