// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using Internal.Runtime.CompilerServices;
namespace System
{
public static partial class Convert
{
///
/// Decode the span of UTF-16 encoded text represented as base 64 into binary data.
/// If the input is not a multiple of 4, or contains illegal characters, it will decode as much as it can, to the largest possible multiple of 4.
/// This invariant allows continuation of the parse with a slower, whitespace-tolerant algorithm.
///
/// The input span which contains UTF-16 encoded text in base 64 that needs to be decoded.
/// The output span which contains the result of the operation, i.e. the decoded binary data.
/// The number of input bytes consumed during the operation. This can be used to slice the input for subsequent calls, if necessary.
/// The number of bytes written into the output span. This can be used to slice the output for subsequent calls, if necessary.
///
/// Returns:
/// - true - The entire input span was successfully parsed.
/// - false - Only a part of the input span was successfully parsed. Failure causes may include embedded or trailing whitespace,
/// other illegal Base64 characters, trailing characters after an encoding pad ('='), an input span whose length is not divisible by 4
/// or a destination span that's too small. and are set so that
/// parsing can continue with a slower whitespace-tolerant algorithm.
///
/// Note: This is a cut down version of the implementation of Base64.DecodeFromUtf8(), modified the accept UTF16 chars and act as a fast-path
/// helper for the Convert routines when the input string contains no whitespace.
///
private static bool TryDecodeFromUtf16(ReadOnlySpan utf16, Span bytes, out int consumed, out int written)
{
ref char srcChars = ref MemoryMarshal.GetReference(utf16);
ref byte destBytes = ref MemoryMarshal.GetReference(bytes);
int srcLength = utf16.Length & ~0x3; // only decode input up to the closest multiple of 4.
int destLength = bytes.Length;
int sourceIndex = 0;
int destIndex = 0;
if (utf16.Length == 0)
goto DoneExit;
ref sbyte decodingMap = ref s_decodingMap[0];
// Last bytes could have padding characters, so process them separately and treat them as valid.
const int skipLastChunk = 4;
int maxSrcLength;
if (destLength >= (srcLength >> 2) * 3)
{
maxSrcLength = srcLength - skipLastChunk;
}
else
{
// This should never overflow since destLength here is less than int.MaxValue / 4 * 3 (i.e. 1610612733)
// Therefore, (destLength / 3) * 4 will always be less than 2147483641
maxSrcLength = (destLength / 3) * 4;
}
while (sourceIndex < maxSrcLength)
{
int result = Decode(ref Unsafe.Add(ref srcChars, sourceIndex), ref decodingMap);
if (result < 0)
goto InvalidExit;
WriteThreeLowOrderBytes(ref Unsafe.Add(ref destBytes, destIndex), result);
destIndex += 3;
sourceIndex += 4;
}
if (maxSrcLength != srcLength - skipLastChunk)
goto InvalidExit;
// If input is less than 4 bytes, srcLength == sourceIndex == 0
// If input is not a multiple of 4, sourceIndex == srcLength != 0
if (sourceIndex == srcLength)
{
goto InvalidExit;
}
int i0 = Unsafe.Add(ref srcChars, srcLength - 4);
int i1 = Unsafe.Add(ref srcChars, srcLength - 3);
int i2 = Unsafe.Add(ref srcChars, srcLength - 2);
int i3 = Unsafe.Add(ref srcChars, srcLength - 1);
if (((i0 | i1 | i2 | i3) & 0xffffff00) != 0)
goto InvalidExit;
i0 = Unsafe.Add(ref decodingMap, i0);
i1 = Unsafe.Add(ref decodingMap, i1);
i0 <<= 18;
i1 <<= 12;
i0 |= i1;
if (i3 != EncodingPad)
{
i2 = Unsafe.Add(ref decodingMap, i2);
i3 = Unsafe.Add(ref decodingMap, i3);
i2 <<= 6;
i0 |= i3;
i0 |= i2;
if (i0 < 0)
goto InvalidExit;
if (destIndex > destLength - 3)
goto InvalidExit;
WriteThreeLowOrderBytes(ref Unsafe.Add(ref destBytes, destIndex), i0);
destIndex += 3;
}
else if (i2 != EncodingPad)
{
i2 = Unsafe.Add(ref decodingMap, i2);
i2 <<= 6;
i0 |= i2;
if (i0 < 0)
goto InvalidExit;
if (destIndex > destLength - 2)
goto InvalidExit;
Unsafe.Add(ref destBytes, destIndex) = (byte)(i0 >> 16);
Unsafe.Add(ref destBytes, destIndex + 1) = (byte)(i0 >> 8);
destIndex += 2;
}
else
{
if (i0 < 0)
goto InvalidExit;
if (destIndex > destLength - 1)
goto InvalidExit;
Unsafe.Add(ref destBytes, destIndex) = (byte)(i0 >> 16);
destIndex += 1;
}
sourceIndex += 4;
if (srcLength != utf16.Length)
goto InvalidExit;
DoneExit:
consumed = sourceIndex;
written = destIndex;
return true;
InvalidExit:
consumed = sourceIndex;
written = destIndex;
Debug.Assert((consumed % 4) == 0);
return false;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int Decode(ref char encodedChars, ref sbyte decodingMap)
{
int i0 = encodedChars;
int i1 = Unsafe.Add(ref encodedChars, 1);
int i2 = Unsafe.Add(ref encodedChars, 2);
int i3 = Unsafe.Add(ref encodedChars, 3);
if (((i0 | i1 | i2 | i3) & 0xffffff00) != 0)
return -1; // One or more chars falls outside the 00..ff range. This cannot be a valid Base64 character.
i0 = Unsafe.Add(ref decodingMap, i0);
i1 = Unsafe.Add(ref decodingMap, i1);
i2 = Unsafe.Add(ref decodingMap, i2);
i3 = Unsafe.Add(ref decodingMap, i3);
i0 <<= 18;
i1 <<= 12;
i2 <<= 6;
i0 |= i3;
i1 |= i2;
i0 |= i1;
return i0;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void WriteThreeLowOrderBytes(ref byte destination, int value)
{
destination = (byte)(value >> 16);
Unsafe.Add(ref destination, 1) = (byte)(value >> 8);
Unsafe.Add(ref destination, 2) = (byte)value;
}
// Pre-computing this table using a custom string(s_characters) and GenerateDecodingMapAndVerify (found in tests)
private static readonly sbyte[] s_decodingMap = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, //62 is placed at index 43 (for +), 63 at index 47 (for /)
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, //52-61 are placed at index 48-57 (for 0-9), 64 at index 61 (for =)
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, //0-25 are placed at index 65-90 (for A-Z)
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, //26-51 are placed at index 97-122 (for a-z)
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bytes over 122 ('z') are invalid and cannot be decoded
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Hence, padding the map with 255, which indicates invalid input
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
private const byte EncodingPad = (byte)'='; // '=', for padding
}
}