// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics.X86; namespace System.Text { internal static partial class ASCIIUtility { /// /// A mask which selects only the high bit of each byte of the given . /// private const uint UInt32HighBitsOnlyMask = 0x80808080u; /// /// A mask which selects only the high bit of each byte of the given . /// private const ulong UInt64HighBitsOnlyMask = 0x80808080_80808080ul; /// /// Returns iff all bytes in are ASCII. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static bool AllBytesInUInt32AreAscii(uint value) { // If the high bit of any byte is set, that byte is non-ASCII. return (value & UInt32HighBitsOnlyMask) == 0; } /// /// Given a DWORD which represents a four-byte buffer read in machine endianness, and which /// the caller has asserted contains a non-ASCII byte *somewhere* in the data, counts the /// number of consecutive ASCII bytes starting from the beginning of the buffer. Returns /// a value 0 - 3, inclusive. (The caller is responsible for ensuring that the buffer doesn't /// contain all-ASCII data.) /// [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static uint CountNumberOfLeadingAsciiBytesFromUInt32WithSomeNonAsciiData(uint value) { Debug.Assert(!AllBytesInUInt32AreAscii(value), "Caller shouldn't provide an all-ASCII value."); // Use BMI1 directly rather than going through BitOperations. We only see a perf gain here // if we're able to emit a real tzcnt instruction; the software fallback used by BitOperations // is too slow for our purposes since we can provide our own faster, specialized software fallback. if (Bmi1.IsSupported) { Debug.Assert(BitConverter.IsLittleEndian); return Bmi1.TrailingZeroCount(value & UInt32HighBitsOnlyMask) >> 3; } // Couldn't emit tzcnt, use specialized software fallback. // The 'allBytesUpToNowAreAscii' DWORD uses bit twiddling to hold a 1 or a 0 depending // on whether all processed bytes were ASCII. Then we accumulate all of the // results to calculate how many consecutive ASCII bytes are present. value = ~value; if (BitConverter.IsLittleEndian) { // Read first byte value >>= 7; uint allBytesUpToNowAreAscii = value & 1; uint numAsciiBytes = allBytesUpToNowAreAscii; // Read second byte value >>= 8; allBytesUpToNowAreAscii &= value; numAsciiBytes += allBytesUpToNowAreAscii; // Read third byte value >>= 8; allBytesUpToNowAreAscii &= value; numAsciiBytes += allBytesUpToNowAreAscii; return numAsciiBytes; } else { // BinaryPrimitives.ReverseEndianness is only implemented as an intrinsic on // little-endian platforms, so using it in this big-endian path would be too // expensive. Instead we'll just change how we perform the shifts. // Read first byte value = BitOperations.RotateLeft(value, 1); uint allBytesUpToNowAreAscii = value & 1; uint numAsciiBytes = allBytesUpToNowAreAscii; // Read second byte value = BitOperations.RotateLeft(value, 8); allBytesUpToNowAreAscii &= value; numAsciiBytes += allBytesUpToNowAreAscii; // Read third byte value = BitOperations.RotateLeft(value, 8); allBytesUpToNowAreAscii &= value; numAsciiBytes += allBytesUpToNowAreAscii; return numAsciiBytes; } } } }