ASCIIUtility.Helpers.cs 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Diagnostics;
  5. using System.Numerics;
  6. using System.Runtime.CompilerServices;
  7. using System.Runtime.Intrinsics.X86;
  8. namespace System.Text
  9. {
  10. internal static partial class ASCIIUtility
  11. {
  12. /// <summary>
  13. /// A mask which selects only the high bit of each byte of the given <see cref="uint"/>.
  14. /// </summary>
  15. private const uint UInt32HighBitsOnlyMask = 0x80808080u;
  16. /// <summary>
  17. /// A mask which selects only the high bit of each byte of the given <see cref="ulong"/>.
  18. /// </summary>
  19. private const ulong UInt64HighBitsOnlyMask = 0x80808080_80808080ul;
  20. /// <summary>
  21. /// Returns <see langword="true"/> iff all bytes in <paramref name="value"/> are ASCII.
  22. /// </summary>
  23. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  24. internal static bool AllBytesInUInt32AreAscii(uint value)
  25. {
  26. // If the high bit of any byte is set, that byte is non-ASCII.
  27. return (value & UInt32HighBitsOnlyMask) == 0;
  28. }
  29. /// <summary>
  30. /// Given a DWORD which represents a four-byte buffer read in machine endianness, and which
  31. /// the caller has asserted contains a non-ASCII byte *somewhere* in the data, counts the
  32. /// number of consecutive ASCII bytes starting from the beginning of the buffer. Returns
  33. /// a value 0 - 3, inclusive. (The caller is responsible for ensuring that the buffer doesn't
  34. /// contain all-ASCII data.)
  35. /// </summary>
  36. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  37. internal static uint CountNumberOfLeadingAsciiBytesFromUInt32WithSomeNonAsciiData(uint value)
  38. {
  39. Debug.Assert(!AllBytesInUInt32AreAscii(value), "Caller shouldn't provide an all-ASCII value.");
  40. // Use BMI1 directly rather than going through BitOperations. We only see a perf gain here
  41. // if we're able to emit a real tzcnt instruction; the software fallback used by BitOperations
  42. // is too slow for our purposes since we can provide our own faster, specialized software fallback.
  43. if (Bmi1.IsSupported)
  44. {
  45. Debug.Assert(BitConverter.IsLittleEndian);
  46. return Bmi1.TrailingZeroCount(value & UInt32HighBitsOnlyMask) >> 3;
  47. }
  48. // Couldn't emit tzcnt, use specialized software fallback.
  49. // The 'allBytesUpToNowAreAscii' DWORD uses bit twiddling to hold a 1 or a 0 depending
  50. // on whether all processed bytes were ASCII. Then we accumulate all of the
  51. // results to calculate how many consecutive ASCII bytes are present.
  52. value = ~value;
  53. if (BitConverter.IsLittleEndian)
  54. {
  55. // Read first byte
  56. value >>= 7;
  57. uint allBytesUpToNowAreAscii = value & 1;
  58. uint numAsciiBytes = allBytesUpToNowAreAscii;
  59. // Read second byte
  60. value >>= 8;
  61. allBytesUpToNowAreAscii &= value;
  62. numAsciiBytes += allBytesUpToNowAreAscii;
  63. // Read third byte
  64. value >>= 8;
  65. allBytesUpToNowAreAscii &= value;
  66. numAsciiBytes += allBytesUpToNowAreAscii;
  67. return numAsciiBytes;
  68. }
  69. else
  70. {
  71. // BinaryPrimitives.ReverseEndianness is only implemented as an intrinsic on
  72. // little-endian platforms, so using it in this big-endian path would be too
  73. // expensive. Instead we'll just change how we perform the shifts.
  74. // Read first byte
  75. value = BitOperations.RotateLeft(value, 1);
  76. uint allBytesUpToNowAreAscii = value & 1;
  77. uint numAsciiBytes = allBytesUpToNowAreAscii;
  78. // Read second byte
  79. value = BitOperations.RotateLeft(value, 8);
  80. allBytesUpToNowAreAscii &= value;
  81. numAsciiBytes += allBytesUpToNowAreAscii;
  82. // Read third byte
  83. value = BitOperations.RotateLeft(value, 8);
  84. allBytesUpToNowAreAscii &= value;
  85. numAsciiBytes += allBytesUpToNowAreAscii;
  86. return numAsciiBytes;
  87. }
  88. }
  89. }
  90. }