Character.cs 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. using System.Diagnostics;
  2. using System.Runtime.CompilerServices;
  3. namespace Jint.Extensions
  4. {
  5. internal static class Character
  6. {
  7. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  8. private static bool IsInRange(this char c, ushort min, ushort max)
  9. {
  10. Debug.Assert(min <= max);
  11. return c - (uint) min <= max - (uint) min;
  12. }
  13. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  14. public static bool IsOctalDigit(this char c) => c.IsInRange('0', '7');
  15. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  16. public static bool IsDecimalDigit(this char c) => c.IsInRange('0', '9');
  17. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  18. public static bool IsHexDigit(this char c)
  19. {
  20. // NOTE: On 32-bit architectures this is not optimal, lookup is supposed to be faster.
  21. // But to keep it simple, we use this method regardless of CPU architecture, and if performance
  22. // needs to be improved further, the lookup approach can be ported from Esprima.HexConverter.
  23. // This code path, when used, has no branches and doesn't depend on cache hits,
  24. // so it's faster and does not vary in speed depending on input data distribution.
  25. // The magic constant 18428868213665201664 is a 64 bit value containing 1s at the
  26. // indices corresponding to all the valid hex characters (ie. "0123456789ABCDEFabcdef")
  27. // minus 48 (ie. '0'), and backwards (so from the most significant bit and downwards).
  28. // The offset of 48 for each bit is necessary so that the entire range fits in 64 bits.
  29. // First, we subtract '0' to the input digit (after casting to uint to account for any
  30. // negative inputs). Note that even if this subtraction underflows, this happens before
  31. // the result is zero-extended to ulong, meaning that `i` will always have upper 32 bits
  32. // equal to 0. We then left shift the constant with this offset, and apply a bitmask that
  33. // has the highest bit set (the sign bit) if and only if `c` is in the ['0', '0' + 64) range.
  34. // Then we only need to check whether this final result is less than 0: this will only be
  35. // the case if both `i` was in fact the index of a set bit in the magic constant, and also
  36. // `c` was in the allowed range (this ensures that false positive bit shifts are ignored).
  37. ulong i = (uint) c - '0';
  38. ulong shift = 18428868213665201664UL << (int) i;
  39. ulong mask = i - 64;
  40. return (long) (shift & mask) < 0 ? true : false;
  41. }
  42. }
  43. }