using System.Buffers; using System.Diagnostics.CodeAnalysis; using System.Globalization; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Text; using Jint.Runtime; namespace Jint.Native.Json { public sealed class JsonParser { private readonly Engine _engine; private readonly int _maxDepth; /// /// Creates a new parser using the recursion depth specified in . /// public JsonParser(Engine engine) : this(engine, engine.Options.Json.MaxParseDepth) { } public JsonParser(Engine engine, int maxDepth) { if (maxDepth < 0) { throw new ArgumentOutOfRangeException(nameof(maxDepth), $"Max depth must be greater or equal to zero"); } _maxDepth = maxDepth; _engine = engine; // Two tokens are "live" during parsing, // lookahead and the current one on the stack // To add a safety boundary to not overwrite // "still in use" stuff, the buffer contains 5 // instead of 2 tokens. _tokenBuffer = new Token[5]; for (int i = 0; i < _tokenBuffer.Length; i++) { _tokenBuffer[i] = new Token(); } _tokenBufferIndex = 0; } private int _index; // position in the stream private int _length; // length of the stream private Token _lookahead = null!; private string _source = null!; private readonly Token[] _tokenBuffer; private int _tokenBufferIndex; [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool IsDecimalDigit(char ch) { // * For characters, which are before the '0', the equation will be negative and then wrap // around because of the unsigned short cast // * For characters, which are after the '9', the equation will be positive, but > 9 // * For digits, the equation will be between int(0) and int(9) return ((uint) (ch - '0')) <= 9; } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool IsLowerCaseHexAlpha(char ch) { return ((uint) (ch - 'a')) <= 5; } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool IsUpperCaseHexAlpha(char ch) { return ((uint) (ch - 'A')) <= 5; } private static bool IsHexDigit(char ch) { return IsDecimalDigit(ch) || IsLowerCaseHexAlpha(ch) || IsUpperCaseHexAlpha(ch) ; } private static bool IsWhiteSpace(char ch) { return (ch == ' ') || (ch == '\t') || (ch == '\n') || (ch == '\r'); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool IsLineTerminator(char ch) { return (ch == 10) || (ch == 13) || (ch == 0x2028) || (ch == 0x2029); } private char ScanHexEscape() { int code = char.MinValue; for (int i = 0; i < 4; ++i) { if (_index < _length + 1 && IsHexDigit(_source[_index])) { char ch = char.ToLower(_source[_index++], CultureInfo.InvariantCulture); code = code * 16 + "0123456789abcdef".IndexOf(ch); } else { ThrowError(_index, Messages.ExpectedHexadecimalDigit); } } return (char) code; } private char ReadToNextSignificantCharacter() { char result = _index < _length ? _source[_index] : char.MinValue; while (IsWhiteSpace(result)) { if ((++_index) >= _length) { return char.MinValue; } result = _source[_index]; } return result; } private Token CreateToken(Tokens type, string text, char firstCharacter, JsValue value, in TextRange range) { Token result = _tokenBuffer[_tokenBufferIndex++]; if (_tokenBufferIndex >= _tokenBuffer.Length) { _tokenBufferIndex = 0; } result.Type = type; result.Text = text; result.FirstCharacter = firstCharacter; result.Value = value; result.Range = range; return result; } private Token ScanPunctuator() { int start = _index; char code = start < _source.Length ? _source[_index] : char.MinValue; string value = ScanPunctuatorValue(start, code); ++_index; return CreateToken(Tokens.Punctuator, value, code, JsValue.Undefined, new TextRange(start, _index)); } private string ScanPunctuatorValue(int start, char code) { switch (code) { case '.': return "."; case ',': return ","; case '{': return "{"; case '}': return "}"; case '[': return "["; case ']': return "]"; case ':': return ":"; default: ThrowError(start, Messages.UnexpectedToken, code); return null!; } } private Token ScanNumericLiteral() { using var sb = new ValueStringBuilder(stackalloc char[64]); var start = _index; var ch = _source.CharCodeAt(_index); var canBeInteger = true; // Number start with a - if (ch == '-') { sb.Append(ch); ch = _source.CharCodeAt(++_index); } if (ch != '.') { var firstCharacter = ch; sb.Append(ch); ch = _source.CharCodeAt(++_index); // Hex number starts with '0x'. // Octal number starts with '0'. if (sb.Length == 1 && firstCharacter == '0') { canBeInteger = false; // decimal number starts with '0' such as '09' is illegal. if (ch > 0 && IsDecimalDigit(ch)) { ThrowError(_index, Messages.UnexpectedToken, ch); } } while (IsDecimalDigit((ch = _source.CharCodeAt(_index)))) { sb.Append(ch); _index++; } } if (ch == '.') { canBeInteger = false; sb.Append(ch); _index++; while (IsDecimalDigit((ch = _source.CharCodeAt(_index)))) { sb.Append(ch); _index++; } } if (ch is 'e' or 'E') { canBeInteger = false; sb.Append(ch); ch = _source.CharCodeAt(++_index); if (ch is '+' or '-') { sb.Append(ch); ch = _source.CharCodeAt(++_index); } if (IsDecimalDigit(ch)) { while (IsDecimalDigit(ch = _source.CharCodeAt(_index))) { sb.Append(ch); _index++; } } else { ThrowError(_index, Messages.UnexpectedToken, _source.CharCodeAt(_index)); } } var number = sb.ToString(); JsNumber value; if (canBeInteger && long.TryParse(number, NumberStyles.Integer, CultureInfo.InvariantCulture, out var longResult) && longResult != -0) { value = JsNumber.Create(longResult); } else { value = new JsNumber(double.Parse(number, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture)); } return CreateToken(Tokens.Number, number, '\0', value, new TextRange(start, _index)); } private Token ScanBooleanLiteral() { var start = _index; if (ConsumeMatch("true")) { return CreateToken(Tokens.BooleanLiteral, "true", '\t', JsBoolean.True, new TextRange(start, _index)); } if (ConsumeMatch("false")) { return CreateToken(Tokens.BooleanLiteral, "false", '\f', JsBoolean.False, new TextRange(start, _index)); } ThrowError(start, Messages.UnexpectedTokenIllegal); return null!; } private bool ConsumeMatch(string text) { var start = _index; var length = text.Length; if (start + length - 1 < _source.Length && _source.AsSpan(start, length).SequenceEqual(text.AsSpan())) { _index += length; return true; } return false; } private Token ScanNullLiteral() { int start = _index; if (ConsumeMatch("null")) { return CreateToken(Tokens.NullLiteral, "null", 'n', JsValue.Null, new TextRange(start, _index)); } ThrowError(start, Messages.UnexpectedTokenIllegal); return null!; } private Token ScanStringLiteral(ref State state) { char quote = _source[_index]; int start = _index; ++_index; using var sb = new ValueStringBuilder(stackalloc char[64]); while (_index < _length) { char ch = _source[_index++]; if (ch == quote) { quote = char.MinValue; break; } if (ch <= 31) { ThrowError(_index - 1, Messages.InvalidCharacter); } if (ch == '\\') { ch = _source.CharCodeAt(_index++); switch (ch) { case '"': sb.Append('"'); break; case '\\': sb.Append('\\'); break; case '/': sb.Append('/'); break; case 'n': sb.Append('\n'); break; case 'r': sb.Append('\r'); break; case 't': sb.Append('\t'); break; case 'u': sb.Append(ScanHexEscape()); break; case 'b': sb.Append('\b'); break; case 'f': sb.Append('\f'); break; default: ThrowError(_index - 1, Messages.UnexpectedToken, ch); break; } } else if (IsLineTerminator(ch)) { break; } else { sb.Append(ch); } } if (quote != 0) { // unterminated string literal ThrowError(_index, Messages.UnexpectedEOS); } var value = sb.ToString(); return CreateToken(Tokens.String, value, '\"', new JsString(value), new TextRange(start, _index)); } private Token Advance(ref State state) { char ch = ReadToNextSignificantCharacter(); if (ch == char.MinValue) { return CreateToken(Tokens.EOF, string.Empty, '\0', JsValue.Undefined, new TextRange(_index, _index)); } // String literal starts with double quote (#34). // Single quote (#39) are not allowed in JSON. if (ch == '"') { return ScanStringLiteral(ref state); } if (ch == '-') // Negative Number { if (IsDecimalDigit(_source.CharCodeAt(_index + 1))) { return ScanNumericLiteral(); } return ScanPunctuator(); } if (IsDecimalDigit(ch)) { return ScanNumericLiteral(); } if (ch == 't' || ch == 'f') { return ScanBooleanLiteral(); } if (ch == 'n') { return ScanNullLiteral(); } return ScanPunctuator(); } private Token Lex(ref State state) { Token token = _lookahead; _index = token.Range.End; _lookahead = Advance(ref state); _index = token.Range.End; return token; } private void Peek(ref State state) { int pos = _index; _lookahead = Advance(ref state); _index = pos; } [DoesNotReturn] private void ThrowDepthLimitReached(Token token) { ThrowError(token.Range.Start, Messages.MaxDepthLevelReached); } [DoesNotReturn] private void ThrowError(Token token, string messageFormat, params object[] arguments) { ThrowError(token.Range.Start, messageFormat, arguments); } [DoesNotReturn] private void ThrowError(int position, string messageFormat, params object[] arguments) { var msg = string.Format(CultureInfo.InvariantCulture, messageFormat, arguments); ExceptionHelper.ThrowSyntaxError(_engine.Realm, $"{msg} at position {position}"); } // Throw an exception because of the token. private void ThrowUnexpected(Token token) { if (token.Type == Tokens.EOF) { ThrowError(token, Messages.UnexpectedEOS); } if (token.Type == Tokens.Number) { ThrowError(token, Messages.UnexpectedNumber); } if (token.Type == Tokens.String) { ThrowError(token, Messages.UnexpectedString); } // BooleanLiteral, NullLiteral, or Punctuator. ThrowError(token, Messages.UnexpectedToken, token.Text); } // Expect the next token to match the specified punctuator. // If not, an exception will be thrown. private void Expect(ref State state, char value) { Token token = Lex(ref state); if (token.Type != Tokens.Punctuator || value != token.FirstCharacter) { ThrowUnexpected(token); } } // Return true if the next token matches the specified punctuator. [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool Match(char value) { return _lookahead.Type == Tokens.Punctuator && value == _lookahead.FirstCharacter; } private JsArray ParseJsonArray(ref State state) { if ((++state.CurrentDepth) > _maxDepth) { ThrowDepthLimitReached(_lookahead); } /* To speed up performance, the list allocation is deferred. First the elements are stored within an array received from the .NET array pool. If a list contains less elements that the size that array, a Jint array is constructed with the values stored in that array. When the number of elements exceed the buffer size, The elements-array gets created and filled with the content of the array. The array will then turn into an intermediate buffer which gets flushed to the list when its full. */ List? elements = null; Expect(ref state, '['); int bufferIndex = 0; JsArray? result = null; JsValue[] buffer = ArrayPool.Shared.Rent(16); try { while (!Match(']')) { buffer[bufferIndex++] = ParseJsonValue(ref state); if (!Match(']')) { Expect(ref state, ','); } if (bufferIndex >= buffer.Length) { if (elements is null) { elements = new List(buffer); } else { elements.AddRange(buffer); } bufferIndex = 0; } } // BufferIndex = 0 has two meanings // * Empty JSON array (elements will be null) // * The buffer array has just been flushed (elements will NOT be null) if (bufferIndex > 0) { if (elements is null) { // No element list has been created, all values did fit into the array. // The Jint-Array can get constructed from that array. var data = new JsValue[bufferIndex]; System.Array.Copy(buffer, data, length: bufferIndex); result = new JsArray(_engine, data); } else { // An element list has been created. Flush the // remaining added items within the array to that list. for (var i = 0; i < bufferIndex; ++i) { elements.Add(buffer[i]); } } } else if (elements is null) { // the JSON array did not have any elements // aka: [] result = new JsArray(_engine); } } finally { ArrayPool.Shared.Return(buffer); } Expect(ref state, ']'); state.CurrentDepth--; return result ?? new JsArray(_engine, elements!.ToArray()); } private JsObject ParseJsonObject(ref State state) { if ((++state.CurrentDepth) > _maxDepth) { ThrowDepthLimitReached(_lookahead); } Expect(ref state, '{'); var obj = new JsObject(_engine); while (!Match('}')) { Tokens type = _lookahead.Type; if (type != Tokens.String) { ThrowUnexpected(Lex(ref state)); } var nameToken = Lex(ref state); var name = nameToken.Text; if (PropertyNameContainsInvalidCharacters(name)) { ThrowError(nameToken, Messages.InvalidCharacter); } Expect(ref state, ':'); var value = ParseJsonValue(ref state); obj.FastSetDataProperty(name, value); if (!Match('}')) { Expect(ref state, ','); } } Expect(ref state, '}'); state.CurrentDepth--; return obj; } private static bool PropertyNameContainsInvalidCharacters(string propertyName) { const char max = (char) 31; foreach (var c in propertyName) { if (c != '\t' && c <= max) { return true; } } return false; } /// /// Optimization. /// By calling Lex().Value for each type, we parse the token twice. /// It was already parsed by the peek() method. /// _lookahead.Value already contain the value. /// private JsValue ParseJsonValue(ref State state) { Tokens type = _lookahead.Type; switch (type) { case Tokens.NullLiteral: case Tokens.BooleanLiteral: case Tokens.String: case Tokens.Number: return Lex(ref state).Value; case Tokens.Punctuator: if (_lookahead.FirstCharacter == '[') { return ParseJsonArray(ref state); } if (_lookahead.FirstCharacter == '{') { return ParseJsonObject(ref state); } ThrowUnexpected(Lex(ref state)); break; } ThrowUnexpected(Lex(ref state)); // can't be reached return JsValue.Null; } public JsValue Parse(string code) { _source = code; _index = 0; _length = _source.Length; _lookahead = null!; State state = new State(); Peek(ref state); JsValue jsv = ParseJsonValue(ref state); Peek(ref state); if (_lookahead.Type != Tokens.EOF) { ThrowError(_lookahead, Messages.UnexpectedToken, _lookahead.Text); } return jsv; } [StructLayout(LayoutKind.Auto)] private ref struct State { /// /// The current recursion depth /// public int CurrentDepth { get; set; } } private enum Tokens { NullLiteral, BooleanLiteral, String, Number, Punctuator, EOF, }; private sealed class Token { public Tokens Type; public char FirstCharacter; public JsValue Value = JsValue.Undefined; public string Text = null!; public TextRange Range; } [StructLayout(LayoutKind.Auto)] private readonly struct TextRange { public TextRange(int start, int end) { Start = start; End = end; } public int Start { get; } public int End { get; } } static class Messages { public const string InvalidCharacter = "Invalid character in JSON"; public const string ExpectedHexadecimalDigit = "Expected hexadecimal digit in JSON"; public const string UnexpectedToken = "Unexpected token '{0}' in JSON"; public const string UnexpectedTokenIllegal = "Unexpected token ILLEGAL in JSON"; public const string UnexpectedNumber = "Unexpected number in JSON"; public const string UnexpectedString = "Unexpected string in JSON"; public const string UnexpectedEOS = "Unexpected end of JSON input"; public const string MaxDepthLevelReached = "Max. depth level of JSON reached"; }; } internal static class StringExtensions { internal static char CharCodeAt(this string source, int index) { if (index > source.Length - 1) { // char.MinValue is used as the null value return char.MinValue; } return source[index]; } } }