JsonParser.cs 25 KB


  1. using System.Globalization;
  2. using System.Runtime.CompilerServices;
  3. using Esprima;
  4. using Esprima.Ast;
  5. using Jint.Native.Object;
  6. using Jint.Pooling;
  7. using Jint.Runtime;
  8. using Range = Esprima.Range;
  9. namespace Jint.Native.Json
  10. {
  11. public class JsonParser
  12. {
  13. private readonly Engine _engine;
  14. public JsonParser(Engine engine)
  15. {
  16. _engine = engine;
  17. }
  18. private Extra _extra = null!;
  19. private int _index; // position in the stream
  20. private int _length; // length of the stream
  21. private int _lineNumber;
  22. private int _lineStart;
  23. private Location _location;
  24. private Token _lookahead = null!;
  25. private string _source = null!;
  26. private State _state;
  27. private static bool IsDecimalDigit(char ch)
  28. {
  29. return (ch >= '0' && ch <= '9');
  30. }
  31. private static bool IsHexDigit(char ch)
  32. {
  33. return
  34. ch >= '0' && ch <= '9' ||
  35. ch >= 'a' && ch <= 'f' ||
  36. ch >= 'A' && ch <= 'F'
  37. ;
  38. }
  39. private static bool IsOctalDigit(char ch)
  40. {
  41. return ch >= '0' && ch <= '7';
  42. }
  43. private static bool IsWhiteSpace(char ch)
  44. {
  45. return (ch == ' ') ||
  46. (ch == '\t') ||
  47. (ch == '\n') ||
  48. (ch == '\r');
  49. }
  50. private static bool IsLineTerminator(char ch)
  51. {
  52. return (ch == 10) || (ch == 13) || (ch == 0x2028) || (ch == 0x2029);
  53. }
  54. private static bool IsNullChar(char ch)
  55. {
  56. return ch == 'n'
  57. || ch == 'u'
  58. || ch == 'l'
  59. || ch == 'l'
  60. ;
  61. }
  62. private static bool IsTrueOrFalseChar(char ch)
  63. {
  64. return ch == 't'
  65. || ch == 'f'
  66. || ch == 'r'
  67. || ch == 'a'
  68. || ch == 'u'
  69. || ch == 'l'
  70. || ch == 'e'
  71. || ch == 's'
  72. ;
  73. }
  74. private char ScanHexEscape()
  75. {
  76. int code = char.MinValue;
  77. for (int i = 0; i < 4; ++i)
  78. {
  79. if (_index < _length + 1 && IsHexDigit(_source[_index]))
  80. {
  81. char ch = _source[_index++];
  82. code = code * 16 + "0123456789abcdef".IndexOf(ch.ToString(), StringComparison.OrdinalIgnoreCase);
  83. }
  84. else
  85. {
  86. ThrowError(_index, Messages.ExpectedHexadecimalDigit);
  87. }
  88. }
  89. return (char) code;
  90. }
  91. private void SkipWhiteSpace()
  92. {
  93. while (_index < _length && IsWhiteSpace(_source[_index]))
  94. {
  95. ++_index;
  96. }
  97. }
  98. private Token ScanPunctuator()
  99. {
  100. int start = _index;
  101. char code = start < _source.Length ? _source[_index] : char.MinValue;
  102. switch ((int) code)
  103. {
  104. // Check for most common single-character punctuators.
  105. case 46: // . dot
  106. case 40: // ( open bracket
  107. case 41: // ) close bracket
  108. case 59: // ; semicolon
  109. case 44: // , comma
  110. case 123: // { open curly brace
  111. case 125: // } close curly brace
  112. case 91: // [
  113. case 93: // ]
  114. case 58: // :
  115. case 63: // ?
  116. case 126: // ~
  117. ++_index;
  118. string value = TypeConverter.ToString(code);
  119. return new Token
  120. {
  121. Type = Tokens.Punctuator,
  122. Text = value,
  123. Value = value,
  124. LineNumber = _lineNumber,
  125. LineStart = _lineStart,
  126. Range = new[] { start, _index }
  127. };
  128. }
  129. ThrowError(start, Messages.UnexpectedToken, code);
  130. return null!;
  131. }
  132. private Token ScanNumericLiteral()
  133. {
  134. char ch = _source.CharCodeAt(_index);
  135. int start = _index;
  136. string number = "";
  137. // Number start with a -
  138. if (ch == '-')
  139. {
  140. number += _source.CharCodeAt(_index++).ToString();
  141. ch = _source.CharCodeAt(_index);
  142. }
  143. if (ch != '.')
  144. {
  145. number += _source.CharCodeAt(_index++).ToString();
  146. ch = _source.CharCodeAt(_index);
  147. // Hex number starts with '0x'.
  148. // Octal number starts with '0'.
  149. if (number == "0")
  150. {
  151. // decimal number starts with '0' such as '09' is illegal.
  152. if (ch > 0 && IsDecimalDigit(ch))
  153. {
  154. ThrowError(_index, Messages.UnexpectedToken, ch);
  155. }
  156. }
  157. while (IsDecimalDigit(_source.CharCodeAt(_index)))
  158. {
  159. number += _source.CharCodeAt(_index++).ToString();
  160. }
  161. ch = _source.CharCodeAt(_index);
  162. }
  163. if (ch == '.')
  164. {
  165. number += _source.CharCodeAt(_index++).ToString();
  166. while (IsDecimalDigit(_source.CharCodeAt(_index)))
  167. {
  168. number += _source.CharCodeAt(_index++).ToString();
  169. }
  170. ch = _source.CharCodeAt(_index);
  171. }
  172. if (ch == 'e' || ch == 'E')
  173. {
  174. number += _source.CharCodeAt(_index++).ToString();
  175. ch = _source.CharCodeAt(_index);
  176. if (ch == '+' || ch == '-')
  177. {
  178. number += _source.CharCodeAt(_index++).ToString();
  179. }
  180. if (IsDecimalDigit(_source.CharCodeAt(_index)))
  181. {
  182. while (IsDecimalDigit(_source.CharCodeAt(_index)))
  183. {
  184. number += _source.CharCodeAt(_index++).ToString();
  185. }
  186. }
  187. else
  188. {
  189. ThrowError(_index, Messages.UnexpectedToken, _source.CharCodeAt(_index));
  190. }
  191. }
  192. return new Token
  193. {
  194. Type = Tokens.Number,
  195. Text = number,
  196. Value = Double.Parse(number, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture),
  197. LineNumber = _lineNumber,
  198. LineStart = _lineStart,
  199. Range = new[] {start, _index}
  200. };
  201. }
  202. private Token ScanBooleanLiteral()
  203. {
  204. var start = _index;
  205. var s = "";
  206. var boolTrue = false;
  207. var boolFalse = false;
  208. if (ConsumeMatch("true"))
  209. {
  210. boolTrue = true;
  211. s = "true";
  212. }
  213. else if (ConsumeMatch("false"))
  214. {
  215. boolFalse = true;
  216. s = "false";
  217. }
  218. if (boolTrue || boolFalse)
  219. {
  220. return new Token
  221. {
  222. Type = Tokens.BooleanLiteral,
  223. Text = s,
  224. Value = boolTrue,
  225. LineNumber = _lineNumber,
  226. LineStart = _lineStart,
  227. Range = new[] { start, _index }
  228. };
  229. }
  230. ThrowError(start, Messages.UnexpectedTokenIllegal);
  231. return null!;
  232. }
  233. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  234. private bool ConsumeMatch(string text)
  235. {
  236. var start = _index;
  237. var length = text.Length;
  238. if (start + length - 1 < _source.Length && _source.AsSpan(start, length).SequenceEqual(text.AsSpan()))
  239. {
  240. _index += length;
  241. return true;
  242. }
  243. return false;
  244. }
  245. private Token ScanNullLiteral()
  246. {
  247. int start = _index;
  248. if (ConsumeMatch(Null.Text))
  249. {
  250. return new Token
  251. {
  252. Type = Tokens.NullLiteral,
  253. Text = Null.Text,
  254. Value = Null.Instance,
  255. LineNumber = _lineNumber,
  256. LineStart = _lineStart,
  257. Range = new[] { start, _index }
  258. };
  259. }
  260. ThrowError(start, Messages.UnexpectedTokenIllegal);
  261. return null!;
  262. }
  263. private Token ScanStringLiteral()
  264. {
  265. using var wrapper = StringBuilderPool.Rent();
  266. var sb = wrapper.Builder;
  267. char quote = _source[_index];
  268. int start = _index;
  269. ++_index;
  270. while (_index < _length)
  271. {
  272. char ch = _source.CharCodeAt(_index++);
  273. if (ch == quote)
  274. {
  275. quote = char.MinValue;
  276. break;
  277. }
  278. if (ch <= 31)
  279. {
  280. ThrowError(_index - 1, Messages.InvalidCharacter);
  281. }
  282. if (ch == '\\')
  283. {
  284. ch = _source.CharCodeAt(_index++);
  285. switch (ch)
  286. {
  287. case '"':
  288. sb.Append('"');
  289. break;
  290. case '\\':
  291. sb.Append('\\');
  292. break;
  293. case '/':
  294. sb.Append('/');
  295. break;
  296. case 'n':
  297. sb.Append('\n');
  298. break;
  299. case 'r':
  300. sb.Append('\r');
  301. break;
  302. case 't':
  303. sb.Append('\t');
  304. break;
  305. case 'u':
  306. sb.Append(ScanHexEscape());
  307. break;
  308. case 'b':
  309. sb.Append('\b');
  310. break;
  311. case 'f':
  312. sb.Append('\f');
  313. break;
  314. default:
  315. ThrowError(_index - 1, Messages.UnexpectedToken, ch);
  316. break;
  317. }
  318. }
  319. else if (IsLineTerminator(ch))
  320. {
  321. break;
  322. }
  323. else
  324. {
  325. sb.Append(ch.ToString());
  326. }
  327. }
  328. if (quote != 0)
  329. {
  330. // unterminated string literal
  331. ThrowError(_index, Messages.UnexpectedEOS);
  332. }
  333. string value = sb.ToString();
  334. return new Token
  335. {
  336. Type = Tokens.String,
  337. Text = value,
  338. Value = value,
  339. LineNumber = _lineNumber,
  340. LineStart = _lineStart,
  341. Range = new[] { start, _index }
  342. };
  343. }
  344. private Token Advance()
  345. {
  346. SkipWhiteSpace();
  347. if (_index >= _length)
  348. {
  349. return new Token
  350. {
  351. Type = Tokens.EOF,
  352. LineNumber = _lineNumber,
  353. LineStart = _lineStart,
  354. Range = new[] {_index, _index}
  355. };
  356. }
  357. char ch = _source.CharCodeAt(_index);
  358. // Very common: ( and ) and ;
  359. if (ch == 40 || ch == 41 || ch == 58)
  360. {
  361. return ScanPunctuator();
  362. }
  363. // String literal starts with double quote (#34).
  364. // Single quote (#39) are not allowed in JSON.
  365. if (ch == 34)
  366. {
  367. return ScanStringLiteral();
  368. }
  369. // Dot (.) char #46 can also start a floating-point number, hence the need
  370. // to check the next character.
  371. if (ch == 46)
  372. {
  373. if (IsDecimalDigit(_source.CharCodeAt(_index + 1)))
  374. {
  375. return ScanNumericLiteral();
  376. }
  377. return ScanPunctuator();
  378. }
  379. if (ch == '-') // Negative Number
  380. {
  381. if (IsDecimalDigit(_source.CharCodeAt(_index + 1)))
  382. {
  383. return ScanNumericLiteral();
  384. }
  385. return ScanPunctuator();
  386. }
  387. if (IsDecimalDigit(ch))
  388. {
  389. return ScanNumericLiteral();
  390. }
  391. if (ch == 't' || ch == 'f')
  392. {
  393. return ScanBooleanLiteral();
  394. }
  395. if (ch == 'n')
  396. {
  397. return ScanNullLiteral();
  398. }
  399. return ScanPunctuator();
  400. }
  401. private Token CollectToken()
  402. {
  403. var start = Position.From(
  404. line: _lineNumber,
  405. column: _index - _lineStart);
  406. Token token = Advance();
  407. var end = Position.From(
  408. line: _lineNumber,
  409. column: _index - _lineStart);
  410. _location = Location.From(start, end, _source);
  411. if (token.Type != Tokens.EOF)
  412. {
  413. var range = new[] {token.Range[0], token.Range[1]};
  414. var value = _source.Substring(token.Range[0], token.Range[1]);
  415. _extra.Tokens.Add(new Token
  416. {
  417. Type = token.Type,
  418. Text = value,
  419. Value = value,
  420. Range = range,
  421. });
  422. }
  423. return token;
  424. }
  425. private Token Lex()
  426. {
  427. Token token = _lookahead;
  428. _index = token.Range[1];
  429. _lineNumber = token.LineNumber.HasValue ? token.LineNumber.Value : 0;
  430. _lineStart = token.LineStart;
  431. _lookahead = (_extra.Tokens != null) ? CollectToken() : Advance();
  432. _index = token.Range[1];
  433. _lineNumber = token.LineNumber.HasValue ? token.LineNumber.Value : 0;
  434. _lineStart = token.LineStart;
  435. return token;
  436. }
  437. private void Peek()
  438. {
  439. int pos = _index;
  440. int line = _lineNumber;
  441. int start = _lineStart;
  442. _lookahead = (_extra.Tokens != null) ? CollectToken() : Advance();
  443. _index = pos;
  444. _lineNumber = line;
  445. _lineStart = start;
  446. }
  447. private void MarkStart()
  448. {
  449. if (_extra.Loc.HasValue)
  450. {
  451. _state.MarkerStack.Push(_index - _lineStart);
  452. _state.MarkerStack.Push(_lineNumber);
  453. }
  454. if (_extra.Range != null)
  455. {
  456. _state.MarkerStack.Push(_index);
  457. }
  458. }
  459. private T MarkEnd<T>(T node) where T : Node
  460. {
  461. if (_extra.Range != null)
  462. {
  463. node.Range = Range.From(_state.MarkerStack.Pop(), _index);
  464. }
  465. if (_extra.Loc.HasValue)
  466. {
  467. var start = Position.From(line: _state.MarkerStack.Pop(), column: _state.MarkerStack.Pop());
  468. var end = Position.From(line: _lineNumber, column: _index - _lineStart);
  469. node.Location = Location.From(start: start, end: end, source: _source);
  470. PostProcess(node);
  471. }
  472. return node;
  473. }
  474. public T MarkEndIf<T>(T node) where T : Node
  475. {
  476. if (node.Range != default || node.Location != default)
  477. {
  478. if (_extra.Loc.HasValue)
  479. {
  480. _state.MarkerStack.Pop();
  481. _state.MarkerStack.Pop();
  482. }
  483. if (_extra.Range != null)
  484. {
  485. _state.MarkerStack.Pop();
  486. }
  487. }
  488. else
  489. {
  490. MarkEnd(node);
  491. }
  492. return node;
  493. }
  494. public Node PostProcess(Node node)
  495. {
  496. //if (_extra.Source != null)
  497. //{
  498. // node.Location.Source = _extra.Source;
  499. //}
  500. return node;
  501. }
  502. private void ThrowError(Token token, string messageFormat, params object[] arguments)
  503. {
  504. ThrowError(token.Range[0], messageFormat, arguments);
  505. }
  506. private void ThrowError(int position, string messageFormat, params object[] arguments)
  507. {
  508. string msg = System.String.Format(messageFormat, arguments);
  509. ExceptionHelper.ThrowSyntaxError(_engine.Realm, $"{msg} at position {position}");
  510. }
  511. // Throw an exception because of the token.
  512. private void ThrowUnexpected(Token token)
  513. {
  514. if (token.Type == Tokens.EOF)
  515. {
  516. ThrowError(token, Messages.UnexpectedEOS);
  517. }
  518. if (token.Type == Tokens.Number)
  519. {
  520. ThrowError(token, Messages.UnexpectedNumber);
  521. }
  522. if (token.Type == Tokens.String)
  523. {
  524. ThrowError(token, Messages.UnexpectedString);
  525. }
  526. // BooleanLiteral, NullLiteral, or Punctuator.
  527. ThrowError(token, Messages.UnexpectedToken, token.Text);
  528. }
  529. // Expect the next token to match the specified punctuator.
  530. // If not, an exception will be thrown.
  531. private void Expect(string value)
  532. {
  533. Token token = Lex();
  534. if (token.Type != Tokens.Punctuator || !value.Equals(token.Value))
  535. {
  536. ThrowUnexpected(token);
  537. }
  538. }
  539. // Return true if the next token matches the specified punctuator.
  540. private bool Match(string value)
  541. {
  542. return _lookahead.Type == Tokens.Punctuator && value.Equals(_lookahead.Value);
  543. }
  544. private ObjectInstance ParseJsonArray()
  545. {
  546. var elements = new List<JsValue>();
  547. Expect("[");
  548. while (!Match("]"))
  549. {
  550. if (Match(","))
  551. {
  552. Lex();
  553. elements.Add(Null.Instance);
  554. }
  555. else
  556. {
  557. elements.Add(ParseJsonValue());
  558. if (!Match("]"))
  559. {
  560. Expect(",");
  561. }
  562. }
  563. }
  564. Expect("]");
  565. return _engine.Realm.Intrinsics.Array.ConstructFast(elements);
  566. }
  567. public ObjectInstance ParseJsonObject()
  568. {
  569. Expect("{");
  570. var obj = _engine.Realm.Intrinsics.Object.Construct(Arguments.Empty);
  571. while (!Match("}"))
  572. {
  573. Tokens type = _lookahead.Type;
  574. if (type != Tokens.String)
  575. {
  576. ThrowUnexpected(Lex());
  577. }
  578. var nameToken = Lex();
  579. var name = nameToken.Value.ToString();
  580. if (PropertyNameContainsInvalidCharacters(name))
  581. {
  582. ThrowError(nameToken, Messages.InvalidCharacter);
  583. }
  584. Expect(":");
  585. var value = ParseJsonValue();
  586. obj.FastSetDataProperty(name, value);
  587. if (!Match("}"))
  588. {
  589. Expect(",");
  590. }
  591. }
  592. Expect("}");
  593. return obj;
  594. }
  595. private static bool PropertyNameContainsInvalidCharacters(string propertyName)
  596. {
  597. const char max = (char) 31;
  598. foreach (var c in propertyName)
  599. {
  600. if (c != '\t' && c <= max)
  601. return true;
  602. }
  603. return false;
  604. }
  605. /// <summary>
  606. /// Optimization.
  607. /// By calling Lex().Value for each type, we parse the token twice.
  608. /// It was already parsed by the peek() method.
  609. /// _lookahead.Value already contain the value.
  610. /// </summary>
  611. /// <returns></returns>
  612. private JsValue ParseJsonValue()
  613. {
  614. Tokens type = _lookahead.Type;
  615. MarkStart();
  616. switch (type)
  617. {
  618. case Tokens.NullLiteral:
  619. var v = Lex().Value;
  620. return Null.Instance;
  621. case Tokens.BooleanLiteral:
  622. // implicit conversion operator goes through caching
  623. return (bool) Lex().Value ? JsBoolean.True : JsBoolean.False;
  624. case Tokens.String:
  625. // implicit conversion operator goes through caching
  626. return new JsString((string) Lex().Value);
  627. case Tokens.Number:
  628. return (double) Lex().Value;
  629. }
  630. if (Match("["))
  631. {
  632. return ParseJsonArray();
  633. }
  634. if (Match("{"))
  635. {
  636. return ParseJsonObject();
  637. }
  638. ThrowUnexpected(Lex());
  639. // can't be reached
  640. return Null.Instance;
  641. }
  642. public JsValue Parse(string code)
  643. {
  644. return Parse(code, null);
  645. }
  646. public JsValue Parse(string code, ParserOptions? options)
  647. {
  648. _source = code;
  649. _index = 0;
  650. _lineNumber = 1;
  651. _lineStart = 0;
  652. _length = _source.Length;
  653. _lookahead = null!;
  654. _state = new State
  655. {
  656. AllowIn = true,
  657. LabelSet = new HashSet<string>(),
  658. InFunctionBody = false,
  659. InIteration = false,
  660. InSwitch = false,
  661. LastCommentStart = -1,
  662. MarkerStack = new Stack<int>()
  663. };
  664. _extra = new Extra
  665. {
  666. Range = new int[0],
  667. Loc = 0,
  668. };
  669. if (options != null)
  670. {
  671. if (options.Tokens)
  672. {
  673. _extra.Tokens = new List<Token>();
  674. }
  675. }
  676. try
  677. {
  678. MarkStart();
  679. Peek();
  680. JsValue jsv = ParseJsonValue();
  681. Peek();
  682. if(_lookahead.Type != Tokens.EOF)
  683. {
  684. ThrowError(_lookahead, Messages.UnexpectedToken, _lookahead.Text);
  685. }
  686. return jsv;
  687. }
  688. finally
  689. {
  690. _extra = new Extra();
  691. }
  692. }
  693. private sealed class Extra
  694. {
  695. public int? Loc;
  696. public int[]? Range;
  697. public List<Token> Tokens = null!;
  698. }
  699. private enum Tokens
  700. {
  701. NullLiteral,
  702. BooleanLiteral,
  703. String,
  704. Number,
  705. Punctuator,
  706. EOF,
  707. };
  708. class Token
  709. {
  710. public Tokens Type;
  711. public object Value = null!;
  712. public string Text = null!;
  713. public int[] Range = null!;
  714. public int? LineNumber;
  715. public int LineStart;
  716. }
  717. static class Messages
  718. {
  719. public const string InvalidCharacter = "Invalid character in JSON";
  720. public const string ExpectedHexadecimalDigit = "Expected hexadecimal digit in JSON";
  721. public const string UnexpectedToken = "Unexpected token '{0}' in JSON";
  722. public const string UnexpectedTokenIllegal = "Unexpected token ILLEGAL in JSON";
  723. public const string UnexpectedNumber = "Unexpected number in JSON";
  724. public const string UnexpectedString = "Unexpected string in JSON";
  725. public const string UnexpectedEOS = "Unexpected end of JSON input";
  726. };
  727. struct State
  728. {
  729. public int LastCommentStart;
  730. public bool AllowIn;
  731. public HashSet<string> LabelSet;
  732. public bool InFunctionBody;
  733. public bool InIteration;
  734. public bool InSwitch;
  735. public Stack<int> MarkerStack;
  736. }
  737. }
  738. internal static class StringExtensions
  739. {
  740. public static char CharCodeAt(this string source, int index)
  741. {
  742. if (index > source.Length - 1)
  743. {
  744. // char.MinValue is used as the null value
  745. return char.MinValue;
  746. }
  747. return source[index];
  748. }
  749. }
  750. }