JsonParser.cs 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Globalization;
  4. using System.Linq;
  5. using Jint.Native.Object;
  6. using Jint.Parser;
  7. using Jint.Parser.Ast;
  8. using Jint.Runtime;
  9. namespace Jint.Native.Json
  10. {
  11. public class JsonParser
  12. {
  13. private readonly Engine _engine;
  14. public JsonParser(Engine engine)
  15. {
  16. _engine = engine;
  17. }
  18. private Extra _extra;
  19. private int _index; // position in the stream
  20. private int _length; // length of the stream
  21. private int _lineNumber;
  22. private int _lineStart;
  23. private Location _location;
  24. private Token _lookahead;
  25. private string _source;
  26. private State _state;
  27. private static bool IsDecimalDigit(char ch)
  28. {
  29. return (ch >= '0' && ch <= '9');
  30. }
  31. private static bool IsHexDigit(char ch)
  32. {
  33. return
  34. ch >= '0' && ch <= '9' ||
  35. ch >= 'a' && ch <= 'f' ||
  36. ch >= 'A' && ch <= 'F'
  37. ;
  38. }
  39. private static bool IsOctalDigit(char ch)
  40. {
  41. return ch >= '0' && ch <= '7';
  42. }
  43. private static bool IsWhiteSpace(char ch)
  44. {
  45. return (ch == ' ') ||
  46. (ch == '\t') ||
  47. (ch == '\n') ||
  48. (ch == '\r');
  49. }
  50. private static bool IsLineTerminator(char ch)
  51. {
  52. return (ch == 10) || (ch == 13) || (ch == 0x2028) || (ch == 0x2029);
  53. }
  54. private char ScanHexEscape(char prefix)
  55. {
  56. int code = char.MinValue;
  57. int len = (prefix == 'u') ? 4 : 2;
  58. for (int i = 0; i < len; ++i)
  59. {
  60. if (_index < _length && IsHexDigit(_source.CharCodeAt(_index)))
  61. {
  62. char ch = _source.CharCodeAt(_index++);
  63. code = code * 16 + "0123456789abcdef".IndexOf(ch.ToString(), StringComparison.OrdinalIgnoreCase);
  64. }
  65. else
  66. {
  67. throw new JavaScriptException(_engine.SyntaxError, string.Format("Expected hexadecimal digit:{0}", _source));
  68. }
  69. }
  70. return (char)code;
  71. }
  72. private void SkipWhiteSpace()
  73. {
  74. while (_index < _length)
  75. {
  76. char ch = _source.CharCodeAt(_index);
  77. if (IsWhiteSpace(ch))
  78. {
  79. ++_index;
  80. }
  81. else
  82. {
  83. break;
  84. }
  85. }
  86. }
  87. private Token ScanPunctuator()
  88. {
  89. int start = _index;
  90. char code = _source.CharCodeAt(_index);
  91. switch ((int) code)
  92. {
  93. // Check for most common single-character punctuators.
  94. case 46: // . dot
  95. case 40: // ( open bracket
  96. case 41: // ) close bracket
  97. case 59: // ; semicolon
  98. case 44: // , comma
  99. case 123: // { open curly brace
  100. case 125: // } close curly brace
  101. case 91: // [
  102. case 93: // ]
  103. case 58: // :
  104. case 63: // ?
  105. case 126: // ~
  106. ++_index;
  107. return new Token
  108. {
  109. Type = Tokens.Punctuator,
  110. Value = code.ToString(),
  111. LineNumber = _lineNumber,
  112. LineStart = _lineStart,
  113. Range = new[] {start, _index}
  114. };
  115. }
  116. throw new JavaScriptException(_engine.SyntaxError, string.Format(Messages.UnexpectedToken, code));
  117. }
  118. private Token ScanNumericLiteral()
  119. {
  120. char ch = _source.CharCodeAt(_index);
  121. int start = _index;
  122. string number = "";
  123. if (ch != '.')
  124. {
  125. number = _source.CharCodeAt(_index++).ToString();
  126. ch = _source.CharCodeAt(_index);
  127. // Hex number starts with '0x'.
  128. // Octal number starts with '0'.
  129. if (number == "0")
  130. {
  131. // decimal number starts with '0' such as '09' is illegal.
  132. if (ch > 0 && IsDecimalDigit(ch))
  133. {
  134. throw new Exception(Messages.UnexpectedToken);
  135. }
  136. }
  137. while (IsDecimalDigit(_source.CharCodeAt(_index)))
  138. {
  139. number += _source.CharCodeAt(_index++).ToString();
  140. }
  141. ch = _source.CharCodeAt(_index);
  142. }
  143. if (ch == '.')
  144. {
  145. number += _source.CharCodeAt(_index++).ToString();
  146. while (IsDecimalDigit(_source.CharCodeAt(_index)))
  147. {
  148. number += _source.CharCodeAt(_index++).ToString();
  149. }
  150. ch = _source.CharCodeAt(_index);
  151. }
  152. if (ch == 'e' || ch == 'E')
  153. {
  154. number += _source.CharCodeAt(_index++).ToString();
  155. ch = _source.CharCodeAt(_index);
  156. if (ch == '+' || ch == '-')
  157. {
  158. number += _source.CharCodeAt(_index++).ToString();
  159. }
  160. if (IsDecimalDigit(_source.CharCodeAt(_index)))
  161. {
  162. while (IsDecimalDigit(_source.CharCodeAt(_index)))
  163. {
  164. number += _source.CharCodeAt(_index++).ToString();
  165. }
  166. }
  167. else
  168. {
  169. throw new Exception(Messages.UnexpectedToken);
  170. }
  171. }
  172. return new Token
  173. {
  174. Type = Tokens.Number,
  175. Value = Double.Parse(number, NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent),
  176. LineNumber = _lineNumber,
  177. LineStart = _lineStart,
  178. Range = new[] {start, _index}
  179. };
  180. }
  181. private Token ScanStringLiteral()
  182. {
  183. string str = "";
  184. char quote = _source.CharCodeAt(_index);
  185. int start = _index;
  186. ++_index;
  187. while (_index < _length)
  188. {
  189. char ch = _source.CharCodeAt(_index++);
  190. if (ch == quote)
  191. {
  192. quote = char.MinValue;
  193. break;
  194. }
  195. if (ch == '\\')
  196. {
  197. ch = _source.CharCodeAt(_index++);
  198. if (ch > 0 || !IsLineTerminator(ch))
  199. {
  200. switch (ch)
  201. {
  202. case 'n':
  203. str += '\n';
  204. break;
  205. case 'r':
  206. str += '\r';
  207. break;
  208. case 't':
  209. str += '\t';
  210. break;
  211. case 'u':
  212. case 'x':
  213. int restore = _index;
  214. char unescaped = ScanHexEscape(ch);
  215. if (unescaped > 0)
  216. {
  217. str += unescaped.ToString();
  218. }
  219. else
  220. {
  221. _index = restore;
  222. str += ch.ToString();
  223. }
  224. break;
  225. case 'b':
  226. str += "\b";
  227. break;
  228. case 'f':
  229. str += "\f";
  230. break;
  231. case 'v':
  232. str += "\x0B";
  233. break;
  234. default:
  235. if (IsOctalDigit(ch))
  236. {
  237. int code = "01234567".IndexOf(ch);
  238. if (_index < _length && IsOctalDigit(_source.CharCodeAt(_index)))
  239. {
  240. code = code * 8 + "01234567".IndexOf(_source.CharCodeAt(_index++));
  241. // 3 digits are only allowed when string starts
  242. // with 0, 1, 2, 3
  243. if ("0123".IndexOf(ch) >= 0 &&
  244. _index < _length &&
  245. IsOctalDigit(_source.CharCodeAt(_index)))
  246. {
  247. code = code * 8 + "01234567".IndexOf(_source.CharCodeAt(_index++));
  248. }
  249. }
  250. str += ((char)code).ToString();
  251. }
  252. else
  253. {
  254. str += ch.ToString();
  255. }
  256. break;
  257. }
  258. }
  259. else
  260. {
  261. ++_lineNumber;
  262. if (ch == '\r' && _source.CharCodeAt(_index) == '\n')
  263. {
  264. ++_index;
  265. }
  266. }
  267. }
  268. else if (IsLineTerminator(ch))
  269. {
  270. break;
  271. }
  272. else
  273. {
  274. str += ch.ToString();
  275. }
  276. }
  277. if (quote != 0)
  278. {
  279. throw new Exception(Messages.UnexpectedToken);
  280. }
  281. return new Token
  282. {
  283. Type = Tokens.String,
  284. Value = str,
  285. LineNumber = _lineNumber,
  286. LineStart = _lineStart,
  287. Range = new[] {start, _index}
  288. };
  289. }
  290. private Token Advance()
  291. {
  292. SkipWhiteSpace();
  293. if (_index >= _length)
  294. {
  295. return new Token
  296. {
  297. Type = Tokens.EOF,
  298. LineNumber = _lineNumber,
  299. LineStart = _lineStart,
  300. Range = new[] {_index, _index}
  301. };
  302. }
  303. char ch = _source.CharCodeAt(_index);
  304. // Very common: ( and ) and ;
  305. if (ch == 40 || ch == 41 || ch == 58)
  306. {
  307. return ScanPunctuator();
  308. }
  309. // String literal starts with single quote (#39) or double quote (#34).
  310. if (ch == 39 || ch == 34)
  311. {
  312. return ScanStringLiteral();
  313. }
  314. // Dot (.) char #46 can also start a floating-point number, hence the need
  315. // to check the next character.
  316. if (ch == 46)
  317. {
  318. if (IsDecimalDigit(_source.CharCodeAt(_index + 1)))
  319. {
  320. return ScanNumericLiteral();
  321. }
  322. return ScanPunctuator();
  323. }
  324. if (IsDecimalDigit(ch))
  325. {
  326. return ScanNumericLiteral();
  327. }
  328. return ScanPunctuator();
  329. }
  330. private Token CollectToken()
  331. {
  332. _location = new Location
  333. {
  334. Start = new Position
  335. {
  336. Line = _lineNumber,
  337. Column = _index - _lineStart
  338. }
  339. };
  340. Token token = Advance();
  341. _location.End = new Position
  342. {
  343. Line = _lineNumber,
  344. Column = _index - _lineStart
  345. };
  346. if (token.Type != Tokens.EOF)
  347. {
  348. var range = new[] {token.Range[0], token.Range[1]};
  349. string value = _source.Slice(token.Range[0], token.Range[1]);
  350. _extra.Tokens.Add(new Token
  351. {
  352. Type = token.Type,
  353. Value = value,
  354. Range = range,
  355. });
  356. }
  357. return token;
  358. }
  359. private Token Lex()
  360. {
  361. Token token = _lookahead;
  362. _index = token.Range[1];
  363. _lineNumber = token.LineNumber.HasValue ? token.LineNumber.Value : 0;
  364. _lineStart = token.LineStart;
  365. _lookahead = (_extra.Tokens != null) ? CollectToken() : Advance();
  366. _index = token.Range[1];
  367. _lineNumber = token.LineNumber.HasValue ? token.LineNumber.Value : 0;
  368. _lineStart = token.LineStart;
  369. return token;
  370. }
  371. private void Peek()
  372. {
  373. int pos = _index;
  374. int line = _lineNumber;
  375. int start = _lineStart;
  376. _lookahead = (_extra.Tokens != null) ? CollectToken() : Advance();
  377. _index = pos;
  378. _lineNumber = line;
  379. _lineStart = start;
  380. }
  381. private void MarkStart()
  382. {
  383. if (_extra.Loc.HasValue)
  384. {
  385. _state.MarkerStack.Push(_index - _lineStart);
  386. _state.MarkerStack.Push(_lineNumber);
  387. }
  388. if (_extra.Range != null)
  389. {
  390. _state.MarkerStack.Push(_index);
  391. }
  392. }
  393. private T MarkEnd<T>(T node) where T : SyntaxNode
  394. {
  395. if (_extra.Range != null)
  396. {
  397. node.Range = new[] {_state.MarkerStack.Pop(), _index};
  398. }
  399. if (_extra.Loc.HasValue)
  400. {
  401. node.Location = new Location
  402. {
  403. Start = new Position
  404. {
  405. Line = _state.MarkerStack.Pop(),
  406. Column = _state.MarkerStack.Pop()
  407. },
  408. End = new Position
  409. {
  410. Line = _lineNumber,
  411. Column = _index - _lineStart
  412. }
  413. };
  414. PostProcess(node);
  415. }
  416. return node;
  417. }
  418. public T MarkEndIf<T>(T node) where T : SyntaxNode
  419. {
  420. if (node.Range != null || node.Location != null)
  421. {
  422. if (_extra.Loc.HasValue)
  423. {
  424. _state.MarkerStack.Pop();
  425. _state.MarkerStack.Pop();
  426. }
  427. if (_extra.Range != null)
  428. {
  429. _state.MarkerStack.Pop();
  430. }
  431. }
  432. else
  433. {
  434. MarkEnd(node);
  435. }
  436. return node;
  437. }
  438. public SyntaxNode PostProcess(SyntaxNode node)
  439. {
  440. if (_extra.Source != null)
  441. {
  442. node.Location.Source = _extra.Source;
  443. }
  444. return node;
  445. }
  446. public ObjectInstance CreateArrayInstance(IEnumerable<JsValue> values)
  447. {
  448. return _engine.Array.Construct(values.ToArray());
  449. }
  450. // Throw an exception
  451. private void ThrowError(Token token, string messageFormat, params object[] arguments)
  452. {
  453. ParserError error;
  454. string msg = System.String.Format(messageFormat, arguments);
  455. if (token.LineNumber.HasValue)
  456. {
  457. error = new ParserError("Line " + token.LineNumber + ": " + msg)
  458. {
  459. Index = token.Range[0],
  460. LineNumber = token.LineNumber.Value,
  461. Column = token.Range[0] - _lineStart + 1
  462. };
  463. }
  464. else
  465. {
  466. error = new ParserError("Line " + _lineNumber + ": " + msg)
  467. {
  468. Index = _index,
  469. LineNumber = _lineNumber,
  470. Column = _index - _lineStart + 1
  471. };
  472. }
  473. error.Description = msg;
  474. throw error;
  475. }
  476. // Throw an exception because of the token.
  477. private void ThrowUnexpected(Token token)
  478. {
  479. if (token.Type == Tokens.EOF)
  480. {
  481. ThrowError(token, Messages.UnexpectedEOS);
  482. }
  483. if (token.Type == Tokens.Number)
  484. {
  485. ThrowError(token, Messages.UnexpectedNumber);
  486. }
  487. if (token.Type == Tokens.String)
  488. {
  489. ThrowError(token, Messages.UnexpectedString);
  490. }
  491. // BooleanLiteral, NullLiteral, or Punctuator.
  492. ThrowError(token, Messages.UnexpectedToken, token.Value as string);
  493. }
  494. // Expect the next token to match the specified punctuator.
  495. // If not, an exception will be thrown.
  496. private void Expect(string value)
  497. {
  498. Token token = Lex();
  499. if (token.Type != Tokens.Punctuator || !value.Equals(token.Value))
  500. {
  501. ThrowUnexpected(token);
  502. }
  503. }
  504. // Return true if the next token matches the specified punctuator.
  505. private bool Match(string value)
  506. {
  507. return _lookahead.Type == Tokens.Punctuator && value.Equals(_lookahead.Value);
  508. }
  509. private ObjectInstance ParseJsonArray()
  510. {
  511. var elements = new List<JsValue>();
  512. Expect("[");
  513. while (!Match("]"))
  514. {
  515. if (Match(","))
  516. {
  517. Lex();
  518. elements.Add(Null.Instance);
  519. }
  520. else
  521. {
  522. elements.Add(ParseJsonValue());
  523. if (!Match("]"))
  524. {
  525. Expect(",");
  526. }
  527. }
  528. }
  529. Expect("]");
  530. return CreateArrayInstance(elements);
  531. }
  532. public ObjectInstance ParseJsonObject()
  533. {
  534. Expect("{");
  535. var obj = _engine.Object.Construct(Arguments.Empty);
  536. while (!Match("}"))
  537. {
  538. Tokens type = _lookahead.Type;
  539. if (type != Tokens.String)
  540. {
  541. ThrowUnexpected(Lex());
  542. }
  543. var name = Lex().Value.ToString();
  544. if (!IsValidJsonObjectPropertyName(name))
  545. {
  546. throw new JavaScriptException(_engine.SyntaxError, string.Format("Invalid character in property name '{0}'", name));
  547. }
  548. Expect(":");
  549. var value = ParseJsonValue();
  550. obj.FastAddProperty(name, value, true, true, true);
  551. if (!Match("}"))
  552. {
  553. Expect(",");
  554. }
  555. }
  556. Expect("}");
  557. return obj;
  558. }
  559. /// <summary>
  560. /// * @path ch15/15.12/15.12.2/15.12.2-2-3.js
  561. /// * @description JSON.parse - parsing an object where property name ends with a null character
  562. /// </summary>
  563. /// <param name="propertyName"></param>
  564. /// <returns></returns>
  565. private bool IsValidJsonObjectPropertyName(string propertyName)
  566. {
  567. for (var i = 0; i < propertyName.Length; i++)
  568. {
  569. var val = (int)propertyName[i];
  570. if (val <= 31)
  571. {
  572. return false;
  573. }
  574. }
  575. return true;
  576. }
  577. private JsValue ParseJsonValue()
  578. {
  579. Tokens type = _lookahead.Type;
  580. MarkStart();
  581. if (type == Tokens.NullLiteral)
  582. {
  583. return Null.Instance;
  584. }
  585. if (type == Tokens.String)
  586. {
  587. return JsValue.FromObject(Lex().Value);
  588. }
  589. if (type == Tokens.Number)
  590. {
  591. return JsValue.FromObject(Lex().Value);
  592. }
  593. if (type == Tokens.BooleanLiteral)
  594. {
  595. return "true".Equals(Lex().Value);
  596. }
  597. if (Match("["))
  598. {
  599. return ParseJsonArray();
  600. }
  601. if (Match("{"))
  602. {
  603. return ParseJsonObject();
  604. }
  605. ThrowUnexpected(Lex());
  606. // can't be reached
  607. return Null.Instance;
  608. }
  609. public JsValue Parse(string code)
  610. {
  611. return Parse(code, null);
  612. }
  613. public JsValue Parse(string code, ParserOptions options)
  614. {
  615. _source = code;
  616. _index = 0;
  617. _lineNumber = (_source.Length > 0) ? 1 : 0;
  618. _lineStart = 0;
  619. _length = _source.Length;
  620. _lookahead = null;
  621. _state = new State
  622. {
  623. AllowIn = true,
  624. LabelSet = new HashSet<string>(),
  625. InFunctionBody = false,
  626. InIteration = false,
  627. InSwitch = false,
  628. LastCommentStart = -1,
  629. MarkerStack = new Stack<int>()
  630. };
  631. _extra = new Extra
  632. {
  633. Range = new int[0],
  634. Loc = 0,
  635. };
  636. if (options != null)
  637. {
  638. if (!System.String.IsNullOrEmpty(options.Source))
  639. {
  640. _extra.Source = options.Source;
  641. }
  642. if (options.Tokens)
  643. {
  644. _extra.Tokens = new List<Token>();
  645. }
  646. }
  647. try
  648. {
  649. MarkStart();
  650. Peek();
  651. return ParseJsonValue();
  652. }
  653. finally
  654. {
  655. _extra = new Extra();
  656. }
  657. }
  658. private class Extra
  659. {
  660. public int? Loc;
  661. public int[] Range;
  662. public string Source;
  663. public List<Token> Tokens;
  664. }
  665. private enum Tokens
  666. {
  667. NullLiteral,
  668. BooleanLiteral,
  669. String,
  670. Number,
  671. Punctuator,
  672. EOF,
  673. };
  674. class Token
  675. {
  676. public Tokens Type;
  677. public object Value;
  678. public int[] Range;
  679. public int? LineNumber;
  680. public int LineStart;
  681. }
  682. static class Messages
  683. {
  684. public const string UnexpectedToken = "Unexpected token {0}";
  685. public const string UnexpectedNumber = "Unexpected number";
  686. public const string UnexpectedString = "Unexpected string";
  687. public const string UnexpectedEOS = "Unexpected end of input";
  688. };
  689. }
  690. }