Scanner.cs 23 KB


  1. using Lua.Internal;
  2. using System.Globalization;
  3. using System.Text;
  4. using static System.Diagnostics.Debug;
  5. namespace Lua.CodeAnalysis.Compilation;
  6. using static Constants;
  7. struct Scanner
  8. {
  9. public LuaState L;
  10. public PooledList<char> Buffer;
  11. public TextReader R;
  12. public int Current;
  13. public int LineNumber, LastLine;
  14. public string Source;
  15. public Token LookAheadToken;
  16. int lastNewLinePos;
  17. ///inline
  18. public Token Token;
  19. public int T => Token.T;
  20. public const int FirstReserved = ushort.MaxValue + 257;
  21. public const int EndOfStream = -1;
  22. public const int MaxInt = int.MaxValue >> (1 + 1); //9223372036854775807
  23. public const int TkAnd = FirstReserved;
  24. public const int TkBreak = TkAnd + 1;
  25. public const int TkDo = TkBreak + 1;
  26. public const int TkElse = TkDo + 1;
  27. public const int TkElseif = TkElse + 1;
  28. public const int TkEnd = TkElseif + 1;
  29. public const int TkFalse = TkEnd + 1;
  30. public const int TkFor = TkFalse + 1;
  31. public const int TkFunction = TkFor + 1;
  32. public const int TkGoto = TkFunction + 1;
  33. public const int TkIf = TkGoto + 1;
  34. public const int TkIn = TkIf + 1;
  35. public const int TkLocal = TkIn + 1;
  36. public const int TkNil = TkLocal + 1;
  37. public const int TkNot = TkNil + 1;
  38. public const int TkOr = TkNot + 1;
  39. public const int TkRepeat = TkOr + 1;
  40. public const int TkReturn = TkRepeat + 1;
  41. public const int TkThen = TkReturn + 1;
  42. public const int TkTrue = TkThen + 1;
  43. public const int TkUntil = TkTrue + 1;
  44. public const int TkWhile = TkUntil + 1;
  45. public const int TkConcat = TkWhile + 1;
  46. public const int TkDots = TkConcat + 1;
  47. public const int TkEq = TkDots + 1;
  48. public const int TkGe = TkEq + 1;
  49. public const int TkLe = TkGe + 1;
  50. public const int TkNe = TkLe + 1;
  51. public const int TkDoubleColon = TkNe + 1;
  52. public const int TkEos = TkDoubleColon + 1;
  53. public const int TkNumber = TkEos + 1;
  54. public const int TkName = TkNumber + 1;
  55. public const int TkString = TkName + 1;
  56. public const int ReservedCount = TkWhile - FirstReserved + 1;
  57. static readonly string[] tokens =
  58. [
  59. "and", "break", "do", "else", "elseif",
  60. "end", "false", "for", "function", "goto", "if",
  61. "in", "local", "nil", "not", "or", "repeat",
  62. "return", "then", "true", "until", "while",
  63. "..", "...", "==", ">=", "<=", "~=", "::", "<eof>",
  64. "<number>", "<name>", "<string>"
  65. ];
  66. public static ReadOnlySpan<string> Tokens => tokens;
  67. public void SyntaxError(int position, string message)
  68. {
  69. ScanError(position, message, Token.T);
  70. }
  71. public void SyntaxError(string message)
  72. {
  73. ScanError(R.Position, message, Token.T);
  74. }
  75. public void ErrorExpected(int position, char t)
  76. {
  77. SyntaxError(position, TokenToString(t) + " expected");
  78. }
  79. public void NumberError(int numberStartPosition, int position)
  80. {
  81. Buffer.Clear();
  82. Token = new(numberStartPosition, TkString, R.Span[numberStartPosition..(position - 1)].ToString());
  83. ScanError(position, "malformed number", TkString);
  84. }
  85. public static bool IsNewLine(int c)
  86. {
  87. return c is '\n' or '\r';
  88. }
  89. public static bool IsDecimal(int c)
  90. {
  91. return c is >= '0' and <= '9';
  92. }
  93. public static string TokenToString(Token t)
  94. {
  95. return t.T switch
  96. {
  97. TkName or TkString => t.S,
  98. TkNumber => $"{t.N}",
  99. < FirstReserved => $"{(char)t.T}", // TODO check for printable rune
  100. < TkEos => $"'{tokens[t.T - FirstReserved]}'",
  101. _ => tokens[t.T - FirstReserved]
  102. };
  103. }
  104. public string TokenToString(int t)
  105. {
  106. return t switch
  107. {
  108. TkName or TkString => Token.S,
  109. TkNumber => $"{Token.N}",
  110. < FirstReserved => $"{(char)t}", // TODO check for printable rune
  111. < TkEos => $"'{tokens[t - FirstReserved]}'",
  112. _ => tokens[t - FirstReserved]
  113. };
  114. }
  115. public static string TokenRuteToString(int t)
  116. {
  117. return t switch
  118. {
  119. < FirstReserved => $"{(char)t}", // TODO check for printable rune
  120. <= TkString => $"'{tokens[t - FirstReserved]}'",
  121. _ => tokens[t - FirstReserved]
  122. };
  123. }
  124. public void ScanError(int pos, string message, int token)
  125. {
  126. var shortSourceBuffer = (stackalloc char[59]);
  127. var len = LuaDebug.WriteShortSource(Source, shortSourceBuffer);
  128. var buff = shortSourceBuffer[..len].ToString();
  129. string? nearToken = null;
  130. if (token != 0)
  131. {
  132. nearToken = TokenToString(token);
  133. }
  134. throw new LuaCompileException(buff, new(LineNumber, pos - lastNewLinePos + 1), pos - 1, message, nearToken);
  135. }
  136. public void IncrementLineNumber()
  137. {
  138. var old = Current;
  139. Assert(IsNewLine(old));
  140. Advance();
  141. if (IsNewLine(Current) && Current != old)
  142. {
  143. Advance();
  144. }
  145. lastNewLinePos = R.Position;
  146. if (++LineNumber >= MaxLine)
  147. {
  148. SyntaxError(lastNewLinePos, "chunk has too many lines");
  149. }
  150. }
  151. public void Advance()
  152. {
  153. Current = R.TryRead(out var c) ? c : EndOfStream;
  154. }
  155. public void SaveAndAdvance()
  156. {
  157. Save(Current);
  158. Advance();
  159. }
  160. public void AdvanceAndSave(int c)
  161. {
  162. Advance();
  163. Save(c);
  164. }
  165. public void Save(int c)
  166. {
  167. Buffer.Add((char)c);
  168. }
  169. public bool CheckNext(string str)
  170. {
  171. if (Current == 0 || !str.Contains((char)Current))
  172. {
  173. return false;
  174. }
  175. SaveAndAdvance();
  176. return true;
  177. }
  178. public int SkipSeparator()
  179. {
  180. var (i, c) = (0, Current);
  181. Assert(c is '[' or ']');
  182. for (SaveAndAdvance(); Current == '='; i++)
  183. {
  184. SaveAndAdvance();
  185. }
  186. if (Current == c)
  187. {
  188. return i;
  189. }
  190. return -i - 1;
  191. }
  192. public string ReadMultiLine(bool comment, int sep)
  193. {
  194. SaveAndAdvance();
  195. if (IsNewLine(Current))
  196. {
  197. IncrementLineNumber();
  198. }
  199. for (;;)
  200. {
  201. switch (Current)
  202. {
  203. case EndOfStream:
  204. ScanError(R.Position, comment ? "unfinished long comment" : "unfinished long string", TkEos);
  205. break;
  206. case ']':
  207. if (SkipSeparator() == sep)
  208. {
  209. SaveAndAdvance();
  210. if (!comment)
  211. {
  212. var s = Buffer.AsSpan().Slice(2 + sep, Buffer.Length - (4 + (2 * sep))).ToString();
  213. Buffer.Clear();
  214. return s;
  215. }
  216. Buffer.Clear();
  217. return "";
  218. }
  219. break;
  220. case '\r':
  221. goto case '\n';
  222. case '\n':
  223. Save('\n');
  224. IncrementLineNumber();
  225. break;
  226. default:
  227. if (!comment)
  228. {
  229. Save(Current);
  230. }
  231. Advance();
  232. break;
  233. }
  234. }
  235. }
  236. public int ReadDigits()
  237. {
  238. var c = Current;
  239. for (; IsDecimal(c); c = Current)
  240. {
  241. SaveAndAdvance();
  242. }
  243. return c;
  244. }
  245. public static bool IsHexadecimal(int c)
  246. {
  247. return c is >= '0' and <= '9' or >= 'a' and <= 'f' or >= 'A' and <= 'F';
  248. }
  249. public (double n, int c, int i) ReadHexNumber(double x, ref int position)
  250. {
  251. var c = Current;
  252. var n = x;
  253. if (!IsHexadecimal(c))
  254. {
  255. return (n, c, 0);
  256. }
  257. position++;
  258. var i = 0;
  259. for (;;)
  260. {
  261. switch (c)
  262. {
  263. case >= '0' and <= '9':
  264. c = c - '0';
  265. break;
  266. case >= 'a' and <= 'f':
  267. c = c - 'a' + 10;
  268. break;
  269. case >= 'A' and <= 'F':
  270. c = c - 'A' + 10;
  271. break;
  272. case EndOfStream or '}' or ',' or '.' or ')' or 'p' or 'P': return (n, c, i);
  273. default:
  274. if (IsWhiteSpace(c))
  275. {
  276. return (n, c, i);
  277. }
  278. return (n, 0, 0);
  279. }
  280. Advance();
  281. position++;
  282. (c, n, i) = (Current, (n * 16.0) + c, i + 1);
  283. }
  284. }
  285. public Token ReadNumber(int pos)
  286. {
  287. var startPosition = pos - 1;
  288. var c = Current;
  289. Assert(IsDecimal(c));
  290. SaveAndAdvance();
  291. if (c == '0' && CheckNext("Xx")) // hexadecimal
  292. {
  293. pos++;
  294. Buffer.Clear();
  295. var exponent = 0;
  296. (var fraction, c, var i) = ReadHexNumber(0, ref pos);
  297. if (c == '.')
  298. {
  299. Advance();
  300. (fraction, c, exponent) = ReadHexNumber(fraction, ref pos);
  301. }
  302. if (i == 0 && exponent == 0)
  303. {
  304. NumberError(startPosition, pos);
  305. }
  306. exponent *= -4;
  307. if (c is 'p' or 'P')
  308. {
  309. Advance();
  310. var negativeExponent = false;
  311. c = Current;
  312. if (c is '+' or '-')
  313. {
  314. negativeExponent = c == '-';
  315. Advance();
  316. }
  317. if (!IsDecimal(Current))
  318. {
  319. NumberError(startPosition, pos + 1);
  320. }
  321. _ = ReadDigits();
  322. if (!long.TryParse(Buffer.AsSpan(), NumberStyles.Float, CultureInfo.InvariantCulture, out var e))
  323. {
  324. NumberError(startPosition, pos + 1);
  325. }
  326. else if (negativeExponent)
  327. {
  328. exponent += (int)-e;
  329. }
  330. else
  331. {
  332. exponent += (int)e;
  333. }
  334. Buffer.Clear();
  335. }
  336. return new(pos, fraction * Math.Pow(2, exponent));
  337. }
  338. c = ReadDigits();
  339. if (c == '.')
  340. {
  341. SaveAndAdvance();
  342. c = ReadDigits();
  343. }
  344. if (c is 'e' or 'E')
  345. {
  346. SaveAndAdvance();
  347. c = Current;
  348. if (c is '+' or '-')
  349. {
  350. SaveAndAdvance();
  351. }
  352. _ = ReadDigits();
  353. }
  354. var strSpan = Buffer.AsSpan();
  355. if (strSpan.StartsWith("0"))
  356. {
  357. if (strSpan.Length == 1)
  358. {
  359. Buffer.Clear();
  360. return new(pos, 0d);
  361. }
  362. while (strSpan.Length > 1 && strSpan[0] == '0' && strSpan[1] == '0')
  363. {
  364. strSpan = strSpan[1..];
  365. }
  366. }
  367. if (!double.TryParse(strSpan, NumberStyles.Float, CultureInfo.InvariantCulture, out var f))
  368. {
  369. NumberError(startPosition, pos);
  370. }
  371. Buffer.Clear();
  372. return new(pos, f);
  373. }
  374. static readonly Dictionary<int, char> escapes = new()
  375. {
  376. { 'a', '\a' },
  377. { 'b', '\b' },
  378. { 'f', '\f' },
  379. { 'n', '\n' },
  380. { 'r', '\r' },
  381. { 't', '\t' },
  382. { 'v', '\v' },
  383. { '\\', '\\' },
  384. { '"', '"' },
  385. { '\'', '\'' }
  386. };
  387. public void EscapeError(int pos, ReadOnlySpan<int> c, string message)
  388. {
  389. Buffer.Clear();
  390. Save('\'');
  391. Save('\\');
  392. foreach (var r in c)
  393. {
  394. if (r == EndOfStream)
  395. {
  396. break;
  397. }
  398. Save(r);
  399. }
  400. Save('\'');
  401. Token = new(pos - Buffer.Length, TkString, Buffer.AsSpan().ToString());
  402. Buffer.Clear();
  403. ScanError(pos, message, TkString);
  404. }
  405. public int ReadHexEscape()
  406. {
  407. Advance();
  408. var r = 0;
  409. var b = (stackalloc int[3] { 'x', 0, 0 });
  410. var (i, c) = (1, Current);
  411. for (; i < b.Length; (i, c, r) = (i + 1, Current, (r << 4) + c))
  412. {
  413. b[i] = c;
  414. switch (c)
  415. {
  416. case >= '0' and <= '9':
  417. c -= '0';
  418. break;
  419. case >= 'a' and <= 'f':
  420. c -= 'a' - 10;
  421. break;
  422. case >= 'A' and <= 'F':
  423. c -= 'A' - 10;
  424. break;
  425. default:
  426. EscapeError(R.Position - 1, b.Slice(0, i + 1), "hexadecimal digit expected");
  427. break;
  428. }
  429. Advance();
  430. }
  431. return r;
  432. }
  433. public int ReadDecimalEscape()
  434. {
  435. var b = (stackalloc int[3] { 0, 0, 0 });
  436. var c = Current;
  437. var r = 0;
  438. var pos = R.Position;
  439. for (var i = 0; i < b.Length && IsDecimal(c); i++, c = Current)
  440. {
  441. b[i] = c;
  442. r = (10 * r) + c - '0';
  443. Advance();
  444. pos = R.Position;
  445. }
  446. if (r > 255)
  447. {
  448. EscapeError(pos - 1, b, "decimal escape too large");
  449. }
  450. return r;
  451. }
  452. public Token ReadString()
  453. {
  454. var pos = R.Position;
  455. var delimiter = Current;
  456. for (SaveAndAdvance(); Current != delimiter;)
  457. {
  458. switch (Current)
  459. {
  460. case EndOfStream:
  461. Token = new(R.Position - Buffer.Length, TkString, Buffer.AsSpan().ToString());
  462. ScanError(R.Position, "unfinished string", TkEos);
  463. break;
  464. case '\n' or '\r':
  465. Token = new(R.Position - Buffer.Length, TkString, Buffer.AsSpan().ToString());
  466. ScanError(R.Position, "unfinished string", TkString);
  467. break;
  468. case '\\':
  469. Advance();
  470. var c = Current;
  471. if (escapes.TryGetValue(c, out var esc))
  472. {
  473. AdvanceAndSave(esc);
  474. }
  475. else if (IsNewLine(c))
  476. {
  477. IncrementLineNumber();
  478. Save('\n');
  479. }
  480. else if (c == EndOfStream) // do nothing
  481. {
  482. }
  483. else if (c == 'x')
  484. {
  485. Save(ReadHexEscape());
  486. }
  487. else if (c == 'z')
  488. {
  489. for (Advance(); IsWhiteSpace(Current);)
  490. {
  491. if (IsNewLine(Current))
  492. {
  493. IncrementLineNumber();
  494. }
  495. else
  496. {
  497. Advance();
  498. }
  499. }
  500. }
  501. else if (IsDecimal(c))
  502. {
  503. Save(ReadDecimalEscape());
  504. }
  505. else
  506. {
  507. EscapeError(R.Position - 1, [c], "invalid escape sequence");
  508. }
  509. break;
  510. default:
  511. SaveAndAdvance();
  512. break;
  513. }
  514. }
  515. SaveAndAdvance();
  516. var length = Buffer.Length - 2;
  517. // if (0<length&&Buffer[^2] == '\0')
  518. // {
  519. // length--;
  520. // }
  521. var str = Buffer.AsSpan().Slice(1, length).ToString();
  522. Buffer.Clear();
  523. return new(pos, TkString, str);
  524. }
  525. public static bool IsReserved(string s)
  526. {
  527. foreach (var reserved in Tokens)
  528. {
  529. if (s == reserved)
  530. {
  531. return true;
  532. }
  533. }
  534. return false;
  535. }
  536. public Token ReservedOrName()
  537. {
  538. var pos = R.Position - Buffer.Length;
  539. var str = Buffer.AsSpan().ToString();
  540. Buffer.Clear();
  541. for (var i = 0; i < Tokens.Length; i++)
  542. {
  543. if (str == Tokens[i])
  544. {
  545. return new(pos, i + FirstReserved, str);
  546. }
  547. }
  548. return new(pos, TkName, str);
  549. }
  550. public Token Scan()
  551. {
  552. const bool comment = true, str = false;
  553. var pos = R.Position;
  554. while (true)
  555. {
  556. var c = Current;
  557. switch (c)
  558. {
  559. case '\n':
  560. case '\r':
  561. IncrementLineNumber();
  562. break;
  563. case ' ':
  564. case '\f':
  565. case '\t':
  566. case '\v':
  567. Advance();
  568. pos = R.Position;
  569. break;
  570. case '-':
  571. Advance();
  572. if (Current != '-')
  573. {
  574. return new(pos, '-');
  575. }
  576. Advance();
  577. if (Current == '[')
  578. {
  579. var sep = SkipSeparator();
  580. if (sep >= 0)
  581. {
  582. _ = ReadMultiLine(comment, sep);
  583. break;
  584. }
  585. Buffer.Clear();
  586. }
  587. while (!IsNewLine(Current) && Current != EndOfStream)
  588. {
  589. Advance();
  590. }
  591. break;
  592. case '[':
  593. {
  594. var sep = SkipSeparator();
  595. if (sep >= 0)
  596. {
  597. return new(pos, TkString, ReadMultiLine(str, sep));
  598. }
  599. Buffer.Clear();
  600. if (sep == -1)
  601. {
  602. return new(pos, '[');
  603. }
  604. ScanError(pos, "invalid long string delimiter", TkString);
  605. break;
  606. }
  607. case '=':
  608. Advance();
  609. if (Current != '=')
  610. {
  611. return new(pos, '=');
  612. }
  613. Advance();
  614. return new(pos, TkEq);
  615. case '<':
  616. Advance();
  617. if (Current != '=')
  618. {
  619. return new(pos, '<');
  620. }
  621. Advance();
  622. return new(pos, TkLe);
  623. case '>':
  624. Advance();
  625. if (Current != '=')
  626. {
  627. return new(pos, '>');
  628. }
  629. Advance();
  630. return new(pos, TkGe);
  631. case '~':
  632. Advance();
  633. if (Current != '=')
  634. {
  635. return new(pos, '~');
  636. }
  637. Advance();
  638. return new(pos, TkNe);
  639. case ':':
  640. Advance();
  641. if (Current != ':')
  642. {
  643. return new(pos, ':');
  644. }
  645. Advance();
  646. return new(pos, TkDoubleColon);
  647. case '"':
  648. case '\'':
  649. return ReadString();
  650. case EndOfStream:
  651. return new(pos, TkEos);
  652. case '.':
  653. SaveAndAdvance();
  654. if (CheckNext("."))
  655. {
  656. if (CheckNext("."))
  657. {
  658. Buffer.Clear();
  659. return new(pos, TkDots);
  660. }
  661. Buffer.Clear();
  662. return new(pos, TkConcat);
  663. }
  664. if (!IsDigit(Current))
  665. {
  666. Buffer.Clear();
  667. return new(pos, '.');
  668. }
  669. return ReadNumber(pos);
  670. case 0:
  671. Advance();
  672. pos = R.Position;
  673. break;
  674. default:
  675. {
  676. if (IsDigit(c))
  677. {
  678. return ReadNumber(pos);
  679. }
  680. if (IsLetter(c))
  681. {
  682. for (; IsLetter(c) || IsDigit(c); c = Current)
  683. {
  684. SaveAndAdvance();
  685. }
  686. return ReservedOrName();
  687. }
  688. Advance();
  689. return new(pos, c);
  690. }
  691. }
  692. }
  693. }
  694. public void Next()
  695. {
  696. LastLine = LineNumber;
  697. if (LookAheadToken.T != TkEos)
  698. {
  699. Token = LookAheadToken;
  700. LookAheadToken = new(0, TkEos);
  701. }
  702. else
  703. {
  704. Token = Scan();
  705. }
  706. }
  707. public int LookAhead()
  708. {
  709. Assert(LookAheadToken.T == TkEos);
  710. LookAheadToken = Scan();
  711. return LookAheadToken.T;
  712. }
  713. public bool TestNext(int t)
  714. {
  715. var r = Token.T == t;
  716. if (!r)
  717. {
  718. return false;
  719. }
  720. Next();
  721. return true;
  722. }
  723. public void Check(int t)
  724. {
  725. if (Token.T != t)
  726. {
  727. ErrorExpected(R.Position, (char)t);
  728. }
  729. }
  730. public void CheckMatch(int what, int who, int where)
  731. {
  732. if (TestNext(what))
  733. {
  734. return;
  735. }
  736. if (where == LineNumber)
  737. {
  738. ErrorExpected(R.Position, (char)what);
  739. }
  740. else
  741. {
  742. SyntaxError(R.Position, $"{TokenToString(what)} expected (to close {TokenToString(who)} at line {where})");
  743. }
  744. }
  745. static bool IsWhiteSpace(int c)
  746. {
  747. return c is ' ' or '\t' or '\n' or '\r' or '\f' or '\v';
  748. }
  749. static bool IsDigit(int c)
  750. {
  751. return c is >= '0' and <= '9';
  752. }
  753. static bool IsLetter(int c)
  754. {
  755. return c is < ushort.MaxValue and ('_' or >= 'a' and <= 'z' or >= 'A' and <= 'Z');
  756. }
  757. }