gdscript_tokenizer.h 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. /*************************************************************************/
  2. /* gdscript_tokenizer.h */
  3. /*************************************************************************/
  4. /* This file is part of: */
  5. /* GODOT ENGINE */
  6. /* https://godotengine.org */
  7. /*************************************************************************/
  8. /* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */
  9. /* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */
  10. /* */
  11. /* Permission is hereby granted, free of charge, to any person obtaining */
  12. /* a copy of this software and associated documentation files (the */
  13. /* "Software"), to deal in the Software without restriction, including */
  14. /* without limitation the rights to use, copy, modify, merge, publish, */
  15. /* distribute, sublicense, and/or sell copies of the Software, and to */
  16. /* permit persons to whom the Software is furnished to do so, subject to */
  17. /* the following conditions: */
  18. /* */
  19. /* The above copyright notice and this permission notice shall be */
  20. /* included in all copies or substantial portions of the Software. */
  21. /* */
  22. /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
  23. /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
  24. /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
  25. /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
  26. /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
  27. /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
  28. /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
  29. /*************************************************************************/
  30. #ifndef GDSCRIPT_TOKENIZER_H
  31. #define GDSCRIPT_TOKENIZER_H
  32. #include "core/list.h"
  33. #include "core/set.h"
  34. #include "core/variant.h"
  35. #include "core/vector.h"
  36. class GDScriptTokenizer {
  37. public:
  38. enum CursorPlace {
  39. CURSOR_NONE,
  40. CURSOR_BEGINNING,
  41. CURSOR_MIDDLE,
  42. CURSOR_END,
  43. };
  44. struct Token {
  45. enum Type {
  46. EMPTY,
  47. // Basic
  48. ANNOTATION,
  49. IDENTIFIER,
  50. LITERAL,
  51. // Comparison
  52. LESS,
  53. LESS_EQUAL,
  54. GREATER,
  55. GREATER_EQUAL,
  56. EQUAL_EQUAL,
  57. BANG_EQUAL,
  58. // Logical
  59. AND,
  60. OR,
  61. NOT,
  62. AMPERSAND_AMPERSAND,
  63. PIPE_PIPE,
  64. BANG,
  65. // Bitwise
  66. AMPERSAND,
  67. PIPE,
  68. TILDE,
  69. CARET,
  70. LESS_LESS,
  71. GREATER_GREATER,
  72. // Math
  73. PLUS,
  74. MINUS,
  75. STAR,
  76. SLASH,
  77. PERCENT,
  78. // Assignment
  79. EQUAL,
  80. PLUS_EQUAL,
  81. MINUS_EQUAL,
  82. STAR_EQUAL,
  83. SLASH_EQUAL,
  84. PERCENT_EQUAL,
  85. LESS_LESS_EQUAL,
  86. GREATER_GREATER_EQUAL,
  87. AMPERSAND_EQUAL,
  88. PIPE_EQUAL,
  89. CARET_EQUAL,
  90. // Control flow
  91. IF,
  92. ELIF,
  93. ELSE,
  94. FOR,
  95. WHILE,
  96. BREAK,
  97. CONTINUE,
  98. PASS,
  99. RETURN,
  100. MATCH,
  101. // Keywords
  102. AS,
  103. ASSERT,
  104. AWAIT,
  105. BREAKPOINT,
  106. CLASS,
  107. CLASS_NAME,
  108. CONST,
  109. ENUM,
  110. EXTENDS,
  111. FUNC,
  112. IN,
  113. IS,
  114. NAMESPACE,
  115. PRELOAD,
  116. SELF,
  117. SIGNAL,
  118. STATIC,
  119. SUPER,
  120. TRAIT,
  121. VAR,
  122. VOID,
  123. YIELD,
  124. // Punctuation
  125. BRACKET_OPEN,
  126. BRACKET_CLOSE,
  127. BRACE_OPEN,
  128. BRACE_CLOSE,
  129. PARENTHESIS_OPEN,
  130. PARENTHESIS_CLOSE,
  131. COMMA,
  132. SEMICOLON,
  133. PERIOD,
  134. PERIOD_PERIOD,
  135. COLON,
  136. DOLLAR,
  137. FORWARD_ARROW,
  138. UNDERSCORE,
  139. // Whitespace
  140. NEWLINE,
  141. INDENT,
  142. DEDENT,
  143. // Constants
  144. CONST_PI,
  145. CONST_TAU,
  146. CONST_INF,
  147. CONST_NAN,
  148. // Error message improvement
  149. VCS_CONFLICT_MARKER,
  150. BACKTICK,
  151. QUESTION_MARK,
  152. // Special
  153. ERROR,
  154. TK_EOF, // "EOF" is reserved
  155. TK_MAX
  156. };
  157. Type type = EMPTY;
  158. Variant literal;
  159. int start_line = 0, end_line = 0, start_column = 0, end_column = 0;
  160. int leftmost_column = 0, rightmost_column = 0; // Column span for multiline tokens.
  161. int cursor_position = -1;
  162. CursorPlace cursor_place = CURSOR_NONE;
  163. String source;
  164. const char *get_name() const;
  165. bool is_identifier() const;
  166. bool is_node_name() const;
  167. StringName get_identifier() const { return source; }
  168. Token(Type p_type) {
  169. type = p_type;
  170. }
  171. Token() {
  172. type = EMPTY;
  173. }
  174. };
  175. private:
  176. String source;
  177. const CharType *_source = nullptr;
  178. const CharType *_current = nullptr;
  179. int line = -1, column = -1;
  180. int cursor_line = -1, cursor_column = -1;
  181. int tab_size = 4;
  182. // Keep track of multichar tokens.
  183. const CharType *_start = nullptr;
  184. int start_line = 0, start_column = 0;
  185. int leftmost_column = 0, rightmost_column = 0;
  186. // Info cache.
  187. bool line_continuation = false; // Whether this line is a continuation of the previous, like when using '\'.
  188. bool multiline_mode = false;
  189. List<Token> error_stack;
  190. bool pending_newline = false;
  191. Token last_newline;
  192. int pending_indents = 0;
  193. List<int> indent_stack;
  194. List<CharType> paren_stack;
  195. CharType indent_char = '\0';
  196. int position = 0;
  197. int length = 0;
  198. _FORCE_INLINE_ bool _is_at_end() { return position >= length; }
  199. _FORCE_INLINE_ CharType _peek(int p_offset = 0) { return position + p_offset >= 0 && position + p_offset < length ? _current[p_offset] : '\0'; }
  200. int indent_level() const { return indent_stack.size(); }
  201. bool has_error() const { return !error_stack.empty(); }
  202. Token pop_error();
  203. CharType _advance();
  204. void _skip_whitespace();
  205. void check_indent();
  206. Token make_error(const String &p_message);
  207. void push_error(const String &p_message);
  208. void push_error(const Token &p_error);
  209. Token make_paren_error(CharType p_paren);
  210. Token make_token(Token::Type p_type);
  211. Token make_literal(const Variant &p_literal);
  212. Token make_identifier(const StringName &p_identifier);
  213. Token check_vcs_marker(CharType p_test, Token::Type p_double_type);
  214. void push_paren(CharType p_char);
  215. bool pop_paren(CharType p_expected);
  216. void newline(bool p_make_token);
  217. Token number();
  218. Token potential_identifier();
  219. Token string();
  220. Token annotation();
  221. public:
  222. Token scan();
  223. void set_source_code(const String &p_source_code);
  224. int get_cursor_line() const;
  225. int get_cursor_column() const;
  226. void set_cursor_position(int p_line, int p_column);
  227. void set_multiline_mode(bool p_state);
  228. bool is_past_cursor() const;
  229. static String get_token_name(Token::Type p_token_type);
  230. GDScriptTokenizer();
  231. };
  232. #endif