gdscript_tokenizer.h 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. /**************************************************************************/
  2. /* gdscript_tokenizer.h */
  3. /**************************************************************************/
  4. /* This file is part of: */
  5. /* GODOT ENGINE */
  6. /* https://godotengine.org */
  7. /**************************************************************************/
  8. /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
  9. /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
  10. /* */
  11. /* Permission is hereby granted, free of charge, to any person obtaining */
  12. /* a copy of this software and associated documentation files (the */
  13. /* "Software"), to deal in the Software without restriction, including */
  14. /* without limitation the rights to use, copy, modify, merge, publish, */
  15. /* distribute, sublicense, and/or sell copies of the Software, and to */
  16. /* permit persons to whom the Software is furnished to do so, subject to */
  17. /* the following conditions: */
  18. /* */
  19. /* The above copyright notice and this permission notice shall be */
  20. /* included in all copies or substantial portions of the Software. */
  21. /* */
  22. /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
  23. /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
  24. /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
  25. /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
  26. /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
  27. /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
  28. /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
  29. /**************************************************************************/
  30. #pragma once
  31. #include "core/templates/hash_map.h"
  32. #include "core/templates/list.h"
  33. #include "core/templates/vector.h"
  34. #include "core/variant/variant.h"
  35. class GDScriptTokenizer {
  36. public:
  37. enum CursorPlace {
  38. CURSOR_NONE,
  39. CURSOR_BEGINNING,
  40. CURSOR_MIDDLE,
  41. CURSOR_END,
  42. };
  43. struct Token {
  44. // If this enum changes, please increment the TOKENIZER_VERSION in gdscript_tokenizer_buffer.h
  45. enum Type {
  46. EMPTY,
  47. // Basic
  48. ANNOTATION,
  49. IDENTIFIER,
  50. LITERAL,
  51. // Comparison
  52. LESS,
  53. LESS_EQUAL,
  54. GREATER,
  55. GREATER_EQUAL,
  56. EQUAL_EQUAL,
  57. BANG_EQUAL,
  58. // Logical
  59. AND,
  60. OR,
  61. NOT,
  62. AMPERSAND_AMPERSAND,
  63. PIPE_PIPE,
  64. BANG,
  65. // Bitwise
  66. AMPERSAND,
  67. PIPE,
  68. TILDE,
  69. CARET,
  70. LESS_LESS,
  71. GREATER_GREATER,
  72. // Math
  73. PLUS,
  74. MINUS,
  75. STAR,
  76. STAR_STAR,
  77. SLASH,
  78. PERCENT,
  79. // Assignment
  80. EQUAL,
  81. PLUS_EQUAL,
  82. MINUS_EQUAL,
  83. STAR_EQUAL,
  84. STAR_STAR_EQUAL,
  85. SLASH_EQUAL,
  86. PERCENT_EQUAL,
  87. LESS_LESS_EQUAL,
  88. GREATER_GREATER_EQUAL,
  89. AMPERSAND_EQUAL,
  90. PIPE_EQUAL,
  91. CARET_EQUAL,
  92. // Control flow
  93. IF,
  94. ELIF,
  95. ELSE,
  96. FOR,
  97. WHILE,
  98. BREAK,
  99. CONTINUE,
  100. PASS,
  101. RETURN,
  102. MATCH,
  103. WHEN,
  104. // Keywords
  105. ABSTRACT,
  106. AS,
  107. ASSERT,
  108. AWAIT,
  109. BREAKPOINT,
  110. CLASS,
  111. CLASS_NAME,
  112. TK_CONST, // Conflict with WinAPI.
  113. ENUM,
  114. EXTENDS,
  115. FUNC,
  116. TK_IN, // Conflict with WinAPI.
  117. IS,
  118. NAMESPACE,
  119. PRELOAD,
  120. SELF,
  121. SIGNAL,
  122. STATIC,
  123. SUPER,
  124. TRAIT,
  125. VAR,
  126. TK_VOID, // Conflict with WinAPI.
  127. YIELD,
  128. // Punctuation
  129. BRACKET_OPEN,
  130. BRACKET_CLOSE,
  131. BRACE_OPEN,
  132. BRACE_CLOSE,
  133. PARENTHESIS_OPEN,
  134. PARENTHESIS_CLOSE,
  135. COMMA,
  136. SEMICOLON,
  137. PERIOD,
  138. PERIOD_PERIOD,
  139. PERIOD_PERIOD_PERIOD,
  140. COLON,
  141. DOLLAR,
  142. FORWARD_ARROW,
  143. UNDERSCORE,
  144. // Whitespace
  145. NEWLINE,
  146. INDENT,
  147. DEDENT,
  148. // Constants
  149. CONST_PI,
  150. CONST_TAU,
  151. CONST_INF,
  152. CONST_NAN,
  153. // Error message improvement
  154. VCS_CONFLICT_MARKER,
  155. BACKTICK,
  156. QUESTION_MARK,
  157. // Special
  158. ERROR,
  159. TK_EOF, // "EOF" is reserved
  160. TK_MAX
  161. };
  162. Type type = EMPTY;
  163. Variant literal;
  164. int start_line = 0, end_line = 0, start_column = 0, end_column = 0;
  165. int cursor_position = -1;
  166. CursorPlace cursor_place = CURSOR_NONE;
  167. String source;
  168. const char *get_name() const;
  169. String get_debug_name() const;
  170. bool can_precede_bin_op() const;
  171. bool is_identifier() const;
  172. bool is_node_name() const;
  173. StringName get_identifier() const { return literal; }
  174. Token(Type p_type) {
  175. type = p_type;
  176. }
  177. Token() {}
  178. };
  179. #ifdef TOOLS_ENABLED
  180. struct CommentData {
  181. String comment;
  182. // true: Comment starts at beginning of line or after indentation.
  183. // false: Inline comment (starts after some code).
  184. bool new_line = false;
  185. CommentData() {}
  186. CommentData(const String &p_comment, bool p_new_line) {
  187. comment = p_comment;
  188. new_line = p_new_line;
  189. }
  190. };
  191. virtual const HashMap<int, CommentData> &get_comments() const = 0;
  192. #endif // TOOLS_ENABLED
  193. static String get_token_name(Token::Type p_token_type);
  194. #ifdef TOOLS_ENABLED
  195. // This is a temporary solution, as Tokens are not able to store their position, only lines and columns.
  196. virtual int get_current_position() const { return 0; }
  197. virtual String get_source_code() const { return ""; }
  198. #endif // TOOLS_ENABLED
  199. virtual int get_cursor_line() const = 0;
  200. virtual int get_cursor_column() const = 0;
  201. virtual void set_cursor_position(int p_line, int p_column) = 0;
  202. virtual void set_multiline_mode(bool p_state) = 0;
  203. virtual bool is_past_cursor() const = 0;
  204. virtual void push_expression_indented_block() = 0; // For lambdas, or blocks inside expressions.
  205. virtual void pop_expression_indented_block() = 0; // For lambdas, or blocks inside expressions.
  206. virtual bool is_text() = 0;
  207. virtual Token scan() = 0;
  208. virtual ~GDScriptTokenizer() {}
  209. };
  210. class GDScriptTokenizerText : public GDScriptTokenizer {
  211. String source;
  212. const char32_t *_source = nullptr;
  213. const char32_t *_current = nullptr;
  214. int line = -1, column = -1;
  215. int cursor_line = -1, cursor_column = -1;
  216. int tab_size = 4;
  217. // Keep track of multichar tokens.
  218. const char32_t *_start = nullptr;
  219. int start_line = 0, start_column = 0;
  220. // Info cache.
  221. bool line_continuation = false; // Whether this line is a continuation of the previous, like when using '\'.
  222. bool multiline_mode = false;
  223. List<Token> error_stack;
  224. bool pending_newline = false;
  225. Token last_token;
  226. Token last_newline;
  227. int pending_indents = 0;
  228. List<int> indent_stack;
  229. List<List<int>> indent_stack_stack; // For lambdas, which require manipulating the indentation point.
  230. List<char32_t> paren_stack;
  231. char32_t indent_char = '\0';
  232. int position = 0;
  233. int length = 0;
  234. Vector<int> continuation_lines;
  235. #ifdef DEBUG_ENABLED
  236. Vector<String> keyword_list;
  237. #endif // DEBUG_ENABLED
  238. #ifdef TOOLS_ENABLED
  239. HashMap<int, CommentData> comments;
  240. #endif // TOOLS_ENABLED
  241. _FORCE_INLINE_ bool _is_at_end() { return position >= length; }
  242. _FORCE_INLINE_ char32_t _peek(int p_offset = 0) { return position + p_offset >= 0 && position + p_offset < length ? _current[p_offset] : '\0'; }
  243. int indent_level() const { return indent_stack.size(); }
  244. bool has_error() const { return !error_stack.is_empty(); }
  245. Token pop_error();
  246. char32_t _advance();
  247. String _get_indent_char_name(char32_t ch);
  248. void _skip_whitespace();
  249. void check_indent();
  250. #ifdef DEBUG_ENABLED
  251. void make_keyword_list();
  252. #endif // DEBUG_ENABLED
  253. Token make_error(const String &p_message);
  254. void push_error(const String &p_message);
  255. void push_error(const Token &p_error);
  256. Token make_paren_error(char32_t p_paren);
  257. Token make_token(Token::Type p_type);
  258. Token make_literal(const Variant &p_literal);
  259. Token make_identifier(const StringName &p_identifier);
  260. Token check_vcs_marker(char32_t p_test, Token::Type p_double_type);
  261. void push_paren(char32_t p_char);
  262. bool pop_paren(char32_t p_expected);
  263. void newline(bool p_make_token);
  264. Token number();
  265. Token potential_identifier();
  266. Token string();
  267. Token annotation();
  268. public:
  269. void set_source_code(const String &p_source_code);
  270. const Vector<int> &get_continuation_lines() const { return continuation_lines; }
  271. #ifdef TOOLS_ENABLED
  272. virtual int get_current_position() const override { return position; }
  273. virtual String get_source_code() const override { return source; }
  274. #endif // TOOLS_ENABLED
  275. virtual int get_cursor_line() const override;
  276. virtual int get_cursor_column() const override;
  277. virtual void set_cursor_position(int p_line, int p_column) override;
  278. virtual void set_multiline_mode(bool p_state) override;
  279. virtual bool is_past_cursor() const override;
  280. virtual void push_expression_indented_block() override; // For lambdas, or blocks inside expressions.
  281. virtual void pop_expression_indented_block() override; // For lambdas, or blocks inside expressions.
  282. virtual bool is_text() override { return true; }
  283. #ifdef TOOLS_ENABLED
  284. virtual const HashMap<int, CommentData> &get_comments() const override {
  285. return comments;
  286. }
  287. #endif // TOOLS_ENABLED
  288. virtual Token scan() override;
  289. GDScriptTokenizerText();
  290. };