Explorar o código

Allow mixed tabs and spaces when indentation does not depend on tab size

(hopefully) Closes #30937, fixes #32612
Bojidar Marinov %!s(int64=5) %!d(string=hai) anos
pai
achega
afbde3314a

+ 50 - 26
modules/gdscript/gdscript_parser.cpp

@@ -89,8 +89,8 @@ bool GDScriptParser::_enter_indent_block(BlockNode *p_block) {
 
 	if (tokenizer->get_token() != GDScriptTokenizer::TK_NEWLINE) {
 		// be more python-like
-		int current = tab_level.back()->get();
-		tab_level.push_back(current);
+		IndentLevel current_level = indent_level.back()->get();
+		indent_level.push_back(current_level);
 		return true;
 		//_set_error("newline expected after ':'.");
 		//return false;
@@ -105,12 +105,19 @@ bool GDScriptParser::_enter_indent_block(BlockNode *p_block) {
 		} else if (tokenizer->get_token(1) != GDScriptTokenizer::TK_NEWLINE) {
 
 			int indent = tokenizer->get_token_line_indent();
-			int current = tab_level.back()->get();
-			if (indent <= current) {
+			int tabs = tokenizer->get_token_line_tab_indent();
+			IndentLevel current_level = indent_level.back()->get();
+			IndentLevel new_indent(indent, tabs);
+			if (new_indent.is_mixed(current_level)) {
+				_set_error("Mixed tabs and spaces in indentation.");
 				return false;
 			}
 
-			tab_level.push_back(indent);
+			if (indent <= current_level.indent) {
+				return false;
+			}
+
+			indent_level.push_back(new_indent);
 			tokenizer->advance();
 			return true;
 
@@ -2213,7 +2220,7 @@ GDScriptParser::PatternNode *GDScriptParser::_parse_pattern(bool p_static) {
 }
 
 void GDScriptParser::_parse_pattern_block(BlockNode *p_block, Vector<PatternBranchNode *> &p_branches, bool p_static) {
-	int indent_level = tab_level.back()->get();
+	IndentLevel current_level = indent_level.back()->get();
 
 	p_block->has_return = true;
 
@@ -2228,7 +2235,7 @@ void GDScriptParser::_parse_pattern_block(BlockNode *p_block, Vector<PatternBran
 		if (error_set)
 			return;
 
-		if (indent_level > tab_level.back()->get()) {
+		if (current_level.indent > indent_level.back()->get().indent) {
 			break; // go back a level
 		}
 
@@ -2685,7 +2692,7 @@ void GDScriptParser::_transform_match_statment(MatchNode *p_match_statement) {
 
 void GDScriptParser::_parse_block(BlockNode *p_block, bool p_static) {
 
-	int indent_level = tab_level.back()->get();
+	IndentLevel current_level = indent_level.back()->get();
 
 #ifdef DEBUG_ENABLED
 
@@ -2698,9 +2705,13 @@ void GDScriptParser::_parse_block(BlockNode *p_block, bool p_static) {
 	bool is_first_line = true;
 
 	while (true) {
-		if (!is_first_line && tab_level.back()->prev() && tab_level.back()->prev()->get() == indent_level) {
+		if (!is_first_line && indent_level.back()->prev() && indent_level.back()->prev()->get().indent == current_level.indent) {
+			if (indent_level.back()->prev()->get().is_mixed(current_level)) {
+				_set_error("Mixed tabs and spaces in indentation.");
+				return;
+			}
 			// pythonic single-line expression, don't parse future lines
-			tab_level.pop_back();
+			indent_level.pop_back();
 			p_block->end_line = tokenizer->get_token_line();
 			return;
 		}
@@ -2710,7 +2721,7 @@ void GDScriptParser::_parse_block(BlockNode *p_block, bool p_static) {
 		if (error_set)
 			return;
 
-		if (indent_level > tab_level.back()->get()) {
+		if (current_level.indent > indent_level.back()->get().indent) {
 			p_block->end_line = tokenizer->get_token_line();
 			return; //go back a level
 		}
@@ -2914,14 +2925,14 @@ void GDScriptParser::_parse_block(BlockNode *p_block, bool p_static) {
 					while (tokenizer->get_token() == GDScriptTokenizer::TK_NEWLINE && _parse_newline())
 						;
 
-					if (tab_level.back()->get() < indent_level) { //not at current indent level
+					if (indent_level.back()->get().indent < current_level.indent) { //not at current indent level
 						p_block->end_line = tokenizer->get_token_line();
 						return;
 					}
 
 					if (tokenizer->get_token() == GDScriptTokenizer::TK_CF_ELIF) {
 
-						if (tab_level.back()->get() > indent_level) {
+						if (indent_level.back()->get().indent > current_level.indent) {
 
 							_set_error("Invalid indentation.");
 							return;
@@ -2969,7 +2980,7 @@ void GDScriptParser::_parse_block(BlockNode *p_block, bool p_static) {
 
 					} else if (tokenizer->get_token() == GDScriptTokenizer::TK_CF_ELSE) {
 
-						if (tab_level.back()->get() > indent_level) {
+						if (indent_level.back()->get().indent > current_level.indent) {
 							_set_error("Invalid indentation.");
 							return;
 						}
@@ -3341,32 +3352,45 @@ bool GDScriptParser::_parse_newline() {
 
 	if (tokenizer->get_token(1) != GDScriptTokenizer::TK_EOF && tokenizer->get_token(1) != GDScriptTokenizer::TK_NEWLINE) {
 
+		IndentLevel current_level = indent_level.back()->get();
 		int indent = tokenizer->get_token_line_indent();
-		int current_indent = tab_level.back()->get();
+		int tabs = tokenizer->get_token_line_tab_indent();
+		IndentLevel new_level(indent, tabs);
+
+		if (new_level.is_mixed(current_level)) {
+			_set_error("Mixed tabs and spaces in indentation.");
+			return false;
+		}
 
-		if (indent > current_indent) {
+		if (indent > current_level.indent) {
 			_set_error("Unexpected indentation.");
 			return false;
 		}
 
-		if (indent < current_indent) {
+		if (indent < current_level.indent) {
 
-			while (indent < current_indent) {
+			while (indent < current_level.indent) {
 
 				//exit block
-				if (tab_level.size() == 1) {
+				if (indent_level.size() == 1) {
 					_set_error("Invalid indentation. Bug?");
 					return false;
 				}
 
-				tab_level.pop_back();
+				indent_level.pop_back();
 
-				if (tab_level.back()->get() < indent) {
+				if (indent_level.back()->get().indent < indent) {
 
 					_set_error("Unindent does not match any outer indentation level.");
 					return false;
 				}
-				current_indent = tab_level.back()->get();
+
+				if (indent_level.back()->get().is_mixed(current_level)) {
+					_set_error("Mixed tabs and spaces in indentation.");
+					return false;
+				}
+
+				current_level = indent_level.back()->get();
 			}
 
 			tokenizer->advance();
@@ -3464,7 +3488,7 @@ void GDScriptParser::_parse_extends(ClassNode *p_class) {
 
 void GDScriptParser::_parse_class(ClassNode *p_class) {
 
-	int indent_level = tab_level.back()->get();
+	IndentLevel current_level = indent_level.back()->get();
 
 	while (true) {
 
@@ -3472,7 +3496,7 @@ void GDScriptParser::_parse_class(ClassNode *p_class) {
 		if (error_set)
 			return;
 
-		if (indent_level > tab_level.back()->get()) {
+		if (current_level.indent > indent_level.back()->get().indent) {
 			p_class->end_line = tokenizer->get_token_line();
 			return; //go back a level
 		}
@@ -8546,8 +8570,8 @@ void GDScriptParser::clear() {
 	validating = false;
 	for_completion = false;
 	error_set = false;
-	tab_level.clear();
-	tab_level.push_back(0);
+	indent_level.clear();
+	indent_level.push_back(IndentLevel(0, 0));
 	error_line = 0;
 	error_column = 0;
 	pending_newline = -1;

+ 21 - 1
modules/gdscript/gdscript_parser.h

@@ -552,7 +552,27 @@ private:
 
 	int pending_newline;
 
-	List<int> tab_level;
+	struct IndentLevel {
+		int indent;
+		int tabs;
+
+		bool is_mixed(IndentLevel other) {
+			return (
+					(indent == other.indent && tabs != other.tabs) ||
+					(indent > other.indent && tabs < other.tabs) ||
+					(indent < other.indent && tabs > other.tabs));
+		}
+
+		IndentLevel() :
+				indent(0),
+				tabs(0) {}
+
+		IndentLevel(int p_indent, int p_tabs) :
+				indent(p_indent),
+				tabs(p_tabs) {}
+	};
+
+	List<IndentLevel> indent_level;
 
 	String base_path;
 	String self_path;

+ 27 - 44
modules/gdscript/gdscript_tokenizer.cpp

@@ -450,11 +450,11 @@ void GDScriptTokenizerText::_make_error(const String &p_error) {
 	tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
 }
 
-void GDScriptTokenizerText::_make_newline(int p_spaces) {
+void GDScriptTokenizerText::_make_newline(int p_indentation, int p_tabs) {
 
 	TokenData &tk = tk_rb[tk_rb_pos];
 	tk.type = TK_NEWLINE;
-	tk.constant = p_spaces;
+	tk.constant = Vector2(p_indentation, p_tabs);
 	tk.line = line;
 	tk.col = column;
 	tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE;
@@ -511,33 +511,6 @@ void GDScriptTokenizerText::_advance() {
 			case ' ':
 				INCPOS(1);
 				continue;
-			case '\n': {
-				line++;
-				INCPOS(1);
-				column = 1;
-				int i = 0;
-				while (true) {
-					if (GETCHAR(i) == ' ') {
-						if (file_indent_type == INDENT_NONE) file_indent_type = INDENT_SPACES;
-						if (file_indent_type != INDENT_SPACES) {
-							_make_error("Spaces used for indentation in tab-indented file!");
-							return;
-						}
-					} else if (GETCHAR(i) == '\t') {
-						if (file_indent_type == INDENT_NONE) file_indent_type = INDENT_TABS;
-						if (file_indent_type != INDENT_TABS) {
-							_make_error("Tabs used for indentation in space-indented file!");
-							return;
-						}
-					} else {
-						break; // not indentation anymore
-					}
-					i++;
-				}
-
-				_make_newline(i);
-				return;
-			}
 			case '#': { // line comment skip
 #ifdef DEBUG_ENABLED
 				String comment;
@@ -565,33 +538,34 @@ void GDScriptTokenizerText::_advance() {
 					ignore_warnings = true;
 				}
 #endif // DEBUG_ENABLED
+				FALLTHROUGH;
+			}
+			case '\n': {
+				line++;
 				INCPOS(1);
+				bool used_spaces = false;
+				int tabs = 0;
 				column = 1;
-				line++;
 				int i = 0;
 				while (true) {
 					if (GETCHAR(i) == ' ') {
-						if (file_indent_type == INDENT_NONE) file_indent_type = INDENT_SPACES;
-						if (file_indent_type != INDENT_SPACES) {
-							_make_error("Spaces used for indentation in tab-indented file!");
-							return;
-						}
+						i++;
+						used_spaces = true;
 					} else if (GETCHAR(i) == '\t') {
-						if (file_indent_type == INDENT_NONE) file_indent_type = INDENT_TABS;
-						if (file_indent_type != INDENT_TABS) {
-							_make_error("Tabs used for indentation in space-indented file!");
+						if (used_spaces) {
+							_make_error("Spaces used before tabs on a line");
 							return;
 						}
+						i++;
+						tabs++;
 					} else {
 						break; // not indentation anymore
 					}
-					i++;
 				}
 
-				_make_newline(i);
+				_make_newline(i, tabs);
 				return;
-
-			} break;
+			}
 			case '/': {
 
 				switch (GETCHAR(1)) {
@@ -1112,7 +1086,6 @@ void GDScriptTokenizerText::set_code(const String &p_code) {
 	ignore_warnings = false;
 #endif // DEBUG_ENABLED
 	last_error = "";
-	file_indent_type = INDENT_NONE;
 	for (int i = 0; i < MAX_LOOKAHEAD + 1; i++)
 		_advance();
 }
@@ -1187,7 +1160,17 @@ int GDScriptTokenizerText::get_token_line_indent(int p_offset) const {
 
 	int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
 	ERR_FAIL_COND_V(tk_rb[ofs].type != TK_NEWLINE, 0);
-	return tk_rb[ofs].constant;
+	return tk_rb[ofs].constant.operator Vector2().x;
+}
+
+int GDScriptTokenizerText::get_token_line_tab_indent(int p_offset) const {
+
+	ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, 0);
+	ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, 0);
+
+	int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE;
+	ERR_FAIL_COND_V(tk_rb[ofs].type != TK_NEWLINE, 0);
+	return tk_rb[ofs].constant.operator Vector2().y;
 }
 
 String GDScriptTokenizerText::get_token_error(int p_offset) const {

+ 4 - 6
modules/gdscript/gdscript_tokenizer.h

@@ -168,6 +168,7 @@ public:
 	virtual int get_token_line(int p_offset = 0) const = 0;
 	virtual int get_token_column(int p_offset = 0) const = 0;
 	virtual int get_token_line_indent(int p_offset = 0) const = 0;
+	virtual int get_token_line_tab_indent(int p_offset = 0) const = 0;
 	virtual String get_token_error(int p_offset = 0) const = 0;
 	virtual void advance(int p_amount = 1) = 0;
 #ifdef DEBUG_ENABLED
@@ -205,7 +206,7 @@ class GDScriptTokenizerText : public GDScriptTokenizer {
 	};
 
 	void _make_token(Token p_type);
-	void _make_newline(int p_spaces = 0);
+	void _make_newline(int p_indentation = 0, int p_tabs = 0);
 	void _make_identifier(const StringName &p_identifier);
 	void _make_built_in_func(GDScriptFunctions::Function p_func);
 	void _make_constant(const Variant &p_constant);
@@ -222,11 +223,6 @@ class GDScriptTokenizerText : public GDScriptTokenizer {
 	int tk_rb_pos;
 	String last_error;
 	bool error_flag;
-	enum {
-		INDENT_NONE,
-		INDENT_SPACES,
-		INDENT_TABS,
-	} file_indent_type;
 
 #ifdef DEBUG_ENABLED
 	Vector<Pair<int, String> > warning_skips;
@@ -245,6 +241,7 @@ public:
 	virtual int get_token_line(int p_offset = 0) const;
 	virtual int get_token_column(int p_offset = 0) const;
 	virtual int get_token_line_indent(int p_offset = 0) const;
+	virtual int get_token_line_tab_indent(int p_offset = 0) const;
 	virtual const Variant &get_token_constant(int p_offset = 0) const;
 	virtual String get_token_error(int p_offset = 0) const;
 	virtual void advance(int p_amount = 1);
@@ -283,6 +280,7 @@ public:
 	virtual int get_token_line(int p_offset = 0) const;
 	virtual int get_token_column(int p_offset = 0) const;
 	virtual int get_token_line_indent(int p_offset = 0) const;
+	virtual int get_token_line_tab_indent(int p_offset = 0) const { return 0; }
 	virtual const Variant &get_token_constant(int p_offset = 0) const;
 	virtual String get_token_error(int p_offset = 0) const;
 	virtual void advance(int p_amount = 1);