2 jaren geleden · 7548e043fc
--- a/doc/classes/ProjectSettings.xml
+++ b/doc/classes/ProjectSettings.xml
@@ -384,6 +384,9 @@
 
				 		<member name="debug/gdscript/warnings/assert_always_true" type="int" setter="" getter="" default="1">
			
 
				 			When set to [code]warn[/code] or [code]error[/code], produces a warning or an error respectively when an [code]assert[/code] call always evaluates to true.
			
 
				 		</member>
			
 
				+		<member name="debug/gdscript/warnings/confusable_identifier" type="int" setter="" getter="" default="1">
			
 
				+			When set to [code]warn[/code] or [code]error[/code], produces a warning or an error respectively when an indentifier contains characters that can be confused with something else, like when mixing different alphabets.
			
 
				+		</member>
			
 
				 		<member name="debug/gdscript/warnings/constant_used_as_function" type="int" setter="" getter="" default="1">
			
 
				 			When set to [code]warn[/code] or [code]error[/code], produces a warning or an error respectively when a constant is used as a function.
			
 
				 		</member>
			
--- a/modules/gdscript/gdscript_parser.cpp
+++ b/modules/gdscript/gdscript_parser.cpp
@@ -41,6 +41,7 @@
 
				 #include "core/os/os.h"
			
 
				 #include "core/string/string_builder.h"
			
 
				 #include "gdscript_warning.h"
			
 
				+#include "servers/text_server.h"
			
 
				 #endif // DEBUG_ENABLED
			
 
				 
			
 
				 #ifdef TOOLS_ENABLED
			
@@ -186,24 +187,6 @@ void GDScriptParser::push_error(const String &p_message, const Node *p_origin) {
 
				 }
			
 
				 
			
 
				 #ifdef DEBUG_ENABLED
			
 
				-void GDScriptParser::push_warning(const Node *p_source, GDScriptWarning::Code p_code, const String &p_symbol1, const String &p_symbol2, const String &p_symbol3, const String &p_symbol4) {
			
 
				-	ERR_FAIL_COND(p_source == nullptr);
			
 
				-	Vector<String> symbols;
			
 
				-	if (!p_symbol1.is_empty()) {
			
 
				-		symbols.push_back(p_symbol1);
			
 
				-	}
			
 
				-	if (!p_symbol2.is_empty()) {
			
 
				-		symbols.push_back(p_symbol2);
			
 
				-	}
			
 
				-	if (!p_symbol3.is_empty()) {
			
 
				-		symbols.push_back(p_symbol3);
			
 
				-	}
			
 
				-	if (!p_symbol4.is_empty()) {
			
 
				-		symbols.push_back(p_symbol4);
			
 
				-	}
			
 
				-	push_warning(p_source, p_code, symbols);
			
 
				-}
			
 
				-
			
 
				 void GDScriptParser::push_warning(const Node *p_source, GDScriptWarning::Code p_code, const Vector<String> &p_symbols) {
			
 
				 	ERR_FAIL_COND(p_source == nullptr);
			
 
				 	if (is_ignoring_warnings) {
			
@@ -2251,7 +2234,14 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_expression(bool p_can_assi
 
				 }
			
 
				 
			
 
				 GDScriptParser::IdentifierNode *GDScriptParser::parse_identifier() {
			
 
				-	return static_cast<IdentifierNode *>(parse_identifier(nullptr, false));
			
 
				+	IdentifierNode *identifier = static_cast<IdentifierNode *>(parse_identifier(nullptr, false));
			
 
				+#ifdef DEBUG_ENABLED
			
 
				+	// Check for spoofing here (if available in TextServer) since this isn't called inside expressions. This is only relevant for declarations.
			
 
				+	if (identifier && TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY) && TS->spoof_check(identifier->name.operator String())) {
			
 
				+		push_warning(identifier, GDScriptWarning::CONFUSABLE_IDENTIFIER, identifier->name.operator String());
			
 
				+	}
			
 
				+#endif
			
 
				+	return identifier;
			
 
				 }
			
 
				 
			
 
				 GDScriptParser::ExpressionNode *GDScriptParser::parse_identifier(ExpressionNode *p_previous_operand, bool p_can_assign) {
			
--- a/modules/gdscript/gdscript_parser.h
+++ b/modules/gdscript/gdscript_parser.h
@@ -1361,8 +1361,11 @@ private:
 
				 	void clear();
			
 
				 	void push_error(const String &p_message, const Node *p_origin = nullptr);
			
 
				 #ifdef DEBUG_ENABLED
			
 
				-	void push_warning(const Node *p_source, GDScriptWarning::Code p_code, const String &p_symbol1 = String(), const String &p_symbol2 = String(), const String &p_symbol3 = String(), const String &p_symbol4 = String());
			
 
				 	void push_warning(const Node *p_source, GDScriptWarning::Code p_code, const Vector<String> &p_symbols);
			
 
				+	template <typename... Symbols>
			
 
				+	void push_warning(const Node *p_source, GDScriptWarning::Code p_code, const Symbols &...p_symbols) {
			
 
				+		push_warning(p_source, p_code, Vector<String>{ p_symbols... });
			
 
				+	}
			
 
				 #endif
			
 
				 
			
 
				 	void make_completion_context(CompletionType p_type, Node *p_node, int p_argument = -1, bool p_force = false);
			
--- a/modules/gdscript/gdscript_tokenizer.cpp
+++ b/modules/gdscript/gdscript_tokenizer.cpp
@@ -31,10 +31,14 @@
 
				 #include "gdscript_tokenizer.h"
			
 
				 
			
 
				 #include "core/error/error_macros.h"
			
 
				+#include "core/string/char_utils.h"
			
 
				 
			
 
				 #ifdef TOOLS_ENABLED
			
 
				 #include "editor/editor_settings.h"
			
 
				 #endif
			
 
				+#ifdef DEBUG_ENABLED
			
 
				+#include "servers/text_server.h"
			
 
				+#endif
			
 
				 
			
 
				 static const char *token_names[] = {
			
 
				 	"Empty", // EMPTY,
			
@@ -435,10 +439,12 @@ GDScriptTokenizer::Token GDScriptTokenizer::check_vcs_marker(char32_t p_test, To
 
				 }
			
 
				 
			
 
				 GDScriptTokenizer::Token GDScriptTokenizer::annotation() {
			
 
				-	if (!is_ascii_identifier_char(_peek())) {
			
 
				+	if (is_unicode_identifier_start(_peek())) {
			
 
				+		_advance(); // Consume start character.
			
 
				+	} else {
			
 
				 		push_error("Expected annotation identifier after \"@\".");
			
 
				 	}
			
 
				-	while (is_ascii_identifier_char(_peek())) {
			
 
				+	while (is_unicode_identifier_continue(_peek())) {
			
 
				 		// Consume all identifier characters.
			
 
				 		_advance();
			
 
				 	}
			
@@ -447,7 +453,6 @@ GDScriptTokenizer::Token GDScriptTokenizer::annotation() {
 
				 	return annotation;
			
 
				 }
			
 
				 
			
 
				-GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() {
			
 
				 #define KEYWORDS(KEYWORD_GROUP, KEYWORD)     \
			
 
				 	KEYWORD_GROUP('a')                       \
			
 
				 	KEYWORD("as", Token::AS)                 \
			
@@ -512,8 +517,21 @@ GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() {
 
				 #define MIN_KEYWORD_LENGTH 2
			
 
				 #define MAX_KEYWORD_LENGTH 10
			
 
				 
			
 
				-	// Consume all alphanumeric characters.
			
 
				-	while (is_ascii_identifier_char(_peek())) {
			
 
				+#ifdef DEBUG_ENABLED
			
 
				+void GDScriptTokenizer::make_keyword_list() {
			
 
				+#define KEYWORD_LINE(keyword, token_type) keyword,
			
 
				+#define KEYWORD_GROUP_IGNORE(group)
			
 
				+	keyword_list = {
			
 
				+		KEYWORDS(KEYWORD_GROUP_IGNORE, KEYWORD_LINE)
			
 
				+	};
			
 
				+#undef KEYWORD_LINE
			
 
				+#undef KEYWORD_GROUP_IGNORE
			
 
				+}
			
 
				+#endif // DEBUG_ENABLED
			
 
				+
			
 
				+GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() {
			
 
				+	// Consume all identifier characters.
			
 
				+	while (is_unicode_identifier_continue(_peek())) {
			
 
				 		_advance();
			
 
				 	}
			
 
				 
			
@@ -565,15 +583,28 @@ GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() {
 
				 	}
			
 
				 
			
 
				 	// Not a keyword, so must be an identifier.
			
 
				-	return make_identifier(name);
			
 
				+	Token id = make_identifier(name);
			
 
				+
			
 
				+#ifdef DEBUG_ENABLED
			
 
				+	// Additional checks for identifiers but only in debug and if it's available in TextServer.
			
 
				+	if (TS->has_feature(TextServer::FEATURE_UNICODE_SECURITY)) {
			
 
				+		int64_t confusable = TS->is_confusable(name, keyword_list);
			
 
				+		if (confusable >= 0) {
			
 
				+			push_error(vformat(R"(Identifier "%s" is visually similar to the GDScript keyword "%s" and thus not allowed.)", name, keyword_list[confusable]));
			
 
				+		}
			
 
				+	}
			
 
				+#endif // DEBUG_ENABLED
			
 
				+
			
 
				+	return id;
			
 
				 
			
 
				-#undef KEYWORDS
			
 
				-#undef MIN_KEYWORD_LENGTH
			
 
				-#undef MAX_KEYWORD_LENGTH
			
 
				 #undef KEYWORD_GROUP_CASE
			
 
				 #undef KEYWORD
			
 
				 }
			
 
				 
			
 
				+#undef MAX_KEYWORD_LENGTH
			
 
				+#undef MIN_KEYWORD_LENGTH
			
 
				+#undef KEYWORDS
			
 
				+
			
 
				 void GDScriptTokenizer::newline(bool p_make_token) {
			
 
				 	// Don't overwrite previous newline, nor create if we want a line continuation.
			
 
				 	if (p_make_token && !pending_newline && !line_continuation) {
			
@@ -720,7 +751,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::number() {
 
				 		error.rightmost_column = column + 1;
			
 
				 		push_error(error);
			
 
				 		has_error = true;
			
 
				-	} else if (is_ascii_identifier_char(_peek())) {
			
 
				+	} else if (is_unicode_identifier_start(_peek()) || is_unicode_identifier_continue(_peek())) {
			
 
				 		// Letter at the end of the number.
			
 
				 		push_error("Invalid numeric notation.");
			
 
				 	}
			
@@ -1311,7 +1342,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::scan() {
 
				 
			
 
				 	if (is_digit(c)) {
			
 
				 		return number();
			
 
				-	} else if (is_ascii_identifier_char(c)) {
			
 
				+	} else if (is_unicode_identifier_start(c)) {
			
 
				 		return potential_identifier();
			
 
				 	}
			
 
				 
			
@@ -1504,7 +1535,11 @@ GDScriptTokenizer::Token GDScriptTokenizer::scan() {
 
				 			}
			
 
				 
			
 
				 		default:
			
 
				-			return make_error(vformat(R"(Unknown character "%s".)", String(&c, 1)));
			
 
				+			if (is_whitespace(c)) {
			
 
				+				return make_error(vformat(R"(Invalid white space character "\\u%X".)", static_cast<int32_t>(c)));
			
 
				+			} else {
			
 
				+				return make_error(vformat(R"(Unknown character "%s".)", String(&c, 1)));
			
 
				+			}
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -1514,4 +1549,7 @@ GDScriptTokenizer::GDScriptTokenizer() {
 
				 		tab_size = EditorSettings::get_singleton()->get_setting("text_editor/behavior/indent/size");
			
 
				 	}
			
 
				 #endif // TOOLS_ENABLED
			
 
				+#ifdef DEBUG_ENABLED
			
 
				+	make_keyword_list();
			
 
				+#endif // DEBUG_ENABLED
			
 
				 }
			
--- a/modules/gdscript/gdscript_tokenizer.h
+++ b/modules/gdscript/gdscript_tokenizer.h
@@ -224,6 +224,9 @@ private:
 
				 	char32_t indent_char = '\0';
			
 
				 	int position = 0;
			
 
				 	int length = 0;
			
 
				+#ifdef DEBUG_ENABLED
			
 
				+	Vector<String> keyword_list;
			
 
				+#endif // DEBUG_ENABLED
			
 
				 
			
 
				 #ifdef TOOLS_ENABLED
			
 
				 	HashMap<int, CommentData> comments;
			
@@ -239,6 +242,10 @@ private:
 
				 	void _skip_whitespace();
			
 
				 	void check_indent();
			
 
				 
			
 
				+#ifdef DEBUG_ENABLED
			
 
				+	void make_keyword_list();
			
 
				+#endif // DEBUG_ENABLED
			
 
				+
			
 
				 	Token make_error(const String &p_message);
			
 
				 	void push_error(const String &p_message);
			
 
				 	void push_error(const Token &p_error);
			
--- a/modules/gdscript/gdscript_warning.cpp
+++ b/modules/gdscript/gdscript_warning.cpp
@@ -155,6 +155,10 @@ String GDScriptWarning::get_message() const {
 
				 			CHECK_SYMBOLS(2);
			
 
				 			return vformat(R"(The function '%s()' is a static function but was called from an instance. Instead, it should be directly called from the type: '%s.%s()'.)", symbols[0], symbols[1], symbols[0]);
			
 
				 		}
			
 
				+		case CONFUSABLE_IDENTIFIER: {
			
 
				+			CHECK_SYMBOLS(1);
			
 
				+			return vformat(R"(The identifier "%s" has misleading characters and might be confused with something else.)", symbols[0]);
			
 
				+		}
			
 
				 		case WARNING_MAX:
			
 
				 			break; // Can't happen, but silences warning
			
 
				 	}
			
@@ -219,6 +223,7 @@ String GDScriptWarning::get_name_from_code(Code p_code) {
 
				 		"SHADOWED_GLOBAL_IDENTIFIER",
			
 
				 		"INT_ASSIGNED_TO_ENUM",
			
 
				 		"STATIC_CALLED_ON_INSTANCE",
			
 
				+		"CONFUSABLE_IDENTIFIER",
			
 
				 	};
			
 
				 
			
 
				 	static_assert((sizeof(names) / sizeof(*names)) == WARNING_MAX, "Amount of warning types don't match the amount of warning names.");
			
--- a/modules/gdscript/gdscript_warning.h
+++ b/modules/gdscript/gdscript_warning.h
@@ -78,6 +78,7 @@ public:
 
				 		SHADOWED_GLOBAL_IDENTIFIER, // A global class or function has the same name as variable.
			
 
				 		INT_ASSIGNED_TO_ENUM, // An integer value was assigned to an enum-typed variable without casting.
			
 
				 		STATIC_CALLED_ON_INSTANCE, // A static method was called on an instance of a class instead of on the class itself.
			
 
				+		CONFUSABLE_IDENTIFIER, // The identifier contains misleading characters that can be confused. E.g. "usеr" (has Cyrillic "е" instead of Latin "e").
			
 
				 		WARNING_MAX,
			
 
				 	};
			
 
				 
			
--- a/modules/gdscript/tests/scripts/analyzer/warnings/lambda_unused_arg.out
+++ b/modules/gdscript/tests/scripts/analyzer/warnings/lambda_unused_arg.out
@@ -2,4 +2,4 @@ GDTEST_OK
 
				 >> WARNING
			
 
				 >> Line: 2
			
 
				 >> UNUSED_PARAMETER
			
 
				->>
			
 
				+>> The parameter 'unused' is never used in the function ''. If this is intended, prefix it with an underscore: '_unused'
			
--- a/modules/gdscript/tests/scripts/parser/errors/identifier_similar_to_keyword.gd
+++ b/modules/gdscript/tests/scripts/parser/errors/identifier_similar_to_keyword.gd
@@ -0,0 +1,3 @@
 
				+func test():
			
 
				+	var аs # Using Cyrillic "а".
			
 
				+	print(аs)
			
--- a/modules/gdscript/tests/scripts/parser/errors/identifier_similar_to_keyword.out
+++ b/modules/gdscript/tests/scripts/parser/errors/identifier_similar_to_keyword.out
@@ -0,0 +1,2 @@
 
				+GDTEST_PARSER_ERROR
			
 
				+Identifier "аs" is visually similar to the GDScript keyword "as" and thus not allowed.
			
--- a/modules/gdscript/tests/scripts/parser/features/unicode_identifiers.gd
+++ b/modules/gdscript/tests/scripts/parser/features/unicode_identifiers.gd
@@ -0,0 +1,35 @@
 
				+const π = PI
			
 
				+var ㄥ = π
			
 
				+
			
 
				+func test():
			
 
				+	var փորձարկում = "test"
			
 
				+	prints("փորձարկում", փորձարկում)
			
 
				+	var امتحان = "test"
			
 
				+	prints("امتحان", امتحان)
			
 
				+	var পরীক্ষা = "test"
			
 
				+	prints("পরীক্ষা", পরীক্ষা)
			
 
				+	var тест = "test"
			
 
				+	prints("тест", тест)
			
 
				+	var जाँच = "test"
			
 
				+	prints("जाँच", जाँच)
			
 
				+	var 기준 = "test"
			
 
				+	prints("기준", 기준)
			
 
				+	var 测试 = "test"
			
 
				+	prints("测试", 测试)
			
 
				+	var テスト = "test"
			
 
				+	prints("テスト", テスト)
			
 
				+	var 試験 = "test"
			
 
				+	prints("試験", 試験)
			
 
				+	var പരീക്ഷ = "test"
			
 
				+	prints("പരീക്ഷ", പരീക്ഷ)
			
 
				+	var ทดสอบ = "test"
			
 
				+	prints("ทดสอบ", ทดสอบ)
			
 
				+	var δοκιμή = "test"
			
 
				+	prints("δοκιμή", δοκιμή)
			
 
				+
			
 
				+	const d = 1.1
			
 
				+	_process(d)
			
 
				+	print(is_equal_approx(ㄥ, PI + (d * PI)))
			
 
				+
			
 
				+func _process(Δ: float) -> void:
			
 
				+	ㄥ += Δ * π
			
--- a/modules/gdscript/tests/scripts/parser/features/unicode_identifiers.out
+++ b/modules/gdscript/tests/scripts/parser/features/unicode_identifiers.out
@@ -0,0 +1,14 @@
 
				+GDTEST_OK
			
 
				+փորձարկում test
			
 
				+امتحان test
			
 
				+পরীক্ষা test
			
 
				+тест test
			
 
				+जाँच test
			
 
				+기준 test
			
 
				+测试 test
			
 
				+テスト test
			
 
				+試験 test
			
 
				+പരീക്ഷ test
			
 
				+ทดสอบ test
			
 
				+δοκιμή test
			
 
				+true
			
--- a/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.gd
+++ b/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.gd
@@ -0,0 +1,5 @@
 
				+func test():
			
 
				+	var port = 0 # Only latin characters.
			
 
				+	var pοrt = 1 # The "ο" is Greek omicron.
			
 
				+
			
 
				+	prints(port, pοrt)
			
--- a/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.out
+++ b/modules/gdscript/tests/scripts/parser/warnings/confusable_identifier.out
@@ -0,0 +1,6 @@
 
				+GDTEST_OK
			
 
				+>> WARNING
			
 
				+>> Line: 3
			
 
				+>> CONFUSABLE_IDENTIFIER
			
 
				+>> The identifier "pοrt" has misleading characters and might be confused with something else.
			
 
				+0 1