2 anos atrás · 2964c7d51c
--- a/modules/gdscript/editor/gdscript_highlighter.cpp
+++ b/modules/gdscript/editor/gdscript_highlighter.cpp
@@ -52,6 +52,7 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
 
				 	bool in_keyword = false;
			
 
				 	bool in_word = false;
			
 
				 	bool in_number = false;
			
 
				+	bool in_raw_string = false;
			
 
				 	bool in_node_path = false;
			
 
				 	bool in_node_ref = false;
			
 
				 	bool in_annotation = false;
			
@@ -234,15 +235,33 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
 
				 							}
			
 
				 
			
 
				 							if (str[from] == '\\') {
			
 
				-								Dictionary escape_char_highlighter_info;
			
 
				-								escape_char_highlighter_info["color"] = symbol_color;
			
 
				-								color_map[from] = escape_char_highlighter_info;
			
 
				+								if (!in_raw_string) {
			
 
				+									Dictionary escape_char_highlighter_info;
			
 
				+									escape_char_highlighter_info["color"] = symbol_color;
			
 
				+									color_map[from] = escape_char_highlighter_info;
			
 
				+								}
			
 
				 
			
 
				 								from++;
			
 
				 
			
 
				-								Dictionary region_continue_highlighter_info;
			
 
				-								region_continue_highlighter_info["color"] = region_color;
			
 
				-								color_map[from + 1] = region_continue_highlighter_info;
			
 
				+								if (!in_raw_string) {
			
 
				+									int esc_len = 0;
			
 
				+									if (str[from] == 'u') {
			
 
				+										esc_len = 4;
			
 
				+									} else if (str[from] == 'U') {
			
 
				+										esc_len = 6;
			
 
				+									}
			
 
				+									for (int k = 0; k < esc_len && from < line_length - 1; k++) {
			
 
				+										if (!is_hex_digit(str[from + 1])) {
			
 
				+											break;
			
 
				+										}
			
 
				+										from++;
			
 
				+									}
			
 
				+
			
 
				+									Dictionary region_continue_highlighter_info;
			
 
				+									region_continue_highlighter_info["color"] = region_color;
			
 
				+									color_map[from + 1] = region_continue_highlighter_info;
			
 
				+								}
			
 
				+
			
 
				 								continue;
			
 
				 							}
			
 
				 
			
@@ -489,6 +508,12 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
 
				 			in_member_variable = false;
			
 
				 		}
			
 
				 
			
 
				+		if (!in_raw_string && in_region == -1 && str[j] == 'r' && j < line_length - 1 && (str[j + 1] == '"' || str[j + 1] == '\'')) {
			
 
				+			in_raw_string = true;
			
 
				+		} else if (in_raw_string && in_region == -1) {
			
 
				+			in_raw_string = false;
			
 
				+		}
			
 
				+
			
 
				 		// Keep symbol color for binary '&&'. In the case of '&&&' use StringName color for the last ampersand.
			
 
				 		if (!in_string_name && in_region == -1 && str[j] == '&' && !is_binary_op) {
			
 
				 			if (j >= 2 && str[j - 1] == '&' && str[j - 2] != '&' && prev_is_binary_op) {
			
@@ -520,7 +545,9 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
 
				 			in_annotation = false;
			
 
				 		}
			
 
				 
			
 
				-		if (in_node_ref) {
			
 
				+		if (in_raw_string) {
			
 
				+			color = string_color;
			
 
				+		} else if (in_node_ref) {
			
 
				 			next_type = NODE_REF;
			
 
				 			color = node_ref_color;
			
 
				 		} else if (in_annotation) {
			
@@ -692,7 +719,7 @@ void GDScriptSyntaxHighlighter::_update_cache() {
 
				 	}
			
 
				 
			
 
				 	/* Strings */
			
 
				-	const Color string_color = EDITOR_GET("text_editor/theme/highlighting/string_color");
			
 
				+	string_color = EDITOR_GET("text_editor/theme/highlighting/string_color");
			
 
				 	List<String> strings;
			
 
				 	gdscript->get_string_delimiters(&strings);
			
 
				 	for (const String &string : strings) {
			
--- a/modules/gdscript/editor/gdscript_highlighter.h
+++ b/modules/gdscript/editor/gdscript_highlighter.h
@@ -78,6 +78,7 @@ private:
 
				 	Color built_in_type_color;
			
 
				 	Color number_color;
			
 
				 	Color member_color;
			
 
				+	Color string_color;
			
 
				 	Color node_path_color;
			
 
				 	Color node_ref_color;
			
 
				 	Color annotation_color;
			
--- a/modules/gdscript/gdscript_editor.cpp
+++ b/modules/gdscript/gdscript_editor.cpp
@@ -59,6 +59,7 @@ void GDScriptLanguage::get_string_delimiters(List<String> *p_delimiters) const {
 
				 	p_delimiters->push_back("' '");
			
 
				 	p_delimiters->push_back("\"\"\" \"\"\"");
			
 
				 	p_delimiters->push_back("''' '''");
			
 
				+	// NOTE: StringName, NodePath and r-strings are not listed here.
			
 
				 }
			
 
				 
			
 
				 bool GDScriptLanguage::is_using_templates() {
			
--- a/modules/gdscript/gdscript_tokenizer.cpp
+++ b/modules/gdscript/gdscript_tokenizer.cpp
@@ -857,10 +857,14 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() {
 
				 		STRING_NODEPATH,
			
 
				 	};
			
 
				 
			
 
				+	bool is_raw = false;
			
 
				 	bool is_multiline = false;
			
 
				 	StringType type = STRING_REGULAR;
			
 
				 
			
 
				-	if (_peek(-1) == '&') {
			
 
				+	if (_peek(-1) == 'r') {
			
 
				+		is_raw = true;
			
 
				+		_advance();
			
 
				+	} else if (_peek(-1) == '&') {
			
 
				 		type = STRING_NAME;
			
 
				 		_advance();
			
 
				 	} else if (_peek(-1) == '^') {
			
@@ -890,7 +894,12 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() {
 
				 		char32_t ch = _peek();
			
 
				 
			
 
				 		if (ch == 0x200E || ch == 0x200F || (ch >= 0x202A && ch <= 0x202E) || (ch >= 0x2066 && ch <= 0x2069)) {
			
 
				-			Token error = make_error("Invisible text direction control character present in the string, escape it (\"\\u" + String::num_int64(ch, 16) + "\") to avoid confusion.");
			
 
				+			Token error;
			
 
				+			if (is_raw) {
			
 
				+				error = make_error("Invisible text direction control character present in the string, use regular string literal instead of r-string.");
			
 
				+			} else {
			
 
				+				error = make_error("Invisible text direction control character present in the string, escape it (\"\\u" + String::num_int64(ch, 16) + "\") to avoid confusion.");
			
 
				+			}
			
 
				 			error.start_column = column;
			
 
				 			error.leftmost_column = error.start_column;
			
 
				 			error.end_column = column + 1;
			
@@ -905,144 +914,164 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() {
 
				 				return make_error("Unterminated string.");
			
 
				 			}
			
 
				 
			
 
				-			// Grab escape character.
			
 
				-			char32_t code = _peek();
			
 
				-			_advance();
			
 
				-			if (_is_at_end()) {
			
 
				-				return make_error("Unterminated string.");
			
 
				-			}
			
 
				+			if (is_raw) {
			
 
				+				if (_peek() == quote_char) {
			
 
				+					_advance();
			
 
				+					if (_is_at_end()) {
			
 
				+						return make_error("Unterminated string.");
			
 
				+					}
			
 
				+					result += '\\';
			
 
				+					result += quote_char;
			
 
				+				} else if (_peek() == '\\') { // For `\\\"`.
			
 
				+					_advance();
			
 
				+					if (_is_at_end()) {
			
 
				+						return make_error("Unterminated string.");
			
 
				+					}
			
 
				+					result += '\\';
			
 
				+					result += '\\';
			
 
				+				} else {
			
 
				+					result += '\\';
			
 
				+				}
			
 
				+			} else {
			
 
				+				// Grab escape character.
			
 
				+				char32_t code = _peek();
			
 
				+				_advance();
			
 
				+				if (_is_at_end()) {
			
 
				+					return make_error("Unterminated string.");
			
 
				+				}
			
 
				 
			
 
				-			char32_t escaped = 0;
			
 
				-			bool valid_escape = true;
			
 
				+				char32_t escaped = 0;
			
 
				+				bool valid_escape = true;
			
 
				 
			
 
				-			switch (code) {
			
 
				-				case 'a':
			
 
				-					escaped = '\a';
			
 
				-					break;
			
 
				-				case 'b':
			
 
				-					escaped = '\b';
			
 
				-					break;
			
 
				-				case 'f':
			
 
				-					escaped = '\f';
			
 
				-					break;
			
 
				-				case 'n':
			
 
				-					escaped = '\n';
			
 
				-					break;
			
 
				-				case 'r':
			
 
				-					escaped = '\r';
			
 
				-					break;
			
 
				-				case 't':
			
 
				-					escaped = '\t';
			
 
				-					break;
			
 
				-				case 'v':
			
 
				-					escaped = '\v';
			
 
				-					break;
			
 
				-				case '\'':
			
 
				-					escaped = '\'';
			
 
				-					break;
			
 
				-				case '\"':
			
 
				-					escaped = '\"';
			
 
				-					break;
			
 
				-				case '\\':
			
 
				-					escaped = '\\';
			
 
				-					break;
			
 
				-				case 'U':
			
 
				-				case 'u': {
			
 
				-					// Hexadecimal sequence.
			
 
				-					int hex_len = (code == 'U') ? 6 : 4;
			
 
				-					for (int j = 0; j < hex_len; j++) {
			
 
				-						if (_is_at_end()) {
			
 
				-							return make_error("Unterminated string.");
			
 
				+				switch (code) {
			
 
				+					case 'a':
			
 
				+						escaped = '\a';
			
 
				+						break;
			
 
				+					case 'b':
			
 
				+						escaped = '\b';
			
 
				+						break;
			
 
				+					case 'f':
			
 
				+						escaped = '\f';
			
 
				+						break;
			
 
				+					case 'n':
			
 
				+						escaped = '\n';
			
 
				+						break;
			
 
				+					case 'r':
			
 
				+						escaped = '\r';
			
 
				+						break;
			
 
				+					case 't':
			
 
				+						escaped = '\t';
			
 
				+						break;
			
 
				+					case 'v':
			
 
				+						escaped = '\v';
			
 
				+						break;
			
 
				+					case '\'':
			
 
				+						escaped = '\'';
			
 
				+						break;
			
 
				+					case '\"':
			
 
				+						escaped = '\"';
			
 
				+						break;
			
 
				+					case '\\':
			
 
				+						escaped = '\\';
			
 
				+						break;
			
 
				+					case 'U':
			
 
				+					case 'u': {
			
 
				+						// Hexadecimal sequence.
			
 
				+						int hex_len = (code == 'U') ? 6 : 4;
			
 
				+						for (int j = 0; j < hex_len; j++) {
			
 
				+							if (_is_at_end()) {
			
 
				+								return make_error("Unterminated string.");
			
 
				+							}
			
 
				+
			
 
				+							char32_t digit = _peek();
			
 
				+							char32_t value = 0;
			
 
				+							if (is_digit(digit)) {
			
 
				+								value = digit - '0';
			
 
				+							} else if (digit >= 'a' && digit <= 'f') {
			
 
				+								value = digit - 'a';
			
 
				+								value += 10;
			
 
				+							} else if (digit >= 'A' && digit <= 'F') {
			
 
				+								value = digit - 'A';
			
 
				+								value += 10;
			
 
				+							} else {
			
 
				+								// Make error, but keep parsing the string.
			
 
				+								Token error = make_error("Invalid hexadecimal digit in unicode escape sequence.");
			
 
				+								error.start_column = column;
			
 
				+								error.leftmost_column = error.start_column;
			
 
				+								error.end_column = column + 1;
			
 
				+								error.rightmost_column = error.end_column;
			
 
				+								push_error(error);
			
 
				+								valid_escape = false;
			
 
				+								break;
			
 
				+							}
			
 
				+
			
 
				+							escaped <<= 4;
			
 
				+							escaped |= value;
			
 
				+
			
 
				+							_advance();
			
 
				 						}
			
 
				-
			
 
				-						char32_t digit = _peek();
			
 
				-						char32_t value = 0;
			
 
				-						if (is_digit(digit)) {
			
 
				-							value = digit - '0';
			
 
				-						} else if (digit >= 'a' && digit <= 'f') {
			
 
				-							value = digit - 'a';
			
 
				-							value += 10;
			
 
				-						} else if (digit >= 'A' && digit <= 'F') {
			
 
				-							value = digit - 'A';
			
 
				-							value += 10;
			
 
				-						} else {
			
 
				-							// Make error, but keep parsing the string.
			
 
				-							Token error = make_error("Invalid hexadecimal digit in unicode escape sequence.");
			
 
				-							error.start_column = column;
			
 
				-							error.leftmost_column = error.start_column;
			
 
				-							error.end_column = column + 1;
			
 
				-							error.rightmost_column = error.end_column;
			
 
				-							push_error(error);
			
 
				-							valid_escape = false;
			
 
				+					} break;
			
 
				+					case '\r':
			
 
				+						if (_peek() != '\n') {
			
 
				+							// Carriage return without newline in string. (???)
			
 
				+							// Just add it to the string and keep going.
			
 
				+							result += ch;
			
 
				+							_advance();
			
 
				 							break;
			
 
				 						}
			
 
				-
			
 
				-						escaped <<= 4;
			
 
				-						escaped |= value;
			
 
				-
			
 
				-						_advance();
			
 
				-					}
			
 
				-				} break;
			
 
				-				case '\r':
			
 
				-					if (_peek() != '\n') {
			
 
				-						// Carriage return without newline in string. (???)
			
 
				-						// Just add it to the string and keep going.
			
 
				-						result += ch;
			
 
				-						_advance();
			
 
				+						[[fallthrough]];
			
 
				+					case '\n':
			
 
				+						// Escaping newline.
			
 
				+						newline(false);
			
 
				+						valid_escape = false; // Don't add to the string.
			
 
				 						break;
			
 
				-					}
			
 
				-					[[fallthrough]];
			
 
				-				case '\n':
			
 
				-					// Escaping newline.
			
 
				-					newline(false);
			
 
				-					valid_escape = false; // Don't add to the string.
			
 
				-					break;
			
 
				-				default:
			
 
				-					Token error = make_error("Invalid escape in string.");
			
 
				-					error.start_column = column - 2;
			
 
				-					error.leftmost_column = error.start_column;
			
 
				-					push_error(error);
			
 
				-					valid_escape = false;
			
 
				-					break;
			
 
				-			}
			
 
				-			// Parse UTF-16 pair.
			
 
				-			if (valid_escape) {
			
 
				-				if ((escaped & 0xfffffc00) == 0xd800) {
			
 
				-					if (prev == 0) {
			
 
				-						prev = escaped;
			
 
				-						prev_pos = column - 2;
			
 
				-						continue;
			
 
				-					} else {
			
 
				-						Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
			
 
				+					default:
			
 
				+						Token error = make_error("Invalid escape in string.");
			
 
				 						error.start_column = column - 2;
			
 
				 						error.leftmost_column = error.start_column;
			
 
				 						push_error(error);
			
 
				 						valid_escape = false;
			
 
				-						prev = 0;
			
 
				+						break;
			
 
				+				}
			
 
				+				// Parse UTF-16 pair.
			
 
				+				if (valid_escape) {
			
 
				+					if ((escaped & 0xfffffc00) == 0xd800) {
			
 
				+						if (prev == 0) {
			
 
				+							prev = escaped;
			
 
				+							prev_pos = column - 2;
			
 
				+							continue;
			
 
				+						} else {
			
 
				+							Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate.");
			
 
				+							error.start_column = column - 2;
			
 
				+							error.leftmost_column = error.start_column;
			
 
				+							push_error(error);
			
 
				+							valid_escape = false;
			
 
				+							prev = 0;
			
 
				+						}
			
 
				+					} else if ((escaped & 0xfffffc00) == 0xdc00) {
			
 
				+						if (prev == 0) {
			
 
				+							Token error = make_error("Invalid UTF-16 sequence in string, unpaired trail surrogate.");
			
 
				+							error.start_column = column - 2;
			
 
				+							error.leftmost_column = error.start_column;
			
 
				+							push_error(error);
			
 
				+							valid_escape = false;
			
 
				+						} else {
			
 
				+							escaped = (prev << 10UL) + escaped - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
			
 
				+							prev = 0;
			
 
				+						}
			
 
				 					}
			
 
				-				} else if ((escaped & 0xfffffc00) == 0xdc00) {
			
 
				-					if (prev == 0) {
			
 
				-						Token error = make_error("Invalid UTF-16 sequence in string, unpaired trail surrogate");
			
 
				-						error.start_column = column - 2;
			
 
				+					if (prev != 0) {
			
 
				+						Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate.");
			
 
				+						error.start_column = prev_pos;
			
 
				 						error.leftmost_column = error.start_column;
			
 
				 						push_error(error);
			
 
				-						valid_escape = false;
			
 
				-					} else {
			
 
				-						escaped = (prev << 10UL) + escaped - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
			
 
				 						prev = 0;
			
 
				 					}
			
 
				 				}
			
 
				-				if (prev != 0) {
			
 
				-					Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
			
 
				-					error.start_column = prev_pos;
			
 
				-					error.leftmost_column = error.start_column;
			
 
				-					push_error(error);
			
 
				-					prev = 0;
			
 
				-				}
			
 
				-			}
			
 
				 
			
 
				-			if (valid_escape) {
			
 
				-				result += escaped;
			
 
				+				if (valid_escape) {
			
 
				+					result += escaped;
			
 
				+				}
			
 
				 			}
			
 
				 		} else if (ch == quote_char) {
			
 
				 			if (prev != 0) {
			
@@ -1416,6 +1445,9 @@ GDScriptTokenizer::Token GDScriptTokenizer::scan() {
 
				 
			
 
				 	if (is_digit(c)) {
			
 
				 		return number();
			
 
				+	} else if (c == 'r' && (_peek() == '"' || _peek() == '\'')) {
			
 
				+		// Raw string literals.
			
 
				+		return string();
			
 
				 	} else if (is_unicode_identifier_start(c)) {
			
 
				 		return potential_identifier();
			
 
				 	}
			
--- a/modules/gdscript/tests/scripts/parser/errors/bad_r_string_1.gd
+++ b/modules/gdscript/tests/scripts/parser/errors/bad_r_string_1.gd
@@ -0,0 +1,2 @@
 
				+func test():
			
 
				+	print(r"\")
			
--- a/modules/gdscript/tests/scripts/parser/errors/bad_r_string_1.out
+++ b/modules/gdscript/tests/scripts/parser/errors/bad_r_string_1.out
@@ -0,0 +1,2 @@
 
				+GDTEST_PARSER_ERROR
			
 
				+Unterminated string.
			
--- a/modules/gdscript/tests/scripts/parser/errors/bad_r_string_2.gd
+++ b/modules/gdscript/tests/scripts/parser/errors/bad_r_string_2.gd
@@ -0,0 +1,2 @@
 
				+func test():
			
 
				+	print(r"\\"")
			
--- a/modules/gdscript/tests/scripts/parser/errors/bad_r_string_2.out
+++ b/modules/gdscript/tests/scripts/parser/errors/bad_r_string_2.out
@@ -0,0 +1,2 @@
 
				+GDTEST_PARSER_ERROR
			
 
				+Unterminated string.
			
--- a/modules/gdscript/tests/scripts/parser/errors/bad_r_string_3.gd
+++ b/modules/gdscript/tests/scripts/parser/errors/bad_r_string_3.gd
@@ -0,0 +1,3 @@
 
				+func test():
			
 
				+	#         v
			
 
				+	print(r"['"]*")
			
--- a/modules/gdscript/tests/scripts/parser/errors/bad_r_string_3.out
+++ b/modules/gdscript/tests/scripts/parser/errors/bad_r_string_3.out
@@ -0,0 +1,2 @@
 
				+GDTEST_PARSER_ERROR
			
 
				+Closing "]" doesn't have an opening counterpart.
			
--- a/modules/gdscript/tests/scripts/parser/features/r_strings.gd
+++ b/modules/gdscript/tests/scripts/parser/features/r_strings.gd
@@ -0,0 +1,22 @@
 
				+func test():
			
 
				+	print(r"test ' \' \" \\ \n \t \u2023 test")
			
 
				+	print(r"\n\\[\t ]*(\w+)")
			
 
				+	print(r"")
			
 
				+	print(r"\"")
			
 
				+	print(r"\\\"")
			
 
				+	print(r"\\")
			
 
				+	print(r"\" \\\" \\\\\"")
			
 
				+	print(r"\ \\ \\\ \\\\ \\\\\ \\")
			
 
				+	print(r'"')
			
 
				+	print(r'"(?:\\.|[^"])*"')
			
 
				+	print(r"""""")
			
 
				+	print(r"""test \t "test"="" " \" \\\" \ \\ \\\ test""")
			
 
				+	print(r'''r"""test \t "test"="" " \" \\\" \ \\ \\\ test"""''')
			
 
				+	print(r"\t
			
 
				+			\t")
			
 
				+	print(r"\t \
			
 
				+			\t")
			
 
				+	print(r"""\t
			
 
				+			\t""")
			
 
				+	print(r"""\t \
			
 
				+			\t""")
			
--- a/modules/gdscript/tests/scripts/parser/features/r_strings.out
+++ b/modules/gdscript/tests/scripts/parser/features/r_strings.out
@@ -0,0 +1,22 @@
 
				+GDTEST_OK
			
 
				+test ' \' \" \\ \n \t \u2023 test
			
 
				+\n\\[\t ]*(\w+)
			
 
				+
			
 
				+\"
			
 
				+\\\"
			
 
				+\\
			
 
				+\" \\\" \\\\\"
			
 
				+\ \\ \\\ \\\\ \\\\\ \\
			
 
				+"
			
 
				+"(?:\\.|[^"])*"
			
 
				+
			
 
				+test \t "test"="" " \" \\\" \ \\ \\\ test
			
 
				+r"""test \t "test"="" " \" \\\" \ \\ \\\ test"""
			
 
				+\t
			
 
				+			\t
			
 
				+\t \
			
 
				+			\t
			
 
				+\t
			
 
				+			\t
			
 
				+\t \
			
 
				+			\t
			
--- a/modules/regex/doc_classes/RegEx.xml
+++ b/modules/regex/doc_classes/RegEx.xml
@@ -10,7 +10,7 @@
 
				 		var regex = RegEx.new()
			
 
				 		regex.compile("\\w-(\\d+)")
			
 
				 		[/codeblock]
			
 
				-		The search pattern must be escaped first for GDScript before it is escaped for the expression. For example, [code]compile("\\d+")[/code] would be read by RegEx as [code]\d+[/code]. Similarly, [code]compile("\"(?:\\\\.|[^\"])*\"")[/code] would be read as [code]"(?:\\.|[^"])*"[/code].
			
 
				+		The search pattern must be escaped first for GDScript before it is escaped for the expression. For example, [code]compile("\\d+")[/code] would be read by RegEx as [code]\d+[/code]. Similarly, [code]compile("\"(?:\\\\.|[^\"])*\"")[/code] would be read as [code]"(?:\\.|[^"])*"[/code]. In GDScript, you can also use raw string literals (r-strings). For example, [code]compile(r'"(?:\\.|[^"])*"')[/code] would be read the same.
			
 
				 		Using [method search], you can find the pattern within the given text. If a pattern is found, [RegExMatch] is returned and you can retrieve details of the results using methods such as [method RegExMatch.get_string] and [method RegExMatch.get_start].
			
 
				 		[codeblock]
			
 
				 		var regex = RegEx.new()