|
@@ -857,10 +857,14 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() {
|
|
|
STRING_NODEPATH,
|
|
|
};
|
|
|
|
|
|
+ bool is_raw = false;
|
|
|
bool is_multiline = false;
|
|
|
StringType type = STRING_REGULAR;
|
|
|
|
|
|
- if (_peek(-1) == '&') {
|
|
|
+ if (_peek(-1) == 'r') {
|
|
|
+ is_raw = true;
|
|
|
+ _advance();
|
|
|
+ } else if (_peek(-1) == '&') {
|
|
|
type = STRING_NAME;
|
|
|
_advance();
|
|
|
} else if (_peek(-1) == '^') {
|
|
@@ -890,7 +894,12 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() {
|
|
|
char32_t ch = _peek();
|
|
|
|
|
|
if (ch == 0x200E || ch == 0x200F || (ch >= 0x202A && ch <= 0x202E) || (ch >= 0x2066 && ch <= 0x2069)) {
|
|
|
- Token error = make_error("Invisible text direction control character present in the string, escape it (\"\\u" + String::num_int64(ch, 16) + "\") to avoid confusion.");
|
|
|
+ Token error;
|
|
|
+ if (is_raw) {
|
|
|
+ error = make_error("Invisible text direction control character present in the string, use regular string literal instead of r-string.");
|
|
|
+ } else {
|
|
|
+ error = make_error("Invisible text direction control character present in the string, escape it (\"\\u" + String::num_int64(ch, 16) + "\") to avoid confusion.");
|
|
|
+ }
|
|
|
error.start_column = column;
|
|
|
error.leftmost_column = error.start_column;
|
|
|
error.end_column = column + 1;
|
|
@@ -905,144 +914,164 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() {
|
|
|
return make_error("Unterminated string.");
|
|
|
}
|
|
|
|
|
|
- // Grab escape character.
|
|
|
- char32_t code = _peek();
|
|
|
- _advance();
|
|
|
- if (_is_at_end()) {
|
|
|
- return make_error("Unterminated string.");
|
|
|
- }
|
|
|
+ if (is_raw) {
|
|
|
+ if (_peek() == quote_char) {
|
|
|
+ _advance();
|
|
|
+ if (_is_at_end()) {
|
|
|
+ return make_error("Unterminated string.");
|
|
|
+ }
|
|
|
+ result += '\\';
|
|
|
+ result += quote_char;
|
|
|
+ } else if (_peek() == '\\') { // For `\\\"`.
|
|
|
+ _advance();
|
|
|
+ if (_is_at_end()) {
|
|
|
+ return make_error("Unterminated string.");
|
|
|
+ }
|
|
|
+ result += '\\';
|
|
|
+ result += '\\';
|
|
|
+ } else {
|
|
|
+ result += '\\';
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ // Grab escape character.
|
|
|
+ char32_t code = _peek();
|
|
|
+ _advance();
|
|
|
+ if (_is_at_end()) {
|
|
|
+ return make_error("Unterminated string.");
|
|
|
+ }
|
|
|
|
|
|
- char32_t escaped = 0;
|
|
|
- bool valid_escape = true;
|
|
|
+ char32_t escaped = 0;
|
|
|
+ bool valid_escape = true;
|
|
|
|
|
|
- switch (code) {
|
|
|
- case 'a':
|
|
|
- escaped = '\a';
|
|
|
- break;
|
|
|
- case 'b':
|
|
|
- escaped = '\b';
|
|
|
- break;
|
|
|
- case 'f':
|
|
|
- escaped = '\f';
|
|
|
- break;
|
|
|
- case 'n':
|
|
|
- escaped = '\n';
|
|
|
- break;
|
|
|
- case 'r':
|
|
|
- escaped = '\r';
|
|
|
- break;
|
|
|
- case 't':
|
|
|
- escaped = '\t';
|
|
|
- break;
|
|
|
- case 'v':
|
|
|
- escaped = '\v';
|
|
|
- break;
|
|
|
- case '\'':
|
|
|
- escaped = '\'';
|
|
|
- break;
|
|
|
- case '\"':
|
|
|
- escaped = '\"';
|
|
|
- break;
|
|
|
- case '\\':
|
|
|
- escaped = '\\';
|
|
|
- break;
|
|
|
- case 'U':
|
|
|
- case 'u': {
|
|
|
- // Hexadecimal sequence.
|
|
|
- int hex_len = (code == 'U') ? 6 : 4;
|
|
|
- for (int j = 0; j < hex_len; j++) {
|
|
|
- if (_is_at_end()) {
|
|
|
- return make_error("Unterminated string.");
|
|
|
+ switch (code) {
|
|
|
+ case 'a':
|
|
|
+ escaped = '\a';
|
|
|
+ break;
|
|
|
+ case 'b':
|
|
|
+ escaped = '\b';
|
|
|
+ break;
|
|
|
+ case 'f':
|
|
|
+ escaped = '\f';
|
|
|
+ break;
|
|
|
+ case 'n':
|
|
|
+ escaped = '\n';
|
|
|
+ break;
|
|
|
+ case 'r':
|
|
|
+ escaped = '\r';
|
|
|
+ break;
|
|
|
+ case 't':
|
|
|
+ escaped = '\t';
|
|
|
+ break;
|
|
|
+ case 'v':
|
|
|
+ escaped = '\v';
|
|
|
+ break;
|
|
|
+ case '\'':
|
|
|
+ escaped = '\'';
|
|
|
+ break;
|
|
|
+ case '\"':
|
|
|
+ escaped = '\"';
|
|
|
+ break;
|
|
|
+ case '\\':
|
|
|
+ escaped = '\\';
|
|
|
+ break;
|
|
|
+ case 'U':
|
|
|
+ case 'u': {
|
|
|
+ // Hexadecimal sequence.
|
|
|
+ int hex_len = (code == 'U') ? 6 : 4;
|
|
|
+ for (int j = 0; j < hex_len; j++) {
|
|
|
+ if (_is_at_end()) {
|
|
|
+ return make_error("Unterminated string.");
|
|
|
+ }
|
|
|
+
|
|
|
+ char32_t digit = _peek();
|
|
|
+ char32_t value = 0;
|
|
|
+ if (is_digit(digit)) {
|
|
|
+ value = digit - '0';
|
|
|
+ } else if (digit >= 'a' && digit <= 'f') {
|
|
|
+ value = digit - 'a';
|
|
|
+ value += 10;
|
|
|
+ } else if (digit >= 'A' && digit <= 'F') {
|
|
|
+ value = digit - 'A';
|
|
|
+ value += 10;
|
|
|
+ } else {
|
|
|
+ // Make error, but keep parsing the string.
|
|
|
+ Token error = make_error("Invalid hexadecimal digit in unicode escape sequence.");
|
|
|
+ error.start_column = column;
|
|
|
+ error.leftmost_column = error.start_column;
|
|
|
+ error.end_column = column + 1;
|
|
|
+ error.rightmost_column = error.end_column;
|
|
|
+ push_error(error);
|
|
|
+ valid_escape = false;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ escaped <<= 4;
|
|
|
+ escaped |= value;
|
|
|
+
|
|
|
+ _advance();
|
|
|
}
|
|
|
-
|
|
|
- char32_t digit = _peek();
|
|
|
- char32_t value = 0;
|
|
|
- if (is_digit(digit)) {
|
|
|
- value = digit - '0';
|
|
|
- } else if (digit >= 'a' && digit <= 'f') {
|
|
|
- value = digit - 'a';
|
|
|
- value += 10;
|
|
|
- } else if (digit >= 'A' && digit <= 'F') {
|
|
|
- value = digit - 'A';
|
|
|
- value += 10;
|
|
|
- } else {
|
|
|
- // Make error, but keep parsing the string.
|
|
|
- Token error = make_error("Invalid hexadecimal digit in unicode escape sequence.");
|
|
|
- error.start_column = column;
|
|
|
- error.leftmost_column = error.start_column;
|
|
|
- error.end_column = column + 1;
|
|
|
- error.rightmost_column = error.end_column;
|
|
|
- push_error(error);
|
|
|
- valid_escape = false;
|
|
|
+ } break;
|
|
|
+ case '\r':
|
|
|
+ if (_peek() != '\n') {
|
|
|
+ // Carriage return without newline in string. (???)
|
|
|
+ // Just add it to the string and keep going.
|
|
|
+ result += ch;
|
|
|
+ _advance();
|
|
|
break;
|
|
|
}
|
|
|
-
|
|
|
- escaped <<= 4;
|
|
|
- escaped |= value;
|
|
|
-
|
|
|
- _advance();
|
|
|
- }
|
|
|
- } break;
|
|
|
- case '\r':
|
|
|
- if (_peek() != '\n') {
|
|
|
- // Carriage return without newline in string. (???)
|
|
|
- // Just add it to the string and keep going.
|
|
|
- result += ch;
|
|
|
- _advance();
|
|
|
+ [[fallthrough]];
|
|
|
+ case '\n':
|
|
|
+ // Escaping newline.
|
|
|
+ newline(false);
|
|
|
+ valid_escape = false; // Don't add to the string.
|
|
|
break;
|
|
|
- }
|
|
|
- [[fallthrough]];
|
|
|
- case '\n':
|
|
|
- // Escaping newline.
|
|
|
- newline(false);
|
|
|
- valid_escape = false; // Don't add to the string.
|
|
|
- break;
|
|
|
- default:
|
|
|
- Token error = make_error("Invalid escape in string.");
|
|
|
- error.start_column = column - 2;
|
|
|
- error.leftmost_column = error.start_column;
|
|
|
- push_error(error);
|
|
|
- valid_escape = false;
|
|
|
- break;
|
|
|
- }
|
|
|
- // Parse UTF-16 pair.
|
|
|
- if (valid_escape) {
|
|
|
- if ((escaped & 0xfffffc00) == 0xd800) {
|
|
|
- if (prev == 0) {
|
|
|
- prev = escaped;
|
|
|
- prev_pos = column - 2;
|
|
|
- continue;
|
|
|
- } else {
|
|
|
- Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
|
|
|
+ default:
|
|
|
+ Token error = make_error("Invalid escape in string.");
|
|
|
error.start_column = column - 2;
|
|
|
error.leftmost_column = error.start_column;
|
|
|
push_error(error);
|
|
|
valid_escape = false;
|
|
|
- prev = 0;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ // Parse UTF-16 pair.
|
|
|
+ if (valid_escape) {
|
|
|
+ if ((escaped & 0xfffffc00) == 0xd800) {
|
|
|
+ if (prev == 0) {
|
|
|
+ prev = escaped;
|
|
|
+ prev_pos = column - 2;
|
|
|
+ continue;
|
|
|
+ } else {
|
|
|
+ Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate.");
|
|
|
+ error.start_column = column - 2;
|
|
|
+ error.leftmost_column = error.start_column;
|
|
|
+ push_error(error);
|
|
|
+ valid_escape = false;
|
|
|
+ prev = 0;
|
|
|
+ }
|
|
|
+ } else if ((escaped & 0xfffffc00) == 0xdc00) {
|
|
|
+ if (prev == 0) {
|
|
|
+ Token error = make_error("Invalid UTF-16 sequence in string, unpaired trail surrogate.");
|
|
|
+ error.start_column = column - 2;
|
|
|
+ error.leftmost_column = error.start_column;
|
|
|
+ push_error(error);
|
|
|
+ valid_escape = false;
|
|
|
+ } else {
|
|
|
+ escaped = (prev << 10UL) + escaped - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
|
|
|
+ prev = 0;
|
|
|
+ }
|
|
|
}
|
|
|
- } else if ((escaped & 0xfffffc00) == 0xdc00) {
|
|
|
- if (prev == 0) {
|
|
|
- Token error = make_error("Invalid UTF-16 sequence in string, unpaired trail surrogate");
|
|
|
- error.start_column = column - 2;
|
|
|
+ if (prev != 0) {
|
|
|
+ Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate.");
|
|
|
+ error.start_column = prev_pos;
|
|
|
error.leftmost_column = error.start_column;
|
|
|
push_error(error);
|
|
|
- valid_escape = false;
|
|
|
- } else {
|
|
|
- escaped = (prev << 10UL) + escaped - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
|
|
|
prev = 0;
|
|
|
}
|
|
|
}
|
|
|
- if (prev != 0) {
|
|
|
- Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
|
|
|
- error.start_column = prev_pos;
|
|
|
- error.leftmost_column = error.start_column;
|
|
|
- push_error(error);
|
|
|
- prev = 0;
|
|
|
- }
|
|
|
- }
|
|
|
|
|
|
- if (valid_escape) {
|
|
|
- result += escaped;
|
|
|
+ if (valid_escape) {
|
|
|
+ result += escaped;
|
|
|
+ }
|
|
|
}
|
|
|
} else if (ch == quote_char) {
|
|
|
if (prev != 0) {
|
|
@@ -1416,6 +1445,9 @@ GDScriptTokenizer::Token GDScriptTokenizer::scan() {
|
|
|
|
|
|
if (is_digit(c)) {
|
|
|
return number();
|
|
|
+ } else if (c == 'r' && (_peek() == '"' || _peek() == '\'')) {
|
|
|
+ // Raw string literals.
|
|
|
+ return string();
|
|
|
} else if (is_unicode_identifier_start(c)) {
|
|
|
return potential_identifier();
|
|
|
}
|