Browse Source

Merge pull request #46551 from bruvzg/json_utf16

JSON parser: add UTF-16 surrogate pairs support.
Rémi Verschelde 4 years ago
parent
commit
b98b2744a6
1 changed files with 46 additions and 0 deletions
  1. 46 0
      core/io/json.cpp

+ 46 - 0
core/io/json.cpp

@@ -234,6 +234,52 @@ Error JSON::_get_token(const char32_t *p_str, int &index, int p_len, Token &r_to
 								}
 								}
 								index += 4; //will add at the end anyway
 								index += 4; //will add at the end anyway
 
 
+								if ((res & 0xfffffc00) == 0xd800) {
+									if (p_str[index + 1] != '\\' || p_str[index + 2] != 'u') {
+										r_err_str = "Invalid UTF-16 sequence in string, unpaired lead surrogate";
+										return ERR_PARSE_ERROR;
+									}
+									index += 2;
+									char32_t trail = 0;
+									for (int j = 0; j < 4; j++) {
+										char32_t c = p_str[index + j + 1];
+										if (c == 0) {
+											r_err_str = "Unterminated String";
+											return ERR_PARSE_ERROR;
+										}
+										if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
+											r_err_str = "Malformed hex constant in string";
+											return ERR_PARSE_ERROR;
+										}
+										char32_t v;
+										if (c >= '0' && c <= '9') {
+											v = c - '0';
+										} else if (c >= 'a' && c <= 'f') {
+											v = c - 'a';
+											v += 10;
+										} else if (c >= 'A' && c <= 'F') {
+											v = c - 'A';
+											v += 10;
+										} else {
+											ERR_PRINT("Bug parsing hex constant.");
+											v = 0;
+										}
+
+										trail <<= 4;
+										trail |= v;
+									}
+									if ((trail & 0xfffffc00) == 0xdc00) {
+										res = (res << 10UL) + trail - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
+										index += 4; //will add at the end anyway
+									} else {
+										r_err_str = "Invalid UTF-16 sequence in string, unpaired lead surrogate";
+										return ERR_PARSE_ERROR;
+									}
+								} else if ((res & 0xfffffc00) == 0xdc00) {
+									r_err_str = "Invalid UTF-16 sequence in string, unpaired trail surrogate";
+									return ERR_PARSE_ERROR;
+								}
+
 							} break;
 							} break;
 							default: {
 							default: {
 								res = next;
 								res = next;