3 years ago · 0c5431644d
--- a/core/io/file_access.cpp
+++ b/core/io/file_access.cpp
@@ -388,9 +388,7 @@ String FileAccess::get_as_utf8_string() const {
 
				 	w[len] = 0;
			
 
				 
			
 
				 	String s;
			
 
				-	if (s.parse_utf8((const char *)w)) {
			
 
				-		return String();
			
 
				-	}
			
 
				+	s.parse_utf8((const char *)w);
			
 
				 	return s;
			
 
				 }
			
 
				 
			
@@ -516,7 +514,6 @@ String FileAccess::get_pascal_string() {
 
				 
			
 
				 	String ret;
			
 
				 	ret.parse_utf8(cs.ptr());
			
 
				-
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
--- a/core/io/marshalls.cpp
+++ b/core/io/marshalls.cpp
@@ -78,7 +78,7 @@ static Error _decode_string(const uint8_t *&buf, int &len, int *r_len, String &r
 
				 	ERR_FAIL_COND_V(strlen < 0 || strlen + pad > len, ERR_FILE_EOF);
			
 
				 
			
 
				 	String str;
			
 
				-	ERR_FAIL_COND_V(str.parse_utf8((const char *)buf, strlen), ERR_INVALID_DATA);
			
 
				+	ERR_FAIL_COND_V(str.parse_utf8((const char *)buf, strlen) != OK, ERR_INVALID_DATA);
			
 
				 	r_string = str;
			
 
				 
			
 
				 	// Add padding
			
--- a/core/string/ustring.cpp
+++ b/core/string/ustring.cpp
@@ -323,7 +323,13 @@ void String::copy_from(const char *p_cstr) {
 
				 	char32_t *dst = this->ptrw();
			
 
				 
			
 
				 	for (size_t i = 0; i <= len; i++) {
			
 
				-		dst[i] = p_cstr[i];
			
 
				+		uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]);
			
 
				+		if (c == 0 && i < len) {
			
 
				+			print_unicode_error("NUL character", true);
			
 
				+			dst[i] = 0x20;
			
 
				+		} else {
			
 
				+			dst[i] = c;
			
 
				+		}
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -350,7 +356,13 @@ void String::copy_from(const char *p_cstr, const int p_clip_to) {
 
				 	char32_t *dst = this->ptrw();
			
 
				 
			
 
				 	for (int i = 0; i < len; i++) {
			
 
				-		dst[i] = p_cstr[i];
			
 
				+		uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]);
			
 
				+		if (c == 0) {
			
 
				+			print_unicode_error("NUL character", true);
			
 
				+			dst[i] = 0x20;
			
 
				+		} else {
			
 
				+			dst[i] = c;
			
 
				+		}
			
 
				 	}
			
 
				 	dst[len] = 0;
			
 
				 }
			
@@ -376,14 +388,21 @@ void String::copy_from(const wchar_t *p_cstr, const int p_clip_to) {
 
				 }
			
 
				 
			
 
				 void String::copy_from(const char32_t &p_char) {
			
 
				+	if (p_char == 0) {
			
 
				+		print_unicode_error("NUL character", true);
			
 
				+		return;
			
 
				+	}
			
 
				+	if ((p_char & 0xfffff800) == 0xd800) {
			
 
				+		print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char));
			
 
				+	}
			
 
				+	if (p_char > 0x10ffff) {
			
 
				+		print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char));
			
 
				+	}
			
 
				+
			
 
				 	resize(2);
			
 
				+
			
 
				 	char32_t *dst = ptrw();
			
 
				-	if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) {
			
 
				-		print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + ".");
			
 
				-		dst[0] = 0xfffd;
			
 
				-	} else {
			
 
				-		dst[0] = p_char;
			
 
				-	}
			
 
				+	dst[0] = p_char;
			
 
				 	dst[1] = 0;
			
 
				 }
			
 
				 
			
@@ -437,12 +456,18 @@ void String::copy_from_unchecked(const char32_t *p_char, const int p_length) {
 
				 	dst[p_length] = 0;
			
 
				 
			
 
				 	for (int i = 0; i < p_length; i++) {
			
 
				-		if ((p_char[i] >= 0xd800 && p_char[i] <= 0xdfff) || (p_char[i] > 0x10ffff)) {
			
 
				-			print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char[i], 16) + ".");
			
 
				-			dst[i] = 0xfffd;
			
 
				-		} else {
			
 
				-			dst[i] = p_char[i];
			
 
				+		if (p_char[i] == 0) {
			
 
				+			print_unicode_error("NUL character", true);
			
 
				+			dst[i] = 0x20;
			
 
				+			continue;
			
 
				+		}
			
 
				+		if ((p_char[i] & 0xfffff800) == 0xd800) {
			
 
				+			print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char[i]));
			
 
				 		}
			
 
				+		if (p_char[i] > 0x10ffff) {
			
 
				+			print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char[i]));
			
 
				+		}
			
 
				+		dst[i] = p_char[i];
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -481,7 +506,7 @@ String operator+(const wchar_t *p_chr, const String &p_str) {
 
				 	// wchar_t is 16-bit
			
 
				 	String tmp = String::utf16((const char16_t *)p_chr);
			
 
				 #else
			
 
				-	// wchar_t is 32-bi
			
 
				+	// wchar_t is 32-bit
			
 
				 	String tmp = (const char32_t *)p_chr;
			
 
				 #endif
			
 
				 	tmp += p_str;
			
@@ -527,7 +552,13 @@ String &String::operator+=(const char *p_str) {
 
				 	char32_t *dst = ptrw() + lhs_len;
			
 
				 
			
 
				 	for (size_t i = 0; i <= rhs_len; i++) {
			
 
				-		dst[i] = p_str[i];
			
 
				+		uint8_t c = p_str[i] >= 0 ? p_str[i] : uint8_t(256 + p_str[i]);
			
 
				+		if (c == 0 && i < rhs_len) {
			
 
				+			print_unicode_error("NUL character", true);
			
 
				+			dst[i] = 0x20;
			
 
				+		} else {
			
 
				+			dst[i] = c;
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	return *this;
			
@@ -550,15 +581,21 @@ String &String::operator+=(const char32_t *p_str) {
 
				 }
			
 
				 
			
 
				 String &String::operator+=(char32_t p_char) {
			
 
				+	if (p_char == 0) {
			
 
				+		print_unicode_error("NUL character", true);
			
 
				+		return *this;
			
 
				+	}
			
 
				+	if ((p_char & 0xfffff800) == 0xd800) {
			
 
				+		print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char));
			
 
				+	}
			
 
				+	if (p_char > 0x10ffff) {
			
 
				+		print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char));
			
 
				+	}
			
 
				+
			
 
				 	const int lhs_len = length();
			
 
				 	resize(lhs_len + 2);
			
 
				 	char32_t *dst = ptrw();
			
 
				-	if ((p_char >= 0xd800 && p_char <= 0xdfff) || (p_char > 0x10ffff)) {
			
 
				-		print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(p_char, 16) + ".");
			
 
				-		dst[lhs_len] = 0xfffd;
			
 
				-	} else {
			
 
				-		dst[lhs_len] = p_char;
			
 
				-	}
			
 
				+	dst[lhs_len] = p_char;
			
 
				 	dst[lhs_len + 1] = 0;
			
 
				 
			
 
				 	return *this;
			
@@ -1583,6 +1620,14 @@ String String::hex_encode_buffer(const uint8_t *p_buffer, int p_len) {
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+void String::print_unicode_error(const String &p_message, bool p_critical) const {
			
 
				+	if (p_critical) {
			
 
				+		print_error(vformat("Unicode parsing error, some characters were replaced with spaces: %s", p_message));
			
 
				+	} else {
			
 
				+		print_error(vformat("Unicode parsing error: %s", p_message));
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 CharString String::ascii(bool p_allow_extended) const {
			
 
				 	if (!length()) {
			
 
				 		return CharString();
			
@@ -1596,7 +1641,7 @@ CharString String::ascii(bool p_allow_extended) const {
 
				 		if ((c <= 0x7f) || (c <= 0xff && p_allow_extended)) {
			
 
				 			cs[i] = c;
			
 
				 		} else {
			
 
				-			print_error("Unicode parsing error: Cannot represent " + num_int64(c, 16) + " as ASCII/Latin-1 character.");
			
 
				+			print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as ASCII/Latin-1", (uint32_t)c));
			
 
				 			cs[i] = 0x20;
			
 
				 		}
			
 
				 	}
			
@@ -1611,11 +1656,9 @@ String String::utf8(const char *p_utf8, int p_len) {
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-bool String::parse_utf8(const char *p_utf8, int p_len) {
			
 
				-#define UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-8?");
			
 
				-
			
 
				+Error String::parse_utf8(const char *p_utf8, int p_len) {
			
 
				 	if (!p_utf8) {
			
 
				-		return true;
			
 
				+		return ERR_INVALID_DATA;
			
 
				 	}
			
 
				 
			
 
				 	String aux;
			
@@ -1635,14 +1678,17 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	bool decode_error = false;
			
 
				+	bool decode_failed = false;
			
 
				 	{
			
 
				 		const char *ptrtmp = p_utf8;
			
 
				 		const char *ptrtmp_limit = &p_utf8[p_len];
			
 
				 		int skip = 0;
			
 
				+		uint8_t c_start = 0;
			
 
				 		while (ptrtmp != ptrtmp_limit && *ptrtmp) {
			
 
				-			if (skip == 0) {
			
 
				-				uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp);
			
 
				+			uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp);
			
 
				 
			
 
				+			if (skip == 0) {
			
 
				 				/* Determine the number of characters in sequence */
			
 
				 				if ((c & 0x80) == 0) {
			
 
				 					skip = 0;
			
@@ -1652,20 +1698,34 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
 
				 					skip = 2;
			
 
				 				} else if ((c & 0xf8) == 0xf0) {
			
 
				 					skip = 3;
			
 
				+				} else if ((c & 0xfc) == 0xf8) {
			
 
				+					skip = 4;
			
 
				+				} else if ((c & 0xfe) == 0xfc) {
			
 
				+					skip = 5;
			
 
				 				} else {
			
 
				-					UNICERROR("invalid skip at " + num_int64(cstr_size));
			
 
				-					return true; //invalid utf8
			
 
				+					skip = 0;
			
 
				+					print_unicode_error(vformat("Invalid UTF-8 leading byte (%x)", c), true);
			
 
				+					decode_failed = true;
			
 
				 				}
			
 
				+				c_start = c;
			
 
				 
			
 
				 				if (skip == 1 && (c & 0x1e) == 0) {
			
 
				-					UNICERROR("overlong rejected at " + num_int64(cstr_size));
			
 
				-					return true; //reject overlong
			
 
				+					print_unicode_error(vformat("Overlong encoding (%x ...)", c));
			
 
				+					decode_error = true;
			
 
				 				}
			
 
				-
			
 
				 				str_size++;
			
 
				-
			
 
				 			} else {
			
 
				-				--skip;
			
 
				+				if ((c_start == 0xe0 && skip == 2 && c < 0xa0) || (c_start == 0xf0 && skip == 3 && c < 0x90) || (c_start == 0xf8 && skip == 4 && c < 0x88) || (c_start == 0xfc && skip == 5 && c < 0x84)) {
			
 
				+					print_unicode_error(vformat("Overlong encoding (%x %x ...)", c_start, c));
			
 
				+					decode_error = true;
			
 
				+				}
			
 
				+				if (c < 0x80 || c > 0xbf) {
			
 
				+					print_unicode_error(vformat("Invalid UTF-8 continuation byte (%x ... %x ...)", c_start, c), true);
			
 
				+					decode_failed = true;
			
 
				+					skip = 0;
			
 
				+				} else {
			
 
				+					--skip;
			
 
				+				}
			
 
				 			}
			
 
				 
			
 
				 			cstr_size++;
			
@@ -1673,80 +1733,91 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
 
				 		}
			
 
				 
			
 
				 		if (skip) {
			
 
				-			UNICERROR("no space left");
			
 
				-			return true; //not enough space
			
 
				+			print_unicode_error(vformat("Missing %d UTF-8 continuation byte(s)", skip), true);
			
 
				+			decode_failed = true;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 	if (str_size == 0) {
			
 
				 		clear();
			
 
				-		return false;
			
 
				+		return OK; // empty string
			
 
				 	}
			
 
				 
			
 
				 	resize(str_size + 1);
			
 
				 	char32_t *dst = ptrw();
			
 
				 	dst[str_size] = 0;
			
 
				 
			
 
				+	int skip = 0;
			
 
				+	uint32_t unichar = 0;
			
 
				 	while (cstr_size) {
			
 
				-		int len = 0;
			
 
				-
			
 
				-		/* Determine the number of characters in sequence */
			
 
				-		if ((*p_utf8 & 0x80) == 0) {
			
 
				-			len = 1;
			
 
				-		} else if ((*p_utf8 & 0xe0) == 0xc0) {
			
 
				-			len = 2;
			
 
				-		} else if ((*p_utf8 & 0xf0) == 0xe0) {
			
 
				-			len = 3;
			
 
				-		} else if ((*p_utf8 & 0xf8) == 0xf0) {
			
 
				-			len = 4;
			
 
				-		} else {
			
 
				-			UNICERROR("invalid len");
			
 
				-			return true; //invalid UTF8
			
 
				-		}
			
 
				-
			
 
				-		if (len > cstr_size) {
			
 
				-			UNICERROR("no space left");
			
 
				-			return true; //not enough space
			
 
				-		}
			
 
				-
			
 
				-		if (len == 2 && (*p_utf8 & 0x1E) == 0) {
			
 
				-			UNICERROR("no space left");
			
 
				-			return true; //reject overlong
			
 
				-		}
			
 
				-
			
 
				-		/* Convert the first character */
			
 
				-
			
 
				-		uint32_t unichar = 0;
			
 
				-
			
 
				-		if (len == 1) {
			
 
				-			unichar = *p_utf8;
			
 
				+		uint8_t c = *p_utf8 >= 0 ? *p_utf8 : uint8_t(256 + *p_utf8);
			
 
				+
			
 
				+		if (skip == 0) {
			
 
				+			/* Determine the number of characters in sequence */
			
 
				+			if ((c & 0x80) == 0) {
			
 
				+				*(dst++) = c;
			
 
				+				unichar = 0;
			
 
				+				skip = 0;
			
 
				+			} else if ((c & 0xe0) == 0xc0) {
			
 
				+				unichar = (0xff >> 3) & c;
			
 
				+				skip = 1;
			
 
				+			} else if ((c & 0xf0) == 0xe0) {
			
 
				+				unichar = (0xff >> 4) & c;
			
 
				+				skip = 2;
			
 
				+			} else if ((c & 0xf8) == 0xf0) {
			
 
				+				unichar = (0xff >> 5) & c;
			
 
				+				skip = 3;
			
 
				+			} else if ((c & 0xfc) == 0xf8) {
			
 
				+				unichar = (0xff >> 6) & c;
			
 
				+				skip = 4;
			
 
				+			} else if ((c & 0xfe) == 0xfc) {
			
 
				+				unichar = (0xff >> 7) & c;
			
 
				+				skip = 5;
			
 
				+			} else {
			
 
				+				*(dst++) = 0x20;
			
 
				+				unichar = 0;
			
 
				+				skip = 0;
			
 
				+			}
			
 
				 		} else {
			
 
				-			unichar = (0xff >> (len + 1)) & *p_utf8;
			
 
				-
			
 
				-			for (int i = 1; i < len; i++) {
			
 
				-				if ((p_utf8[i] & 0xc0) != 0x80) {
			
 
				-					UNICERROR("invalid utf8");
			
 
				-					return true; //invalid utf8
			
 
				-				}
			
 
				-				if (unichar == 0 && i == 2 && ((p_utf8[i] & 0x7f) >> (7 - len)) == 0) {
			
 
				-					UNICERROR("invalid utf8 overlong");
			
 
				-					return true; //no overlong
			
 
				+			if (c < 0x80 || c > 0xbf) {
			
 
				+				*(dst++) = 0x20;
			
 
				+				skip = 0;
			
 
				+			} else {
			
 
				+				unichar = (unichar << 6) | (c & 0x3f);
			
 
				+				--skip;
			
 
				+				if (skip == 0) {
			
 
				+					if (unichar == 0) {
			
 
				+						print_unicode_error("NUL character", true);
			
 
				+						decode_failed = true;
			
 
				+						unichar = 0x20;
			
 
				+					}
			
 
				+					if ((unichar & 0xfffff800) == 0xd800) {
			
 
				+						print_unicode_error(vformat("Unpaired surrogate (%x)", unichar));
			
 
				+						decode_error = true;
			
 
				+					}
			
 
				+					if (unichar > 0x10ffff) {
			
 
				+						print_unicode_error(vformat("Invalid unicode codepoint (%x)", unichar));
			
 
				+						decode_error = true;
			
 
				+					}
			
 
				+					*(dst++) = unichar;
			
 
				 				}
			
 
				-				unichar = (unichar << 6) | (p_utf8[i] & 0x3f);
			
 
				 			}
			
 
				 		}
			
 
				-		if (unichar >= 0xd800 && unichar <= 0xdfff) {
			
 
				-			UNICERROR("invalid code point");
			
 
				-			return CharString();
			
 
				-		}
			
 
				 
			
 
				-		*(dst++) = unichar;
			
 
				-		cstr_size -= len;
			
 
				-		p_utf8 += len;
			
 
				+		cstr_size--;
			
 
				+		p_utf8++;
			
 
				+	}
			
 
				+	if (skip) {
			
 
				+		*(dst++) = 0x20;
			
 
				 	}
			
 
				 
			
 
				-	return false;
			
 
				-#undef UNICERROR
			
 
				+	if (decode_failed) {
			
 
				+		return ERR_INVALID_DATA;
			
 
				+	} else if (decode_error) {
			
 
				+		return ERR_PARSE_ERROR;
			
 
				+	} else {
			
 
				+		return OK;
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 CharString String::utf8() const {
			
@@ -1765,15 +1836,17 @@ CharString String::utf8() const {
 
				 			fl += 2;
			
 
				 		} else if (c <= 0xffff) { // 16 bits
			
 
				 			fl += 3;
			
 
				-		} else if (c <= 0x0010ffff) { // 21 bits
			
 
				+		} else if (c <= 0x001fffff) { // 21 bits
			
 
				 			fl += 4;
			
 
				+		} else if (c <= 0x03ffffff) { // 26 bits
			
 
				+			fl += 5;
			
 
				+			print_unicode_error(vformat("Invalid unicode codepoint (%x)", c));
			
 
				+		} else if (c <= 0x7fffffff) { // 31 bits
			
 
				+			fl += 6;
			
 
				+			print_unicode_error(vformat("Invalid unicode codepoint (%x)", c));
			
 
				 		} else {
			
 
				-			print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
			
 
				-			return CharString();
			
 
				-		}
			
 
				-		if (c >= 0xd800 && c <= 0xdfff) {
			
 
				-			print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
			
 
				-			return CharString();
			
 
				+			fl += 1;
			
 
				+			print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-8", c), true);
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -1799,11 +1872,26 @@ CharString String::utf8() const {
 
				 			APPEND_CHAR(uint32_t(0xe0 | ((c >> 12) & 0x0f))); // Top 4 bits.
			
 
				 			APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Middle 6 bits.
			
 
				 			APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
			
 
				-		} else { // 21 bits
			
 
				+		} else if (c <= 0x001fffff) { // 21 bits
			
 
				 			APPEND_CHAR(uint32_t(0xf0 | ((c >> 18) & 0x07))); // Top 3 bits.
			
 
				 			APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper middle 6 bits.
			
 
				 			APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits.
			
 
				 			APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
			
 
				+		} else if (c <= 0x03ffffff) { // 26 bits
			
 
				+			APPEND_CHAR(uint32_t(0xf8 | ((c >> 24) & 0x03))); // Top 2 bits.
			
 
				+			APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Upper middle 6 bits.
			
 
				+			APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // middle 6 bits.
			
 
				+			APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits.
			
 
				+			APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
			
 
				+		} else if (c <= 0x7fffffff) { // 31 bits
			
 
				+			APPEND_CHAR(uint32_t(0xfc | ((c >> 30) & 0x01))); // Top 1 bit.
			
 
				+			APPEND_CHAR(uint32_t(0x80 | ((c >> 24) & 0x3f))); // Upper upper middle 6 bits.
			
 
				+			APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Lower upper middle 6 bits.
			
 
				+			APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper lower middle 6 bits.
			
 
				+			APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower lower middle 6 bits.
			
 
				+			APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
			
 
				+		} else {
			
 
				+			APPEND_CHAR(0x20);
			
 
				 		}
			
 
				 	}
			
 
				 #undef APPEND_CHAR
			
@@ -1819,11 +1907,9 @@ String String::utf16(const char16_t *p_utf16, int p_len) {
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-bool String::parse_utf16(const char16_t *p_utf16, int p_len) {
			
 
				-#define UNICERROR(m_err) print_error("Unicode parsing error: " + String(m_err) + ". Is the string valid UTF-16?");
			
 
				-
			
 
				+Error String::parse_utf16(const char16_t *p_utf16, int p_len) {
			
 
				 	if (!p_utf16) {
			
 
				-		return true;
			
 
				+		return ERR_INVALID_DATA;
			
 
				 	}
			
 
				 
			
 
				 	String aux;
			
@@ -1850,80 +1936,90 @@ bool String::parse_utf16(const char16_t *p_utf16, int p_len) {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	bool decode_error = false;
			
 
				 	{
			
 
				 		const char16_t *ptrtmp = p_utf16;
			
 
				 		const char16_t *ptrtmp_limit = &p_utf16[p_len];
			
 
				-		int skip = 0;
			
 
				+		uint32_t c_prev = 0;
			
 
				+		bool skip = false;
			
 
				 		while (ptrtmp != ptrtmp_limit && *ptrtmp) {
			
 
				 			uint32_t c = (byteswap) ? BSWAP16(*ptrtmp) : *ptrtmp;
			
 
				-			if (skip == 0) {
			
 
				-				if ((c & 0xfffffc00) == 0xd800) {
			
 
				-					skip = 1; // lead surrogate
			
 
				-				} else if ((c & 0xfffffc00) == 0xdc00) {
			
 
				-					UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size));
			
 
				-					return true; // invalid UTF16
			
 
				-				} else {
			
 
				-					skip = 0;
			
 
				+
			
 
				+			if ((c & 0xfffffc00) == 0xd800) { // lead surrogate
			
 
				+				if (skip) {
			
 
				+					print_unicode_error(vformat("Unpaired lead surrogate (%x [trail?] %x)", c_prev, c));
			
 
				+					decode_error = true;
			
 
				 				}
			
 
				-				str_size++;
			
 
				-			} else {
			
 
				-				if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
			
 
				-					--skip;
			
 
				+				skip = true;
			
 
				+			} else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
			
 
				+				if (skip) {
			
 
				+					str_size--;
			
 
				 				} else {
			
 
				-					UNICERROR("invalid utf16 surrogate at " + num_int64(cstr_size));
			
 
				-					return true; // invalid UTF16
			
 
				+					print_unicode_error(vformat("Unpaired trail surrogate (%x [lead?] %x)", c_prev, c));
			
 
				+					decode_error = true;
			
 
				 				}
			
 
				+				skip = false;
			
 
				+			} else {
			
 
				+				skip = false;
			
 
				 			}
			
 
				 
			
 
				+			c_prev = c;
			
 
				+			str_size++;
			
 
				 			cstr_size++;
			
 
				 			ptrtmp++;
			
 
				 		}
			
 
				 
			
 
				 		if (skip) {
			
 
				-			UNICERROR("no space left");
			
 
				-			return true; // not enough space
			
 
				+			print_unicode_error(vformat("Unpaired lead surrogate (%x [eol])", c_prev));
			
 
				+			decode_error = true;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 	if (str_size == 0) {
			
 
				 		clear();
			
 
				-		return false;
			
 
				+		return OK; // empty string
			
 
				 	}
			
 
				 
			
 
				 	resize(str_size + 1);
			
 
				 	char32_t *dst = ptrw();
			
 
				 	dst[str_size] = 0;
			
 
				 
			
 
				+	bool skip = false;
			
 
				+	uint32_t c_prev = 0;
			
 
				 	while (cstr_size) {
			
 
				-		int len = 0;
			
 
				 		uint32_t c = (byteswap) ? BSWAP16(*p_utf16) : *p_utf16;
			
 
				 
			
 
				-		if ((c & 0xfffffc00) == 0xd800) {
			
 
				-			len = 2;
			
 
				+		if ((c & 0xfffffc00) == 0xd800) { // lead surrogate
			
 
				+			if (skip) {
			
 
				+				*(dst++) = c_prev; // unpaired, store as is
			
 
				+			}
			
 
				+			skip = true;
			
 
				+		} else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
			
 
				+			if (skip) {
			
 
				+				*(dst++) = (c_prev << 10UL) + c - ((0xd800 << 10UL) + 0xdc00 - 0x10000); // decode pair
			
 
				+			} else {
			
 
				+				*(dst++) = c; // unpaired, store as is
			
 
				+			}
			
 
				+			skip = false;
			
 
				 		} else {
			
 
				-			len = 1;
			
 
				+			*(dst++) = c;
			
 
				+			skip = false;
			
 
				 		}
			
 
				 
			
 
				-		if (len > cstr_size) {
			
 
				-			UNICERROR("no space left");
			
 
				-			return true; //not enough space
			
 
				-		}
			
 
				-
			
 
				-		uint32_t unichar = 0;
			
 
				-		if (len == 1) {
			
 
				-			unichar = c;
			
 
				-		} else {
			
 
				-			uint32_t c2 = (byteswap) ? BSWAP16(p_utf16[1]) : p_utf16[1];
			
 
				-			unichar = (c << 10UL) + c2 - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
			
 
				-		}
			
 
				+		cstr_size--;
			
 
				+		p_utf16++;
			
 
				+		c_prev = c;
			
 
				+	}
			
 
				 
			
 
				-		*(dst++) = unichar;
			
 
				-		cstr_size -= len;
			
 
				-		p_utf16 += len;
			
 
				+	if (skip) {
			
 
				+		*(dst++) = c_prev;
			
 
				 	}
			
 
				 
			
 
				-	return false;
			
 
				-#undef UNICERROR
			
 
				+	if (decode_error) {
			
 
				+		return ERR_PARSE_ERROR;
			
 
				+	} else {
			
 
				+		return OK;
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 Char16String String::utf16() const {
			
@@ -1938,15 +2034,14 @@ Char16String String::utf16() const {
 
				 		uint32_t c = d[i];
			
 
				 		if (c <= 0xffff) { // 16 bits.
			
 
				 			fl += 1;
			
 
				+			if ((c & 0xfffff800) == 0xd800) {
			
 
				+				print_unicode_error(vformat("Unpaired surrogate (%x)", c));
			
 
				+			}
			
 
				 		} else if (c <= 0x10ffff) { // 32 bits.
			
 
				 			fl += 2;
			
 
				 		} else {
			
 
				-			print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
			
 
				-			return Char16String();
			
 
				-		}
			
 
				-		if (c >= 0xd800 && c <= 0xdfff) {
			
 
				-			print_error("Unicode parsing error: Invalid unicode codepoint " + num_int64(c, 16) + ".");
			
 
				-			return Char16String();
			
 
				+			print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-16", c), true);
			
 
				+			fl += 1;
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -1965,9 +2060,11 @@ Char16String String::utf16() const {
 
				 
			
 
				 		if (c <= 0xffff) { // 16 bits.
			
 
				 			APPEND_CHAR(c);
			
 
				-		} else { // 32 bits.
			
 
				+		} else if (c <= 0x10ffff) { // 32 bits.
			
 
				 			APPEND_CHAR(uint32_t((c >> 10) + 0xd7c0)); // lead surrogate.
			
 
				 			APPEND_CHAR(uint32_t((c & 0x3ff) | 0xdc00)); // trail surrogate.
			
 
				+		} else {
			
 
				+			APPEND_CHAR(0x20);
			
 
				 		}
			
 
				 	}
			
 
				 #undef APPEND_CHAR
			
--- a/core/string/ustring.h
+++ b/core/string/ustring.h
@@ -271,6 +271,9 @@ public:
 
				 
			
 
				 	bool is_valid_string() const;
			
 
				 
			
 
				+	/* debug, error messages */
			
 
				+	void print_unicode_error(const String &p_message, bool p_critical = false) const;
			
 
				+
			
 
				 	/* complex helpers */
			
 
				 	String substr(int p_from, int p_chars = -1) const;
			
 
				 	int find(const String &p_str, int p_from = 0) const; ///< return <0 if failed
			
@@ -373,11 +376,11 @@ public:
 
				 
			
 
				 	CharString ascii(bool p_allow_extended = false) const;
			
 
				 	CharString utf8() const;
			
 
				-	bool parse_utf8(const char *p_utf8, int p_len = -1); //return true on error
			
 
				+	Error parse_utf8(const char *p_utf8, int p_len = -1);
			
 
				 	static String utf8(const char *p_utf8, int p_len = -1);
			
 
				 
			
 
				 	Char16String utf16() const;
			
 
				-	bool parse_utf16(const char16_t *p_utf16, int p_len = -1); //return true on error
			
 
				+	Error parse_utf16(const char16_t *p_utf16, int p_len = -1);
			
 
				 	static String utf16(const char16_t *p_utf16, int p_len = -1);
			
 
				 
			
 
				 	static uint32_t hash(const char32_t *p_cstr, int p_len); /* hash the string */
			
--- a/drivers/unix/dir_access_unix.cpp
+++ b/drivers/unix/dir_access_unix.cpp
@@ -343,7 +343,7 @@ Error DirAccessUnix::change_dir(String p_dir) {
 
				 	String prev_dir;
			
 
				 	char real_current_dir_name[2048];
			
 
				 	ERR_FAIL_COND_V(getcwd(real_current_dir_name, 2048) == nullptr, ERR_BUG);
			
 
				-	if (prev_dir.parse_utf8(real_current_dir_name)) {
			
 
				+	if (prev_dir.parse_utf8(real_current_dir_name) != OK) {
			
 
				 		prev_dir = real_current_dir_name; //no utf8, maybe latin?
			
 
				 	}
			
 
				 
			
@@ -505,7 +505,7 @@ DirAccessUnix::DirAccessUnix() {
 
				 	// set current directory to an absolute path of the current directory
			
 
				 	char real_current_dir_name[2048];
			
 
				 	ERR_FAIL_COND(getcwd(real_current_dir_name, 2048) == nullptr);
			
 
				-	if (current_dir.parse_utf8(real_current_dir_name)) {
			
 
				+	if (current_dir.parse_utf8(real_current_dir_name) != OK) {
			
 
				 		current_dir = real_current_dir_name;
			
 
				 	}
			
 
				 
			
--- a/drivers/unix/os_unix.cpp
+++ b/drivers/unix/os_unix.cpp
@@ -313,7 +313,12 @@ Error OS_Unix::execute(const String &p_path, const List<String> &p_arguments, St
 
				 			if (p_pipe_mutex) {
			
 
				 				p_pipe_mutex->lock();
			
 
				 			}
			
 
				-			(*r_pipe) += String::utf8(buf);
			
 
				+			String pipe_out;
			
 
				+			if (pipe_out.parse_utf8(buf) == OK) {
			
 
				+				(*r_pipe) += pipe_out;
			
 
				+			} else {
			
 
				+				(*r_pipe) += String(buf); // If not valid UTF-8 try decode as Latin-1
			
 
				+			}
			
 
				 			if (p_pipe_mutex) {
			
 
				 				p_pipe_mutex->unlock();
			
 
				 			}
			
--- a/modules/gdscript/gdscript.cpp
+++ b/modules/gdscript/gdscript.cpp
@@ -1050,7 +1050,7 @@ Error GDScript::load_source_code(const String &p_path) {
 
				 	w[len] = 0;
			
 
				 
			
 
				 	String s;
			
 
				-	if (s.parse_utf8((const char *)w)) {
			
 
				+	if (s.parse_utf8((const char *)w) != OK) {
			
 
				 		ERR_FAIL_V_MSG(ERR_INVALID_DATA, "Script '" + p_path + "' contains invalid unicode (UTF-8), so it was not loaded. Please ensure that scripts are saved in valid UTF-8 unicode.");
			
 
				 	}
			
 
				 
			
--- a/modules/gdscript/gdscript_cache.cpp
+++ b/modules/gdscript/gdscript_cache.cpp
@@ -157,7 +157,7 @@ String GDScriptCache::get_source_code(const String &p_path) {
 
				 	source_file.write[len] = 0;
			
 
				 
			
 
				 	String source;
			
 
				-	if (source.parse_utf8((const char *)source_file.ptr())) {
			
 
				+	if (source.parse_utf8((const char *)source_file.ptr()) != OK) {
			
 
				 		ERR_FAIL_V_MSG("", "Script '" + p_path + "' contains invalid unicode (UTF-8), so it was not loaded. Please ensure that scripts are saved in valid UTF-8 unicode.");
			
 
				 	}
			
 
				 	return source;
			
--- a/modules/mono/build_scripts/make_android_mono_config.py
+++ b/modules/mono/build_scripts/make_android_mono_config.py
@@ -43,7 +43,7 @@ String get_godot_android_mono_config() {
 
				 	Compression::decompress(w, config_uncompressed_size, config_compressed_data,
			
 
				 			config_compressed_size, Compression::MODE_DEFLATE);
			
 
				 	String s;
			
 
				-	if (s.parse_utf8((const char *)w, data.size())) {
			
 
				+	if (s.parse_utf8((const char *)w, data.size()) != OK) {
			
 
				 		ERR_FAIL_V(String());
			
 
				 	}
			
 
				 	return s;
			
--- a/modules/mono/utils/path_utils.cpp
+++ b/modules/mono/utils/path_utils.cpp
@@ -62,7 +62,8 @@ String cwd() {
 
				 	}
			
 
				 
			
 
				 	String result;
			
 
				-	if (result.parse_utf16(buffer.ptr())) {
			
 
				+	result.parse_utf16(buffer.ptr());
			
 
				+	if (result.is_empty()) {
			
 
				 		return ".";
			
 
				 	}
			
 
				 	return result.simplify_path();
			
@@ -73,7 +74,7 @@ String cwd() {
 
				 	}
			
 
				 
			
 
				 	String result;
			
 
				-	if (result.parse_utf8(buffer)) {
			
 
				+	if (result.parse_utf8(buffer) != OK) {
			
 
				 		return ".";
			
 
				 	}
			
 
				 
			
@@ -114,7 +115,8 @@ String realpath(const String &p_path) {
 
				 	::CloseHandle(hFile);
			
 
				 
			
 
				 	String result;
			
 
				-	if (result.parse_utf16(buffer.ptr())) {
			
 
				+	result.parse_utf16(buffer.ptr());
			
 
				+	if (result.is_empty()) {
			
 
				 		return p_path;
			
 
				 	}
			
 
				 
			
@@ -127,10 +129,10 @@ String realpath(const String &p_path) {
 
				 	}
			
 
				 
			
 
				 	String result;
			
 
				-	bool parse_ok = result.parse_utf8(resolved_path);
			
 
				+	Error parse_ok = result.parse_utf8(resolved_path);
			
 
				 	::free(resolved_path);
			
 
				 
			
 
				-	if (parse_ok) {
			
 
				+	if (parse_ok != OK) {
			
 
				 		return p_path;
			
 
				 	}
			
 
				 
			
--- a/modules/mono/utils/string_utils.cpp
+++ b/modules/mono/utils/string_utils.cpp
@@ -178,7 +178,7 @@ Error read_all_file_utf8(const String &p_path, String &r_content) {
 
				 	w[len] = 0;
			
 
				 
			
 
				 	String source;
			
 
				-	if (source.parse_utf8((const char *)w)) {
			
 
				+	if (source.parse_utf8((const char *)w) != OK) {
			
 
				 		ERR_FAIL_V(ERR_INVALID_DATA);
			
 
				 	}
			
 
				 
			
--- a/scene/resources/text_file.cpp
+++ b/scene/resources/text_file.cpp
@@ -64,7 +64,7 @@ Error TextFile::load_text(const String &p_path) {
 
				 	w[len] = 0;
			
 
				 
			
 
				 	String s;
			
 
				-	ERR_FAIL_COND_V_MSG(s.parse_utf8((const char *)w), ERR_INVALID_DATA, "Script '" + p_path + "' contains invalid unicode (UTF-8), so it was not loaded. Please ensure that scripts are saved in valid UTF-8 unicode.");
			
 
				+	ERR_FAIL_COND_V_MSG(s.parse_utf8((const char *)w) != OK, ERR_INVALID_DATA, "Script '" + p_path + "' contains invalid unicode (UTF-8), so it was not loaded. Please ensure that scripts are saved in valid UTF-8 unicode.");
			
 
				 	text = s;
			
 
				 	path = p_path;
			
 
				 	return OK;
			
--- a/tests/core/string/test_string.h
+++ b/tests/core/string/test_string.h
@@ -89,12 +89,12 @@ TEST_CASE("[String] UTF8") {
 
				 	static const char32_t u32str[] = { 0x0045, 0x0020, 0x304A, 0x360F, 0x3088, 0x3046, 0x1F3A4, 0 };
			
 
				 	static const uint8_t u8str[] = { 0x45, 0x20, 0xE3, 0x81, 0x8A, 0xE3, 0x98, 0x8F, 0xE3, 0x82, 0x88, 0xE3, 0x81, 0x86, 0xF0, 0x9F, 0x8E, 0xA4, 0 };
			
 
				 	String s = u32str;
			
 
				-	bool err = s.parse_utf8(s.utf8().get_data());
			
 
				-	CHECK(!err);
			
 
				+	Error err = s.parse_utf8(s.utf8().get_data());
			
 
				+	CHECK(err == OK);
			
 
				 	CHECK(s == u32str);
			
 
				 
			
 
				 	err = s.parse_utf8((const char *)u8str);
			
 
				-	CHECK(!err);
			
 
				+	CHECK(err == OK);
			
 
				 	CHECK(s == u32str);
			
 
				 
			
 
				 	CharString cs = (const char *)u8str;
			
@@ -106,12 +106,12 @@ TEST_CASE("[String] UTF16") {
 
				 	static const char32_t u32str[] = { 0x0045, 0x0020, 0x304A, 0x360F, 0x3088, 0x3046, 0x1F3A4, 0 };
			
 
				 	static const char16_t u16str[] = { 0x0045, 0x0020, 0x304A, 0x360F, 0x3088, 0x3046, 0xD83C, 0xDFA4, 0 };
			
 
				 	String s = u32str;
			
 
				-	bool err = s.parse_utf16(s.utf16().get_data());
			
 
				-	CHECK(!err);
			
 
				+	Error err = s.parse_utf16(s.utf16().get_data());
			
 
				+	CHECK(err == OK);
			
 
				 	CHECK(s == u32str);
			
 
				 
			
 
				 	err = s.parse_utf16(u16str);
			
 
				-	CHECK(!err);
			
 
				+	CHECK(err == OK);
			
 
				 	CHECK(s == u32str);
			
 
				 
			
 
				 	Char16String cs = u16str;
			
@@ -123,8 +123,8 @@ TEST_CASE("[String] UTF8 with BOM") {
 
				 	static const char32_t u32str[] = { 0x0045, 0x0020, 0x304A, 0x360F, 0x3088, 0x3046, 0x1F3A4, 0 };
			
 
				 	static const uint8_t u8str[] = { 0xEF, 0xBB, 0xBF, 0x45, 0x20, 0xE3, 0x81, 0x8A, 0xE3, 0x98, 0x8F, 0xE3, 0x82, 0x88, 0xE3, 0x81, 0x86, 0xF0, 0x9F, 0x8E, 0xA4, 0 };
			
 
				 	String s;
			
 
				-	bool err = s.parse_utf8((const char *)u8str);
			
 
				-	CHECK(!err);
			
 
				+	Error err = s.parse_utf8((const char *)u8str);
			
 
				+	CHECK(err == OK);
			
 
				 	CHECK(s == u32str);
			
 
				 
			
 
				 	CharString cs = (const char *)u8str;
			
@@ -137,12 +137,12 @@ TEST_CASE("[String] UTF16 with BOM") {
 
				 	static const char16_t u16str[] = { 0xFEFF, 0x0020, 0x0045, 0x304A, 0x360F, 0x3088, 0x3046, 0xD83C, 0xDFA4, 0 };
			
 
				 	static const char16_t u16str_swap[] = { 0xFFFE, 0x2000, 0x4500, 0x4A30, 0x0F36, 0x8830, 0x4630, 0x3CD8, 0xA4DF, 0 };
			
 
				 	String s;
			
 
				-	bool err = s.parse_utf16(u16str);
			
 
				-	CHECK(!err);
			
 
				+	Error err = s.parse_utf16(u16str);
			
 
				+	CHECK(err == OK);
			
 
				 	CHECK(s == u32str);
			
 
				 
			
 
				 	err = s.parse_utf16(u16str_swap);
			
 
				-	CHECK(!err);
			
 
				+	CHECK(err == OK);
			
 
				 	CHECK(s == u32str);
			
 
				 
			
 
				 	Char16String cs = u16str;
			
@@ -152,29 +152,48 @@ TEST_CASE("[String] UTF16 with BOM") {
 
				 	CHECK(String::utf16(cs) == s);
			
 
				 }
			
 
				 
			
 
				-TEST_CASE("[String] Invalid UTF8") {
			
 
				+TEST_CASE("[String] Invalid UTF8 (non-standard)") {
			
 
				 	ERR_PRINT_OFF
			
 
				-	static const uint8_t u8str[] = { 0x45, 0xE3, 0x81, 0x8A, 0x8F, 0xE3, 0xE3, 0x98, 0x8F, 0xE3, 0x82, 0x88, 0xE3, 0x81, 0x86, 0xF0, 0x9F, 0x8E, 0xA4, 0 };
			
 
				+	static const uint8_t u8str[] = { 0x45, 0xE3, 0x81, 0x8A, 0xE3, 0x82, 0x88, 0xE3, 0x81, 0x86, 0xF0, 0x9F, 0x8E, 0xA4, 0xF0, 0x82, 0x82, 0xAC, 0xED, 0xA0, 0x81, 0 };
			
 
				+	//                               +     +2                +2                +2                +3                      overlong +3             unpaired +2
			
 
				+	static const char32_t u32str[] = { 0x45, 0x304A, 0x3088, 0x3046, 0x1F3A4, 0x20AC, 0xD801, 0 };
			
 
				 	String s;
			
 
				-	bool err = s.parse_utf8((const char *)u8str);
			
 
				-	CHECK(err);
			
 
				-	CHECK(s.is_empty());
			
 
				+	Error err = s.parse_utf8((const char *)u8str);
			
 
				+	CHECK(err == ERR_PARSE_ERROR);
			
 
				+	CHECK(s == u32str);
			
 
				 
			
 
				 	CharString cs = (const char *)u8str;
			
 
				-	CHECK(String::utf8(cs).is_empty());
			
 
				+	CHECK(String::utf8(cs) == s);
			
 
				 	ERR_PRINT_ON
			
 
				 }
			
 
				 
			
 
				-TEST_CASE("[String] Invalid UTF16") {
			
 
				+TEST_CASE("[String] Invalid UTF8 (unrecoverable)") {
			
 
				+	ERR_PRINT_OFF
			
 
				+	static const uint8_t u8str[] = { 0x45, 0xE3, 0x81, 0x8A, 0x8F, 0xE3, 0xE3, 0x98, 0x8F, 0xE3, 0x82, 0x88, 0xE3, 0x81, 0x86, 0xC0, 0x80, 0xF0, 0x9F, 0x8E, 0xA4, 0xF0, 0x82, 0x82, 0xAC, 0xED, 0xA0, 0x81, 0 };
			
 
				+	//                               +     +2                inv   +2    inv   inv   inv   +2                +2                ovl NUL +1  +3                      overlong +3             unpaired +2
			
 
				+	static const char32_t u32str[] = { 0x45, 0x304A, 0x20, 0x20, 0x20, 0x20, 0x3088, 0x3046, 0x20, 0x1F3A4, 0x20AC, 0xD801, 0 };
			
 
				+	String s;
			
 
				+	Error err = s.parse_utf8((const char *)u8str);
			
 
				+	CHECK(err == ERR_INVALID_DATA);
			
 
				+	CHECK(s == u32str);
			
 
				+
			
 
				+	CharString cs = (const char *)u8str;
			
 
				+	CHECK(String::utf8(cs) == s);
			
 
				+	ERR_PRINT_ON
			
 
				+}
			
 
				+
			
 
				+TEST_CASE("[String] Invalid UTF16 (non-standard)") {
			
 
				 	ERR_PRINT_OFF
			
 
				 	static const char16_t u16str[] = { 0x0045, 0x304A, 0x3088, 0x3046, 0xDFA4, 0 };
			
 
				+	//                                 +       +       +       +       unpaired
			
 
				+	static const char32_t u32str[] = { 0x0045, 0x304A, 0x3088, 0x3046, 0xDFA4, 0 };
			
 
				 	String s;
			
 
				-	bool err = s.parse_utf16(u16str);
			
 
				-	CHECK(err);
			
 
				-	CHECK(s.is_empty());
			
 
				+	Error err = s.parse_utf16(u16str);
			
 
				+	CHECK(err == ERR_PARSE_ERROR);
			
 
				+	CHECK(s == u32str);
			
 
				 
			
 
				 	Char16String cs = u16str;
			
 
				-	CHECK(String::utf16(cs).is_empty());
			
 
				+	CHECK(String::utf16(cs) == s);
			
 
				 	ERR_PRINT_ON
			
 
				 }
			
 
				 
			
@@ -262,8 +281,8 @@ TEST_CASE("[String] Test chr") {
 
				 	CHECK(String::chr('H') == "H");
			
 
				 	CHECK(String::chr(0x3012)[0] == 0x3012);
			
 
				 	ERR_PRINT_OFF
			
 
				-	CHECK(String::chr(0xd812)[0] == 0xfffd); // Unpaired UTF-16 surrogate
			
 
				-	CHECK(String::chr(0x20d812)[0] == 0xfffd); // Outside UTF-32 range
			
 
				+	CHECK(String::chr(0xd812)[0] == 0xd812); // Unpaired UTF-16 surrogate
			
 
				+	CHECK(String::chr(0x20d812)[0] == 0x20d812); // Outside UTF-32 range
			
 
				 	ERR_PRINT_ON
			
 
				 }
			
 
				 
			
@@ -1125,9 +1144,9 @@ TEST_CASE("[String] lstrip and rstrip") {
 
				 #undef STRIP_TEST
			
 
				 }
			
 
				 
			
 
				-TEST_CASE("[String] ensuring empty string into parse_utf8 passes empty string") {
			
 
				+TEST_CASE("[String] Ensuring empty string into parse_utf8 passes empty string") {
			
 
				 	String empty;
			
 
				-	CHECK(empty.parse_utf8(nullptr, -1));
			
 
				+	CHECK(empty.parse_utf8(nullptr, -1) == ERR_INVALID_DATA);
			
 
				 }
			
 
				 
			
 
				 TEST_CASE("[String] Cyrillic to_lower()") {
			
@@ -1440,8 +1459,8 @@ TEST_CASE("[String] validate_node_name") {
 
				 	String name_with_spaces = "Name with spaces";
			
 
				 	CHECK(name_with_spaces.validate_node_name() == "Name with spaces");
			
 
				 
			
 
				-	String name_with_kana = "Name with kana ゴドツ";
			
 
				-	CHECK(name_with_kana.validate_node_name() == "Name with kana ゴドツ");
			
 
				+	String name_with_kana = U"Name with kana ゴドツ";
			
 
				+	CHECK(name_with_kana.validate_node_name() == U"Name with kana ゴドツ");
			
 
				 
			
 
				 	String name_with_invalid_chars = "Name with invalid characters :.@removed!";
			
 
				 	CHECK(name_with_invalid_chars.validate_node_name() == "Name with invalid characters removed!");