Ver Fonte

Expose `copy_from_unchecked` as `append_utf32_unchecked` and `String::utf32_unchecked` in `String` for high performance string copies. Expose `append_wstring` and `String::wstring` for platform strings.

Lukas Tenbrink há 6 meses atrás
pai
commit
d1fd42bf3c
2 ficheiros alterados com 44 adições e 31 exclusões
  1. 18 16
      core/string/ustring.cpp
  2. 26 15
      core/string/ustring.h

+ 18 - 16
core/string/ustring.cpp

@@ -166,11 +166,13 @@ void String::append_latin1(const Span<char> &p_cstr) {
 	*dst = 0;
 }
 
-void String::append_utf32(const Span<char32_t> &p_cstr) {
+Error String::append_utf32(const Span<char32_t> &p_cstr) {
 	if (p_cstr.is_empty()) {
-		return;
+		return OK;
 	}
 
+	Error error = OK;
+
 	const int prev_length = length();
 	resize_uninitialized(prev_length + p_cstr.size() + 1);
 	const char32_t *src = p_cstr.ptr();
@@ -184,29 +186,29 @@ void String::append_utf32(const Span<char32_t> &p_cstr) {
 			// NUL in string is allowed by the unicode standard, but unsupported in our implementation right now.
 			print_unicode_error("Unexpected NUL character", true);
 			*dst = _replacement_char;
+			error = ERR_PARSE_ERROR;
 		} else if (unlikely((chr & 0xfffff800) == 0xd800)) {
 			print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)chr), true);
 			*dst = _replacement_char;
+			error = ERR_PARSE_ERROR;
 		} else if (unlikely(chr > 0x10ffff)) {
 			print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)chr), true);
 			*dst = _replacement_char;
+			error = ERR_PARSE_ERROR;
 		} else {
 			*dst = chr;
 		}
 	}
 	*dst = 0;
+	return error;
 }
 
-// assumes the following have already been validated:
-// p_char != nullptr
-// p_length > 0
-// p_length <= p_char strlen
-// p_char is a valid UTF32 string
-void String::copy_from_unchecked(const char32_t *p_char, const int p_length) {
-	resize_uninitialized(p_length + 1); // + 1 for \0
-	char32_t *dst = ptrw();
-	memcpy(dst, p_char, p_length * sizeof(char32_t));
-	*(dst + p_length) = _null;
+void String::append_utf32_unchecked(const Span<char32_t> &p_span) {
+	const int prev_length = length();
+	resize_uninitialized(prev_length + p_span.size() + 1); // + 1 for \0
+	char32_t *dst = ptrw() + prev_length;
+	memcpy(dst, p_span.ptr(), p_span.size() * sizeof(char32_t));
+	*(dst + p_span.size()) = _null;
 }
 
 String String::operator+(const String &p_str) const {
@@ -266,7 +268,7 @@ String &String::operator+=(const String &p_str) {
 		*this = p_str;
 		return *this;
 	}
-	append_utf32(p_str);
+	append_utf32_unchecked(p_str);
 	return *this;
 }
 
@@ -3048,7 +3050,7 @@ String String::substr(int p_from, int p_chars) const {
 	}
 
 	String s;
-	s.copy_from_unchecked(&get_data()[p_from], p_chars);
+	s.append_utf32_unchecked(Span(ptr() + p_from, p_chars));
 	return s;
 }
 
@@ -4225,7 +4227,7 @@ String String::left(int p_len) const {
 	}
 
 	String s;
-	s.copy_from_unchecked(&get_data()[0], p_len);
+	s.append_utf32_unchecked(Span(ptr(), p_len));
 	return s;
 }
 
@@ -4243,7 +4245,7 @@ String String::right(int p_len) const {
 	}
 
 	String s;
-	s.copy_from_unchecked(&get_data()[length() - p_len], p_len);
+	s.append_utf32_unchecked(Span(ptr() + length() - p_len, p_len));
 	return s;
 }
 

+ 26 - 15
core/string/ustring.h

@@ -268,9 +268,6 @@ class [[nodiscard]] String {
 	static constexpr char32_t _null = 0;
 	static constexpr char32_t _replacement_char = 0xfffd;
 
-	// Known-length copy.
-	void copy_from_unchecked(const char32_t *p_char, int p_length);
-
 	// NULL-terminated c string copy - automatically parse the string to find the length.
 	void append_latin1(const char *p_cstr) {
 		append_latin1(Span(p_cstr, p_cstr ? strlen(p_cstr) : 0));
@@ -278,17 +275,6 @@ class [[nodiscard]] String {
 	void append_utf32(const char32_t *p_cstr) {
 		append_utf32(Span(p_cstr, p_cstr ? strlen(p_cstr) : 0));
 	}
-
-	// wchar_t copy_from depends on the platform.
-	void append_wstring(const Span<wchar_t> &p_cstr) {
-#ifdef WINDOWS_ENABLED
-		// wchar_t is 16-bit, parse as UTF-16
-		append_utf16((const char16_t *)p_cstr.ptr(), p_cstr.size());
-#else
-		// wchar_t is 32-bit, copy directly
-		append_utf32((Span<char32_t> &)p_cstr);
-#endif
-	}
 	void append_wstring(const wchar_t *p_cstr) {
 #ifdef WINDOWS_ENABLED
 		// wchar_t is 16-bit, parse as UTF-16
@@ -562,13 +548,38 @@ public:
 	}
 	static String utf16(const Span<char16_t> &p_range) { return utf16(p_range.ptr(), p_range.size()); }
 
-	void append_utf32(const Span<char32_t> &p_cstr);
+	// wchar_t copy_from depends on the platform.
+	Error append_wstring(const Span<wchar_t> &p_cstr) {
+#ifdef WINDOWS_ENABLED
+		// wchar_t is 16-bit, parse as UTF-16
+		return append_utf16((const char16_t *)p_cstr.ptr(), p_cstr.size());
+#else
+		// wchar_t is 32-bit, copy directly
+		return append_utf32((Span<char32_t> &)p_cstr);
+#endif
+	}
+	static String wstring(const Span<wchar_t> &p_string) {
+		String string;
+		string.append_wstring(p_string);
+		return string;
+	}
+
+	Error append_utf32(const Span<char32_t> &p_cstr);
 	static String utf32(const Span<char32_t> &p_span) {
 		String string;
 		string.append_utf32(p_span);
 		return string;
 	}
 
+	// Like append_utf32, but does not check the string for string integrity (and is thus faster).
+	// Prefer this function for conversion from trusted utf32 strings.
+	void append_utf32_unchecked(const Span<char32_t> &p_span);
+	static String utf32_unchecked(const Span<char32_t> &p_string) {
+		String string;
+		string.append_utf32_unchecked(p_string);
+		return string;
+	}
+
 	static uint32_t hash(const char32_t *p_cstr, int p_len); /* hash the string */
 	static uint32_t hash(const char32_t *p_cstr); /* hash the string */
 	static uint32_t hash(const wchar_t *p_cstr, int p_len); /* hash the string */