Parcourir la source

Optimize String construction from statically known strings allowing `strlen` to be evaluated at compile time, where possible.

Lukas Tenbrink il y a 8 mois
Parent
commit
a3f48f7047
2 fichiers modifiés avec 177 ajouts et 179 suppressions
  1. 11 147
      core/string/ustring.cpp
  2. 166 32
      core/string/ustring.h

+ 11 - 147
core/string/ustring.cpp

@@ -65,15 +65,6 @@ const char16_t Char16String::_null = 0;
 const char32_t String::_null = 0;
 const char32_t String::_replacement_char = 0xfffd;
 
-// strlen equivalent function for char32_t * arguments.
-_FORCE_INLINE_ size_t strlen(const char32_t *p_str) {
-	const char32_t *ptr = p_str;
-	while (*ptr != 0) {
-		++ptr;
-	}
-	return ptr - p_str;
-}
-
 bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end) {
 	const String &s = p_s;
 	int beg = CLAMP(p_col, 0, s.length());
@@ -313,80 +304,32 @@ Error String::parse_url(String &r_scheme, String &r_host, int &r_port, String &r
 	return OK;
 }
 
-void String::copy_from(const char *p_cstr) {
-	// copy Latin-1 encoded c-string directly
-	if (!p_cstr) {
+void String::copy_from(const StrRange<char> &p_cstr) {
+	if (p_cstr.len == 0) {
 		resize(0);
 		return;
 	}
 
-	const size_t len = strlen(p_cstr);
+	resize(p_cstr.len + 1); // include 0
 
-	if (len == 0) {
-		resize(0);
-		return;
-	}
-
-	resize(len + 1); // include 0
-
-	const char *end = p_cstr + len;
+	const char *src = p_cstr.c_str;
+	const char *end = src + p_cstr.len;
 	char32_t *dst = ptrw();
 
-	for (; p_cstr < end; ++p_cstr, ++dst) {
+	for (; src < end; ++src, ++dst) {
 		// If char is int8_t, a set sign bit will be reinterpreted as 256 - val implicitly.
-		*dst = static_cast<uint8_t>(*p_cstr);
+		*dst = static_cast<uint8_t>(*src);
 	}
 	*dst = 0;
 }
 
-void String::copy_from(const char *p_cstr, const int p_clip_to) {
-	// copy Latin-1 encoded c-string directly
-	if (!p_cstr) {
-		resize(0);
-		return;
-	}
-
-	int len = 0;
-	const char *ptr = p_cstr;
-	while ((p_clip_to < 0 || len < p_clip_to) && *(ptr++) != 0) {
-		len++;
-	}
-
-	if (len == 0) {
+void String::copy_from(const StrRange<char32_t> &p_cstr) {
+	if (p_cstr.len == 0) {
 		resize(0);
 		return;
 	}
 
-	resize(len + 1); // include 0
-
-	const char *end = p_cstr + len;
-	char32_t *dst = ptrw();
-
-	for (; p_cstr < end; ++p_cstr, ++dst) {
-		// If char is int8_t, a set sign bit will be reinterpreted as 256 - val implicitly.
-		*dst = static_cast<uint8_t>(*p_cstr);
-	}
-	*dst = 0;
-}
-
-void String::copy_from(const wchar_t *p_cstr) {
-#ifdef WINDOWS_ENABLED
-	// wchar_t is 16-bit, parse as UTF-16
-	parse_utf16((const char16_t *)p_cstr);
-#else
-	// wchar_t is 32-bit, copy directly
-	copy_from((const char32_t *)p_cstr);
-#endif
-}
-
-void String::copy_from(const wchar_t *p_cstr, const int p_clip_to) {
-#ifdef WINDOWS_ENABLED
-	// wchar_t is 16-bit, parse as UTF-16
-	parse_utf16((const char16_t *)p_cstr, p_clip_to);
-#else
-	// wchar_t is 32-bit, copy directly
-	copy_from((const char32_t *)p_cstr, p_clip_to);
-#endif
+	copy_from_unchecked(p_cstr.c_str, p_cstr.len);
 }
 
 void String::copy_from(const char32_t &p_char) {
@@ -412,42 +355,6 @@ void String::copy_from(const char32_t &p_char) {
 	dst[1] = 0;
 }
 
-void String::copy_from(const char32_t *p_cstr) {
-	if (!p_cstr) {
-		resize(0);
-		return;
-	}
-
-	const int len = strlen(p_cstr);
-
-	if (len == 0) {
-		resize(0);
-		return;
-	}
-
-	copy_from_unchecked(p_cstr, len);
-}
-
-void String::copy_from(const char32_t *p_cstr, const int p_clip_to) {
-	if (!p_cstr) {
-		resize(0);
-		return;
-	}
-
-	int len = 0;
-	const char32_t *ptr = p_cstr;
-	while ((p_clip_to < 0 || len < p_clip_to) && *(ptr++) != 0) {
-		len++;
-	}
-
-	if (len == 0) {
-		resize(0);
-		return;
-	}
-
-	copy_from_unchecked(p_cstr, len);
-}
-
 // assumes the following have already been validated:
 // p_char != nullptr
 // p_length > 0
@@ -475,18 +382,6 @@ void String::copy_from_unchecked(const char32_t *p_char, const int p_length) {
 	*dst = 0;
 }
 
-void String::operator=(const char *p_str) {
-	copy_from(p_str);
-}
-
-void String::operator=(const char32_t *p_str) {
-	copy_from(p_str);
-}
-
-void String::operator=(const wchar_t *p_str) {
-	copy_from(p_str);
-}
-
 String String::operator+(const String &p_str) const {
 	String res = *this;
 	res += p_str;
@@ -697,7 +592,7 @@ bool String::operator==(const String &p_str) const {
 	return true;
 }
 
-bool String::operator==(const StrRange &p_str_range) const {
+bool String::operator==(const StrRange<char32_t> &p_str_range) const {
 	int len = p_str_range.len;
 
 	if (length() != len) {
@@ -2512,37 +2407,6 @@ Char16String String::utf16() const {
 	return utf16s;
 }
 
-String::String(const char *p_str) {
-	copy_from(p_str);
-}
-
-String::String(const wchar_t *p_str) {
-	copy_from(p_str);
-}
-
-String::String(const char32_t *p_str) {
-	copy_from(p_str);
-}
-
-String::String(const char *p_str, int p_clip_to_len) {
-	copy_from(p_str, p_clip_to_len);
-}
-
-String::String(const wchar_t *p_str, int p_clip_to_len) {
-	copy_from(p_str, p_clip_to_len);
-}
-
-String::String(const char32_t *p_str, int p_clip_to_len) {
-	copy_from(p_str, p_clip_to_len);
-}
-
-String::String(const StrRange &p_range) {
-	if (!p_range.c_str) {
-		return;
-	}
-	copy_from(p_range.c_str, p_range.len);
-}
-
 int64_t String::hex_to_int() const {
 	int len = length();
 	if (len == 0) {

+ 166 - 32
core/string/ustring.h

@@ -39,6 +39,89 @@
 #include "core/typedefs.h"
 #include "core/variant/array.h"
 
+/*************************************************************************/
+/*  Utility Functions                                                    */
+/*************************************************************************/
+
+// Not defined by std.
+// strlen equivalent function for char16_t * arguments.
+constexpr size_t strlen(const char16_t *p_str) {
+	const char16_t *ptr = p_str;
+	while (*ptr != 0) {
+		++ptr;
+	}
+	return ptr - p_str;
+}
+
+// strlen equivalent function for char32_t * arguments.
+constexpr size_t strlen(const char32_t *p_str) {
+	const char32_t *ptr = p_str;
+	while (*ptr != 0) {
+		++ptr;
+	}
+	return ptr - p_str;
+}
+
+// strlen equivalent function for wchar_t * arguments; depends on the platform.
+constexpr size_t strlen(const wchar_t *str) {
+	// Use static_cast twice because reinterpret_cast is not allowed in constexpr
+#ifdef WINDOWS_ENABLED
+	// wchar_t is 16-bit
+	return strlen(static_cast<const char16_t *>(static_cast<const void *>(str)));
+#else
+	// wchar_t is 32-bit
+	return strlen(static_cast<const char32_t *>(static_cast<const void *>(str)));
+#endif
+}
+
+constexpr size_t _strlen_clipped(const char *p_str, int p_clip_to_len) {
+	if (p_clip_to_len < 0) {
+		return strlen(p_str);
+	}
+
+	int len = 0;
+	while (len < p_clip_to_len && *(p_str++) != 0) {
+		len++;
+	}
+	return len;
+}
+
+constexpr size_t _strlen_clipped(const char32_t *p_str, int p_clip_to_len) {
+	if (p_clip_to_len < 0) {
+		return strlen(p_str);
+	}
+
+	int len = 0;
+	while (len < p_clip_to_len && *(p_str++) != 0) {
+		len++;
+	}
+	return len;
+}
+
+/*************************************************************************/
+/*  StrRange                                                             */
+/*************************************************************************/
+
+template <typename Element>
+struct StrRange {
+	const Element *c_str;
+	size_t len;
+
+	explicit StrRange(const std::nullptr_t p_cstring) :
+			c_str(nullptr), len(0) {}
+
+	explicit StrRange(const Element *p_cstring, const size_t p_len) :
+			c_str(p_cstring), len(p_len) {}
+
+	template <size_t len>
+	explicit StrRange(const Element (&p_cstring)[len]) :
+			c_str(p_cstring), len(strlen(p_cstring)) {}
+
+	static StrRange from_c_str(const Element *p_cstring) {
+		return StrRange(p_cstring, p_cstring ? strlen(p_cstring) : 0);
+	}
+};
+
 /*************************************************************************/
 /*  CharProxy                                                            */
 /*************************************************************************/
@@ -119,6 +202,7 @@ public:
 	int length() const { return size() ? size() - 1 : 0; }
 	const char16_t *get_data() const;
 	operator const char16_t *() const { return get_data(); }
+	explicit operator StrRange<char16_t>() const { return StrRange(get_data(), length()); }
 
 protected:
 	void copy_from(const char16_t *p_cstr);
@@ -161,6 +245,7 @@ public:
 	int length() const { return size() ? size() - 1 : 0; }
 	const char *get_data() const;
 	operator const char *() const { return get_data(); }
+	explicit operator StrRange<char>() const { return StrRange(get_data(), length()); }
 
 protected:
 	void copy_from(const char *p_cstr);
@@ -170,31 +255,59 @@ protected:
 /*  String                                                               */
 /*************************************************************************/
 
-struct StrRange {
-	const char32_t *c_str;
-	int len;
-
-	StrRange(const char32_t *p_c_str = nullptr, int p_len = 0) {
-		c_str = p_c_str;
-		len = p_len;
-	}
-};
-
 class String {
 	CowData<char32_t> _cowdata;
 	static const char32_t _null;
 	static const char32_t _replacement_char;
 
-	void copy_from(const char *p_cstr);
-	void copy_from(const char *p_cstr, const int p_clip_to);
-	void copy_from(const wchar_t *p_cstr);
-	void copy_from(const wchar_t *p_cstr, const int p_clip_to);
-	void copy_from(const char32_t *p_cstr);
-	void copy_from(const char32_t *p_cstr, const int p_clip_to);
-
+	// Known-length copy.
+	void copy_from(const StrRange<char> &p_cstr);
+	void copy_from(const StrRange<char32_t> &p_cstr);
 	void copy_from(const char32_t &p_char);
+	void copy_from_unchecked(const char32_t *p_char, int p_length);
 
-	void copy_from_unchecked(const char32_t *p_char, const int p_length);
+	// NULL-terminated c string copy - automatically parse the string to find the length.
+	void copy_from(const char *p_cstr) {
+		copy_from(StrRange<char>::from_c_str(p_cstr));
+	}
+	void copy_from(const char *p_cstr, int p_clip_to) {
+		copy_from(StrRange(p_cstr, p_cstr ? _strlen_clipped(p_cstr, p_clip_to) : 0));
+	}
+	void copy_from(const char32_t *p_cstr) {
+		copy_from(StrRange<char32_t>::from_c_str(p_cstr));
+	}
+	void copy_from(const char32_t *p_cstr, int p_clip_to) {
+		copy_from(StrRange(p_cstr, p_cstr ? _strlen_clipped(p_cstr, p_clip_to) : 0));
+	}
+
+	// wchar_t copy_from depends on the platform.
+	void copy_from(const StrRange<wchar_t> &p_cstr) {
+#ifdef WINDOWS_ENABLED
+		// wchar_t is 16-bit, parse as UTF-16
+		parse_utf16((const char16_t *)p_cstr.c_str, p_cstr.len);
+#else
+		// wchar_t is 32-bit, copy directly
+		copy_from((StrRange<char32_t> &)p_cstr);
+#endif
+	}
+	void copy_from(const wchar_t *p_cstr) {
+#ifdef WINDOWS_ENABLED
+		// wchar_t is 16-bit, parse as UTF-16
+		parse_utf16((const char16_t *)p_cstr);
+#else
+		// wchar_t is 32-bit, copy directly
+		copy_from((const char32_t *)p_cstr);
+#endif
+	}
+	void copy_from(const wchar_t *p_cstr, int p_clip_to) {
+#ifdef WINDOWS_ENABLED
+		// wchar_t is 16-bit, parse as UTF-16
+		parse_utf16((const char16_t *)p_cstr, p_clip_to);
+#else
+		// wchar_t is 32-bit, copy directly
+		copy_from((const char32_t *)p_cstr, p_clip_to);
+#endif
+	}
 
 	bool _base_is_subsequence_of(const String &p_string, bool case_insensitive) const;
 	int _count(const String &p_string, int p_from, int p_to, bool p_case_insensitive) const;
@@ -227,6 +340,8 @@ public:
 	}
 	_FORCE_INLINE_ CharProxy<char32_t> operator[](int p_index) { return CharProxy<char32_t>(p_index, _cowdata); }
 
+	/* Compatibility Operators */
+
 	bool operator==(const String &p_str) const;
 	bool operator!=(const String &p_str) const;
 	String operator+(const String &p_str) const;
@@ -238,16 +353,10 @@ public:
 	String &operator+=(const wchar_t *p_str);
 	String &operator+=(const char32_t *p_str);
 
-	/* Compatibility Operators */
-
-	void operator=(const char *p_str);
-	void operator=(const wchar_t *p_str);
-	void operator=(const char32_t *p_str);
-
 	bool operator==(const char *p_str) const;
 	bool operator==(const wchar_t *p_str) const;
 	bool operator==(const char32_t *p_str) const;
-	bool operator==(const StrRange &p_str_range) const;
+	bool operator==(const StrRange<char32_t> &p_str_range) const;
 
 	bool operator!=(const char *p_str) const;
 	bool operator!=(const wchar_t *p_str) const;
@@ -493,13 +602,38 @@ public:
 	Vector<uint8_t> to_utf32_buffer() const;
 	Vector<uint8_t> to_wchar_buffer() const;
 
-	String(const char *p_str);
-	String(const wchar_t *p_str);
-	String(const char32_t *p_str);
-	String(const char *p_str, int p_clip_to_len);
-	String(const wchar_t *p_str, int p_clip_to_len);
-	String(const char32_t *p_str, int p_clip_to_len);
-	String(const StrRange &p_range);
+	// Constructors for NULL terminated C strings.
+	String(const char *p_cstr) {
+		copy_from(p_cstr);
+	}
+	String(const wchar_t *p_cstr) {
+		copy_from(p_cstr);
+	}
+	String(const char32_t *p_cstr) {
+		copy_from(p_cstr);
+	}
+	String(const char *p_cstr, int p_clip_to_len) {
+		copy_from(p_cstr, p_clip_to_len);
+	}
+	String(const wchar_t *p_cstr, int p_clip_to_len) {
+		copy_from(p_cstr, p_clip_to_len);
+	}
+	String(const char32_t *p_cstr, int p_clip_to_len) {
+		copy_from(p_cstr, p_clip_to_len);
+	}
+
+	// Copy assignment for NULL terminated C strings.
+	void operator=(const char *p_cstr) {
+		copy_from(p_cstr);
+	}
+	void operator=(const wchar_t *p_cstr) {
+		copy_from(p_cstr);
+	}
+	void operator=(const char32_t *p_cstr) {
+		copy_from(p_cstr);
+	}
+
+	explicit operator StrRange<char32_t>() const { return StrRange(get_data(), length()); }
 };
 
 bool operator==(const char *p_chr, const String &p_str);