Browse Source

Merge pull request #101247 from Ivorforce/string-find-cleanup

Clean up `String::find` and similar functions to remove duplicate code, and speed up comparison.
Thaddeus Crews 3 weeks ago
parent
commit
8deb221829
3 changed files with 62 additions and 167 deletions
  1. 39 162
      core/string/ustring.cpp
  2. 12 5
      core/templates/span.h
  3. 11 0
      tests/core/string/test_string.h

+ 39 - 162
core/string/ustring.cpp

@@ -63,6 +63,17 @@ static _FORCE_INLINE_ char32_t lower_case(char32_t c) {
 	return (is_ascii_upper_case(c) ? (c + ('a' - 'A')) : c);
 	return (is_ascii_upper_case(c) ? (c + ('a' - 'A')) : c);
 }
 }
 
 
+// Case-insensitive version of are_spans_equal
+template <typename T1, typename T2>
+static bool strings_equal_lower(const T1 *p_lhs_begin, const T2 *p_rhs_begin, size_t p_len) {
+	for (size_t i = 0; i < p_len; ++i) {
+		if (_find_lower(p_lhs_begin[i]) != _find_lower(p_rhs_begin[i])) {
+			return false;
+		}
+	}
+	return true;
+}
+
 Error String::parse_url(String &r_scheme, String &r_host, int &r_port, String &r_path, String &r_fragment) const {
 Error String::parse_url(String &r_scheme, String &r_host, int &r_port, String &r_path, String &r_fragment) const {
 	// Splits the URL into scheme, host, port, path, fragment. Strip credentials when present.
 	// Splits the URL into scheme, host, port, path, fragment. Strip credentials when present.
 	String base = *this;
 	String base = *this;
@@ -3043,41 +3054,7 @@ int String::find(const char *p_str, int p_from) const {
 		return find_char(*p_str, p_from); // Optimize with single-char find.
 		return find_char(*p_str, p_from); // Optimize with single-char find.
 	}
 	}
 
 
-	const char32_t *src = get_data();
-
-	if (str_len == 1) {
-		const char32_t needle = p_str[0];
-
-		for (int i = p_from; i < len; i++) {
-			if (src[i] == needle) {
-				return i;
-			}
-		}
-
-	} else {
-		for (int i = p_from; i <= (len - str_len); i++) {
-			bool found = true;
-			for (int j = 0; j < str_len; j++) {
-				int read_pos = i + j;
-
-				if (read_pos >= len) {
-					ERR_PRINT("read_pos>=length()");
-					return -1;
-				}
-
-				if (src[read_pos] != (char32_t)p_str[j]) {
-					found = false;
-					break;
-				}
-			}
-
-			if (found) {
-				return i;
-			}
-		}
-	}
-
-	return -1;
+	return span().find_sequence(Span((const unsigned char *)p_str, str_len), p_from);
 }
 }
 
 
 int String::find_char(char32_t p_char, int p_from) const {
 int String::find_char(char32_t p_char, int p_from) const {
@@ -3110,36 +3087,21 @@ int String::findmk(const Vector<String> &p_keys, int p_from, int *r_key) const {
 	const char32_t *src = get_data();
 	const char32_t *src = get_data();
 
 
 	for (int i = p_from; i < len; i++) {
 	for (int i = p_from; i < len; i++) {
-		bool found = true;
 		for (int k = 0; k < key_count; k++) {
 		for (int k = 0; k < key_count; k++) {
-			found = true;
-			if (r_key) {
-				*r_key = k;
-			}
-			const char32_t *cmp = keys[k].get_data();
-			int l = keys[k].length();
-
-			for (int j = 0; j < l; j++) {
-				int read_pos = i + j;
+			const int str_len = keys[k].length();
 
 
-				if (read_pos >= len) {
-					found = false;
-					break;
-				}
+			if (i + str_len > len) {
+				continue; // Can't find this key here.
+			}
 
 
-				if (src[read_pos] != cmp[j]) {
-					found = false;
-					break;
+			const char32_t *str = keys[k].get_data();
+			if (are_spans_equal(src + i, str, str_len)) {
+				if (r_key) {
+					*r_key = k;
 				}
 				}
-			}
-			if (found) {
-				break;
+				return i;
 			}
 			}
 		}
 		}
-
-		if (found) {
-			return i;
-		}
 	}
 	}
 
 
 	return -1;
 	return -1;
@@ -3156,28 +3118,11 @@ int String::findn(const String &p_str, int p_from) const {
 		return -1; // Still out of bounds
 		return -1; // Still out of bounds
 	}
 	}
 
 
-	const char32_t *srcd = get_data();
+	const char32_t *src = get_data();
+	const char32_t *str = p_str.get_data();
 
 
 	for (int i = p_from; i <= (len - str_len); i++) {
 	for (int i = p_from; i <= (len - str_len); i++) {
-		bool found = true;
-		for (int j = 0; j < str_len; j++) {
-			int read_pos = i + j;
-
-			if (read_pos >= len) {
-				ERR_PRINT("read_pos>=length()");
-				return -1;
-			}
-
-			char32_t src = _find_lower(srcd[read_pos]);
-			char32_t dst = _find_lower(p_str[j]);
-
-			if (src != dst) {
-				found = false;
-				break;
-			}
-		}
-
-		if (found) {
+		if (strings_equal_lower(src + i, str, str_len)) {
 			return i;
 			return i;
 		}
 		}
 	}
 	}
@@ -3196,28 +3141,10 @@ int String::findn(const char *p_str, int p_from) const {
 		return -1; // Still out of bounds
 		return -1; // Still out of bounds
 	}
 	}
 
 
-	const char32_t *srcd = get_data();
+	const char32_t *src = get_data();
 
 
 	for (int i = p_from; i <= (len - str_len); i++) {
 	for (int i = p_from; i <= (len - str_len); i++) {
-		bool found = true;
-		for (int j = 0; j < str_len; j++) {
-			int read_pos = i + j;
-
-			if (read_pos >= len) {
-				ERR_PRINT("read_pos>=length()");
-				return -1;
-			}
-
-			char32_t src = _find_lower(srcd[read_pos]);
-			char32_t dst = _find_lower(p_str[j]);
-
-			if (src != dst) {
-				found = false;
-				break;
-			}
-		}
-
-		if (found) {
+		if (strings_equal_lower(src + i, p_str, str_len)) {
 			return i;
 			return i;
 		}
 		}
 	}
 	}
@@ -3255,31 +3182,12 @@ int String::rfind(const char *p_str, int p_from) const {
 		return -1; // Still out of bounds
 		return -1; // Still out of bounds
 	}
 	}
 
 
-	const char32_t *source = get_data();
-
-	for (int i = p_from; i >= 0; i--) {
-		bool found = true;
-		for (int j = 0; j < str_len; j++) {
-			int read_pos = i + j;
-
-			if (read_pos >= length()) {
-				ERR_PRINT("read_pos>=length()");
-				return -1;
-			}
-
-			const char32_t key_needle = p_str[j];
-			if (source[read_pos] != key_needle) {
-				found = false;
-				break;
-			}
-		}
-
-		if (found) {
-			return i;
-		}
+	if (str_len == 1) {
+		// Optimize with single-char implementation.
+		return span().rfind(p_str[0], p_from);
 	}
 	}
 
 
-	return -1;
+	return span().rfind_sequence(Span((const unsigned char *)p_str, str_len), p_from);
 }
 }
 
 
 int String::rfind_char(char32_t p_char, int p_from) const {
 int String::rfind_char(char32_t p_char, int p_from) const {
@@ -3303,28 +3211,16 @@ int String::rfindn(const String &p_str, int p_from) const {
 		return -1; // Still out of bounds
 		return -1; // Still out of bounds
 	}
 	}
 
 
+	if (str_len == 1) {
+		// Optimize with single-char implementation.
+		return span().rfind(p_str[0], p_from);
+	}
+
 	const char32_t *src = get_data();
 	const char32_t *src = get_data();
+	const char32_t *str = p_str.get_data();
 
 
 	for (int i = p_from; i >= 0; i--) {
 	for (int i = p_from; i >= 0; i--) {
-		bool found = true;
-		for (int j = 0; j < str_len; j++) {
-			int read_pos = i + j;
-
-			if (read_pos >= len) {
-				ERR_PRINT("read_pos>=length()");
-				return -1;
-			}
-
-			char32_t srcc = _find_lower(src[read_pos]);
-			char32_t dstc = _find_lower(p_str[j]);
-
-			if (srcc != dstc) {
-				found = false;
-				break;
-			}
-		}
-
-		if (found) {
+		if (strings_equal_lower(src + i, str, str_len)) {
 			return i;
 			return i;
 		}
 		}
 	}
 	}
@@ -3343,29 +3239,10 @@ int String::rfindn(const char *p_str, int p_from) const {
 		return -1; // Still out of bounds
 		return -1; // Still out of bounds
 	}
 	}
 
 
-	const char32_t *source = get_data();
+	const char32_t *src = get_data();
 
 
 	for (int i = p_from; i >= 0; i--) {
 	for (int i = p_from; i >= 0; i--) {
-		bool found = true;
-		for (int j = 0; j < str_len; j++) {
-			int read_pos = i + j;
-
-			if (read_pos >= len) {
-				ERR_PRINT("read_pos>=length()");
-				return -1;
-			}
-
-			const char32_t key_needle = p_str[j];
-			int srcc = _find_lower(source[read_pos]);
-			int keyc = _find_lower(key_needle);
-
-			if (srcc != keyc) {
-				found = false;
-				break;
-			}
-		}
-
-		if (found) {
+		if (strings_equal_lower(src + i, p_str, str_len)) {
 			return i;
 			return i;
 		}
 		}
 	}
 	}

+ 12 - 5
core/templates/span.h

@@ -116,11 +116,14 @@ public:
 
 
 	// Algorithms.
 	// Algorithms.
 	constexpr int64_t find(const T &p_val, uint64_t p_from = 0) const;
 	constexpr int64_t find(const T &p_val, uint64_t p_from = 0) const;
-	constexpr int64_t find_sequence(const Span<T> &p_span, uint64_t p_from = 0) const;
+	template <typename T1 = T>
+	constexpr int64_t find_sequence(const Span<T1> &p_span, uint64_t p_from = 0) const;
 	constexpr int64_t rfind(const T &p_val, uint64_t p_from) const;
 	constexpr int64_t rfind(const T &p_val, uint64_t p_from) const;
 	_FORCE_INLINE_ constexpr int64_t rfind(const T &p_val) const { return rfind(p_val, size() - 1); }
 	_FORCE_INLINE_ constexpr int64_t rfind(const T &p_val) const { return rfind(p_val, size() - 1); }
-	constexpr int64_t rfind_sequence(const Span<T> &p_span, uint64_t p_from) const;
-	_FORCE_INLINE_ constexpr int64_t rfind_sequence(const Span<T> &p_span) const { return rfind_sequence(p_span, size() - p_span.size()); }
+	template <typename T1 = T>
+	constexpr int64_t rfind_sequence(const Span<T1> &p_span, uint64_t p_from) const;
+	template <typename T1 = T>
+	_FORCE_INLINE_ constexpr int64_t rfind_sequence(const Span<T1> &p_span) const { return rfind_sequence(p_span, size() - p_span.size()); }
 	constexpr uint64_t count(const T &p_val) const;
 	constexpr uint64_t count(const T &p_val) const;
 	/// Find the index of the given value using binary search.
 	/// Find the index of the given value using binary search.
 	/// Note: Assumes that elements in the span are sorted. Otherwise, use find() instead.
 	/// Note: Assumes that elements in the span are sorted. Otherwise, use find() instead.
@@ -142,7 +145,8 @@ constexpr int64_t Span<T>::find(const T &p_val, uint64_t p_from) const {
 }
 }
 
 
 template <typename T>
 template <typename T>
-constexpr int64_t Span<T>::find_sequence(const Span<T> &p_span, uint64_t p_from) const {
+template <typename T1>
+constexpr int64_t Span<T>::find_sequence(const Span<T1> &p_span, uint64_t p_from) const {
 	for (uint64_t i = p_from; i <= size() - p_span.size(); i++) {
 	for (uint64_t i = p_from; i <= size() - p_span.size(); i++) {
 		if (are_spans_equal(ptr() + i, p_span.ptr(), p_span.size())) {
 		if (are_spans_equal(ptr() + i, p_span.ptr(), p_span.size())) {
 			return i;
 			return i;
@@ -154,6 +158,7 @@ constexpr int64_t Span<T>::find_sequence(const Span<T> &p_span, uint64_t p_from)
 
 
 template <typename T>
 template <typename T>
 constexpr int64_t Span<T>::rfind(const T &p_val, uint64_t p_from) const {
 constexpr int64_t Span<T>::rfind(const T &p_val, uint64_t p_from) const {
+	DEV_ASSERT(p_from < size());
 	for (int64_t i = p_from; i >= 0; i--) {
 	for (int64_t i = p_from; i >= 0; i--) {
 		if (ptr()[i] == p_val) {
 		if (ptr()[i] == p_val) {
 			return i;
 			return i;
@@ -163,7 +168,9 @@ constexpr int64_t Span<T>::rfind(const T &p_val, uint64_t p_from) const {
 }
 }
 
 
 template <typename T>
 template <typename T>
-constexpr int64_t Span<T>::rfind_sequence(const Span<T> &p_span, uint64_t p_from) const {
+template <typename T1>
+constexpr int64_t Span<T>::rfind_sequence(const Span<T1> &p_span, uint64_t p_from) const {
+	DEV_ASSERT(p_from + p_span.size() <= size());
 	for (int64_t i = p_from; i >= 0; i--) {
 	for (int64_t i = p_from; i >= 0; i--) {
 		if (are_spans_equal(ptr() + i, p_span.ptr(), p_span.size())) {
 		if (are_spans_equal(ptr() + i, p_span.ptr(), p_span.size())) {
 			return i;
 			return i;

+ 11 - 0
tests/core/string/test_string.h

@@ -396,6 +396,8 @@ TEST_CASE("[String] Find") {
 	MULTICHECK_STRING_EQ(s, find, "tty", 3);
 	MULTICHECK_STRING_EQ(s, find, "tty", 3);
 	MULTICHECK_STRING_EQ(s, find, "Revenge of the Monster Truck", -1);
 	MULTICHECK_STRING_EQ(s, find, "Revenge of the Monster Truck", -1);
 	MULTICHECK_STRING_INT_EQ(s, find, "Wo", 9, 13);
 	MULTICHECK_STRING_INT_EQ(s, find, "Wo", 9, 13);
+	MULTICHECK_STRING_INT_EQ(s, find, "Wo", 1000, -1);
+	MULTICHECK_STRING_INT_EQ(s, find, "Wo", -1, -1);
 	MULTICHECK_STRING_EQ(s, find, "", -1);
 	MULTICHECK_STRING_EQ(s, find, "", -1);
 	MULTICHECK_STRING_EQ(s, find, "Pretty Woman Woman", 0);
 	MULTICHECK_STRING_EQ(s, find, "Pretty Woman Woman", 0);
 	MULTICHECK_STRING_EQ(s, find, "WOMAN", -1);
 	MULTICHECK_STRING_EQ(s, find, "WOMAN", -1);
@@ -407,6 +409,8 @@ TEST_CASE("[String] Find") {
 	MULTICHECK_STRING_EQ(s, rfind, "man", 15);
 	MULTICHECK_STRING_EQ(s, rfind, "man", 15);
 	MULTICHECK_STRING_EQ(s, rfind, "WOMAN", -1);
 	MULTICHECK_STRING_EQ(s, rfind, "WOMAN", -1);
 	MULTICHECK_STRING_INT_EQ(s, rfind, "", 15, -1);
 	MULTICHECK_STRING_INT_EQ(s, rfind, "", 15, -1);
+	MULTICHECK_STRING_INT_EQ(s, rfind, "Wo", 1000, -1);
+	MULTICHECK_STRING_INT_EQ(s, rfind, "Wo", -1, 13);
 }
 }
 
 
 TEST_CASE("[String] Find character") {
 TEST_CASE("[String] Find character") {
@@ -426,6 +430,8 @@ TEST_CASE("[String] Find case insensitive") {
 	String s = "Pretty Whale Whale";
 	String s = "Pretty Whale Whale";
 	MULTICHECK_STRING_EQ(s, findn, "WHA", 7);
 	MULTICHECK_STRING_EQ(s, findn, "WHA", 7);
 	MULTICHECK_STRING_INT_EQ(s, findn, "WHA", 9, 13);
 	MULTICHECK_STRING_INT_EQ(s, findn, "WHA", 9, 13);
+	MULTICHECK_STRING_INT_EQ(s, findn, "WHA", 1000, -1);
+	MULTICHECK_STRING_INT_EQ(s, findn, "WHA", -1, -1);
 	MULTICHECK_STRING_EQ(s, findn, "Revenge of the Monster SawFish", -1);
 	MULTICHECK_STRING_EQ(s, findn, "Revenge of the Monster SawFish", -1);
 	MULTICHECK_STRING_EQ(s, findn, "", -1);
 	MULTICHECK_STRING_EQ(s, findn, "", -1);
 	MULTICHECK_STRING_EQ(s, findn, "wha", 7);
 	MULTICHECK_STRING_EQ(s, findn, "wha", 7);
@@ -437,6 +443,8 @@ TEST_CASE("[String] Find case insensitive") {
 	MULTICHECK_STRING_EQ(s, rfindn, "wha", 13);
 	MULTICHECK_STRING_EQ(s, rfindn, "wha", 13);
 	MULTICHECK_STRING_EQ(s, rfindn, "Wha", 13);
 	MULTICHECK_STRING_EQ(s, rfindn, "Wha", 13);
 	MULTICHECK_STRING_INT_EQ(s, rfindn, "", 13, -1);
 	MULTICHECK_STRING_INT_EQ(s, rfindn, "", 13, -1);
+	MULTICHECK_STRING_INT_EQ(s, rfindn, "WHA", 1000, -1);
+	MULTICHECK_STRING_INT_EQ(s, rfindn, "WHA", -1, 13);
 }
 }
 
 
 TEST_CASE("[String] Find MK") {
 TEST_CASE("[String] Find MK") {
@@ -453,6 +461,9 @@ TEST_CASE("[String] Find MK") {
 
 
 	CHECK(s.findmk(keys, 5, &key) == 9);
 	CHECK(s.findmk(keys, 5, &key) == 9);
 	CHECK(key == 2);
 	CHECK(key == 2);
+
+	CHECK(s.findmk(keys, -1, &key) == -1);
+	CHECK(s.findmk(keys, 1000, &key) == -1);
 }
 }
 
 
 TEST_CASE("[String] Find and replace") {
 TEST_CASE("[String] Find and replace") {