Browse Source

Various StringUtilities feature and safety improvements

- Add `StringView::empty()`.
- `ToCharacter` bounds checking instead of assuming zero-terminated strings.
- Construct UTF-8 iterator from string view.
Michael Ragazzon 1 year ago
parent
commit
574c78ffeb

+ 1 - 1
Backends/RmlUi_Platform_Win32.cpp

@@ -302,7 +302,7 @@ bool RmlWin32::WindowProcedure(Rml::Context* context, TextInputMethodEditor_Win3
 			{
 				// Second 16-bit code unit of a two-wide character.
 				Rml::String utf8 = ConvertToUTF8(std::wstring{first_u16_code_unit, c});
-				character = Rml::StringUtilities::ToCharacter(utf8.data());
+				character = Rml::StringUtilities::ToCharacter(utf8.data(), utf8.data() + utf8.size());
 			}
 			else if (c == '\r')
 			{

+ 4 - 2
Include/RmlUi/Core/StringUtilities.h

@@ -108,7 +108,7 @@ namespace StringUtilities {
 	RMLUICORE_API bool StringCompareCaseInsensitive(StringView lhs, StringView rhs);
 
 	// Decode the first code point in a zero-terminated UTF-8 string.
-	RMLUICORE_API Character ToCharacter(const char* p);
+	RMLUICORE_API Character ToCharacter(const char* p, const char* p_end);
 
 	// Encode a single code point as a UTF-8 string.
 	RMLUICORE_API String ToUTF8(Character character);
@@ -169,6 +169,7 @@ public:
 	inline const char* begin() const { return p_begin; }
 	inline const char* end() const { return p_end; }
 
+	inline bool empty() const { return p_begin == p_end; }
 	inline size_t size() const { return size_t(p_end - p_begin); }
 
 	explicit inline operator String() const { return String(p_begin, p_end); }
@@ -189,6 +190,7 @@ private:
 class RMLUICORE_API StringIteratorU8 {
 public:
 	StringIteratorU8(const char* p_begin, const char* p, const char* p_end);
+	StringIteratorU8(StringView string);
 	StringIteratorU8(const String& string);
 	StringIteratorU8(const String& string, size_t offset);
 	StringIteratorU8(const String& string, size_t offset, size_t count);
@@ -199,7 +201,7 @@ public:
 	StringIteratorU8& operator--();
 
 	// Returns the codepoint at the current position. The iterator must be dereferencable.
-	inline Character operator*() const { return StringUtilities::ToCharacter(p); }
+	inline Character operator*() const { return StringUtilities::ToCharacter(p, view.end()); }
 
 	// Returns false when the iterator is located just outside the valid part of the string.
 	explicit inline operator bool() const { return (p != view.begin() - 1) && (p != view.end()); }

+ 3 - 2
Source/Core/ElementText.cpp

@@ -212,7 +212,7 @@ bool ElementText::GenerateLine(String& line, int& line_length, float& line_width
 	bool break_at_endline =
 		white_space_property == WhiteSpace::Pre || white_space_property == WhiteSpace::Prewrap || white_space_property == WhiteSpace::Preline;
 
-	const TextShapingContext text_shaping_context{ computed.language(), computed.direction(), computed.letter_spacing() };
+	const TextShapingContext text_shaping_context{computed.language(), computed.direction(), computed.letter_spacing()};
 	TextTransform text_transform_property = computed.text_transform();
 	WordBreak word_break = computed.word_break();
 
@@ -229,7 +229,8 @@ bool ElementText::GenerateLine(String& line, int& line_length, float& line_width
 		const char* next_token_begin = token_begin;
 		Character previous_codepoint = Character::Null;
 		if (!line.empty())
-			previous_codepoint = StringUtilities::ToCharacter(StringUtilities::SeekBackwardUTF8(&line.back(), line.data()));
+			previous_codepoint =
+				StringUtilities::ToCharacter(StringUtilities::SeekBackwardUTF8(&line.back(), line.data()), line.data() + line.size());
 
 		// Generate the next token and determine its pixel-length.
 		bool break_line = BuildToken(token, next_token_begin, string_end, line.empty() && trim_whitespace_prefix, collapse_white_space,

+ 3 - 2
Source/Core/Elements/WidgetTextInput.cpp

@@ -1082,7 +1082,7 @@ float WidgetTextInput::GetAlignmentSpecificTextOffset(const Line& line) const
 		// For right alignment with soft-wrapped newlines, remove up to a single space to align the last word to the right edge.
 		const bool is_last_line = (line.value_offset + line.size == (int)value.size());
 		const bool is_soft_wrapped = (!is_last_line && line.editable_length == line.size);
-		if (is_soft_wrapped && editable_line_string.size() > 0 && *(editable_line_string.end() - 1) == ' ')
+		if (is_soft_wrapped && !editable_line_string.empty() && *(editable_line_string.end() - 1) == ' ')
 		{
 			editable_line_string = StringView(editable_line_string.begin(), editable_line_string.end() - 1);
 		}
@@ -1324,7 +1324,8 @@ Vector2f WidgetTextInput::FormatText(float height_constraint)
 				return 0.0f;
 			// We could join the whole string, and compare the result of the joined width to the individual widths of each string. Instead, we take
 			// the two neighboring characters from each string and compare the string width with and without kerning, which should be much faster.
-			const Character left_back = StringUtilities::ToCharacter(StringUtilities::SeekBackwardUTF8(&left.back(), &left.front()));
+			const Character left_back =
+				StringUtilities::ToCharacter(StringUtilities::SeekBackwardUTF8(&left.back(), &left.front()), left.data() + left.size());
 			const String right_front_u8 =
 				right.substr(0, size_t(StringUtilities::SeekForwardUTF8(right.c_str() + 1, right.c_str() + right.size()) - right.c_str()));
 			const int width_kerning = ElementUtilities::GetStringWidth(text_element, right_front_u8, left_back);

+ 7 - 1
Source/Core/StringUtilities.cpp

@@ -430,8 +430,10 @@ bool StringUtilities::StringCompareCaseInsensitive(const StringView lhs, const S
 	return true;
 }
 
-Character StringUtilities::ToCharacter(const char* p)
+Character StringUtilities::ToCharacter(const char* p, const char* p_end)
 {
+	RMLUI_ASSERTMSG(p && p != p_end, "ToCharacter expects a valid, non-empty input string");
+
 	if ((*p & (1 << 7)) == 0)
 		return static_cast<Character>(*p);
 
@@ -459,6 +461,9 @@ Character StringUtilities::ToCharacter(const char* p)
 		return Character::Null;
 	}
 
+	if (p_end - p < num_bytes)
+		return Character::Null;
+
 	for (int i = 1; i < num_bytes; i++)
 	{
 		const char byte = *(p + i);
@@ -586,6 +591,7 @@ bool StringView::operator==(const StringView& other) const
 }
 
 StringIteratorU8::StringIteratorU8(const char* p_begin, const char* p, const char* p_end) : view(p_begin, p_end), p(p) {}
+StringIteratorU8::StringIteratorU8(StringView string) : view(string), p(view.begin()) {}
 StringIteratorU8::StringIteratorU8(const String& string) : view(string), p(string.data()) {}
 StringIteratorU8::StringIteratorU8(const String& string, size_t offset) : view(string), p(string.data() + offset) {}
 StringIteratorU8::StringIteratorU8(const String& string, size_t offset, size_t count) : view(string, 0, offset + count), p(string.data() + offset) {}