Browse Source

Use char16_t for utf16, and char32_t for underlying CodePoint type. Some refactoring.

Michael Ragazzon 6 years ago
parent
commit
b4e6b22e87

+ 8 - 14
Include/RmlUi/Core/StringUtilities.h

@@ -88,8 +88,6 @@ namespace StringUtilities
 	RMLUICORE_API String Replace(String subject, char search, char replace);
 	RMLUICORE_API String Replace(String subject, char search, char replace);
 
 
 	/// Checks if a given value is a whitespace character.
 	/// Checks if a given value is a whitespace character.
-	/// @param[in] x The character to evaluate.
-	/// @return True if the character is whitespace, false otherwise.
 	template < typename CharacterType >
 	template < typename CharacterType >
 	inline bool IsWhitespace(CharacterType x)
 	inline bool IsWhitespace(CharacterType x)
 	{
 	{
@@ -97,8 +95,6 @@ namespace StringUtilities
 	}
 	}
 
 
 	/// Strip whitespace characters from the beginning and end of a string.
 	/// Strip whitespace characters from the beginning and end of a string.
-	/// @param[in] string The string to trim.
-	/// @return The stripped string.
 	RMLUICORE_API String StripWhitespace(const String& string);
 	RMLUICORE_API String StripWhitespace(const String& string);
 
 
 	/// Operator for STL containers using strings.
 	/// Operator for STL containers using strings.
@@ -120,14 +116,14 @@ namespace StringUtilities
 	RMLUICORE_API size_t LengthUTF8(StringView string_view);
 	RMLUICORE_API size_t LengthUTF8(StringView string_view);
 
 
 	// Seek forward in a UTF-8 string, skipping continuation bytes.
 	// Seek forward in a UTF-8 string, skipping continuation bytes.
-	inline const char* SeekForwardU8(const char* p, const char* p_end)
+	inline const char* SeekForwardUTF8(const char* p, const char* p_end)
 	{
 	{
 		while (p != p_end && (*p & 0b1100'0000) == 0b1000'0000)
 		while (p != p_end && (*p & 0b1100'0000) == 0b1000'0000)
 			++p;
 			++p;
 		return p;
 		return p;
 	}
 	}
 	// Seek backward in a UTF-8 string, skipping continuation bytes.
 	// Seek backward in a UTF-8 string, skipping continuation bytes.
-	inline const char* SeekBackU8(const char* p, const char* p_begin)
+	inline const char* SeekBackwardUTF8(const char* p, const char* p_begin)
 	{
 	{
 		while ((p + 1) != p_begin && (*p & 0b1100'0000) == 0b1000'0000)
 		while ((p + 1) != p_begin && (*p & 0b1100'0000) == 0b1000'0000)
 			--p;
 			--p;
@@ -135,15 +131,13 @@ namespace StringUtilities
 	}
 	}
 
 
 
 
-	/// Converts a string in UTF-8 encoding to a wide string in UTF-16 encoding. The UTF-16 words will
-	/// be encoded as either big- or little-endian, depending on the host processor.
-	/// Reports a warning if the conversion fails.
-	RMLUICORE_API WString ToUTF16(const String& str);
+	/// Converts a string in UTF-8 encoding to a u16string in UTF-16 encoding.
+	/// Reports a warning if some or all characters could not be converted.
+	RMLUICORE_API U16String ToUTF16(const String& str);
 
 
-	/// Converts a wide string in UTF-16 encoding into a string in UTF-8 encoding. This
-	/// function assumes the endianness of the input words to be the same as the host processor.
-	/// Reports a warning if the conversion fails.
-	RMLUICORE_API String ToUTF8(const WString& wstr);
+	/// Converts a u16string in UTF-16 encoding into a string in UTF-8 encoding.
+	/// Reports a warning if some or all characters could not be converted.
+	RMLUICORE_API String ToUTF8(const U16String& u16str);
 }
 }
 
 
 
 

+ 2 - 8
Include/RmlUi/Core/Types.h

@@ -60,17 +60,11 @@ namespace Core {
 typedef unsigned char byte;
 typedef unsigned char byte;
 typedef double Time;
 typedef double Time;
 typedef void* ScriptObject;
 typedef void* ScriptObject;
-enum class CodePoint : unsigned int { Null, Replacement = 0xfffd };
+enum class CodePoint : char32_t { Null, Replacement = 0xfffd };
 
 
 }
 }
 }
 }
 
 
-#ifdef RMLUI_PLATFORM_WIN32
-typedef unsigned __int64 uint64_t;
-#else
-#include <inttypes.h>
-#endif
-
 #include "Colour.h"
 #include "Colour.h"
 #include "Vector2.h"
 #include "Vector2.h"
 #include "Vector3.h"
 #include "Vector3.h"
@@ -122,8 +116,8 @@ using FontFaceHandle = uintptr_t;
 
 
 // Strings
 // Strings
 using String = std::string;
 using String = std::string;
-using WString = std::wstring;
 using StringList = std::vector< String >;
 using StringList = std::vector< String >;
+using U16String = std::u16string;
 
 
 // Smart pointer types
 // Smart pointer types
 template<typename T>
 template<typename T>

+ 9 - 9
Samples/shell/src/win32/InputWin32.cpp

@@ -108,32 +108,32 @@ void InputWin32::ProcessWindowsEvent(UINT message, WPARAM w_param, LPARAM l_para
 
 
 		case WM_CHAR:
 		case WM_CHAR:
 		{
 		{
-			static wchar_t two_wide_char_first = 0;
+			static char16_t first_u16_code_unit = 0;
 
 
-			wchar_t w = (wchar_t)w_param;
-			Rml::Core::CodePoint code_point = (Rml::Core::CodePoint)w;
+			char16_t c = (char16_t)w_param;
+			Rml::Core::CodePoint code_point = (Rml::Core::CodePoint)c;
 
 
 			// Windows sends two-wide characters as two messages.
 			// Windows sends two-wide characters as two messages.
-			if (w >= 0xD800 && w < 0xDC00)
+			if (c >= 0xD800 && c < 0xDC00)
 			{
 			{
 				// First 16-bit code unit of a two-wide character.
 				// First 16-bit code unit of a two-wide character.
-				two_wide_char_first = w;
+				first_u16_code_unit = c;
 			}
 			}
 			else
 			else
 			{
 			{
-				if (w >= 0xDC00 && w < 0xE000 && two_wide_char_first != 0)
+				if (c >= 0xDC00 && c < 0xE000 && first_u16_code_unit != 0)
 				{
 				{
 					// Second 16-bit code unit of a two-wide character.
 					// Second 16-bit code unit of a two-wide character.
-					Rml::Core::String utf8 = Rml::Core::StringUtilities::ToUTF8({ two_wide_char_first, w });
+					Rml::Core::String utf8 = Rml::Core::StringUtilities::ToUTF8({ first_u16_code_unit, c });
 					code_point = Rml::Core::StringUtilities::ToCodePoint(utf8.data());
 					code_point = Rml::Core::StringUtilities::ToCodePoint(utf8.data());
 				}
 				}
-				else if (w == '\r')
+				else if (c == '\r')
 				{
 				{
 					// Windows sends new-lines as carriage returns, convert to endlines.
 					// Windows sends new-lines as carriage returns, convert to endlines.
 					code_point = (Rml::Core::CodePoint)'\n';
 					code_point = (Rml::Core::CodePoint)'\n';
 				}
 				}
 
 
-				two_wide_char_first = 0;
+				first_u16_code_unit = 0;
 
 
 				// Only send through printable characters.
 				// Only send through printable characters.
 				if ((unsigned int)code_point >= 32 || code_point == (Rml::Core::CodePoint)'\n')
 				if ((unsigned int)code_point >= 32 || code_point == (Rml::Core::CodePoint)'\n')

+ 11 - 11
Samples/shell/src/win32/ShellWin32.cpp

@@ -37,7 +37,7 @@ static LRESULT CALLBACK WindowProcedure(HWND window_handle, UINT message, WPARAM
 
 
 static bool activated = true;
 static bool activated = true;
 static bool running = false;
 static bool running = false;
-static Rml::Core::WString instance_name;
+static Rml::Core::U16String instance_name;
 static HWND window_handle = nullptr;
 static HWND window_handle = nullptr;
 static HINSTANCE instance_handle = nullptr;
 static HINSTANCE instance_handle = nullptr;
 
 
@@ -107,7 +107,7 @@ bool Shell::OpenWindow(const char* in_name, ShellRenderInterfaceExtensions *_she
 {
 {
 	WNDCLASSW window_class;
 	WNDCLASSW window_class;
 
 
-	Rml::Core::WString name = Rml::Core::StringUtilities::ToUTF16(Rml::Core::String(in_name));
+	Rml::Core::U16String name = Rml::Core::StringUtilities::ToUTF16(Rml::Core::String(in_name));
 
 
 	// Fill out the window class struct.
 	// Fill out the window class struct.
 	window_class.style = CS_HREDRAW | CS_VREDRAW | CS_OWNDC;
 	window_class.style = CS_HREDRAW | CS_VREDRAW | CS_OWNDC;
@@ -119,7 +119,7 @@ bool Shell::OpenWindow(const char* in_name, ShellRenderInterfaceExtensions *_she
 	window_class.hCursor = cursor_default;
 	window_class.hCursor = cursor_default;
 	window_class.hbrBackground = nullptr;
 	window_class.hbrBackground = nullptr;
 	window_class.lpszMenuName = nullptr;
 	window_class.lpszMenuName = nullptr;
-	window_class.lpszClassName = name.data();
+	window_class.lpszClassName = (LPCWSTR)name.data();
 
 
 	if (!RegisterClassW(&window_class))
 	if (!RegisterClassW(&window_class))
 	{
 	{
@@ -130,8 +130,8 @@ bool Shell::OpenWindow(const char* in_name, ShellRenderInterfaceExtensions *_she
 	}
 	}
 
 
 	window_handle = CreateWindowExW(WS_EX_APPWINDOW | WS_EX_WINDOWEDGE,
 	window_handle = CreateWindowExW(WS_EX_APPWINDOW | WS_EX_WINDOWEDGE,
-								   name.data(),	// Window class name.
-								   name.data(),
+								   (LPCWSTR)name.data(),	// Window class name.
+								   (LPCWSTR)name.data(),
 								   WS_CLIPSIBLINGS | WS_CLIPCHILDREN | WS_OVERLAPPEDWINDOW,
 								   WS_CLIPSIBLINGS | WS_CLIPCHILDREN | WS_OVERLAPPEDWINDOW,
 								   0, 0,	// Window position.
 								   0, 0,	// Window position.
 								   width, height,// Window size.
 								   width, height,// Window size.
@@ -191,7 +191,7 @@ void Shell::CloseWindow()
 	}
 	}
 
 
 	DestroyWindow(window_handle);  
 	DestroyWindow(window_handle);  
-	UnregisterClassW(instance_name.data(), instance_handle);
+	UnregisterClassW((LPCWSTR)instance_name.data(), instance_handle);
 }
 }
 
 
 // Returns a platform-dependent handle to the window.
 // Returns a platform-dependent handle to the window.
@@ -242,7 +242,7 @@ void Shell::DisplayError(const char* fmt, ...)
 	buffer[len + 1] = '\0';
 	buffer[len + 1] = '\0';
 	va_end(argument_list);
 	va_end(argument_list);
 
 
-	MessageBox(window_handle, Rml::Core::StringUtilities::ToUTF16(buffer).c_str(), L"Shell Error", MB_OK);
+	MessageBox(window_handle, (LPCWSTR)Rml::Core::StringUtilities::ToUTF16(buffer).c_str(), L"Shell Error", MB_OK);
 }
 }
 
 
 void Shell::Log(const char* fmt, ...)
 void Shell::Log(const char* fmt, ...)
@@ -262,7 +262,7 @@ void Shell::Log(const char* fmt, ...)
 	buffer[len + 1] = '\0';
 	buffer[len + 1] = '\0';
 	va_end(argument_list);
 	va_end(argument_list);
 
 
-	OutputDebugString(Rml::Core::StringUtilities::ToUTF16(buffer).c_str());
+	OutputDebugString((LPCWSTR)Rml::Core::StringUtilities::ToUTF16(buffer).c_str());
 }
 }
 
 
 double Shell::GetElapsedTime() 
 double Shell::GetElapsedTime() 
@@ -304,9 +304,9 @@ void Shell::SetClipboardText(const Rml::Core::String& text_utf8)
 
 
 		EmptyClipboard();
 		EmptyClipboard();
 
 
-		const Rml::Core::WString text = Rml::Core::StringUtilities::ToUTF16(text_utf8);
+		const Rml::Core::U16String text = Rml::Core::StringUtilities::ToUTF16(text_utf8);
 
 
-		size_t size = sizeof(wchar_t) * (text.size() + 1);
+		size_t size = sizeof(char16_t) * (text.size() + 1);
 
 
 		HGLOBAL clipboard_data = GlobalAlloc(GMEM_FIXED, size);
 		HGLOBAL clipboard_data = GlobalAlloc(GMEM_FIXED, size);
 		memcpy(clipboard_data, text.data(), size);
 		memcpy(clipboard_data, text.data(), size);
@@ -335,7 +335,7 @@ void Shell::GetClipboardText(Rml::Core::String& text)
 			return;
 			return;
 		}
 		}
 
 
-		const wchar_t* clipboard_text = (const wchar_t*)GlobalLock(clipboard_data);
+		const char16_t* clipboard_text = (const char16_t*)GlobalLock(clipboard_data);
 		if (clipboard_text)
 		if (clipboard_text)
 			text = Rml::Core::StringUtilities::ToUTF8(clipboard_text);
 			text = Rml::Core::StringUtilities::ToUTF8(clipboard_text);
 		GlobalUnlock(clipboard_data);
 		GlobalUnlock(clipboard_data);

+ 2 - 2
Source/Controls/WidgetTextInput.cpp

@@ -654,9 +654,9 @@ void WidgetTextInput::MoveCursorToCharacterBoundaries(bool forward)
 	const char* p = p_cursor;
 	const char* p = p_cursor;
 
 
 	if (forward)
 	if (forward)
-		p = Core::StringUtilities::SeekForwardU8(p_cursor, p_line_end);
+		p = Core::StringUtilities::SeekForwardUTF8(p_cursor, p_line_end);
 	else
 	else
-		p = Core::StringUtilities::SeekBackU8(p_cursor, p_line_begin);
+		p = Core::StringUtilities::SeekBackwardUTF8(p_cursor, p_line_begin);
 
 
 	if (p != p_cursor)
 	if (p != p_cursor)
 	{
 	{

+ 1 - 1
Source/Core/ElementTextDefault.cpp

@@ -223,7 +223,7 @@ bool ElementTextDefault::GenerateLine(String& line, int& line_length, float& lin
 		const char* next_token_begin = token_begin;
 		const char* next_token_begin = token_begin;
 		CodePoint previous_codepoint = CodePoint::Null;
 		CodePoint previous_codepoint = CodePoint::Null;
 		if (!line.empty())
 		if (!line.empty())
-			previous_codepoint = StringUtilities::ToCodePoint(StringUtilities::SeekBackU8(&line.back(), line.data()));
+			previous_codepoint = StringUtilities::ToCodePoint(StringUtilities::SeekBackwardUTF8(&line.back(), line.data()));
 
 
 		// Generate the next token and determine its pixel-length.
 		// Generate the next token and determine its pixel-length.
 		bool break_line = BuildToken(token, next_token_begin, string_end, line.empty() && trim_whitespace_prefix, collapse_white_space, break_at_endline, text_transform_property);
 		bool break_line = BuildToken(token, next_token_begin, string_end, line.empty() && trim_whitespace_prefix, collapse_white_space, break_at_endline, text_transform_property);

+ 59 - 71
Source/Core/StringUtilities.cpp

@@ -48,9 +48,6 @@
 namespace Rml {
 namespace Rml {
 namespace Core {
 namespace Core {
 
 
-static bool UTF8toUTF16(const String& input, WString& output);
-static bool UTF16toUTF8(const WString& input, String& output);
-
 
 
 static int FormatString(String& string, size_t max_size, const char* format, va_list argument_list)
 static int FormatString(String& string, size_t max_size, const char* format, va_list argument_list)
 {
 {
@@ -104,41 +101,6 @@ String StringUtilities::ToLower(const String& string) {
 	return str_lower;
 	return str_lower;
 }
 }
 
 
-WString StringUtilities::ToUTF16(const String& str)
-{
-	WString result;
-	if (!UTF8toUTF16(str, result))
-		Log::Message(Log::LT_WARNING, "Invalid characters encountered while converting UTF-8 string to UTF-16.");
-	return result;
-}
-
-String StringUtilities::ToUTF8(const WString& wstr)
-{
-	String result;
-	if(!UTF16toUTF8(wstr, result))
-		Log::Message(Log::LT_WARNING, "Invalid characters encountered while converting UTF-16 string to UTF-8.");
-	return result;
-}
-
-size_t StringUtilities::LengthUTF8(StringView string_view)
-{
-	const char* const p_end = string_view.end();
-
-	// Skip any continuation bytes at the beginning
-	const char* p = string_view.begin();
-
-	size_t num_continuation_bytes = 0;
-
-	while (p != p_end)
-	{
-		if ((*p & 0b1100'0000) == 0b1000'0000)
-			++num_continuation_bytes;
-		++p;
-	}
-
-	return string_view.size() - num_continuation_bytes;
-}
-
 String StringUtilities::Replace(String subject, const String& search, const String& replace)
 String StringUtilities::Replace(String subject, const String& search, const String& replace)
 {
 {
 	size_t pos = 0;
 	size_t pos = 0;
@@ -264,7 +226,6 @@ void StringUtilities::ExpandString(StringList& string_list, const String& string
 		string_list.emplace_back(start_ptr, end_ptr + 1);
 		string_list.emplace_back(start_ptr, end_ptr + 1);
 }
 }
 
 
-
 // Joins a list of string values into a single string separated by a character delimiter.
 // Joins a list of string values into a single string separated by a character delimiter.
 void StringUtilities::JoinString(String& string, const StringList& string_list, const char delimiter)
 void StringUtilities::JoinString(String& string, const StringList& string_list, const char delimiter)
 {
 {
@@ -276,8 +237,6 @@ void StringUtilities::JoinString(String& string, const StringList& string_list,
 	}
 	}
 }
 }
 
 
-
-
 // Strip whitespace characters from the beginning and end of a string.
 // Strip whitespace characters from the beginning and end of a string.
 String StringUtilities::StripWhitespace(const String& string)
 String StringUtilities::StripWhitespace(const String& string)
 {
 {
@@ -296,6 +255,13 @@ String StringUtilities::StripWhitespace(const String& string)
 	return String();
 	return String();
 }
 }
 
 
+// Operators for STL containers using strings.
+bool StringUtilities::StringComparei::operator()(const String& lhs, const String& rhs) const
+{
+	return strcasecmp(lhs.c_str(), rhs.c_str()) < 0;
+}
+
+
 CodePoint StringUtilities::ToCodePoint(const char* p)
 CodePoint StringUtilities::ToCodePoint(const char* p)
 {
 {
 	if ((*p & (1 << 7)) == 0)
 	if ((*p & (1 << 7)) == 0)
@@ -349,12 +315,13 @@ String StringUtilities::ToUTF8(CodePoint code_point)
 String StringUtilities::ToUTF8(const CodePoint* code_points, int num_code_points)
 String StringUtilities::ToUTF8(const CodePoint* code_points, int num_code_points)
 {
 {
 	String result;
 	String result;
+	result.reserve(num_code_points);
 
 
 	bool invalid_code_point = false;
 	bool invalid_code_point = false;
 
 
 	for (int i = 0; i < num_code_points; i++)
 	for (int i = 0; i < num_code_points; i++)
 	{
 	{
-		unsigned int c = (unsigned int)code_points[i];
+		char32_t c = (char32_t)code_points[i];
 
 
 		constexpr int l3 = 0b0000'0111;
 		constexpr int l3 = 0b0000'0111;
 		constexpr int l4 = 0b0000'1111;
 		constexpr int l4 = 0b0000'1111;
@@ -368,11 +335,11 @@ String StringUtilities::ToUTF8(const CodePoint* code_points, int num_code_points
 		if (c < 0x80)
 		if (c < 0x80)
 			result += (char)c;
 			result += (char)c;
 		else if (c < 0x800)
 		else if (c < 0x800)
-			result += { char(((c >> 6)& l5) | h2), char((c& l6) | h1) };
+			result += { char(((c >> 6) & l5) | h2), char((c & l6) | h1) };
 		else if (c < 0x10000)
 		else if (c < 0x10000)
-			result += { char(((c >> 12)& l4) | h3), char(((c >> 6)& l6) | h1), char((c& l6) | h1) };
+			result += { char(((c >> 12) & l4) | h3), char(((c >> 6) & l6) | h1), char((c & l6) | h1) };
 		else if (c <= 0x10FFFF)
 		else if (c <= 0x10FFFF)
-			result += { char(((c >> 18)& l3) | h4), char(((c >> 12)& l6) | h1), char(((c >> 6)& l6) | h1), char((c& l6) | h1) };
+			result += { char(((c >> 18) & l3) | h4), char(((c >> 12) & l6) | h1), char(((c >> 6) & l6) | h1), char((c & l6) | h1) };
 		else
 		else
 			invalid_code_point = true;
 			invalid_code_point = true;
 	}
 	}
@@ -383,19 +350,32 @@ String StringUtilities::ToUTF8(const CodePoint* code_points, int num_code_points
 	return result;
 	return result;
 }
 }
 
 
-// Operators for STL containers using strings.
-bool StringUtilities::StringComparei::operator()(const String& lhs, const String& rhs) const
+
+size_t StringUtilities::LengthUTF8(StringView string_view)
 {
 {
-	return strcasecmp(lhs.c_str(), rhs.c_str()) < 0;
-}
+	const char* const p_end = string_view.end();
 
 
+	// Skip any continuation bytes at the beginning
+	const char* p = string_view.begin();
+
+	size_t num_continuation_bytes = 0;
+
+	while (p != p_end)
+	{
+		if ((*p & 0b1100'0000) == 0b1000'0000)
+			++num_continuation_bytes;
+		++p;
+	}
 
 
+	return string_view.size() - num_continuation_bytes;
+}
 
 
-// Converts a character array in UTF-8 encoding to a wide string in UTF-16 encoding.
-static bool UTF8toUTF16(const String& input, WString& output)
+U16String StringUtilities::ToUTF16(const String& input)
 {
 {
+	U16String result;
+
 	if (input.empty())
 	if (input.empty())
-		return true;
+		return result;
 
 
 	std::vector<CodePoint> code_points;
 	std::vector<CodePoint> code_points;
 	code_points.reserve(input.size());
 	code_points.reserve(input.size());
@@ -403,26 +383,26 @@ static bool UTF8toUTF16(const String& input, WString& output)
 	for (auto it = StringIteratorU8(input); it; ++it)
 	for (auto it = StringIteratorU8(input); it; ++it)
 		code_points.push_back(*it);
 		code_points.push_back(*it);
 
 
-	output.reserve(input.size());
+	result.reserve(input.size());
 
 
 	bool valid_characters = true;
 	bool valid_characters = true;
 
 
 	for (CodePoint code_point : code_points)
 	for (CodePoint code_point : code_points)
 	{
 	{
-		unsigned int c = (unsigned int)code_point;
+		char32_t c = (char32_t)code_point;
 
 
 		if (c <= 0xD7FF || (c >= 0xE000 && c <= 0xFFFF))
 		if (c <= 0xD7FF || (c >= 0xE000 && c <= 0xFFFF))
 		{
 		{
 			// Single 16-bit code unit.
 			// Single 16-bit code unit.
-			output += (wchar_t)c;
+			result += (char16_t)c;
 		}
 		}
 		else if (c >= 0x10000 && c <= 0x10FFFF)
 		else if (c >= 0x10000 && c <= 0x10FFFF)
 		{
 		{
 			// Encode as two 16-bit code units.
 			// Encode as two 16-bit code units.
-			unsigned int c_shift = c - 0x10000;
-			wchar_t w1 = (0xD800 | ((c_shift >> 10) & 0x3FF));
-			wchar_t w2 = (0xDC00 | (c_shift & 0x3FF));
-			output += {w1, w2};
+			char32_t c_shift = c - 0x10000;
+			char16_t w1 = (0xD800 | ((c_shift >> 10) & 0x3FF));
+			char16_t w2 = (0xDC00 | (c_shift & 0x3FF));
+			result += {w1, w2};
 		}
 		}
 		else
 		else
 		{
 		{
@@ -430,26 +410,28 @@ static bool UTF8toUTF16(const String& input, WString& output)
 		}
 		}
 	}
 	}
 
 
-	return valid_characters;
+	if (!valid_characters)
+		Log::Message(Log::LT_WARNING, "Invalid characters encountered while converting UTF-8 string to UTF-16.");
+
+	return result;
 }
 }
 
 
-// Converts a wide string in UTF-16 encoding into a string in UTF-8 encoding.
-static bool UTF16toUTF8(const WString& input, String& output)
+String StringUtilities::ToUTF8(const U16String& input)
 {
 {
 	std::vector<CodePoint> code_points;
 	std::vector<CodePoint> code_points;
 	code_points.reserve(input.size());
 	code_points.reserve(input.size());
 
 
 	bool valid_input = true;
 	bool valid_input = true;
-	wchar_t w1 = 0;
+	char16_t w1 = 0;
 
 
-	for (wchar_t w : input)
+	for (char16_t w : input)
 	{
 	{
 		if (w <= 0xD7FF || w >= 0xE000)
 		if (w <= 0xD7FF || w >= 0xE000)
 		{
 		{
 			// Single 16-bit code unit.
 			// Single 16-bit code unit.
 			code_points.push_back((CodePoint)(w));
 			code_points.push_back((CodePoint)(w));
 		}
 		}
-		else 
+		else
 		{
 		{
 			// Two 16-bit code units.
 			// Two 16-bit code units.
 			if (!w1 && w < 0xDC00)
 			if (!w1 && w < 0xDC00)
@@ -458,7 +440,7 @@ static bool UTF16toUTF8(const WString& input, String& output)
 			}
 			}
 			else if (w1 && w >= 0xDC00)
 			else if (w1 && w >= 0xDC00)
 			{
 			{
-				code_points.push_back((CodePoint)(((((unsigned int)w1 & 0x3FF) << 10) | ((unsigned int)(w) & 0x3FF)) + 0x10000u));
+				code_points.push_back((CodePoint)(((((char32_t)w1 & 0x3FF) << 10) | ((char32_t)(w) & 0x3FF)) + 0x10000u));
 				w1 = 0;
 				w1 = 0;
 			}
 			}
 			else
 			else
@@ -468,12 +450,18 @@ static bool UTF16toUTF8(const WString& input, String& output)
 		}
 		}
 	}
 	}
 
 
-	if(code_points.size() > 0)
-		output = StringUtilities::ToUTF8(code_points.data(), (int)code_points.size());
+	String result;
+
+	if (code_points.size() > 0)
+		result = StringUtilities::ToUTF8(code_points.data(), (int)code_points.size());
 
 
-	return valid_input;
+	if (!valid_input)
+		Log::Message(Log::LT_WARNING, "Invalid characters encountered while converting UTF-16 string to UTF-8.");
+
+	return result;
 }
 }
 
 
+
 StringView::StringView(const char* p_begin, const char* p_end) : p_begin(p_begin), p_end(p_end)
 StringView::StringView(const char* p_begin, const char* p_end) : p_begin(p_begin), p_end(p_end)
 {
 {
 	RMLUI_ASSERT(p_end >= p_begin);
 	RMLUI_ASSERT(p_end >= p_begin);
@@ -520,11 +508,11 @@ StringIteratorU8& StringIteratorU8::operator--() {
 	return *this;
 	return *this;
 }
 }
 inline void StringIteratorU8::SeekBack() {
 inline void StringIteratorU8::SeekBack() {
-	p = StringUtilities::SeekBackU8(p, view.end());
+	p = StringUtilities::SeekBackwardUTF8(p, view.end());
 }
 }
 
 
 inline void StringIteratorU8::SeekForward() {
 inline void StringIteratorU8::SeekForward() {
-	p = StringUtilities::SeekForwardU8(p, view.end());
+	p = StringUtilities::SeekForwardUTF8(p, view.end());
 }
 }
 
 
 }
 }