6 năm trước cách đây · 2f6fd1494d
--- a/Include/RmlUi/Core/StringUtilities.h
+++ b/Include/RmlUi/Core/StringUtilities.h
@@ -79,19 +79,6 @@ namespace StringUtilities
 
				 	/// @param[in] delimiter Delimiter to insert between the individual values.
			
 
				 	RMLUICORE_API void JoinString(String& string, const StringList& string_list, const char delimiter = ',');
			
 
				 
			
 
				-	/// Converts a string in UTF-8 encoding to a wide string in UTF-16 encoding. The UTF-16 words will
			
 
				-	/// be encoded as either big- or little-endian, depending on the host processor.
			
 
				-	/// Reports a warning if the conversion fails.
			
 
				-	RMLUICORE_API WString ToUTF16(const String& str);
			
 
				-
			
 
				-	/// Converts a wide string in UTF-16 encoding into a string in UTF-8 encoding. This
			
 
				-	/// function assumes the endianness of the input words to be the same as the host processor.
			
 
				-	/// Reports a warning if the conversion fails.
			
 
				-	RMLUICORE_API String ToUTF8(const WString& wstr);
			
 
				-
			
 
				-	/// Returns number of characters in UTF8 string.
			
 
				-	RMLUICORE_API size_t LengthU8(StringView string_view);
			
 
				-
			
 
				 	/// Converts upper-case characters in string to lower-case.
			
 
				 	RMLUICORE_API String ToLower(const String& string);
			
 
				 
			
@@ -120,22 +107,43 @@ namespace StringUtilities
 
				 		bool operator()(const String& lhs, const String& rhs) const;
			
 
				 	};
			
 
				 
			
 
				+	// Decode the first code point in a zero-terminated UTF-8 string.
			
 
				 	RMLUICORE_API CodePoint ToCodePoint(const char* p);
			
 
				+
			
 
				+	// Encode a single code point as a UTF-8 string.
			
 
				 	RMLUICORE_API String ToUTF8(CodePoint code_point);
			
 
				+
			
 
				+	// Encode an array of code points as a UTF-8 string.
			
 
				 	RMLUICORE_API String ToUTF8(const CodePoint* code_points, int num_code_points);
			
 
				 
			
 
				+	/// Returns number of characters in a UTF-8 string.
			
 
				+	RMLUICORE_API size_t LengthUTF8(StringView string_view);
			
 
				+
			
 
				+	// Seek forward in a UTF-8 string, skipping continuation bytes.
			
 
				 	inline const char* SeekForwardU8(const char* p, const char* p_end)
			
 
				 	{
			
 
				 		while (p != p_end && (*p & 0b1100'0000) == 0b1000'0000)
			
 
				 			++p;
			
 
				 		return p;
			
 
				 	}
			
 
				+	// Seek backward in a UTF-8 string, skipping continuation bytes.
			
 
				 	inline const char* SeekBackU8(const char* p, const char* p_begin)
			
 
				 	{
			
 
				 		while ((p + 1) != p_begin && (*p & 0b1100'0000) == 0b1000'0000)
			
 
				 			--p;
			
 
				 		return p;
			
 
				 	}
			
 
				+
			
 
				+
			
 
				+	/// Converts a string in UTF-8 encoding to a wide string in UTF-16 encoding. The UTF-16 words will
			
 
				+	/// be encoded as either big- or little-endian, depending on the host processor.
			
 
				+	/// Reports a warning if the conversion fails.
			
 
				+	RMLUICORE_API WString ToUTF16(const String& str);
			
 
				+
			
 
				+	/// Converts a wide string in UTF-16 encoding into a string in UTF-8 encoding. This
			
 
				+	/// function assumes the endianness of the input words to be the same as the host processor.
			
 
				+	/// Reports a warning if the conversion fails.
			
 
				+	RMLUICORE_API String ToUTF8(const WString& wstr);
			
 
				 }
			
 
				 
			
 
				 
			
@@ -182,9 +190,9 @@ public:
 
				 	StringIteratorU8(const String& string, size_t offset);
			
 
				 	StringIteratorU8(const String& string, size_t offset, size_t count);
			
 
				 
			
 
				-	// Seeks forward to the next UTF8 character. Iterator must be valid.
			
 
				+	// Seeks forward to the next UTF-8 character. Iterator must be valid.
			
 
				 	StringIteratorU8& operator++();
			
 
				-	// Seeks back to the previous UTF8 character. Iterator must be valid.
			
 
				+	// Seeks back to the previous UTF-8 character. Iterator must be valid.
			
 
				 	StringIteratorU8& operator--();
			
 
				 
			
 
				 	// Returns the codepoint at the current position. The iterator must be dereferencable.
			
--- a/Samples/basic/demo/data/demo.rml
+++ b/Samples/basic/demo/data/demo.rml
@@ -125,7 +125,7 @@ button:focus {
 
				 textarea {
			
 
				 	font-size: 18px;
			
 
				 	font-effect: outline(2px #006600);
			
 
				-	color: #ccc;
			
 
				+	color: #333;
			
 
				 	
			
 
				 }
			
 
				 </style>
			
--- a/Samples/shell/include/win32/InputWin32.h
+++ b/Samples/shell/include/win32/InputWin32.h
@@ -30,10 +30,12 @@
 
				 #define RMLUIINPUTWIN32_H
			
 
				 
			
 
				 #include <Input.h>
			
 
				-#if !defined _WIN32_WINNT || _WIN32_WINNT < 0x0500
			
 
				+#if !defined _WIN32_WINNT || _WIN32_WINNT < 0x0501
			
 
				 #undef _WIN32_WINNT
			
 
				-#define _WIN32_WINNT 0x0500
			
 
				+#define _WIN32_WINNT 0x0501
			
 
				 #endif
			
 
				+#define UNICODE
			
 
				+#define _UNICODE
			
 
				 #include <windows.h>
			
 
				 
			
 
				 /**
			
@@ -50,7 +52,6 @@ public:
 
				 
			
 
				 	/// Process the Windows message.
			
 
				 	static void ProcessWindowsEvent(UINT message, WPARAM w_param, LPARAM l_param);
			
 
				-private:
			
 
				 };
			
 
				 
			
 
				 #endif
			
--- a/Samples/shell/src/win32/InputWin32.cpp
+++ b/Samples/shell/src/win32/InputWin32.cpp
@@ -29,6 +29,7 @@
 
				 #include <win32/InputWin32.h>
			
 
				 #include <RmlUi/Core/Context.h>
			
 
				 #include <RmlUi/Core/Input.h>
			
 
				+#include <RmlUi/Core/StringUtilities.h>
			
 
				 #include <RmlUi/Debugger.h>
			
 
				 #include <Shell.h>
			
 
				 
			
@@ -52,7 +53,7 @@ void InputWin32::ProcessWindowsEvent(UINT message, WPARAM w_param, LPARAM l_para
 
				 {
			
 
				 	if (context == nullptr)
			
 
				 		return;
			
 
				-
			
 
				+	
			
 
				 	// Process all mouse and keyboard events
			
 
				 	switch (message)
			
 
				 	{
			
@@ -104,15 +105,40 @@ void InputWin32::ProcessWindowsEvent(UINT message, WPARAM w_param, LPARAM l_para
 
				 		}
			
 
				 		break;
			
 
				 
			
 
				+
			
 
				 		case WM_CHAR:
			
 
				 		{
			
 
				-			// Only send through printable characters.
			
 
				-			// TODO: Convert utf16 character to codepoint
			
 
				-			if (w_param >= 32)
			
 
				-				context->ProcessTextInput((Rml::Core::CodePoint) w_param);
			
 
				-			// Or endlines - Windows sends them through as carriage returns.
			
 
				-			else if (w_param == '\r')
			
 
				-				context->ProcessTextInput((Rml::Core::CodePoint)'\n');
			
 
				+			static wchar_t two_wide_char_first = 0;
			
 
				+
			
 
				+			wchar_t w = (wchar_t)w_param;
			
 
				+			Rml::Core::CodePoint code_point = (Rml::Core::CodePoint)w;
			
 
				+
			
 
				+			// Windows sends two-wide characters as two messages.
			
 
				+			if (w >= 0xD800 && w < 0xDC00)
			
 
				+			{
			
 
				+				// First 16-bit code unit of a two-wide character.
			
 
				+				two_wide_char_first = w;
			
 
				+			}
			
 
				+			else
			
 
				+			{
			
 
				+				if (w >= 0xDC00 && w < 0xE000 && two_wide_char_first != 0)
			
 
				+				{
			
 
				+					// Second 16-bit code unit of a two-wide character.
			
 
				+					Rml::Core::String utf8 = Rml::Core::StringUtilities::ToUTF8({ two_wide_char_first, w });
			
 
				+					code_point = Rml::Core::StringUtilities::ToCodePoint(utf8.data());
			
 
				+				}
			
 
				+				else if (w == '\r')
			
 
				+				{
			
 
				+					// Windows sends new-lines as carriage returns, convert to endlines.
			
 
				+					code_point = (Rml::Core::CodePoint)'\n';
			
 
				+				}
			
 
				+
			
 
				+				two_wide_char_first = 0;
			
 
				+
			
 
				+				// Only send through printable characters.
			
 
				+				if ((unsigned int)code_point >= 32 || code_point == (Rml::Core::CodePoint)'\n')
			
 
				+					context->ProcessTextInput(code_point);
			
 
				+			}
			
 
				 		}
			
 
				 		break;
			
 
				 
			
--- a/Samples/shell/src/win32/ShellWin32.cpp
+++ b/Samples/shell/src/win32/ShellWin32.cpp
@@ -30,7 +30,6 @@
 
				 #include <RmlUi/Core.h>
			
 
				 #include <win32/InputWin32.h>
			
 
				 #include "ShellFileInterface.h"
			
 
				-#include <windows.h>
			
 
				 #include <stdio.h>
			
 
				 #include <stdarg.h>
			
 
				 
			
@@ -38,7 +37,7 @@ static LRESULT CALLBACK WindowProcedure(HWND window_handle, UINT message, WPARAM
 
				 
			
 
				 static bool activated = true;
			
 
				 static bool running = false;
			
 
				-static const char* instance_name = nullptr;
			
 
				+static Rml::Core::WString instance_name;
			
 
				 static HWND window_handle = nullptr;
			
 
				 static HINSTANCE instance_handle = nullptr;
			
 
				 
			
@@ -65,10 +64,10 @@ bool Shell::Initialise()
 
				 	time_frequency = 1.0 / (double) time_ticks_per_second.QuadPart;
			
 
				 
			
 
				 	// Load cursors
			
 
				-	cursor_default = LoadCursorA(nullptr, IDC_ARROW);
			
 
				-	cursor_move = LoadCursorA(nullptr, IDC_SIZEALL);
			
 
				-	cursor_cross = LoadCursorA(nullptr, IDC_CROSS);
			
 
				-	cursor_unavailable = LoadCursorA(nullptr, IDC_NO);
			
 
				+	cursor_default = LoadCursor(nullptr, IDC_ARROW);
			
 
				+	cursor_move = LoadCursor(nullptr, IDC_SIZEALL);
			
 
				+	cursor_cross = LoadCursor(nullptr, IDC_CROSS);
			
 
				+	cursor_unavailable = LoadCursor(nullptr, IDC_NO);
			
 
				 
			
 
				 	Rml::Core::String root = FindSamplesRoot();
			
 
				 	
			
@@ -104,9 +103,11 @@ Rml::Core::String Shell::FindSamplesRoot()
 
				 }
			
 
				 
			
 
				 static ShellRenderInterfaceExtensions *shell_renderer = nullptr;
			
 
				-bool Shell::OpenWindow(const char* name, ShellRenderInterfaceExtensions *_shell_renderer, unsigned int width, unsigned int height, bool allow_resize)
			
 
				+bool Shell::OpenWindow(const char* in_name, ShellRenderInterfaceExtensions *_shell_renderer, unsigned int width, unsigned int height, bool allow_resize)
			
 
				 {
			
 
				-	WNDCLASS window_class;
			
 
				+	WNDCLASSW window_class;
			
 
				+
			
 
				+	Rml::Core::WString name = Rml::Core::StringUtilities::ToUTF16(Rml::Core::String(in_name));
			
 
				 
			
 
				 	// Fill out the window class struct.
			
 
				 	window_class.style = CS_HREDRAW | CS_VREDRAW | CS_OWNDC;
			
@@ -118,9 +119,9 @@ bool Shell::OpenWindow(const char* name, ShellRenderInterfaceExtensions *_shell_
 
				 	window_class.hCursor = cursor_default;
			
 
				 	window_class.hbrBackground = nullptr;
			
 
				 	window_class.lpszMenuName = nullptr;
			
 
				-	window_class.lpszClassName = name;
			
 
				+	window_class.lpszClassName = name.data();
			
 
				 
			
 
				-	if (!RegisterClass(&window_class))
			
 
				+	if (!RegisterClassW(&window_class))
			
 
				 	{
			
 
				 		DisplayError("Could not register window class.");
			
 
				 
			
@@ -128,9 +129,9 @@ bool Shell::OpenWindow(const char* name, ShellRenderInterfaceExtensions *_shell_
 
				 		return false;
			
 
				 	}
			
 
				 
			
 
				-	window_handle = CreateWindowEx(WS_EX_APPWINDOW | WS_EX_WINDOWEDGE,
			
 
				-								   name,	// Window class name.
			
 
				-								   name,
			
 
				+	window_handle = CreateWindowExW(WS_EX_APPWINDOW | WS_EX_WINDOWEDGE,
			
 
				+								   name.data(),	// Window class name.
			
 
				+								   name.data(),
			
 
				 								   WS_CLIPSIBLINGS | WS_CLIPCHILDREN | WS_OVERLAPPEDWINDOW,
			
 
				 								   0, 0,	// Window position.
			
 
				 								   width, height,// Window size.
			
@@ -190,7 +191,7 @@ void Shell::CloseWindow()
 
				 	}
			
 
				 
			
 
				 	DestroyWindow(window_handle);  
			
 
				-	UnregisterClass(instance_name, instance_handle);
			
 
				+	UnregisterClassW(instance_name.data(), instance_handle);
			
 
				 }
			
 
				 
			
 
				 // Returns a platform-dependent handle to the window.
			
@@ -241,7 +242,7 @@ void Shell::DisplayError(const char* fmt, ...)
 
				 	buffer[len + 1] = '\0';
			
 
				 	va_end(argument_list);
			
 
				 
			
 
				-	MessageBox(window_handle, buffer, "Shell Error", MB_OK);
			
 
				+	MessageBox(window_handle, Rml::Core::StringUtilities::ToUTF16(buffer).c_str(), L"Shell Error", MB_OK);
			
 
				 }
			
 
				 
			
 
				 void Shell::Log(const char* fmt, ...)
			
@@ -261,7 +262,7 @@ void Shell::Log(const char* fmt, ...)
 
				 	buffer[len + 1] = '\0';
			
 
				 	va_end(argument_list);
			
 
				 
			
 
				-	OutputDebugString(buffer);
			
 
				+	OutputDebugString(Rml::Core::StringUtilities::ToUTF16(buffer).c_str());
			
 
				 }
			
 
				 
			
 
				 double Shell::GetElapsedTime() 
			
@@ -371,8 +372,8 @@ static LRESULT CALLBACK WindowProcedure(HWND window_handle, UINT message, WPARAM
 
				 
			
 
				 		case WM_SIZE:
			
 
				 		{
			
 
				-			int width = LOWORD(l_param);;
			
 
				-			int height = HIWORD(l_param);;
			
 
				+			int width = LOWORD(l_param);
			
 
				+			int height = HIWORD(l_param);
			
 
				 			shell_renderer->SetViewport(width, height);
			
 
				 		}
			
 
				 		break;
			
--- a/Source/Controls/WidgetTextInput.cpp
+++ b/Source/Controls/WidgetTextInput.cpp
@@ -164,7 +164,7 @@ int WidgetTextInput::GetMaxLength() const
 
				 int WidgetTextInput::GetLength() const
			
 
				 {
			
 
				 	Core::String value = GetElement()->GetAttribute< Core::String >("value", "");
			
 
				-	size_t result = Core::StringUtilities::LengthU8(value);
			
 
				+	size_t result = Core::StringUtilities::LengthUTF8(value);
			
 
				 	return (int)result;
			
 
				 }
			
 
				 
			
--- a/Source/Core/StringUtilities.cpp
+++ b/Source/Core/StringUtilities.cpp
@@ -48,7 +48,7 @@
 
				 namespace Rml {
			
 
				 namespace Core {
			
 
				 
			
 
				-static bool UTF8toUCS2(const String& input, WString& output);
			
 
				+static bool UTF8toUTF16(const String& input, WString& output);
			
 
				 static bool UTF16toUTF8(const WString& input, String& output);
			
 
				 
			
 
				 
			
@@ -106,23 +106,21 @@ String StringUtilities::ToLower(const String& string) {
 
				 
			
 
				 WString StringUtilities::ToUTF16(const String& str)
			
 
				 {
			
 
				-	// TODO: Convert to UTF16 instead of UCS2
			
 
				 	WString result;
			
 
				-	if (!UTF8toUCS2(str, result))
			
 
				-		Log::Message(Log::LT_WARNING, "Failed to convert UTF8 string to UTF16.");
			
 
				+	if (!UTF8toUTF16(str, result))
			
 
				+		Log::Message(Log::LT_WARNING, "Invalid characters encountered while converting UTF-8 string to UTF-16.");
			
 
				 	return result;
			
 
				 }
			
 
				 
			
 
				 String StringUtilities::ToUTF8(const WString& wstr)
			
 
				 {
			
 
				-	/// TODO: Convert from UTF-16 instead.
			
 
				 	String result;
			
 
				 	if(!UTF16toUTF8(wstr, result))
			
 
				-		Log::Message(Log::LT_WARNING, "Failed to convert UCS2 string to UTF8.");
			
 
				+		Log::Message(Log::LT_WARNING, "Invalid characters encountered while converting UTF-16 string to UTF-8.");
			
 
				 	return result;
			
 
				 }
			
 
				 
			
 
				-size_t StringUtilities::LengthU8(StringView string_view)
			
 
				+size_t StringUtilities::LengthUTF8(StringView string_view)
			
 
				 {
			
 
				 	const char* const p_end = string_view.end();
			
 
				 
			
@@ -392,151 +390,50 @@ bool StringUtilities::StringComparei::operator()(const String& lhs, const String
 
				 }
			
 
				 
			
 
				 
			
 
				-// Defines, helper functions for the UTF8 / UCS2 conversion functions.
			
 
				-constexpr int _NXT = 0x80;
			
 
				-constexpr int _SEQ2 = 0xc0;
			
 
				-constexpr int _SEQ3 = 0xe0;
			
 
				-constexpr int _SEQ4 = 0xf0;
			
 
				-constexpr int _SEQ5 = 0xf8;
			
 
				-constexpr int _SEQ6 = 0xfc;
			
 
				 
			
 
				-constexpr int _BOM = 0xfeff;
			
 
				-	
			
 
				-static int __wchar_forbidden(unsigned int sym)
			
 
				+// Converts a character array in UTF-8 encoding to a wide string in UTF-16 encoding.
			
 
				+static bool UTF8toUTF16(const String& input, WString& output)
			
 
				 {
			
 
				-	// Surrogate pairs
			
 
				-	if (sym >= 0xd800 && sym <= 0xdfff)
			
 
				-		return -1;
			
 
				-	
			
 
				-	return 0;
			
 
				-}
			
 
				+	if (input.empty())
			
 
				+		return true;
			
 
				 
			
 
				-static int __utf8_forbidden(unsigned char octet)
			
 
				-{
			
 
				-	switch (octet)
			
 
				-	{
			
 
				-		case 0xc0:
			
 
				-		case 0xc1:
			
 
				-		case 0xf5:
			
 
				-		case 0xff:
			
 
				-			return -1;
			
 
				-			
			
 
				-		default:
			
 
				-			return 0;
			
 
				-	}
			
 
				-}
			
 
				+	std::vector<CodePoint> code_points;
			
 
				+	code_points.reserve(input.size());
			
 
				 
			
 
				+	for (auto it = StringIteratorU8(input); it; ++it)
			
 
				+		code_points.push_back(*it);
			
 
				 
			
 
				+	output.reserve(input.size());
			
 
				 
			
 
				-// Converts a character array in UTF-8 encoding to a vector of words.
			
 
				-static bool UTF8toUCS2(const String& input, WString& output)
			
 
				-{
			
 
				-	if (input.empty())
			
 
				-		return true;
			
 
				+	bool valid_characters = true;
			
 
				 
			
 
				-	output.reserve(input.size());
			
 
				-	
			
 
				-	unsigned char* p = (unsigned char*) input.c_str();
			
 
				-	unsigned char* end = p + input.size();
			
 
				-	
			
 
				-	// Skip the UTF-8 byte order marker if it exists.
			
 
				-	if (input.substr(0, 3) == "\xEF\xBB\xBF")
			
 
				-		p += 3;
			
 
				-	
			
 
				-	int num_bytes;
			
 
				-	for (; p < end; p += num_bytes)
			
 
				+	for (CodePoint code_point : code_points)
			
 
				 	{
			
 
				-		if (__utf8_forbidden(*p) != 0)
			
 
				-			return false;
			
 
				-		
			
 
				-		// Get number of bytes for one wide character.
			
 
				-		wchar_t high;
			
 
				-		num_bytes = 1;
			
 
				-		
			
 
				-		if ((*p & 0x80) == 0)
			
 
				-		{
			
 
				-			high = (wchar_t)*p;
			
 
				-		}
			
 
				-		else if ((*p & 0xe0) == _SEQ2)
			
 
				-		{
			
 
				-			num_bytes = 2;
			
 
				-			high = (wchar_t)(*p & 0x1f);
			
 
				-		}
			
 
				-		else if ((*p & 0xf0) == _SEQ3)
			
 
				-		{
			
 
				-			num_bytes = 3;
			
 
				-			high = (wchar_t)(*p & 0x0f);
			
 
				-		}
			
 
				-		else if ((*p & 0xf8) == _SEQ4)
			
 
				-		{
			
 
				-			num_bytes = 4;
			
 
				-			high = (wchar_t)(*p & 0x07);
			
 
				-		}
			
 
				-		else if ((*p & 0xfc) == _SEQ5)
			
 
				+		unsigned int c = (unsigned int)code_point;
			
 
				+
			
 
				+		if (c <= 0xD7FF || (c >= 0xE000 && c <= 0xFFFF))
			
 
				 		{
			
 
				-			num_bytes = 5;
			
 
				-			high = (wchar_t)(*p & 0x03);
			
 
				+			// Single 16-bit code unit.
			
 
				+			output += (wchar_t)c;
			
 
				 		}
			
 
				-		else if ((*p & 0xfe) == _SEQ6)
			
 
				+		else if (c >= 0x10000 && c <= 0x10FFFF)
			
 
				 		{
			
 
				-			num_bytes = 6;
			
 
				-			high = (wchar_t)(*p & 0x01);
			
 
				+			// Encode as two 16-bit code units.
			
 
				+			unsigned int c_shift = c - 0x10000;
			
 
				+			wchar_t w1 = (0xD800 | ((c_shift >> 10) & 0x3FF));
			
 
				+			wchar_t w2 = (0xDC00 | (c_shift & 0x3FF));
			
 
				+			output += {w1, w2};
			
 
				 		}
			
 
				 		else
			
 
				 		{
			
 
				-			return false;
			
 
				-		}
			
 
				-		
			
 
				-		// Does the sequence header tell us the truth about length?
			
 
				-		if (end - p <= num_bytes - 1)
			
 
				-		{
			
 
				-			return false;
			
 
				-		}
			
 
				-		
			
 
				-		// Validate the sequence. All symbols must have higher bits set to 10xxxxxx.
			
 
				-		if (num_bytes > 1)
			
 
				-		{
			
 
				-			int i;
			
 
				-			for (i = 1; i < num_bytes; i++)
			
 
				-			{
			
 
				-				if ((p[i] & 0b1100'0000) != _NXT)
			
 
				-					break;
			
 
				-			}
			
 
				-			
			
 
				-			if (i != num_bytes)
			
 
				-			{
			
 
				-				return false;
			
 
				-			}
			
 
				-		}
			
 
				-		
			
 
				-		// Make up a single UCS-4 (32-bit) character from the required number of UTF-8 tokens. The first byte has
			
 
				-		// been determined earlier, the second and subsequent bytes contribute the first six of their bits into the
			
 
				-		// final character code.
			
 
				-		unsigned int ucs4_char = 0;
			
 
				-		int num_bits = 0;
			
 
				-		for (int i = 1; i < num_bytes; i++)
			
 
				-		{
			
 
				-			ucs4_char |= (wchar_t)(p[num_bytes - i] & 0x3f) << num_bits;
			
 
				-			num_bits += 6;
			
 
				-		}
			
 
				-		ucs4_char |= high << num_bits;
			
 
				-		
			
 
				-		// Check for surrogate pairs.
			
 
				-		if (__wchar_forbidden(ucs4_char) != 0)
			
 
				-		{
			
 
				-			return false;
			
 
				+			valid_characters = false;
			
 
				 		}
			
 
				-		
			
 
				-		// Only add the character to the output if it exists in the Basic Multilingual Plane (ie, fits in a single
			
 
				-		// word).
			
 
				-		if (ucs4_char <= 0xffff)
			
 
				-			output.push_back((wchar_t) ucs4_char);
			
 
				 	}
			
 
				-	
			
 
				-	return true;
			
 
				+
			
 
				+	return valid_characters;
			
 
				 }
			
 
				 
			
 
				-// Converts an array of words in UCS-2 encoding into a character array in UTF-8 encoding.
			
 
				+// Converts a wide string in UTF-16 encoding into a string in UTF-8 encoding.
			
 
				 static bool UTF16toUTF8(const WString& input, String& output)
			
 
				 {
			
 
				 	std::vector<CodePoint> code_points;
			
@@ -545,25 +442,23 @@ static bool UTF16toUTF8(const WString& input, String& output)
 
				 	bool valid_input = true;
			
 
				 	wchar_t w1 = 0;
			
 
				 
			
 
				-	const wchar_t* w = input.data();
			
 
				-	const wchar_t* wlim = w + input.size();
			
 
				-	for (; w < wlim; w++)
			
 
				+	for (wchar_t w : input)
			
 
				 	{
			
 
				-		if (*w <= 0xD7FF || *w >= 0xE000)
			
 
				+		if (w <= 0xD7FF || w >= 0xE000)
			
 
				 		{
			
 
				 			// Single 16-bit code unit.
			
 
				-			code_points.push_back((CodePoint)(*w));
			
 
				+			code_points.push_back((CodePoint)(w));
			
 
				 		}
			
 
				 		else 
			
 
				 		{
			
 
				 			// Two 16-bit code units.
			
 
				-			if (!w1 && *w < 0xDC00)
			
 
				+			if (!w1 && w < 0xDC00)
			
 
				 			{
			
 
				-				w1 = *w;
			
 
				+				w1 = w;
			
 
				 			}
			
 
				-			else if (w1 && *w >= 0xDC00)
			
 
				+			else if (w1 && w >= 0xDC00)
			
 
				 			{
			
 
				-				code_points.push_back((CodePoint)(((((unsigned int)w1 & 0x3FF) << 10) | ((unsigned int)(*w) & 0x3FF)) + 0x10000u));
			
 
				+				code_points.push_back((CodePoint)(((((unsigned int)w1 & 0x3FF) << 10) | ((unsigned int)(w) & 0x3FF)) + 0x10000u));
			
 
				 				w1 = 0;
			
 
				 			}
			
 
				 			else
			
@@ -580,7 +475,9 @@ static bool UTF16toUTF8(const WString& input, String& output)
 
				 }
			
 
				 
			
 
				 StringView::StringView(const char* p_begin, const char* p_end) : p_begin(p_begin), p_end(p_end)
			
 
				-{}
			
 
				+{
			
 
				+	RMLUI_ASSERT(p_end >= p_begin);
			
 
				+}
			
 
				 StringView::StringView(const String& string) : p_begin(string.data()), p_end(string.data() + string.size())
			
 
				 {}
			
 
				 StringView::StringView(const String& string, size_t offset) : p_begin(string.data()), p_end(string.data() + string.size())