Browse Source

Proper conversion to and from UTF-16, use unicode for interaction with Windows API

Michael Ragazzon 6 years ago
parent
commit
2f6fd1494d

+ 23 - 15
Include/RmlUi/Core/StringUtilities.h

@@ -79,19 +79,6 @@ namespace StringUtilities
 	/// @param[in] delimiter Delimiter to insert between the individual values.
 	/// @param[in] delimiter Delimiter to insert between the individual values.
 	RMLUICORE_API void JoinString(String& string, const StringList& string_list, const char delimiter = ',');
 	RMLUICORE_API void JoinString(String& string, const StringList& string_list, const char delimiter = ',');
 
 
-	/// Converts a string in UTF-8 encoding to a wide string in UTF-16 encoding. The UTF-16 words will
-	/// be encoded as either big- or little-endian, depending on the host processor.
-	/// Reports a warning if the conversion fails.
-	RMLUICORE_API WString ToUTF16(const String& str);
-
-	/// Converts a wide string in UTF-16 encoding into a string in UTF-8 encoding. This
-	/// function assumes the endianness of the input words to be the same as the host processor.
-	/// Reports a warning if the conversion fails.
-	RMLUICORE_API String ToUTF8(const WString& wstr);
-
-	/// Returns number of characters in UTF8 string.
-	RMLUICORE_API size_t LengthU8(StringView string_view);
-
 	/// Converts upper-case characters in string to lower-case.
 	/// Converts upper-case characters in string to lower-case.
 	RMLUICORE_API String ToLower(const String& string);
 	RMLUICORE_API String ToLower(const String& string);
 
 
@@ -120,22 +107,43 @@ namespace StringUtilities
 		bool operator()(const String& lhs, const String& rhs) const;
 		bool operator()(const String& lhs, const String& rhs) const;
 	};
 	};
 
 
+	// Decode the first code point in a zero-terminated UTF-8 string.
 	RMLUICORE_API CodePoint ToCodePoint(const char* p);
 	RMLUICORE_API CodePoint ToCodePoint(const char* p);
+
+	// Encode a single code point as a UTF-8 string.
 	RMLUICORE_API String ToUTF8(CodePoint code_point);
 	RMLUICORE_API String ToUTF8(CodePoint code_point);
+
+	// Encode an array of code points as a UTF-8 string.
 	RMLUICORE_API String ToUTF8(const CodePoint* code_points, int num_code_points);
 	RMLUICORE_API String ToUTF8(const CodePoint* code_points, int num_code_points);
 
 
+	/// Returns number of characters in a UTF-8 string.
+	RMLUICORE_API size_t LengthUTF8(StringView string_view);
+
+	// Seek forward in a UTF-8 string, skipping continuation bytes.
 	inline const char* SeekForwardU8(const char* p, const char* p_end)
 	inline const char* SeekForwardU8(const char* p, const char* p_end)
 	{
 	{
 		while (p != p_end && (*p & 0b1100'0000) == 0b1000'0000)
 		while (p != p_end && (*p & 0b1100'0000) == 0b1000'0000)
 			++p;
 			++p;
 		return p;
 		return p;
 	}
 	}
+	// Seek backward in a UTF-8 string, skipping continuation bytes.
 	inline const char* SeekBackU8(const char* p, const char* p_begin)
 	inline const char* SeekBackU8(const char* p, const char* p_begin)
 	{
 	{
 		while ((p + 1) != p_begin && (*p & 0b1100'0000) == 0b1000'0000)
 		while ((p + 1) != p_begin && (*p & 0b1100'0000) == 0b1000'0000)
 			--p;
 			--p;
 		return p;
 		return p;
 	}
 	}
+
+
+	/// Converts a string in UTF-8 encoding to a wide string in UTF-16 encoding. The UTF-16 words will
+	/// be encoded as either big- or little-endian, depending on the host processor.
+	/// Reports a warning if the conversion fails.
+	RMLUICORE_API WString ToUTF16(const String& str);
+
+	/// Converts a wide string in UTF-16 encoding into a string in UTF-8 encoding. This
+	/// function assumes the endianness of the input words to be the same as the host processor.
+	/// Reports a warning if the conversion fails.
+	RMLUICORE_API String ToUTF8(const WString& wstr);
 }
 }
 
 
 
 
@@ -182,9 +190,9 @@ public:
 	StringIteratorU8(const String& string, size_t offset);
 	StringIteratorU8(const String& string, size_t offset);
 	StringIteratorU8(const String& string, size_t offset, size_t count);
 	StringIteratorU8(const String& string, size_t offset, size_t count);
 
 
-	// Seeks forward to the next UTF8 character. Iterator must be valid.
+	// Seeks forward to the next UTF-8 character. Iterator must be valid.
 	StringIteratorU8& operator++();
 	StringIteratorU8& operator++();
-	// Seeks back to the previous UTF8 character. Iterator must be valid.
+	// Seeks back to the previous UTF-8 character. Iterator must be valid.
 	StringIteratorU8& operator--();
 	StringIteratorU8& operator--();
 
 
 	// Returns the codepoint at the current position. The iterator must be dereferencable.
 	// Returns the codepoint at the current position. The iterator must be dereferencable.

+ 1 - 1
Samples/basic/demo/data/demo.rml

@@ -125,7 +125,7 @@ button:focus {
 textarea {
 textarea {
 	font-size: 18px;
 	font-size: 18px;
 	font-effect: outline(2px #006600);
 	font-effect: outline(2px #006600);
-	color: #ccc;
+	color: #333;
 	
 	
 }
 }
 </style>
 </style>

+ 4 - 3
Samples/shell/include/win32/InputWin32.h

@@ -30,10 +30,12 @@
 #define RMLUIINPUTWIN32_H
 #define RMLUIINPUTWIN32_H
 
 
 #include <Input.h>
 #include <Input.h>
-#if !defined _WIN32_WINNT || _WIN32_WINNT < 0x0500
+#if !defined _WIN32_WINNT || _WIN32_WINNT < 0x0501
 #undef _WIN32_WINNT
 #undef _WIN32_WINNT
-#define _WIN32_WINNT 0x0500
+#define _WIN32_WINNT 0x0501
 #endif
 #endif
+#define UNICODE
+#define _UNICODE
 #include <windows.h>
 #include <windows.h>
 
 
 /**
 /**
@@ -50,7 +52,6 @@ public:
 
 
 	/// Process the Windows message.
 	/// Process the Windows message.
 	static void ProcessWindowsEvent(UINT message, WPARAM w_param, LPARAM l_param);
 	static void ProcessWindowsEvent(UINT message, WPARAM w_param, LPARAM l_param);
-private:
 };
 };
 
 
 #endif
 #endif

+ 34 - 8
Samples/shell/src/win32/InputWin32.cpp

@@ -29,6 +29,7 @@
 #include <win32/InputWin32.h>
 #include <win32/InputWin32.h>
 #include <RmlUi/Core/Context.h>
 #include <RmlUi/Core/Context.h>
 #include <RmlUi/Core/Input.h>
 #include <RmlUi/Core/Input.h>
+#include <RmlUi/Core/StringUtilities.h>
 #include <RmlUi/Debugger.h>
 #include <RmlUi/Debugger.h>
 #include <Shell.h>
 #include <Shell.h>
 
 
@@ -52,7 +53,7 @@ void InputWin32::ProcessWindowsEvent(UINT message, WPARAM w_param, LPARAM l_para
 {
 {
 	if (context == nullptr)
 	if (context == nullptr)
 		return;
 		return;
-
+	
 	// Process all mouse and keyboard events
 	// Process all mouse and keyboard events
 	switch (message)
 	switch (message)
 	{
 	{
@@ -104,15 +105,40 @@ void InputWin32::ProcessWindowsEvent(UINT message, WPARAM w_param, LPARAM l_para
 		}
 		}
 		break;
 		break;
 
 
+
 		case WM_CHAR:
 		case WM_CHAR:
 		{
 		{
-			// Only send through printable characters.
-			// TODO: Convert utf16 character to codepoint
-			if (w_param >= 32)
-				context->ProcessTextInput((Rml::Core::CodePoint) w_param);
-			// Or endlines - Windows sends them through as carriage returns.
-			else if (w_param == '\r')
-				context->ProcessTextInput((Rml::Core::CodePoint)'\n');
+			static wchar_t two_wide_char_first = 0;
+
+			wchar_t w = (wchar_t)w_param;
+			Rml::Core::CodePoint code_point = (Rml::Core::CodePoint)w;
+
+			// Windows sends two-wide characters as two messages.
+			if (w >= 0xD800 && w < 0xDC00)
+			{
+				// First 16-bit code unit of a two-wide character.
+				two_wide_char_first = w;
+			}
+			else
+			{
+				if (w >= 0xDC00 && w < 0xE000 && two_wide_char_first != 0)
+				{
+					// Second 16-bit code unit of a two-wide character.
+					Rml::Core::String utf8 = Rml::Core::StringUtilities::ToUTF8({ two_wide_char_first, w });
+					code_point = Rml::Core::StringUtilities::ToCodePoint(utf8.data());
+				}
+				else if (w == '\r')
+				{
+					// Windows sends new-lines as carriage returns, convert to endlines.
+					code_point = (Rml::Core::CodePoint)'\n';
+				}
+
+				two_wide_char_first = 0;
+
+				// Only send through printable characters.
+				if ((unsigned int)code_point >= 32 || code_point == (Rml::Core::CodePoint)'\n')
+					context->ProcessTextInput(code_point);
+			}
 		}
 		}
 		break;
 		break;
 
 

+ 19 - 18
Samples/shell/src/win32/ShellWin32.cpp

@@ -30,7 +30,6 @@
 #include <RmlUi/Core.h>
 #include <RmlUi/Core.h>
 #include <win32/InputWin32.h>
 #include <win32/InputWin32.h>
 #include "ShellFileInterface.h"
 #include "ShellFileInterface.h"
-#include <windows.h>
 #include <stdio.h>
 #include <stdio.h>
 #include <stdarg.h>
 #include <stdarg.h>
 
 
@@ -38,7 +37,7 @@ static LRESULT CALLBACK WindowProcedure(HWND window_handle, UINT message, WPARAM
 
 
 static bool activated = true;
 static bool activated = true;
 static bool running = false;
 static bool running = false;
-static const char* instance_name = nullptr;
+static Rml::Core::WString instance_name;
 static HWND window_handle = nullptr;
 static HWND window_handle = nullptr;
 static HINSTANCE instance_handle = nullptr;
 static HINSTANCE instance_handle = nullptr;
 
 
@@ -65,10 +64,10 @@ bool Shell::Initialise()
 	time_frequency = 1.0 / (double) time_ticks_per_second.QuadPart;
 	time_frequency = 1.0 / (double) time_ticks_per_second.QuadPart;
 
 
 	// Load cursors
 	// Load cursors
-	cursor_default = LoadCursorA(nullptr, IDC_ARROW);
-	cursor_move = LoadCursorA(nullptr, IDC_SIZEALL);
-	cursor_cross = LoadCursorA(nullptr, IDC_CROSS);
-	cursor_unavailable = LoadCursorA(nullptr, IDC_NO);
+	cursor_default = LoadCursor(nullptr, IDC_ARROW);
+	cursor_move = LoadCursor(nullptr, IDC_SIZEALL);
+	cursor_cross = LoadCursor(nullptr, IDC_CROSS);
+	cursor_unavailable = LoadCursor(nullptr, IDC_NO);
 
 
 	Rml::Core::String root = FindSamplesRoot();
 	Rml::Core::String root = FindSamplesRoot();
 	
 	
@@ -104,9 +103,11 @@ Rml::Core::String Shell::FindSamplesRoot()
 }
 }
 
 
 static ShellRenderInterfaceExtensions *shell_renderer = nullptr;
 static ShellRenderInterfaceExtensions *shell_renderer = nullptr;
-bool Shell::OpenWindow(const char* name, ShellRenderInterfaceExtensions *_shell_renderer, unsigned int width, unsigned int height, bool allow_resize)
+bool Shell::OpenWindow(const char* in_name, ShellRenderInterfaceExtensions *_shell_renderer, unsigned int width, unsigned int height, bool allow_resize)
 {
 {
-	WNDCLASS window_class;
+	WNDCLASSW window_class;
+
+	Rml::Core::WString name = Rml::Core::StringUtilities::ToUTF16(Rml::Core::String(in_name));
 
 
 	// Fill out the window class struct.
 	// Fill out the window class struct.
 	window_class.style = CS_HREDRAW | CS_VREDRAW | CS_OWNDC;
 	window_class.style = CS_HREDRAW | CS_VREDRAW | CS_OWNDC;
@@ -118,9 +119,9 @@ bool Shell::OpenWindow(const char* name, ShellRenderInterfaceExtensions *_shell_
 	window_class.hCursor = cursor_default;
 	window_class.hCursor = cursor_default;
 	window_class.hbrBackground = nullptr;
 	window_class.hbrBackground = nullptr;
 	window_class.lpszMenuName = nullptr;
 	window_class.lpszMenuName = nullptr;
-	window_class.lpszClassName = name;
+	window_class.lpszClassName = name.data();
 
 
-	if (!RegisterClass(&window_class))
+	if (!RegisterClassW(&window_class))
 	{
 	{
 		DisplayError("Could not register window class.");
 		DisplayError("Could not register window class.");
 
 
@@ -128,9 +129,9 @@ bool Shell::OpenWindow(const char* name, ShellRenderInterfaceExtensions *_shell_
 		return false;
 		return false;
 	}
 	}
 
 
-	window_handle = CreateWindowEx(WS_EX_APPWINDOW | WS_EX_WINDOWEDGE,
-								   name,	// Window class name.
-								   name,
+	window_handle = CreateWindowExW(WS_EX_APPWINDOW | WS_EX_WINDOWEDGE,
+								   name.data(),	// Window class name.
+								   name.data(),
 								   WS_CLIPSIBLINGS | WS_CLIPCHILDREN | WS_OVERLAPPEDWINDOW,
 								   WS_CLIPSIBLINGS | WS_CLIPCHILDREN | WS_OVERLAPPEDWINDOW,
 								   0, 0,	// Window position.
 								   0, 0,	// Window position.
 								   width, height,// Window size.
 								   width, height,// Window size.
@@ -190,7 +191,7 @@ void Shell::CloseWindow()
 	}
 	}
 
 
 	DestroyWindow(window_handle);  
 	DestroyWindow(window_handle);  
-	UnregisterClass(instance_name, instance_handle);
+	UnregisterClassW(instance_name.data(), instance_handle);
 }
 }
 
 
 // Returns a platform-dependent handle to the window.
 // Returns a platform-dependent handle to the window.
@@ -241,7 +242,7 @@ void Shell::DisplayError(const char* fmt, ...)
 	buffer[len + 1] = '\0';
 	buffer[len + 1] = '\0';
 	va_end(argument_list);
 	va_end(argument_list);
 
 
-	MessageBox(window_handle, buffer, "Shell Error", MB_OK);
+	MessageBox(window_handle, Rml::Core::StringUtilities::ToUTF16(buffer).c_str(), L"Shell Error", MB_OK);
 }
 }
 
 
 void Shell::Log(const char* fmt, ...)
 void Shell::Log(const char* fmt, ...)
@@ -261,7 +262,7 @@ void Shell::Log(const char* fmt, ...)
 	buffer[len + 1] = '\0';
 	buffer[len + 1] = '\0';
 	va_end(argument_list);
 	va_end(argument_list);
 
 
-	OutputDebugString(buffer);
+	OutputDebugString(Rml::Core::StringUtilities::ToUTF16(buffer).c_str());
 }
 }
 
 
 double Shell::GetElapsedTime() 
 double Shell::GetElapsedTime() 
@@ -371,8 +372,8 @@ static LRESULT CALLBACK WindowProcedure(HWND window_handle, UINT message, WPARAM
 
 
 		case WM_SIZE:
 		case WM_SIZE:
 		{
 		{
-			int width = LOWORD(l_param);;
-			int height = HIWORD(l_param);;
+			int width = LOWORD(l_param);
+			int height = HIWORD(l_param);
 			shell_renderer->SetViewport(width, height);
 			shell_renderer->SetViewport(width, height);
 		}
 		}
 		break;
 		break;

+ 1 - 1
Source/Controls/WidgetTextInput.cpp

@@ -164,7 +164,7 @@ int WidgetTextInput::GetMaxLength() const
 int WidgetTextInput::GetLength() const
 int WidgetTextInput::GetLength() const
 {
 {
 	Core::String value = GetElement()->GetAttribute< Core::String >("value", "");
 	Core::String value = GetElement()->GetAttribute< Core::String >("value", "");
-	size_t result = Core::StringUtilities::LengthU8(value);
+	size_t result = Core::StringUtilities::LengthUTF8(value);
 	return (int)result;
 	return (int)result;
 }
 }
 
 

+ 41 - 144
Source/Core/StringUtilities.cpp

@@ -48,7 +48,7 @@
 namespace Rml {
 namespace Rml {
 namespace Core {
 namespace Core {
 
 
-static bool UTF8toUCS2(const String& input, WString& output);
+static bool UTF8toUTF16(const String& input, WString& output);
 static bool UTF16toUTF8(const WString& input, String& output);
 static bool UTF16toUTF8(const WString& input, String& output);
 
 
 
 
@@ -106,23 +106,21 @@ String StringUtilities::ToLower(const String& string) {
 
 
 WString StringUtilities::ToUTF16(const String& str)
 WString StringUtilities::ToUTF16(const String& str)
 {
 {
-	// TODO: Convert to UTF16 instead of UCS2
 	WString result;
 	WString result;
-	if (!UTF8toUCS2(str, result))
-		Log::Message(Log::LT_WARNING, "Failed to convert UTF8 string to UTF16.");
+	if (!UTF8toUTF16(str, result))
+		Log::Message(Log::LT_WARNING, "Invalid characters encountered while converting UTF-8 string to UTF-16.");
 	return result;
 	return result;
 }
 }
 
 
 String StringUtilities::ToUTF8(const WString& wstr)
 String StringUtilities::ToUTF8(const WString& wstr)
 {
 {
-	/// TODO: Convert from UTF-16 instead.
 	String result;
 	String result;
 	if(!UTF16toUTF8(wstr, result))
 	if(!UTF16toUTF8(wstr, result))
-		Log::Message(Log::LT_WARNING, "Failed to convert UCS2 string to UTF8.");
+		Log::Message(Log::LT_WARNING, "Invalid characters encountered while converting UTF-16 string to UTF-8.");
 	return result;
 	return result;
 }
 }
 
 
-size_t StringUtilities::LengthU8(StringView string_view)
+size_t StringUtilities::LengthUTF8(StringView string_view)
 {
 {
 	const char* const p_end = string_view.end();
 	const char* const p_end = string_view.end();
 
 
@@ -392,151 +390,50 @@ bool StringUtilities::StringComparei::operator()(const String& lhs, const String
 }
 }
 
 
 
 
-// Defines, helper functions for the UTF8 / UCS2 conversion functions.
-constexpr int _NXT = 0x80;
-constexpr int _SEQ2 = 0xc0;
-constexpr int _SEQ3 = 0xe0;
-constexpr int _SEQ4 = 0xf0;
-constexpr int _SEQ5 = 0xf8;
-constexpr int _SEQ6 = 0xfc;
 
 
-constexpr int _BOM = 0xfeff;
-	
-static int __wchar_forbidden(unsigned int sym)
+// Converts a character array in UTF-8 encoding to a wide string in UTF-16 encoding.
+static bool UTF8toUTF16(const String& input, WString& output)
 {
 {
-	// Surrogate pairs
-	if (sym >= 0xd800 && sym <= 0xdfff)
-		return -1;
-	
-	return 0;
-}
+	if (input.empty())
+		return true;
 
 
-static int __utf8_forbidden(unsigned char octet)
-{
-	switch (octet)
-	{
-		case 0xc0:
-		case 0xc1:
-		case 0xf5:
-		case 0xff:
-			return -1;
-			
-		default:
-			return 0;
-	}
-}
+	std::vector<CodePoint> code_points;
+	code_points.reserve(input.size());
 
 
+	for (auto it = StringIteratorU8(input); it; ++it)
+		code_points.push_back(*it);
 
 
+	output.reserve(input.size());
 
 
-// Converts a character array in UTF-8 encoding to a vector of words.
-static bool UTF8toUCS2(const String& input, WString& output)
-{
-	if (input.empty())
-		return true;
+	bool valid_characters = true;
 
 
-	output.reserve(input.size());
-	
-	unsigned char* p = (unsigned char*) input.c_str();
-	unsigned char* end = p + input.size();
-	
-	// Skip the UTF-8 byte order marker if it exists.
-	if (input.substr(0, 3) == "\xEF\xBB\xBF")
-		p += 3;
-	
-	int num_bytes;
-	for (; p < end; p += num_bytes)
+	for (CodePoint code_point : code_points)
 	{
 	{
-		if (__utf8_forbidden(*p) != 0)
-			return false;
-		
-		// Get number of bytes for one wide character.
-		wchar_t high;
-		num_bytes = 1;
-		
-		if ((*p & 0x80) == 0)
-		{
-			high = (wchar_t)*p;
-		}
-		else if ((*p & 0xe0) == _SEQ2)
-		{
-			num_bytes = 2;
-			high = (wchar_t)(*p & 0x1f);
-		}
-		else if ((*p & 0xf0) == _SEQ3)
-		{
-			num_bytes = 3;
-			high = (wchar_t)(*p & 0x0f);
-		}
-		else if ((*p & 0xf8) == _SEQ4)
-		{
-			num_bytes = 4;
-			high = (wchar_t)(*p & 0x07);
-		}
-		else if ((*p & 0xfc) == _SEQ5)
+		unsigned int c = (unsigned int)code_point;
+
+		if (c <= 0xD7FF || (c >= 0xE000 && c <= 0xFFFF))
 		{
 		{
-			num_bytes = 5;
-			high = (wchar_t)(*p & 0x03);
+			// Single 16-bit code unit.
+			output += (wchar_t)c;
 		}
 		}
-		else if ((*p & 0xfe) == _SEQ6)
+		else if (c >= 0x10000 && c <= 0x10FFFF)
 		{
 		{
-			num_bytes = 6;
-			high = (wchar_t)(*p & 0x01);
+			// Encode as two 16-bit code units.
+			unsigned int c_shift = c - 0x10000;
+			wchar_t w1 = (0xD800 | ((c_shift >> 10) & 0x3FF));
+			wchar_t w2 = (0xDC00 | (c_shift & 0x3FF));
+			output += {w1, w2};
 		}
 		}
 		else
 		else
 		{
 		{
-			return false;
-		}
-		
-		// Does the sequence header tell us the truth about length?
-		if (end - p <= num_bytes - 1)
-		{
-			return false;
-		}
-		
-		// Validate the sequence. All symbols must have higher bits set to 10xxxxxx.
-		if (num_bytes > 1)
-		{
-			int i;
-			for (i = 1; i < num_bytes; i++)
-			{
-				if ((p[i] & 0b1100'0000) != _NXT)
-					break;
-			}
-			
-			if (i != num_bytes)
-			{
-				return false;
-			}
-		}
-		
-		// Make up a single UCS-4 (32-bit) character from the required number of UTF-8 tokens. The first byte has
-		// been determined earlier, the second and subsequent bytes contribute the first six of their bits into the
-		// final character code.
-		unsigned int ucs4_char = 0;
-		int num_bits = 0;
-		for (int i = 1; i < num_bytes; i++)
-		{
-			ucs4_char |= (wchar_t)(p[num_bytes - i] & 0x3f) << num_bits;
-			num_bits += 6;
-		}
-		ucs4_char |= high << num_bits;
-		
-		// Check for surrogate pairs.
-		if (__wchar_forbidden(ucs4_char) != 0)
-		{
-			return false;
+			valid_characters = false;
 		}
 		}
-		
-		// Only add the character to the output if it exists in the Basic Multilingual Plane (ie, fits in a single
-		// word).
-		if (ucs4_char <= 0xffff)
-			output.push_back((wchar_t) ucs4_char);
 	}
 	}
-	
-	return true;
+
+	return valid_characters;
 }
 }
 
 
-// Converts an array of words in UCS-2 encoding into a character array in UTF-8 encoding.
+// Converts a wide string in UTF-16 encoding into a string in UTF-8 encoding.
 static bool UTF16toUTF8(const WString& input, String& output)
 static bool UTF16toUTF8(const WString& input, String& output)
 {
 {
 	std::vector<CodePoint> code_points;
 	std::vector<CodePoint> code_points;
@@ -545,25 +442,23 @@ static bool UTF16toUTF8(const WString& input, String& output)
 	bool valid_input = true;
 	bool valid_input = true;
 	wchar_t w1 = 0;
 	wchar_t w1 = 0;
 
 
-	const wchar_t* w = input.data();
-	const wchar_t* wlim = w + input.size();
-	for (; w < wlim; w++)
+	for (wchar_t w : input)
 	{
 	{
-		if (*w <= 0xD7FF || *w >= 0xE000)
+		if (w <= 0xD7FF || w >= 0xE000)
 		{
 		{
 			// Single 16-bit code unit.
 			// Single 16-bit code unit.
-			code_points.push_back((CodePoint)(*w));
+			code_points.push_back((CodePoint)(w));
 		}
 		}
 		else 
 		else 
 		{
 		{
 			// Two 16-bit code units.
 			// Two 16-bit code units.
-			if (!w1 && *w < 0xDC00)
+			if (!w1 && w < 0xDC00)
 			{
 			{
-				w1 = *w;
+				w1 = w;
 			}
 			}
-			else if (w1 && *w >= 0xDC00)
+			else if (w1 && w >= 0xDC00)
 			{
 			{
-				code_points.push_back((CodePoint)(((((unsigned int)w1 & 0x3FF) << 10) | ((unsigned int)(*w) & 0x3FF)) + 0x10000u));
+				code_points.push_back((CodePoint)(((((unsigned int)w1 & 0x3FF) << 10) | ((unsigned int)(w) & 0x3FF)) + 0x10000u));
 				w1 = 0;
 				w1 = 0;
 			}
 			}
 			else
 			else
@@ -580,7 +475,9 @@ static bool UTF16toUTF8(const WString& input, String& output)
 }
 }
 
 
 StringView::StringView(const char* p_begin, const char* p_end) : p_begin(p_begin), p_end(p_end)
 StringView::StringView(const char* p_begin, const char* p_end) : p_begin(p_begin), p_end(p_end)
-{}
+{
+	RMLUI_ASSERT(p_end >= p_begin);
+}
 StringView::StringView(const String& string) : p_begin(string.data()), p_end(string.data() + string.size())
 StringView::StringView(const String& string) : p_begin(string.data()), p_end(string.data() + string.size())
 {}
 {}
 StringView::StringView(const String& string, size_t offset) : p_begin(string.data()), p_end(string.data() + string.size())
 StringView::StringView(const String& string, size_t offset) : p_begin(string.data()), p_end(string.data() + string.size())