BearishSun 8 лет назад
Родитель
Сommit
8a243ae52e

+ 1 - 12
Source/BansheeMono/BsMonoUtil.cpp

@@ -3,7 +3,6 @@
 #include "BsMonoUtil.h"
 #include "Debug/BsDebug.h"
 #include <mono/jit/jit.h>
-#include <codecvt>
 
 namespace bs
 {
@@ -42,17 +41,7 @@ namespace bs
 		if (sizeof(wchar_t) == 2) // Assuming UTF-16
 			return mono_string_from_utf16((mono_unichar2*)str.c_str());
 		else // Assuming UTF-32
-		{
-			const std::codecvt_mode convMode = (std::codecvt_mode)(std::little_endian);
-			typedef std::codecvt_utf16<UINT32, 1114111, convMode> utf16utf32;
-
-			std::wstring_convert<utf16utf32, UINT32> conversion("?");
-			UINT32* start = (UINT32*)str.data();
-			UINT32* end = (start + (str.size() - 1) / 4);
-
-			mono_unichar2* convertedStr = (mono_unichar2*)conversion.to_bytes(start, end).c_str();
-			return mono_string_from_utf16(convertedStr);
-		}
+			return mono_string_from_utf32((mono_unichar4*)str.c_str());
 	}
 
 	MonoString* MonoUtil::stringToMono(const String& str)

+ 2 - 0
Source/BansheeUtility/CMakeSources.cmake

@@ -43,6 +43,7 @@ set(BS_BANSHEEUTILITY_INC_STRING
 	"String/BsString.h"
 	"String/BsStringFormat.h"
 	"String/BsStringID.h"
+	"String/BsUnicode.h"
 )
 
 set(BS_BANSHEEUTILITY_SRC_IMAGE
@@ -244,6 +245,7 @@ set(BS_BANSHEEUTILITY_INC_SERIALIZATION
 set(BS_BANSHEEUTILITY_SRC_STRING
 	"String/BsStringID.cpp"
 	"String/BsString.cpp"
+	"String/BsUnicode.cpp"
 )
 
 set(BS_BANSHEEUTILITY_INC_REFLECTION

+ 61 - 133
Source/BansheeUtility/FileSystem/BsDataStream.cpp

@@ -2,7 +2,7 @@
 //**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
 #include "FileSystem/BsDataStream.h"
 #include "Debug/BsDebug.h"
-#include <codecvt>
+#include "String/BsUnicode.h"
 
 namespace bs 
 {
@@ -49,15 +49,19 @@ namespace bs
 	{
 		if (encoding == StringEncoding::UTF16)
 		{
-			const std::codecvt_mode convMode = (std::codecvt_mode)(std::generate_header);
-			typedef std::codecvt_utf8_utf16<char, 1114111, convMode> UTF8ToUTF16Conv;
-			std::wstring_convert<UTF8ToUTF16Conv, char> conversion("?");
+			// Write BOM
+			UINT8 bom[2] = { 0xFF, 0xFE };
+			write(bom, sizeof(bom));
 
-			std::string encodedString = conversion.from_bytes(string.c_str());
-			write(encodedString.data(), encodedString.length());
+			U16String u16string = UTF8::toUTF16(string);
+			write(u16string.data(), u16string.length());
 		}
 		else
 		{
+			// Write BOM
+			UINT8 bom[3] = { 0xEF, 0xBB, 0xBF };
+			write(bom, sizeof(bom));
+
 			write(string.data(), string.length());
 		}
 	}
@@ -66,21 +70,22 @@ namespace bs
 	{
 		if (encoding == StringEncoding::UTF16)
 		{
-			const std::codecvt_mode convMode = (std::codecvt_mode)(std::generate_header | std::little_endian);
-			typedef std::codecvt_utf16<wchar_t, 1114111, convMode> WCharToUTF16Conv;
-			std::wstring_convert<WCharToUTF16Conv, wchar_t> conversion("?");
+			// Write BOM
+			UINT8 bom[2] = { 0xFF, 0xFE };
+			write(bom, sizeof(bom));
 
-			std::string encodedString = conversion.to_bytes(string.c_str());
-			write(encodedString.data(), encodedString.length());
+			String u8string = UTF8::fromWide(string);
+			U16String u16string = UTF8::toUTF16(u8string);
+			write(u16string.data(), u16string.length());
 		}
 		else
 		{
-			const std::codecvt_mode convMode = (std::codecvt_mode)(std::generate_header);
-			typedef std::codecvt_utf8<wchar_t, 1114111, convMode> WCharToUTF8Conv;
-			std::wstring_convert<WCharToUTF8Conv, wchar_t> conversion("?");
+			// Write BOM
+			UINT8 bom[3] = { 0xEF, 0xBB, 0xBF };
+			write(bom, sizeof(bom));
 
-			std::string encodedString = conversion.to_bytes(string.c_str());
-			write(encodedString.data(), encodedString.length());
+			String u8string = UTF8::fromWide(string);
+			write(u8string.data(), u8string.length());
 		}
 	}
 
@@ -94,93 +99,38 @@ namespace bs
 		// Ensure read from begin of stream
 		seek(0);
 
-		std::stringstream result;
-		while (!eof())
-		{
-			size_t numReadBytes = read(tempBuffer, bufSize);
-			result.write(tempBuffer, numReadBytes);
-		}
+		// Try reading header
+		UINT8 headerBytes[4];
+		size_t numHeaderBytes = read(headerBytes, 4);
 
-		free(tempBuffer);
-		std::string string = result.str();
-
-		UINT32 readBytes = (UINT32)string.size();
-		if (readBytes >= 4)
+		size_t dataOffset = 0;
+		if(numHeaderBytes == 4)
 		{
-			if (isUTF32LE((UINT8*)string.data()))
+			if (isUTF32LE(headerBytes))
+				dataOffset = 4;
+			else if (isUTF32BE(headerBytes))
 			{
-				const std::codecvt_mode convMode = (std::codecvt_mode)(std::consume_header | std::little_endian);
-				typedef std::codecvt_utf8<UINT32, 1114111, convMode> utf8utf32;
-
-				std::wstring_convert<utf8utf32, UINT32> conversion("?");
-				UINT32* start = (UINT32*)string.data();
-				UINT32* end = (start + (string.size() - 1) / 4);
-
-				return conversion.to_bytes(start, end).c_str();
-			}
-			else if (isUTF32BE((UINT8*)string.data()))
-			{
-				const std::codecvt_mode convMode = (std::codecvt_mode)(std::consume_header);
-				typedef std::codecvt_utf8<UINT32, 1114111, convMode> utf8utf32;
-
-				std::wstring_convert<utf8utf32, UINT32> conversion("?");
-				UINT32* start = (UINT32*)string.data();
-				UINT32* end = (start + (string.size() - 1) / 4);
-
-				return conversion.to_bytes(start, end).c_str();
+				LOGWRN("UTF-32 big endian decoding not supported");
+				return u8"";
 			}			
 		}
-		
-		if (readBytes >= 3)
+		else if(numHeaderBytes == 3)
 		{
-			if (isUTF8((UINT8*)string.data()))
-			{
-				return string.c_str() + 3;
-			}
+			if (isUTF8(headerBytes))
+				dataOffset = 3;
 		}
-
-		if (readBytes >= 2)
+		else if(numHeaderBytes == 2)
 		{
-			if (isUTF16LE((UINT8*)string.data()))
-			{
-				const std::codecvt_mode convMode = (std::codecvt_mode)(std::little_endian);
-				typedef std::codecvt_utf8_utf16<UINT16, 1114111, convMode> utf8utf16;
-
-				std::wstring_convert<utf8utf16, UINT16> conversion("?");
-				UINT16* start = (UINT16*)(string.c_str() + 2); // Bug?: std::consume_header seems to be ignored so I manually remove the header
-
-				return conversion.to_bytes(start).c_str();
-			}
-			else if (isUTF16BE((UINT8*)string.data()))
+			if (isUTF16LE(headerBytes))
+				dataOffset = 2;
+			else if (isUTF16BE(headerBytes))
 			{
-				const std::codecvt_mode convMode = (std::codecvt_mode)(0);
-				typedef std::codecvt_utf8_utf16<UINT16, 1114111, convMode> utf8utf16;
-
-				// Bug?: Regardless of not providing the std::little_endian flag it seems that is how the data is read
-				// so I manually flip it
-				UINT32 numChars = (UINT32)(string.size() - 2) / 2;
-				for (UINT32 i = 0; i < numChars; i++)
-					std::swap(string[i * 2 + 0], string[i * 2 + 1]);
-
-				std::wstring_convert<utf8utf16, UINT16> conversion("?");
-				UINT16* start = (UINT16*)(string.c_str() + 2); // Bug?: std::consume_header seems to be ignored so I manually remove the header
-
-				return conversion.to_bytes(start).c_str();
+				LOGWRN("UTF-16 big endian decoding not supported");
+				return u8"";
 			}
 		}
 
-		return string.c_str();
-	}
-
-	WString DataStream::getAsWString()
-	{
-		// Read the entire buffer - ideally in one read, but if the size of
-		// the buffer is unknown, do multiple fixed size reads.
-		size_t bufSize = (mSize > 0 ? mSize : 4096);
-		std::stringstream::char_type* tempBuffer = (std::stringstream::char_type*)bs_alloc((UINT32)bufSize);
-
-		// Ensure read from begin of stream
-		seek(0);
+		seek(dataOffset);
 
 		std::stringstream result;
 		while (!eof())
@@ -192,58 +142,36 @@ namespace bs
 		free(tempBuffer);
 		std::string string = result.str();
 
-		UINT32 readBytes = (UINT32)string.size();
-		if (readBytes >= 4)
-		{
-			if (isUTF32LE((UINT8*)string.data()))
-			{
-				// Not supported
-			}
-			else if (isUTF32BE((UINT8*)string.data()))
-			{
-				// Not supported
-			}
-		}
-
-		if (readBytes >= 3)
-		{
-			if (isUTF8((UINT8*)string.data()))
-			{
-				const std::codecvt_mode convMode = (std::codecvt_mode)(std::consume_header);
-				typedef std::codecvt_utf8<wchar_t, 1114111, convMode> wcharutf8;
-
-				std::wstring_convert<wcharutf8> conversion("?");
-				return conversion.from_bytes(string).c_str();
-			}
-		}
-
-		if (readBytes >= 2)
+		switch(dataOffset)
 		{
-			if (isUTF16LE((UINT8*)string.data()))
+		default:
+		case 0: // No BOM = assumed UTF-8
+		case 3: // UTF-8
+			return String(string.data(), string.length());
+		case 2: // UTF-16
 			{
-				const std::codecvt_mode convMode = (std::codecvt_mode)(std::consume_header | std::little_endian);
-				typedef std::codecvt_utf16<wchar_t, 1114111, convMode> wcharutf16;
+			UINT32 numElems = (UINT32)string.length() / 2;
 
-				std::wstring_convert<wcharutf16> conversion("?");
-				return conversion.from_bytes(string).c_str();
+			return UTF8::fromUTF16(U16String((char16_t*)string.data(), numElems));
 			}
-			else if (isUTF16BE((UINT8*)string.data()))
+		case 4: // UTF-32
 			{
-				const std::codecvt_mode convMode = (std::codecvt_mode)(std::consume_header);
-				typedef std::codecvt_utf16<wchar_t, 1114111, convMode> wcharutf16;
+			UINT32 numElems = (UINT32)string.length() / 4;
 
-				std::wstring_convert<wcharutf16> conversion("?");
-				return conversion.from_bytes(string).c_str();
+			return UTF8::fromUTF32(U32String((char32_t*)string.data(), numElems));
 			}
 		}
 
-		{
-			const std::codecvt_mode convMode = (std::codecvt_mode)(std::consume_header);
-			typedef std::codecvt_utf8<wchar_t, 1114111, convMode> wcharutf8;
+		// Note: Never assuming ANSI as there is no ideal way to check for it. If required I need to
+		// try reading the data and if all UTF encodings fail, assume it's ANSI. For now it should be
+		// fine as most files are UTF-8 encoded.
+	}
 
-			std::wstring_convert<wcharutf8> conversion("?");
-			return conversion.from_bytes(string).c_str();
-		}
+	WString DataStream::getAsWString()
+	{
+		String u8string = getAsString();
+
+		return UTF8::toWide(u8string);
 	}
 
 	MemoryDataStream::MemoryDataStream(size_t size)

+ 44 - 43
Source/BansheeUtility/FileSystem/BsDataStream.h

@@ -32,12 +32,12 @@ namespace bs
 		};
 
 	public:
-        /** Creates an unnamed stream. */
-        DataStream(UINT16 accessMode = READ) 
+		/** Creates an unnamed stream. */
+		DataStream(UINT16 accessMode = READ) 
 			:mSize(0), mAccess(accessMode) 
 		{ }
 
-        /** Creates a named stream. */
+		/** Creates a named stream. */
 		DataStream(const String& name, UINT16 accessMode = READ) 
 			:mName(name), mSize(0), mAccess(accessMode) {}
 
@@ -50,11 +50,12 @@ namespace bs
 		virtual bool isWriteable() const { return (mAccess & WRITE) != 0; }
 		virtual bool isFile() const = 0;
 
-        /** Reads data from the buffer and copies it to the specified value. */
-        template<typename T> DataStream& operator>>(T& val);
+		/** Reads data from the buffer and copies it to the specified value. */
+		template<typename T> DataStream& operator>>(T& val);
 
 		/**
-		 * Read the requisite number of bytes from the stream, stopping at the end of the file.
+		 * Read the requisite number of bytes from the stream, stopping at the end of the file. Advances
+		 * the read pointer.
 		 *
 		 * @param[in]	buf		Pre-allocated buffer to read the data into.
 		 * @param[in]	count	Number of bytes to read.
@@ -65,7 +66,7 @@ namespace bs
 		virtual size_t read(void* buf, size_t count) = 0;
 
 		/**
-		 * Write the requisite number of bytes to the stream.
+		 * Write the requisite number of bytes to the stream and advance the write pointer.
 		 *
 		 * @param[in]	buf		Buffer containing bytes to write.
 		 * @param[in]	count	Number of bytes to write.
@@ -90,27 +91,27 @@ namespace bs
 		 * 							wide characters.
 		 * @param[in]	encoding	Encoding to convert the string to before writing.
 		 */
-		virtual void writeString(const WString& string, StringEncoding encoding = StringEncoding::UTF16);
+		virtual void writeString(const WString& string, StringEncoding encoding = StringEncoding::UTF8);
 
-	    /**
-	     * Returns a string containing the entire stream.
-	     *
+		/**
+		 * Returns a string containing the entire stream.
+		 *
 		 * @return	String data encoded as UTF-8. 
 		 *
 		 * @note	This is a convenience method for text streams only, allowing you to retrieve a String object containing 
 		 *			all the data in the stream.
-	     */
-	    virtual String getAsString();
+		 */
+		virtual String getAsString();
 
-	    /**
-	     * Returns a wide string containing the entire stream.
-	     *
+		/**
+		 * Returns a wide string containing the entire stream.
+		 *
 		 * @return	Wide string encoded as specified by current platform.
 		 *
 		 * @note	This is a convenience method for text streams only, allowing you to retrieve a WString object 
 		 *			containing all the data in the stream.
-	     */
-	    virtual WString getAsWString();
+		 */
+		virtual WString getAsWString();
 
 		/**
 		 * Skip a defined number of bytes. This can also be a negative value, in which case the file pointer rewinds a 
@@ -118,17 +119,17 @@ namespace bs
 		 */
 		virtual void skip(size_t count) = 0;
 	
-	    /** Repositions the read point to a specified byte. */
-	    virtual void seek(size_t pos) = 0;
+		/** Repositions the read point to a specified byte. */
+		virtual void seek(size_t pos) = 0;
 		
-	    /** Returns the current byte offset from beginning. */
-	    virtual size_t tell() const = 0;
+		/** Returns the current byte offset from beginning. */
+		virtual size_t tell() const = 0;
 
-	    /** Returns true if the stream has reached the end. */
-	    virtual bool eof() const = 0;
+		/** Returns true if the stream has reached the end. */
+		virtual bool eof() const = 0;
 
-        /** Returns the total size of the data to be read from the stream, or 0 if this is indeterminate for this stream. */
-        size_t size() const { return mSize; }
+		/** Returns the total size of the data to be read from the stream, or 0 if this is indeterminate for this stream. */
+		size_t size() const { return mSize; }
 
 		/** 
 		 * Creates a copy of this stream. 
@@ -139,14 +140,14 @@ namespace bs
 		 */
 		virtual SPtr<DataStream> clone(bool copyData = true) const = 0;
 
-        /** Close the stream. This makes further operations invalid. */
-        virtual void close() = 0;
+		/** Close the stream. This makes further operations invalid. */
+		virtual void close() = 0;
 		
 	protected:
 		static const UINT32 StreamTempSize;
 
 		String mName;		
-        size_t mSize;
+		size_t mSize;
 		UINT16 mAccess;
 	};
 
@@ -196,28 +197,28 @@ namespace bs
 		/** Get a pointer to the current position in the memory block this stream holds. */
 		UINT8* getCurrentPtr() const { return mPos; }
 		
-        /** @copydoc DataStream::read */
+		/** @copydoc DataStream::read */
 		size_t read(void* buf, size_t count) override;
 
-        /** @copydoc DataStream::write */
+		/** @copydoc DataStream::write */
 		size_t write(const void* buf, size_t count) override;
 
-        /** @copydoc DataStream::skip */
+		/** @copydoc DataStream::skip */
 		void skip(size_t count) override;
 	
-        /** @copydoc DataStream::seek */
+		/** @copydoc DataStream::seek */
 		void seek(size_t pos) override;
 		
-        /** @copydoc DataStream::tell */
+		/** @copydoc DataStream::tell */
 		size_t tell() const override;
 
-        /** @copydoc DataStream::eof */
+		/** @copydoc DataStream::eof */
 		bool eof() const override;
 
 		/** @copydoc DataStream::clone */
 		SPtr<DataStream> clone(bool copyData = true) const override;
 
-        /** @copydoc DataStream::close */
+		/** @copydoc DataStream::close */
 		void close() override;
 
 	protected:
@@ -246,28 +247,28 @@ namespace bs
 
 		bool isFile() const override { return true; }
 
-        /** @copydoc DataStream::read */
+		/** @copydoc DataStream::read */
 		size_t read(void* buf, size_t count) override;
 
-        /** @copydoc DataStream::write */
+		/** @copydoc DataStream::write */
 		size_t write(const void* buf, size_t count) override;
 
-        /** @copydoc DataStream::skip */
+		/** @copydoc DataStream::skip */
 		void skip(size_t count) override;
 	
-        /** @copydoc DataStream::seek */
+		/** @copydoc DataStream::seek */
 		void seek(size_t pos) override;
 
-        /** @copydoc DataStream::tell */
+		/** @copydoc DataStream::tell */
 		size_t tell() const override;
 
-        /** @copydoc DataStream::eof */
+		/** @copydoc DataStream::eof */
 		bool eof() const override;
 
 		/** @copydoc DataStream::clone */
 		SPtr<DataStream> clone(bool copyData = true) const override;
 
-        /** @copydoc DataStream::close */
+		/** @copydoc DataStream::close */
 		void close() override;
 
 		/** Returns the path of the file opened by the stream. */

+ 285 - 273
Source/BansheeUtility/String/BsString.h

@@ -21,18 +21,30 @@ namespace bs
 	template <typename T>
 	using BasicStringStream = std::basic_stringstream < T, std::char_traits<T>, StdAlloc<T> > ;
 
-	/** Wide string used primarily for handling Unicode text. */
+	/** Wide string used primarily for handling Unicode text (UTF-32 on Linux, UTF-16 on Windows, generally). */
 	typedef BasicString<wchar_t> WString;
 
-	/** Narrow string used primarily for handling ASCII text. */
+	/** Narrow string used for handling narrow encoded text (either locale specific ANSI or UTF-8). */
 	typedef BasicString<char> String;
 
-	/** Wide string stream used for primarily for constructing strings consisting of Unicode text. */
+	/** Wide string used UTF-16 encoded strings. */
+	typedef BasicString<char16_t> U16String;
+
+	/** Wide string used UTF-32 encoded strings. */
+	typedef BasicString<char32_t> U32String;
+
+	/** Wide string stream used for primarily for constructing wide strings. */
 	typedef BasicStringStream<wchar_t> WStringStream;
 
-	/** Wide string stream used for primarily for constructing strings consisting of ASCII text. */
+	/** Wide string stream used for primarily for constructing narrow strings. */
 	typedef BasicStringStream<char> StringStream;
 
+	/** Wide string stream used for primarily for constructing UTF-16 strings. */
+	typedef BasicStringStream<char16_t> U16StringStream;
+
+	/** Wide string stream used for primarily for constructing UTF-32 strings. */
+	typedef BasicStringStream<char32_t> U32StringStream;
+
 	/** Equivalent to String, except it avoids any dynamic allocations until the number of elements exceeds @p Count. */
 	template <int Count> 
 	using SmallString = std::basic_string <char, std::char_traits<char>, StdAlloc<char>>; // TODO: Currently equivalent to String, need to implement the allocator
@@ -48,21 +60,21 @@ namespace bs
 	 *  @{
 	 */
 
-    /** Utility class for manipulating Strings. */
-    class BS_UTILITY_EXPORT StringUtil
-    {
+	/** Utility class for manipulating Strings. */
+	class BS_UTILITY_EXPORT StringUtil
+	{
 	public:
-        /** Removes any whitespace characters from beginning or end of the string. */
-        static void trim(String& str, bool left = true, bool right = true);
+		/** Removes any whitespace characters from beginning or end of the string. */
+		static void trim(String& str, bool left = true, bool right = true);
 
-        /** @copydoc StringUtil::trim(String&, bool, bool) */
-        static void trim(WString& str, bool left = true, bool right = true);
+		/** @copydoc StringUtil::trim(String&, bool, bool) */
+		static void trim(WString& str, bool left = true, bool right = true);
 
-        /**	Removes specified characters from beginning or end of the string. */
-        static void trim(String& str, const String& delims, bool left = true, bool right = true);
+		/**	Removes specified characters from beginning or end of the string. */
+		static void trim(String& str, const String& delims, bool left = true, bool right = true);
 
 		/** @copydoc StringUtil::trim(String&, const String&, bool, bool) */
-        static void trim(WString& str, const WString& delims, bool left = true, bool right = true);
+		static void trim(WString& str, const WString& delims, bool left = true, bool right = true);
 
 		/**
 		 * Returns a vector of strings containing all the substrings delimited by the provided delimiter characters.
@@ -96,56 +108,56 @@ namespace bs
 		/** @copydoc StringUtil::tokenise(const String&, const String&, const String&, unsigned int) */
 		static Vector<WString> tokenise(const WString& str, const WString& delims = L"\t\n ", const WString& doubleDelims = L"\"", unsigned int maxSplits = 0);
 
-        /** Converts all the characters in the string to lower case. */
-        static void toLowerCase(String& str);
+		/** Converts all the characters in the string to lower case. */
+		static void toLowerCase(String& str);
 
-        /** Converts all the characters in the string to lower case. */
-        static void toLowerCase(WString& str);
+		/** Converts all the characters in the string to lower case. */
+		static void toLowerCase(WString& str);
 
-        /** Converts all the characters in the string to upper case. */
-        static void toUpperCase(String& str);
+		/** Converts all the characters in the string to upper case. */
+		static void toUpperCase(String& str);
 
-        /**	Converts all the characters in the string to upper case. */
-        static void toUpperCase(WString& str);
+		/**	Converts all the characters in the string to upper case. */
+		static void toUpperCase(WString& str);
 
-        /**
-         * Returns whether the string begins with the pattern passed in.
-         *
-         * @param[in]	str		 	String to compare.
-         * @param[in]	pattern		Pattern to compare with.
+		/**
+		 * Returns whether the string begins with the pattern passed in.
+		 *
+		 * @param[in]	str		 	String to compare.
+		 * @param[in]	pattern		Pattern to compare with.
 		 * @param[in]	lowerCase	(optional) If true, the start of the string will be lower cased before comparison, and 
 		 *							the pattern should also be in lower case.
-         */
-        static bool startsWith(const String& str, const String& pattern, bool lowerCase = true);
+		 */
+		static bool startsWith(const String& str, const String& pattern, bool lowerCase = true);
 
-        /** @copydoc startsWith(const String&, const String&, bool) */
-        static bool startsWith(const WString& str, const WString& pattern, bool lowerCase = true);
+		/** @copydoc startsWith(const String&, const String&, bool) */
+		static bool startsWith(const WString& str, const WString& pattern, bool lowerCase = true);
 
-        /**
-         * Returns whether the string end with the pattern passed in.
-         *
-         * @param[in]	str		 	String to compare.
-         * @param[in]	pattern		Pattern to compare with.
+		/**
+		 * Returns whether the string end with the pattern passed in.
+		 *
+		 * @param[in]	str		 	String to compare.
+		 * @param[in]	pattern		Pattern to compare with.
 		 * @param[in]	lowerCase	(optional) If true, the start of the string will be lower cased before comparison, and 
 		 *							the pattern should also be in lower case.
-         */
-        static bool endsWith(const String& str, const String& pattern, bool lowerCase = true);
+		 */
+		static bool endsWith(const String& str, const String& pattern, bool lowerCase = true);
 
-        /** @copydoc endsWith(const String&, const String&, bool) */
-        static bool endsWith(const WString& str, const WString& pattern, bool lowerCase = true);
+		/** @copydoc endsWith(const String&, const String&, bool) */
+		static bool endsWith(const WString& str, const WString& pattern, bool lowerCase = true);
 
-        /**
-         * Returns true if the string matches the provided pattern. Pattern may use a "*" wildcard for matching any 
+		/**
+		 * Returns true if the string matches the provided pattern. Pattern may use a "*" wildcard for matching any 
 		 * characters.
-         *
-         * @param[in]	str			 	The string to test.
-         * @param[in]	pattern		 	Patterns to look for.
-         * @param[in]	caseSensitive	(optional) Should the match be case sensitive or not.
-         */
-        static bool match(const String& str, const String& pattern, bool caseSensitive = true);
+		 *
+		 * @param[in]	str			 	The string to test.
+		 * @param[in]	pattern		 	Patterns to look for.
+		 * @param[in]	caseSensitive	(optional) Should the match be case sensitive or not.
+		 */
+		static bool match(const String& str, const String& pattern, bool caseSensitive = true);
 
 		/** @copydoc match(const String&, const String&, bool) */
-        static bool match(const WString& str, const WString& pattern, bool caseSensitive = true);
+		static bool match(const WString& str, const WString& pattern, bool caseSensitive = true);
 
 		/**
 		 * Replace all instances of a substring with a another substring.
@@ -200,7 +212,7 @@ namespace bs
 		}
 
 		/** Constant blank string, useful for returning by ref where local does not exist. */
-        static const String BLANK;
+		static const String BLANK;
 
 		/**	Constant blank wide string, useful for returning by ref where local does not exist. */
 		static const WString WBLANK;
@@ -428,7 +440,7 @@ namespace bs
 			}
 			return result;
 		}
-    };
+	};
 
 	/** Converts a narrow string to a wide string. */
 	BS_UTILITY_EXPORT WString toWString(const String& source);
@@ -437,9 +449,9 @@ namespace bs
 	BS_UTILITY_EXPORT WString toWString(const char* source);
 
 	/** Converts a float to a string. */
-    BS_UTILITY_EXPORT WString toWString(float val, unsigned short precision = 6, 
-        unsigned short width = 0, char fill = ' ', 
-        std::ios::fmtflags flags = std::ios::fmtflags(0) );
+	BS_UTILITY_EXPORT WString toWString(float val, unsigned short precision = 6, 
+		unsigned short width = 0, char fill = ' ', 
+		std::ios::fmtflags flags = std::ios::fmtflags(0) );
 
 	/** Converts a double to a string. */
 	BS_UTILITY_EXPORT WString toWString(double val, unsigned short precision = 6, 
@@ -447,24 +459,24 @@ namespace bs
 		std::ios::fmtflags flags = std::ios::fmtflags(0) );
 
 	/** Converts a Radian to a string. */
-    BS_UTILITY_EXPORT WString toWString(Radian val, unsigned short precision = 6, 
-        unsigned short width = 0, char fill = ' ', 
-        std::ios::fmtflags flags = std::ios::fmtflags(0) );
+	BS_UTILITY_EXPORT WString toWString(Radian val, unsigned short precision = 6, 
+		unsigned short width = 0, char fill = ' ', 
+		std::ios::fmtflags flags = std::ios::fmtflags(0) );
 
 	/** Converts a Degree to a string. */
-    BS_UTILITY_EXPORT WString toWString(Degree val, unsigned short precision = 6, 
-        unsigned short width = 0, char fill = ' ', 
-        std::ios::fmtflags flags = std::ios::fmtflags(0) );
+	BS_UTILITY_EXPORT WString toWString(Degree val, unsigned short precision = 6, 
+		unsigned short width = 0, char fill = ' ', 
+		std::ios::fmtflags flags = std::ios::fmtflags(0) );
 
 	/**	Converts an int to a string. */
-    BS_UTILITY_EXPORT WString toWString(int val, unsigned short width = 0, 
-        char fill = ' ', 
-        std::ios::fmtflags flags = std::ios::fmtflags(0) );
+	BS_UTILITY_EXPORT WString toWString(int val, unsigned short width = 0, 
+		char fill = ' ', 
+		std::ios::fmtflags flags = std::ios::fmtflags(0) );
 
 	/**	Converts an unsigned int to a string. */
-    BS_UTILITY_EXPORT WString toWString(unsigned int val, 
-        unsigned short width = 0, char fill = ' ', 
-        std::ios::fmtflags flags = std::ios::fmtflags(0) );
+	BS_UTILITY_EXPORT WString toWString(unsigned int val, 
+		unsigned short width = 0, char fill = ' ', 
+		std::ios::fmtflags flags = std::ios::fmtflags(0) );
 
 	/**	Converts an 64bit integer to a string. */
 	BS_UTILITY_EXPORT WString toWString(INT64 val, 
@@ -486,72 +498,72 @@ namespace bs
 		unsigned short width = 0, char fill = ' ',
 		std::ios::fmtflags flags = std::ios::fmtflags(0));
 
-    /**
-     * Converts a boolean to a string.
-     *
-     * @param[in]	val  	Value to convert.
-     * @param[in]	yesNo	(optional) If set to true, result is "yes" or "no" instead of "true" or "false".
-     */
-    BS_UTILITY_EXPORT WString toWString(bool val, bool yesNo = false);
-
-    /**
-     * Converts a 2 dimensional vector to a string.
-     * 			
+	/**
+	 * Converts a boolean to a string.
+	 *
+	 * @param[in]	val  	Value to convert.
+	 * @param[in]	yesNo	(optional) If set to true, result is "yes" or "no" instead of "true" or "false".
+	 */
+	BS_UTILITY_EXPORT WString toWString(bool val, bool yesNo = false);
+
+	/**
+	 * Converts a 2 dimensional vector to a string.
+	 * 			
 	 * @note	Format is "x y".
-     */
-    BS_UTILITY_EXPORT WString toWString(const Vector2& val);
+	 */
+	BS_UTILITY_EXPORT WString toWString(const Vector2& val);
 
-    /**
-     * Converts a 2 dimensional integer vector to a string.
-     * 			
+	/**
+	 * Converts a 2 dimensional integer vector to a string.
+	 * 			
 	 * @note	Format is "x y".
-     */
-    BS_UTILITY_EXPORT WString toWString(const Vector2I& val);
+	 */
+	BS_UTILITY_EXPORT WString toWString(const Vector2I& val);
 
-    /**
-     * Converts a 3 dimensional vector to a string.
-     * 			
+	/**
+	 * Converts a 3 dimensional vector to a string.
+	 * 			
 	 * @note	Format is "x y z".
-     */
-    BS_UTILITY_EXPORT WString toWString(const Vector3& val);
+	 */
+	BS_UTILITY_EXPORT WString toWString(const Vector3& val);
 
-    /**
-     * Converts a 4 dimensional vector to a string.
-     * 			
+	/**
+	 * Converts a 4 dimensional vector to a string.
+	 * 			
 	 * @note	Format is "x y z w".
-     */
-    BS_UTILITY_EXPORT WString toWString(const Vector4& val);
+	 */
+	BS_UTILITY_EXPORT WString toWString(const Vector4& val);
 
-    /**
-     * Converts a 3x3 matrix to a string.
-     * 			
+	/**
+	 * Converts a 3x3 matrix to a string.
+	 * 			
 	 * @note	Format is "00 01 02 10 11 12 20 21 22".
-     */
-    BS_UTILITY_EXPORT WString toWString(const Matrix3& val);
+	 */
+	BS_UTILITY_EXPORT WString toWString(const Matrix3& val);
 
-    /**
-     * Converts a 4x4 matrix to a string.
-     * 			
+	/**
+	 * Converts a 4x4 matrix to a string.
+	 * 			
 	 * @note	Format is "00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33".
-     */
-    BS_UTILITY_EXPORT WString toWString(const Matrix4& val);
+	 */
+	BS_UTILITY_EXPORT WString toWString(const Matrix4& val);
 
-    /**
-     * Converts a Quaternion to a string.
-     * 			
+	/**
+	 * Converts a Quaternion to a string.
+	 * 			
 	 * @note	Format is "w x y z".
-     */
-    BS_UTILITY_EXPORT WString toWString(const Quaternion& val);
+	 */
+	BS_UTILITY_EXPORT WString toWString(const Quaternion& val);
 
-    /**
-     * Converts a color to a string.
-     * 			
+	/**
+	 * Converts a color to a string.
+	 * 			
 	 * @note	Format is "r g b a".
-     */
-    BS_UTILITY_EXPORT WString toWString(const Color& val);
+	 */
+	BS_UTILITY_EXPORT WString toWString(const Color& val);
 
-    /** Converts a vector of strings into a single string where the substrings are delimited by spaces. */
-    BS_UTILITY_EXPORT WString toWString(const Vector<bs::WString>& val);
+	/** Converts a vector of strings into a single string where the substrings are delimited by spaces. */
+	BS_UTILITY_EXPORT WString toWString(const Vector<bs::WString>& val);
 
 	/** Converts a wide string to a narrow string. */
 	BS_UTILITY_EXPORT String toString(const WString& source);
@@ -560,9 +572,9 @@ namespace bs
 	BS_UTILITY_EXPORT String toString(const wchar_t* source);
 
 	/**	Converts a float to a string. */
-    BS_UTILITY_EXPORT String toString(float val, unsigned short precision = 6, 
-        unsigned short width = 0, char fill = ' ', 
-        std::ios::fmtflags flags = std::ios::fmtflags(0) );
+	BS_UTILITY_EXPORT String toString(float val, unsigned short precision = 6, 
+		unsigned short width = 0, char fill = ' ', 
+		std::ios::fmtflags flags = std::ios::fmtflags(0) );
 
 	/**	Converts a double to a string. */
 	BS_UTILITY_EXPORT String toString(double val, unsigned short precision = 6, 
@@ -570,24 +582,24 @@ namespace bs
 		std::ios::fmtflags flags = std::ios::fmtflags(0) );
 
 	/**	Converts a Radian to a string. */
-    BS_UTILITY_EXPORT String toString(Radian val, unsigned short precision = 6, 
-        unsigned short width = 0, char fill = ' ', 
-        std::ios::fmtflags flags = std::ios::fmtflags(0) );
+	BS_UTILITY_EXPORT String toString(Radian val, unsigned short precision = 6, 
+		unsigned short width = 0, char fill = ' ', 
+		std::ios::fmtflags flags = std::ios::fmtflags(0) );
 
 	/**	Converts a Degree to a string. */
-    BS_UTILITY_EXPORT String toString(Degree val, unsigned short precision = 6, 
-        unsigned short width = 0, char fill = ' ', 
-        std::ios::fmtflags flags = std::ios::fmtflags(0) );
+	BS_UTILITY_EXPORT String toString(Degree val, unsigned short precision = 6, 
+		unsigned short width = 0, char fill = ' ', 
+		std::ios::fmtflags flags = std::ios::fmtflags(0) );
 
 	/**	Converts an int to a string. */
-    BS_UTILITY_EXPORT String toString(int val, unsigned short width = 0, 
-        char fill = ' ', 
-        std::ios::fmtflags flags = std::ios::fmtflags(0) );
+	BS_UTILITY_EXPORT String toString(int val, unsigned short width = 0, 
+		char fill = ' ', 
+		std::ios::fmtflags flags = std::ios::fmtflags(0) );
 
 	/**	Converts an unsigned int to a string. */
-    BS_UTILITY_EXPORT String toString(unsigned int val, 
-        unsigned short width = 0, char fill = ' ', 
-        std::ios::fmtflags flags = std::ios::fmtflags(0) );
+	BS_UTILITY_EXPORT String toString(unsigned int val, 
+		unsigned short width = 0, char fill = ' ', 
+		std::ios::fmtflags flags = std::ios::fmtflags(0) );
 
 	/**	Converts a 64bit int to a string. */
 	BS_UTILITY_EXPORT String toString(INT64 val, 
@@ -599,168 +611,168 @@ namespace bs
 		unsigned short width = 0, char fill = ' ', 
 		std::ios::fmtflags flags = std::ios::fmtflags(0) );
 
-    /**
-     * Converts a boolean to a string.
-     *
-     * @param[in]	val  	true to value.
-     * @param[in]	yesNo	(optional) If set to true, result is "yes" or "no" instead of "true" or "false".
-     */
-    BS_UTILITY_EXPORT String toString(bool val, bool yesNo = false);
-
-    /**
-     * Converts a 2 dimensional vector to a string.
-     * 			
+	/**
+	 * Converts a boolean to a string.
+	 *
+	 * @param[in]	val  	true to value.
+	 * @param[in]	yesNo	(optional) If set to true, result is "yes" or "no" instead of "true" or "false".
+	 */
+	BS_UTILITY_EXPORT String toString(bool val, bool yesNo = false);
+
+	/**
+	 * Converts a 2 dimensional vector to a string.
+	 * 			
 	 * @note	Format is "x y".
-     */
-    BS_UTILITY_EXPORT String toString(const Vector2& val);
+	 */
+	BS_UTILITY_EXPORT String toString(const Vector2& val);
 
-    /**
-     * Converts a 2 dimensional integer vector to a string.
-     * 			
+	/**
+	 * Converts a 2 dimensional integer vector to a string.
+	 * 			
 	 * @note	Format is "x y".
-     */
-    BS_UTILITY_EXPORT String toString(const Vector2I& val);
+	 */
+	BS_UTILITY_EXPORT String toString(const Vector2I& val);
 
-    /**
-     * Converts a 3 dimensional vector to a string.
-     * 			
+	/**
+	 * Converts a 3 dimensional vector to a string.
+	 * 			
 	 * @note	Format is "x y z".
-     */
-    BS_UTILITY_EXPORT String toString(const Vector3& val);
+	 */
+	BS_UTILITY_EXPORT String toString(const Vector3& val);
 
-    /**
-     * Converts a 4 dimensional vector to a string.
-     * 			
+	/**
+	 * Converts a 4 dimensional vector to a string.
+	 * 			
 	 * @note	Format is "x y z w".
-     */
-    BS_UTILITY_EXPORT String toString(const Vector4& val);
+	 */
+	BS_UTILITY_EXPORT String toString(const Vector4& val);
 
-    /**
-     * Converts a 3x3 matrix to a string.
-     * 			
+	/**
+	 * Converts a 3x3 matrix to a string.
+	 * 			
 	 * @note	Format is "00 01 02 10 11 12 20 21 22".
-     */
-    BS_UTILITY_EXPORT String toString(const Matrix3& val);
+	 */
+	BS_UTILITY_EXPORT String toString(const Matrix3& val);
 
-    /**
-     * Converts a 4x4 matrix to a string.
-     * 			
+	/**
+	 * Converts a 4x4 matrix to a string.
+	 * 			
 	 * @note	Format is "00 01 02 03 10 11 12 13 20 21 22 23 30 31 32 33".
-     */
-    BS_UTILITY_EXPORT String toString(const Matrix4& val);
+	 */
+	BS_UTILITY_EXPORT String toString(const Matrix4& val);
 
-    /**
-     * Converts a Quaternion to a string.
-     * 			
+	/**
+	 * Converts a Quaternion to a string.
+	 * 			
 	 * @note	Format is "w x y z".
-     */
-    BS_UTILITY_EXPORT String toString(const Quaternion& val);
+	 */
+	BS_UTILITY_EXPORT String toString(const Quaternion& val);
 
-    /**
-     * Converts a color to a string.
-     * 			
+	/**
+	 * Converts a color to a string.
+	 * 			
 	 * @note	Format is "r g b a".
-     */
-    BS_UTILITY_EXPORT String toString(const Color& val);
-
-    /**
-     * Converts a vector of strings into a single string where the substrings are delimited by spaces.
-     */
-    BS_UTILITY_EXPORT String toString(const Vector<bs::String>& val);
-
-    /**
-     * Converts a String to a float.
-     *
-     * @note	0.0f if the value could not be parsed, otherwise the numeric version of the string.
-     */
-    BS_UTILITY_EXPORT float parseFloat(const String& val, float defaultValue = 0);
-
-    /**
-     * Converts a String to a whole number.
-     *
-     * @note	0 if the value could not be parsed, otherwise the numeric version of the string.
-     */
-    BS_UTILITY_EXPORT INT32 parseINT32(const String& val, INT32 defaultValue = 0);
-
-    /**
-     * Converts a String to a whole number.
-     *
-     * @note	0 if the value could not be parsed, otherwise the numeric version of the string.
-     */
-    BS_UTILITY_EXPORT UINT32 parseUINT32(const String& val, UINT32 defaultValue = 0);
-
-    /**
-     * Converts a String to a whole number.
-     *
-     * @note	0 if the value could not be parsed, otherwise the numeric version of the string.
-     */
-    BS_UTILITY_EXPORT INT64 parseINT64(const String& val, INT64 defaultValue = 0);
-
-    /**
-     * Converts a String to a whole number.
-     *
-     * @note	0 if the value could not be parsed, otherwise the numeric version of the string.
-     */
-    BS_UTILITY_EXPORT UINT64 parseUINT64(const String& val, UINT64 defaultValue = 0);
-
-    /**
-     * Converts a String to a boolean.
-     *
+	 */
+	BS_UTILITY_EXPORT String toString(const Color& val);
+
+	/**
+	 * Converts a vector of strings into a single string where the substrings are delimited by spaces.
+	 */
+	BS_UTILITY_EXPORT String toString(const Vector<bs::String>& val);
+
+	/**
+	 * Converts a String to a float.
+	 *
+	 * @note	0.0f if the value could not be parsed, otherwise the numeric version of the string.
+	 */
+	BS_UTILITY_EXPORT float parseFloat(const String& val, float defaultValue = 0);
+
+	/**
+	 * Converts a String to a whole number.
+	 *
+	 * @note	0 if the value could not be parsed, otherwise the numeric version of the string.
+	 */
+	BS_UTILITY_EXPORT INT32 parseINT32(const String& val, INT32 defaultValue = 0);
+
+	/**
+	 * Converts a String to a whole number.
+	 *
+	 * @note	0 if the value could not be parsed, otherwise the numeric version of the string.
+	 */
+	BS_UTILITY_EXPORT UINT32 parseUINT32(const String& val, UINT32 defaultValue = 0);
+
+	/**
+	 * Converts a String to a whole number.
+	 *
+	 * @note	0 if the value could not be parsed, otherwise the numeric version of the string.
+	 */
+	BS_UTILITY_EXPORT INT64 parseINT64(const String& val, INT64 defaultValue = 0);
+
+	/**
+	 * Converts a String to a whole number.
+	 *
+	 * @note	0 if the value could not be parsed, otherwise the numeric version of the string.
+	 */
+	BS_UTILITY_EXPORT UINT64 parseUINT64(const String& val, UINT64 defaultValue = 0);
+
+	/**
+	 * Converts a String to a boolean.
+	 *
 	 * @note	Returns true if case-insensitive match of the start of the string matches "true", "yes" or "1", 
 	 *			false otherwise.
-     */
-    BS_UTILITY_EXPORT bool parseBool(const String& val, bool defaultValue = 0);
-
-    /** Checks the String is a valid number value. */
-    BS_UTILITY_EXPORT bool isNumber(const String& val);
-
-    /**
-     * Converts a WString to a float.
-     *
-     * @note	0.0f if the value could not be parsed, otherwise the numeric version of the string.
-     */
-    BS_UTILITY_EXPORT float parseFloat(const WString& val, float defaultValue = 0);
-
-    /**
-     * Converts a WString to a whole number.
-     *
-     * @note	0 if the value could not be parsed, otherwise the numeric version of the string.
-     */
-    BS_UTILITY_EXPORT INT32 parseINT32(const WString& val, INT32 defaultValue = 0);
-
-    /**
-     * Converts a WString to a whole number.
-     *
-     * @note	0 if the value could not be parsed, otherwise the numeric version of the string.
-     */
-    BS_UTILITY_EXPORT UINT32 parseUINT32(const WString& val, UINT32 defaultValue = 0);
-
-    /**
-     * Converts a WString to a whole number.
-     *
-     * @note	0 if the value could not be parsed, otherwise the numeric version of the string.
-     */
-    BS_UTILITY_EXPORT INT64 parseINT64(const WString& val, INT64 defaultValue = 0);
-
-    /**
-     * Converts a WString to a whole number.
-     *
-     * @note	0 if the value could not be parsed, otherwise the numeric version of the string.
-     */
-    BS_UTILITY_EXPORT UINT64 parseUINT64(const WString& val, UINT64 defaultValue = 0);
-
-    /**
-     * Converts a WString to a boolean.
-     *
+	 */
+	BS_UTILITY_EXPORT bool parseBool(const String& val, bool defaultValue = 0);
+
+	/** Checks the String is a valid number value. */
+	BS_UTILITY_EXPORT bool isNumber(const String& val);
+
+	/**
+	 * Converts a WString to a float.
+	 *
+	 * @note	0.0f if the value could not be parsed, otherwise the numeric version of the string.
+	 */
+	BS_UTILITY_EXPORT float parseFloat(const WString& val, float defaultValue = 0);
+
+	/**
+	 * Converts a WString to a whole number.
+	 *
+	 * @note	0 if the value could not be parsed, otherwise the numeric version of the string.
+	 */
+	BS_UTILITY_EXPORT INT32 parseINT32(const WString& val, INT32 defaultValue = 0);
+
+	/**
+	 * Converts a WString to a whole number.
+	 *
+	 * @note	0 if the value could not be parsed, otherwise the numeric version of the string.
+	 */
+	BS_UTILITY_EXPORT UINT32 parseUINT32(const WString& val, UINT32 defaultValue = 0);
+
+	/**
+	 * Converts a WString to a whole number.
+	 *
+	 * @note	0 if the value could not be parsed, otherwise the numeric version of the string.
+	 */
+	BS_UTILITY_EXPORT INT64 parseINT64(const WString& val, INT64 defaultValue = 0);
+
+	/**
+	 * Converts a WString to a whole number.
+	 *
+	 * @note	0 if the value could not be parsed, otherwise the numeric version of the string.
+	 */
+	BS_UTILITY_EXPORT UINT64 parseUINT64(const WString& val, UINT64 defaultValue = 0);
+
+	/**
+	 * Converts a WString to a boolean.
+	 *
 	 * @note	Returns true if case-insensitive match of the start of the string
 	 *			matches "true", "yes" or "1", false otherwise.
-     */
-    BS_UTILITY_EXPORT bool parseBool(const WString& val, bool defaultValue = 0);
+	 */
+	BS_UTILITY_EXPORT bool parseBool(const WString& val, bool defaultValue = 0);
 
-    /**
-     * Checks the WString is a valid number value.
-     */
-    BS_UTILITY_EXPORT bool isNumber(const WString& val);
+	/**
+	 * Checks the WString is a valid number value.
+	 */
+	BS_UTILITY_EXPORT bool isNumber(const WString& val);
 
 	/** @name Internal 
 	 *  @{

+ 394 - 0
Source/BansheeUtility/String/BsUnicode.cpp

@@ -0,0 +1,394 @@
+//********************************** Banshee Engine (www.banshee3d.com) **************************************************//
+//**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
+#include "BsUnicode.h"
+
+namespace bs
+{
+	/** Converts an UTF-8 encoded character (possibly multibyte) into an UTF-32 character. */
+	template<typename T>
+	T UTF8To32(T begin, T end, char32_t& output, char32_t invalidChar = 0)
+	{
+		// Nothing to parse
+		if (begin >= end)
+			return begin;
+
+		// Determine the number of bytes used by the character
+		UINT32 numBytes;
+
+		UINT8 firstByte = (UINT8)*begin;
+		if (firstByte < 192)
+			numBytes = 1;
+		else if (firstByte < 224)
+			numBytes = 2;
+		else if (firstByte < 240)
+			numBytes = 3;
+		else if (firstByte < 248)
+			numBytes = 4;
+		else if (firstByte < 252)
+			numBytes = 5;
+		else // < 256
+			numBytes = 6;
+
+		// Not enough bytes were provided, invalid character
+		if((begin + numBytes) > end)
+		{
+			output = invalidChar;
+			return end;
+		}
+
+		// Decode the character
+		output = 0;
+		switch(numBytes)
+		{
+		case 6: output += (UINT8)(*begin); ++begin; output <<= 6;
+		case 5: output += (UINT8)(*begin); ++begin; output <<= 6;
+		case 4: output += (UINT8)(*begin); ++begin; output <<= 6;
+		case 3: output += (UINT8)(*begin); ++begin; output <<= 6;
+		case 2: output += (UINT8)(*begin); ++begin; output <<= 6;
+		case 1: output += (UINT8)(*begin); ++begin;
+		default: break;
+		}
+
+		constexpr UINT32 offsets[6] = { 0x00000000, 0x00003080, 0x000E2080, 0x03C82080, 0xFA082080, 0x82082080 };
+		output -= offsets[numBytes - 1];
+
+		return begin;
+	}	
+
+	/** Converts an UTF-32 encoded character into an (possibly multibyte) UTF-8 character. */
+	template<typename T>
+	T UTF32To8(char32_t input, T output, UINT32 maxElems, char invalidChar = 0)
+	{
+		// No place to write the character
+		if (maxElems == 0)
+			return output;
+
+		// Check if character is valid
+		if ((input > 0x0010FFFF) || ((input >= 0xD800) && (input <= 0xDBFF)))
+		{
+			*output = invalidChar;
+			++output;
+
+			return output;
+		}
+
+		// Determine the number of bytes used by the character
+		UINT32 numBytes;
+		if (input <  0x80)
+			numBytes = 1;
+		else if (input < 0x800)
+			numBytes = 2;
+		else if (input < 0x10000) 
+			numBytes = 3;
+		else // <= 0x0010FFFF 
+			numBytes = 4;
+
+		// Check if we have enough space
+		if(numBytes > maxElems)
+		{
+			*output = invalidChar;
+			++output;
+
+			return output;
+		}
+
+		// Encode the character
+		constexpr UINT8 headers[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+		char bytes[4];
+		switch (numBytes)
+		{
+			case 4: bytes[3] = (char)((input | 0x80) & 0xBF); input >>= 6;
+			case 3: bytes[2] = (char)((input | 0x80) & 0xBF); input >>= 6;
+			case 2: bytes[1] = (char)((input | 0x80) & 0xBF); input >>= 6;
+			case 1: bytes[0] = (char)(input | headers[numBytes]);
+			default: break;
+		}
+
+		output = std::copy(bytes, bytes + numBytes, output);
+		return output;
+	}
+
+	/** Converts an UTF-16 encoded character into an UTF-32 character. */
+	template<typename T>
+	T UTF16To32(T begin, T end, char32_t& output, char32_t invalidChar = 0)
+	{
+		// Nothing to parse
+		if (begin >= end)
+			return begin;
+
+		char16_t firstElem = (char16_t)*begin;
+		++begin;
+
+		// Check if it's a surrogate pair
+		if ((firstElem >= 0xD800) && (firstElem <= 0xDBFF))
+		{
+			// Invalid character
+			if (begin >= end)
+			{
+				output = invalidChar;
+				return end;
+			}
+
+			char32_t secondElem = (char32_t)*begin;
+			++begin;
+
+			if ((secondElem >= 0xDC00) && (secondElem <= 0xDFFF))
+				output = (char32_t)(((firstElem - 0xD800) << 10) + (secondElem - 0xDC00) + 0x0010000);
+			else // Invalid character
+				output = invalidChar;
+		}
+		else
+		{
+			output = (char32_t)firstElem;
+			return begin;
+		}
+
+		return begin;
+	}	
+
+	/** Converts an UTF-32 encoded character into an UTF-16 character. */
+	template<typename T>
+	T UTF32To16(char32_t input, T output, UINT32 maxElems, char16_t invalidChar = 0)
+	{
+		// No place to write the character
+		if (maxElems == 0)
+			return output;
+
+		// Invalid character
+		if (input > 0x0010FFFF)
+		{
+			*output = invalidChar;
+			++output;
+
+			return output;
+		}
+
+		// Can be encoded as a single element
+		if (input <= 0xFFFF)
+		{
+			// Check if in valid range
+			if ((input >= 0xD800) && (input <= 0xDFFF))
+			{
+				*output = invalidChar;
+				++output;
+
+				return output;
+			}
+
+			*output = (char16_t)input;
+			++output;
+		}
+		else // Must be encoded as two elements
+		{
+			// Two elements won't fit
+			if (maxElems < 2)
+			{
+				*output = invalidChar;
+				++output;
+
+				return output;
+			}
+
+			input -= 0x0010000;
+
+			*output = (char16_t)((input >> 10) + 0xD800);
+			++output;
+
+			*output = (char16_t)((input & 0x3FFUL) + 0xDC00);
+			++output;
+		}
+
+		return output;
+	}
+
+	template<typename T>
+	T wideToUTF32(T begin, T end, char32_t& output, char32_t invalidChar = 0)
+	{
+		if (sizeof(wchar_t) == 4) // Assuming UTF-32 (i.e. Unix)
+		{
+			output = (char32_t)*begin;
+			++begin;
+
+			return begin;
+		}
+		else // Assuming UTF-16 (i.e. Windows)
+			return UTF16To32(begin, end, output, invalidChar);
+
+	}
+
+	char32_t ANSIToUTF32(char input, const std::locale& locale = std::locale())
+	{
+		const std::ctype<wchar_t>& facet = std::use_facet<std::ctype<wchar_t>>(locale);
+
+		// Note: Not exactly valid on Windows, since the input character could require a surrogate pair.
+		//       Consider improving this if it ever becomes an issue.
+		wchar_t wideChar = facet.widen(input);
+
+		char32_t output;
+		wideToUTF32(&wideChar, &wideChar + 1, output);
+
+		return output;
+	}
+
+	template<typename T>
+	T UTF32ToWide(char32_t input, T output, UINT32 maxElems, wchar_t invalidChar = 0)
+	{
+		if(sizeof(wchar_t) == 4) // Assuming UTF-32 (i.e. Unix)
+		{
+			*output = (wchar_t)input;
+			++output;
+
+			return output;
+		}
+		else // Assuming UTF-16 (i.e. Windows)
+			return UTF32To16(input, output, maxElems, invalidChar);
+	}
+
+	char UTF32ToANSI(char32_t input, char invalidChar = 0, const std::locale& locale = std::locale())
+	{
+		const std::ctype<wchar_t>& facet = std::use_facet<std::ctype<wchar_t>>(locale);
+
+		// Note: Same as above, not exactly correct as narrow() doesn't accept a surrogate pair
+		return facet.narrow((wchar_t)input, invalidChar);
+	}
+
+	String UTF8::fromANSI(const String& input, const std::locale& locale)
+	{
+		String output;
+		output.reserve(input.size());
+
+		auto backInserter = std::back_inserter(output);
+
+		auto iter = input.begin();
+		while(iter != input.end())
+		{
+			char32_t u32char = ANSIToUTF32(*iter, locale);
+			UTF32To8(u32char, backInserter, 4);
+
+			++iter;
+		}
+
+		return output;
+	}
+
+	String UTF8::toANSI(const String& input, const std::locale& locale, char invalidChar)
+	{
+		String output;
+
+		auto iter = input.begin();
+		while(iter != input.end())
+		{
+			char32_t u32char;
+			iter = UTF8To32(iter, input.end(), u32char, invalidChar);
+
+			output.push_back(UTF32ToANSI(u32char, invalidChar, locale));
+		}
+
+		return output;
+	}
+
+	String UTF8::fromWide(const WString& input)
+	{
+		String output;
+		output.reserve(input.size());
+
+		auto backInserter = std::back_inserter(output);
+
+		auto iter = input.begin();
+		while(iter != input.end())
+		{
+			char32_t u32char;
+			iter = wideToUTF32(iter, input.end(), u32char);
+			UTF32To8(u32char, backInserter, 4);
+		}
+
+		return output;
+	}
+
+	WString UTF8::toWide(const String& input) 
+	{
+		WString output;
+		auto backInserter = std::back_inserter(output);
+
+		auto iter = input.begin();
+		while(iter != input.end())
+		{
+			char32_t u32char;
+			iter = UTF8To32(iter, input.end(), u32char);
+
+			UTF32ToWide(u32char, backInserter, 2);
+		}
+
+		return output;
+	}
+
+	String UTF8::fromUTF16(const U16String& input)
+	{
+		String output;
+		output.reserve(input.size());
+
+		auto backInserter = std::back_inserter(output);
+
+		auto iter = input.begin();
+		while(iter != input.end())
+		{
+			char32_t u32char;
+			iter = UTF16To32(iter, input.end(), u32char);
+			UTF32To8(u32char, backInserter, 4);
+		}
+
+		return output;
+	}
+
+	U16String UTF8::toUTF16(const String& input) 
+	{
+		U16String output;
+		auto backInserter = std::back_inserter(output);
+
+		auto iter = input.begin();
+		while(iter != input.end())
+		{
+			char32_t u32char;
+			iter = UTF8To32(iter, input.end(), u32char);
+
+			UTF32To16(u32char, backInserter, 2);
+		}
+
+		return output;
+	}
+
+	String UTF8::fromUTF32(const U32String& input)
+	{
+		String output;
+		output.reserve(input.size());
+
+		auto backInserter = std::back_inserter(output);
+
+		auto iter = input.begin();
+		while(iter != input.end())
+		{
+			UTF32To8(*iter, backInserter, 4);
+
+			++iter;
+		}
+
+		return output;
+	}
+
+	U32String UTF8::toUTF32(const String& input) 
+	{
+		U32String output;
+
+		auto iter = input.begin();
+		while(iter != input.end())
+		{
+			char32_t u32char;
+			iter = UTF8To32(iter, input.end(), u32char);
+
+			output.push_back(u32char);
+		}
+
+		return output;
+	}
+}

+ 82 - 0
Source/BansheeUtility/String/BsUnicode.h

@@ -0,0 +1,82 @@
+//********************************** Banshee Engine (www.banshee3d.com) **************************************************//
+//**************** Copyright (c) 2016 Marko Pintera ([email protected]). All rights reserved. **********************//
+#pragma once
+
+#include "Prerequisites/BsPrerequisitesUtil.h"
+
+namespace bs
+{
+	/** Provides methods to converting between UTF-8 character encoding and other popular encodings. */
+	class BS_UTILITY_EXPORT UTF8
+	{
+	public:
+		/**
+		 * Converts from an ANSI encoding in the specified locale into UTF-8.
+		 * 
+		 * @param[in]	input	Narrow string encoded as ANSI characters. Characters are expected to be in the code page
+		 *						specified by @p locale.
+		 * @param[in]	locale	Locale that determines how are the ANSI characters interpreted.
+		 * @return				UTF-8 encoded string.
+		 */
+		static String fromANSI(const String& input, const std::locale& locale = std::locale());
+
+		/**
+		 * Converts from an UTF-8 encoding into ANSI encoding in the specified locale.
+		 * 
+		 * @param[in]	input	Narrow string encoded as UTF-8 characters.
+		 * @param[in]	locale	Locale that determines from which code page to generate the ANSI characters.
+		 * @return				ANSI encoded string in the specified locale.
+		 */
+		static String toANSI(const String& input, const std::locale& locale = std::locale(), char invalidChar = 0);
+
+		/**
+		 * Converts from a system-specific wide character encoding into UTF-8.
+		 * 
+		 * @param[in]	input	Wide string to convert. Actual encoding is system specific be can be assumed to be UTF-16 on 
+		 *						Windows and UTF-32 on Unix.
+		 * @return				UTF-8 encoded string.
+		 */
+		static String fromWide(const WString& input);
+
+		/**
+		 * Converts from an UTF-8 encoding into system-specific wide character encoding.
+		 * 
+		 * @param[in]	input	Narrow string encoded as UTF-8 characters.
+		 * @return				Wide string encoded in a system-specific manner. Actual encoding can be assumed to be UTF-16 
+		 *						on Windows and UTF-32 and Unix.
+		 */
+		static WString toWide(const String& input);
+
+		/**
+		 * Converts from an UTF-16 encoding into UTF-8.
+		 * 
+		 * @param[in]	input	String encoded as UTF-16.
+		 * @return				UTF-8 encoded string.
+		 */
+		static String fromUTF16(const U16String& input);
+
+		/**
+		 * Converts from an UTF-8 encoding into UTF-16.
+		 * 
+		 * @param[in]	input	String encoded as UTF-8.
+		 * @return				UTF-16 encoded string.
+		 */
+		static U16String toUTF16(const String& input);
+
+		/**
+		 * Converts from an UTF-32 encoding into UTF-8.
+		 * 
+		 * @param[in]	input	String encoded as UTF-32.
+		 * @return				UTF-8 encoded string.
+		 */
+		static String fromUTF32(const U32String& input);
+
+		/**
+		 * Converts from an UTF-8 encoding into UTF-32.
+		 * 
+		 * @param[in]	input	String encoded as UTF-8.
+		 * @return				UTF-32 encoded string.
+		 */
+		static U32String toUTF32(const String& input);
+	};
+}