Browse Source

Completed exhaustive testing of valid character codes.

David Piuva 5 years ago
parent
commit
a7b163141a
3 changed files with 180 additions and 67 deletions
  1. 43 50
      Source/DFPSR/base/text.cpp
  2. 15 6
      Source/DFPSR/base/text.h
  3. 122 11
      Source/test/tests/TextEncodingTest.cpp

+ 43 - 50
Source/DFPSR/base/text.cpp

@@ -343,7 +343,7 @@ using UTF32WriterFunction = std::function<void(DsrChar character)>;
 
 
 // Filter out unwanted characters for improved portability
 // Filter out unwanted characters for improved portability
 static void feedCharacter(const UTF32WriterFunction &reciever, DsrChar character) {
 static void feedCharacter(const UTF32WriterFunction &reciever, DsrChar character) {
-	if (character != U'\r') {
+	if (character != U'\0' && character != U'\r') {
 		reciever(character);
 		reciever(character);
 	}
 	}
 }
 }
@@ -352,30 +352,28 @@ static void feedCharacter(const UTF32WriterFunction &reciever, DsrChar character
 static void feedStringFromFileBuffer_Latin1(const UTF32WriterFunction &reciever, const uint8_t* buffer, int64_t fileLength) {
 static void feedStringFromFileBuffer_Latin1(const UTF32WriterFunction &reciever, const uint8_t* buffer, int64_t fileLength) {
 	for (int64_t i = 0; i < fileLength; i++) {
 	for (int64_t i = 0; i < fileLength; i++) {
 		DsrChar character = (DsrChar)(buffer[i]);
 		DsrChar character = (DsrChar)(buffer[i]);
-		if (character != U'\r') {
-			feedCharacter(reciever, character);
-		}
+		feedCharacter(reciever, character);
 	}
 	}
 }
 }
 // Appends the content of buffer as a BOM-free UTF-8 file into target
 // Appends the content of buffer as a BOM-free UTF-8 file into target
 static void feedStringFromFileBuffer_UTF8(const UTF32WriterFunction &reciever, const uint8_t* buffer, int64_t fileLength) {
 static void feedStringFromFileBuffer_UTF8(const UTF32WriterFunction &reciever, const uint8_t* buffer, int64_t fileLength) {
 	for (int64_t i = 0; i < fileLength; i++) {
 	for (int64_t i = 0; i < fileLength; i++) {
 		uint8_t byteA = buffer[i];
 		uint8_t byteA = buffer[i];
-		if (byteA < 0b10000000) {
+		if (byteA < (uint32_t)0b10000000) {
 			// Single byte (1xxxxxxx)
 			// Single byte (1xxxxxxx)
 			feedCharacter(reciever, (DsrChar)byteA);
 			feedCharacter(reciever, (DsrChar)byteA);
 		} else {
 		} else {
 			uint32_t character = 0;
 			uint32_t character = 0;
 			int extraBytes = 0;
 			int extraBytes = 0;
-			if (byteA >= 0b11000000) { // At least two leading ones
-				if (byteA < 0b11100000) { // Less than three leading ones
-					character = byteA & 0b00011111;
+			if (byteA >= (uint32_t)0b11000000) { // At least two leading ones
+				if (byteA < (uint32_t)0b11100000) { // Less than three leading ones
+					character = byteA & (uint32_t)0b00011111;
 					extraBytes = 1;
 					extraBytes = 1;
-				} else if (byteA < 0b11110000) { // Less than four leading ones
-					character = byteA & 0b00001111;
+				} else if (byteA < (uint32_t)0b11110000) { // Less than four leading ones
+					character = byteA & (uint32_t)0b00001111;
 					extraBytes = 2;
 					extraBytes = 2;
-				} else if (byteA < 0b11111000) { // Less than five leading ones
-					character = byteA & 0b00000111;
+				} else if (byteA < (uint32_t)0b11111000) { // Less than five leading ones
+					character = byteA & (uint32_t)0b00000111;
 					extraBytes = 3;
 					extraBytes = 3;
 				} else {
 				} else {
 					// Invalid UTF-8 format
 					// Invalid UTF-8 format
@@ -422,9 +420,9 @@ static void feedStringFromFileBuffer_UTF16(const UTF32WriterFunction &reciever,
 			// The given range was reserved and therefore using 32 bits
 			// The given range was reserved and therefore using 32 bits
 			i += 2;
 			i += 2;
 			uint16_t wordB = read16bits<LittleEndian>(buffer, i);
 			uint16_t wordB = read16bits<LittleEndian>(buffer, i);
-			uint32_t higher10Bits = wordA & 0b1111111111;
-			uint32_t lower10Bits = wordB & 0b1111111111;
-			feedCharacter(reciever, (DsrChar)(((higher10Bits << 10) | lower10Bits) + 0x10000));
+			uint32_t higher10Bits = wordA & (uint32_t)0b1111111111;
+			uint32_t lower10Bits  = wordB & (uint32_t)0b1111111111;
+			feedCharacter(reciever, (DsrChar)(((higher10Bits << 10) | lower10Bits) + (uint32_t)0x10000));
 		}
 		}
 	}
 	}
 }
 }
@@ -461,7 +459,7 @@ static void feedStringFromFileBuffer(const UTF32WriterFunction &reciever, const
 	}
 	}
 }
 }
 
 
-String dsr::string_loadFromMemory(const Buffer &fileContent) {
+String dsr::string_loadFromMemory(Buffer fileContent) {
 	String result;
 	String result;
 	// Measure the size of the result by scanning the content in advance
 	// Measure the size of the result by scanning the content in advance
 	int64_t characterCount = 0;
 	int64_t characterCount = 0;
@@ -517,42 +515,40 @@ String dsr::string_load(const ReadableString& filename, bool mustExist) {
 	}
 	}
 }
 }
 
 
-#define AT_MOST_BITS(BIT_COUNT) if (character >= 1 << BIT_COUNT) { character = U'?'; }
-
 template <CharacterEncoding characterEncoding>
 template <CharacterEncoding characterEncoding>
 static void encodeCharacter(const ByteWriterFunction &receiver, DsrChar character) {
 static void encodeCharacter(const ByteWriterFunction &receiver, DsrChar character) {
 	if (characterEncoding == CharacterEncoding::Raw_Latin1) {
 	if (characterEncoding == CharacterEncoding::Raw_Latin1) {
 		// Replace any illegal characters with questionmarks
 		// Replace any illegal characters with questionmarks
-		AT_MOST_BITS(8);
+		if (character > 255) { character = U'?'; }
 		receiver(character);
 		receiver(character);
 	} else if (characterEncoding == CharacterEncoding::BOM_UTF8) {
 	} else if (characterEncoding == CharacterEncoding::BOM_UTF8) {
 		// Replace any illegal characters with questionmarks
 		// Replace any illegal characters with questionmarks
-		AT_MOST_BITS(21);
+		if (character > 0x10FFFF) { character = U'?'; }
 		if (character < (1 << 7)) {
 		if (character < (1 << 7)) {
 			// 0xxxxxxx
 			// 0xxxxxxx
 			receiver(character);
 			receiver(character);
 		} else if (character < (1 << 11)) {
 		} else if (character < (1 << 11)) {
 			// 110xxxxx 10xxxxxx
 			// 110xxxxx 10xxxxxx
-			receiver(0b11000000 | ((character & (0b11111 << 6)) >> 6));
-			receiver(0b10000000 | (character & 0b111111));
+			receiver((uint32_t)0b11000000 | ((character & ((uint32_t)0b11111 << 6)) >> 6));
+			receiver((uint32_t)0b10000000 |  (character &  (uint32_t)0b111111));
 		} else if (character < (1 << 16)) {
 		} else if (character < (1 << 16)) {
 			// 1110xxxx 10xxxxxx 10xxxxxx
 			// 1110xxxx 10xxxxxx 10xxxxxx
-			receiver(0b11100000 | ((character & (0b1111 << 12)) >> 12));
-			receiver(0b10000000 | ((character & (0b111111 << 6)) >> 6));
-			receiver(0b10000000 | (character & 0b111111));
+			receiver((uint32_t)0b11100000 | ((character & ((uint32_t)0b1111 << 12)) >> 12));
+			receiver((uint32_t)0b10000000 | ((character & ((uint32_t)0b111111 << 6)) >> 6));
+			receiver((uint32_t)0b10000000 |  (character &  (uint32_t)0b111111));
 		} else if (character < (1 << 21)) {
 		} else if (character < (1 << 21)) {
 			// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 			// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-			receiver(0b11110000 | ((character & (0b111 << 18)) >> 18));
-			receiver(0b10000000 | ((character & (0b111111 << 12)) >> 12));
-			receiver(0b10000000 | ((character & (0b111111 << 6)) >> 6));
-			receiver(0b10000000 | (character & 0b111111));
+			receiver((uint32_t)0b11110000 | ((character & ((uint32_t)0b111 << 18)) >> 18));
+			receiver((uint32_t)0b10000000 | ((character & ((uint32_t)0b111111 << 12)) >> 12));
+			receiver((uint32_t)0b10000000 | ((character & ((uint32_t)0b111111 << 6)) >> 6));
+			receiver((uint32_t)0b10000000 |  (character &  (uint32_t)0b111111));
 		}
 		}
 	} else { // Assuming UTF-16
 	} else { // Assuming UTF-16
-		AT_MOST_BITS(20);
+		if (character > 0x10FFFF) { character = U'?'; }
 		if (character <= 0xD7FF || (character >= 0xE000 && character <= 0xFFFF)) {
 		if (character <= 0xD7FF || (character >= 0xE000 && character <= 0xFFFF)) {
 			// xxxxxxxx xxxxxxxx (Limited range)
 			// xxxxxxxx xxxxxxxx (Limited range)
-			uint32_t higher8Bits = (character & 0b1111111100000000) >> 8;
-			uint32_t lower8Bits  =  character & 0b0000000011111111;
+			uint32_t higher8Bits = (character & (uint32_t)0b1111111100000000) >> 8;
+			uint32_t lower8Bits  =  character & (uint32_t)0b0000000011111111;
 			if (characterEncoding == CharacterEncoding::BOM_UTF16BE) {
 			if (characterEncoding == CharacterEncoding::BOM_UTF16BE) {
 				receiver(higher8Bits);
 				receiver(higher8Bits);
 				receiver(lower8Bits);
 				receiver(lower8Bits);
@@ -562,13 +558,11 @@ static void encodeCharacter(const ByteWriterFunction &receiver, DsrChar characte
 			}
 			}
 		} else if (character >= 0x010000 && character <= 0x10FFFF) {
 		} else if (character >= 0x010000 && character <= 0x10FFFF) {
 			// 110110xxxxxxxxxx 110111xxxxxxxxxx
 			// 110110xxxxxxxxxx 110111xxxxxxxxxx
-			uint32_t code = character - 0x10000;
-			uint32_t higher10Bits = (code & 0b11111111110000000000) >> 10;
-			uint32_t lower10Bits  =  code & 0b00000000001111111111;
-			uint32_t byteA = (0b110110 << 2) | ((higher10Bits & (0b11 << 8)) >> 8);
-			uint32_t byteB = higher10Bits & 0b11111111;
-			uint32_t byteC = (0b110111 << 2) | ((lower10Bits & (0b11 << 8)) >> 8);
-			uint32_t byteD = lower10Bits & 0b11111111;
+			uint32_t code = character - (uint32_t)0x10000;
+			uint32_t byteA = ((code & (uint32_t)0b11000000000000000000) >> 18) | (uint32_t)0b11011000;
+			uint32_t byteB =  (code & (uint32_t)0b00111111110000000000) >> 10;
+			uint32_t byteC = ((code & (uint32_t)0b00000000001100000000) >> 8)  | (uint32_t)0b11011100;
+			uint32_t byteD =   code & (uint32_t)0b00000000000011111111;
 			if (characterEncoding == CharacterEncoding::BOM_UTF16BE) {
 			if (characterEncoding == CharacterEncoding::BOM_UTF16BE) {
 				receiver(byteA);
 				receiver(byteA);
 				receiver(byteB);
 				receiver(byteB);
@@ -659,22 +653,21 @@ void dsr::string_save(const ReadableString& filename, const ReadableString& cont
 	}
 	}
 }
 }
 
 
-/*
-void dsr::string_saveToMemory(Buffer &target, const ReadableString& content,
-  CharacterEncoding characterEncoding = CharacterEncoding::BOM_UTF8,
-  LineEncoding lineEncoding = LineEncoding::CrLf) {
+Buffer dsr::string_saveToMemory(const ReadableString& content, CharacterEncoding characterEncoding, LineEncoding lineEncoding) {
 	int64_t byteCount = 0;
 	int64_t byteCount = 0;
-	ByteWriterFunction counter = [&fileStream](uint8_t value) {
+	ByteWriterFunction counter = [&byteCount](uint8_t value) {
 		byteCount++;
 		byteCount++;
 	};
 	};
-	
-	ENCODE_TEXT(receiver, content, characterEncoding, lineEncoding);
-	ByteWriterFunction receiver = [&fileStream](uint8_t value) {
-		fileStream.write((const char*)&value, 1);
+	ENCODE_TEXT(counter, content, characterEncoding, lineEncoding);
+	Buffer result = buffer_create(byteCount);
+	SafePointer<uint8_t> byteWriter = buffer_getSafeData<uint8_t>(result, "Buffer for string encoding");
+	ByteWriterFunction receiver = [&byteWriter](uint8_t value) {
+		*byteWriter = value;
+		byteWriter += 1;
 	};
 	};
-
+	ENCODE_TEXT(receiver, content, characterEncoding, lineEncoding);
+	return result;
 }
 }
-*/
 
 
 const char32_t* dsr::file_separator() {
 const char32_t* dsr::file_separator() {
 	#ifdef _WIN32
 	#ifdef _WIN32

+ 15 - 6
Source/DFPSR/base/text.h

@@ -42,8 +42,8 @@ using DsrChar = char32_t;
 enum class CharacterEncoding {
 enum class CharacterEncoding {
 	Raw_Latin1,  // U+00 to U+FF
 	Raw_Latin1,  // U+00 to U+FF
 	BOM_UTF8,    // U+00000000 to U+0010FFFF
 	BOM_UTF8,    // U+00000000 to U+0010FFFF
-	BOM_UTF16BE, // U+00000000 to U+0000D7FF, U+0000E000 to U+0000FFFF, U+00010000 to U+0010FFFF
-	BOM_UTF16LE  // U+00000000 to U+0000D7FF, U+0000E000 to U+0000FFFF, U+00010000 to U+0010FFFF
+	BOM_UTF16BE, // U+00000000 to U+0000D7FF, U+0000E000 to U+0010FFFF
+	BOM_UTF16LE  // U+00000000 to U+0000D7FF, U+0000E000 to U+0010FFFF
 };
 };
 
 
 // Carriage-return is removed when loading text files to prevent getting double lines
 // Carriage-return is removed when loading text files to prevent getting double lines
@@ -301,7 +301,7 @@ double string_toDouble(const ReadableString& source);
 String string_load(const ReadableString& filename, bool mustExist = true);
 String string_load(const ReadableString& filename, bool mustExist = true);
 // A version loading the text from a binary representation of the file's content instead of the filename.
 // A version loading the text from a binary representation of the file's content instead of the filename.
 //   Makes it easier to test character encoding and load arbitrary files from archives.
 //   Makes it easier to test character encoding and load arbitrary files from archives.
-String string_loadFromMemory(const Buffer &fileContent);
+String string_loadFromMemory(Buffer fileContent);
 
 
 // Side-effect: Saves content to filename using the selected character and line encodings.
 // Side-effect: Saves content to filename using the selected character and line encodings.
 // Do not add carriage return characters yourself into strings, for these will be added automatically in the CrLf mode.
 // Do not add carriage return characters yourself into strings, for these will be added automatically in the CrLf mode.
@@ -312,12 +312,11 @@ void string_save(const ReadableString& filename, const ReadableString& content,
   CharacterEncoding characterEncoding = CharacterEncoding::BOM_UTF8,
   CharacterEncoding characterEncoding = CharacterEncoding::BOM_UTF8,
   LineEncoding lineEncoding = LineEncoding::CrLf
   LineEncoding lineEncoding = LineEncoding::CrLf
 );
 );
-/*
-void string_saveToMemory(Buffer &target, const ReadableString& content,
+// A version encoding the text to a new buffer
+Buffer string_saveToMemory(const ReadableString& content,
   CharacterEncoding characterEncoding = CharacterEncoding::BOM_UTF8,
   CharacterEncoding characterEncoding = CharacterEncoding::BOM_UTF8,
   LineEncoding lineEncoding = LineEncoding::CrLf
   LineEncoding lineEncoding = LineEncoding::CrLf
 );
 );
-*/
 
 
 // Post-condition: Returns true iff strings a and b are exactly equal.
 // Post-condition: Returns true iff strings a and b are exactly equal.
 bool string_match(const ReadableString& a, const ReadableString& b);
 bool string_match(const ReadableString& a, const ReadableString& b);
@@ -342,6 +341,16 @@ String string_mangleQuote(const ReadableString &rawText);
 // Post-condition: Returns mangledText with quotes removed and excape tokens interpreted.
 // Post-condition: Returns mangledText with quotes removed and excape tokens interpreted.
 String string_unmangleQuote(const ReadableString& mangledText);
 String string_unmangleQuote(const ReadableString& mangledText);
 
 
+// Ensures safely that at least minimumLength characters can he held in the buffer
+inline void string_reserve(String& target, int32_t minimumLength) {
+	target.reserve(minimumLength);
+}
+
+// Append/push one character (to avoid integer to string conversion)
+inline void string_appendChar(String& target, DsrChar value) {
+	target.appendChar(value);
+}
+
 // Append one element
 // Append one element
 template<typename TYPE>
 template<typename TYPE>
 inline void string_append(String& target, TYPE value) {
 inline void string_append(String& target, TYPE value) {

+ 122 - 11
Source/test/tests/TextEncodingTest.cpp

@@ -48,9 +48,9 @@ String expected_utf8 = unicodeContent + U"\nThis is UTF-8";
 String expected_utf16le = unicodeContent + U"\nThis is UTF-16 Little Endian";
 String expected_utf16le = unicodeContent + U"\nThis is UTF-16 Little Endian";
 String expected_utf16be = unicodeContent + U"\nThis is UTF-16 Big Endian";
 String expected_utf16be = unicodeContent + U"\nThis is UTF-16 Big Endian";
 
 
-void printCharacterCode(uint32_t value) {
-	for (int i = 0; i < 32; i++) {
-		if (value & 0b10000000000000000000000000000000) {
+void printBinary(uint32_t value, int maxBits) {
+	for (int i = 0; i < maxBits; i++) {
+		if (value & (uint32_t)0b1 << (maxBits - 1)) {
 			printText(U"1");
 			printText(U"1");
 		} else {
 		} else {
 			printText(U"0");
 			printText(U"0");
@@ -59,35 +59,45 @@ void printCharacterCode(uint32_t value) {
 	}
 	}
 }
 }
 
 
+void printBuffer(Buffer buffer) {
+	int length = buffer_getSize(buffer);
+	SafePointer<uint8_t> data = buffer_getSafeData<uint8_t>(buffer, "Generic buffer");
+	printText(U"Buffer of length ", length, U":\n");
+	for (int i = 0; i < length; i++) {
+		printBinary(data[i], 8);
+		printText(U" @", i, U"\n");
+	}
+}
+
 // Method for printing the character codes of a string for debugging
 // Method for printing the character codes of a string for debugging
 void compareCharacterCodes(String textA, String textB) {
 void compareCharacterCodes(String textA, String textB) {
 	int lengthA = string_length(textA);
 	int lengthA = string_length(textA);
 	int lengthB = string_length(textB);
 	int lengthB = string_length(textB);
 	int minLength = lengthA < lengthB ? lengthA : lengthB;
 	int minLength = lengthA < lengthB ? lengthA : lengthB;
-	printText("Character codes for strings of length ", lengthA, U" and ", lengthB, U":\n");
+	printText(U"Character codes for strings of length ", lengthA, U" and ", lengthB, U":\n");
 	for (int i = 0; i < minLength; i++) {
 	for (int i = 0; i < minLength; i++) {
 		uint32_t codeA = (uint32_t)textA[i];
 		uint32_t codeA = (uint32_t)textA[i];
 		uint32_t codeB = (uint32_t)textB[i];
 		uint32_t codeB = (uint32_t)textB[i];
-		printCharacterCode(codeA);
+		printBinary(codeA, 32);
 		if (codeA == codeB) {
 		if (codeA == codeB) {
 			printText(U" == ");
 			printText(U" == ");
 		} else {
 		} else {
 			printText(U" != ");
 			printText(U" != ");
 		}
 		}
-		printCharacterCode(codeB);
+		printBinary(codeB, 32);
 		printText(U" (", textA[i], U") (", textB[i], U")\n");
 		printText(U" (", textA[i], U") (", textB[i], U")\n");
 	}
 	}
 	if (lengthA > lengthB) {
 	if (lengthA > lengthB) {
 		for (int i = minLength; i < lengthA; i++) {
 		for (int i = minLength; i < lengthA; i++) {
 			uint32_t codeA = (uint32_t)textA[i];
 			uint32_t codeA = (uint32_t)textA[i];
-			printCharacterCode(codeA);
+			printBinary(codeA, 32);
 			printText(U" (", textA[i], U")\n");
 			printText(U" (", textA[i], U")\n");
 		}
 		}
 	} else {
 	} else {
 		printText(U"                                    ");
 		printText(U"                                    ");
 		for (int i = minLength; i < lengthB; i++) {
 		for (int i = minLength; i < lengthB; i++) {
 			uint32_t codeB = (uint32_t)textB[i];
 			uint32_t codeB = (uint32_t)textB[i];
-			printCharacterCode(codeB);
+			printBinary(codeB, 32);
 			printText(U" (", textB[i], U")\n");
 			printText(U" (", textB[i], U")\n");
 		}
 		}
 	}
 	}
@@ -96,7 +106,108 @@ void compareCharacterCodes(String textA, String textB) {
 START_TEST(TextEncoding)
 START_TEST(TextEncoding)
 	String folderPath = string_combine(U"test", file_separator(), U"tests", file_separator(), U"resources", file_separator());
 	String folderPath = string_combine(U"test", file_separator(), U"tests", file_separator(), U"resources", file_separator());
 	{ // Text encodings stored in memory
 	{ // Text encodings stored in memory
-		// TODO: Test string_loadFromMemory using random character codes from the extended 0x10000..0x10FFFF range
+		// Run these tests for all line encodings
+		for (int l = 0; l <= 1; l++) {
+			LineEncoding lineEncoding = (l == 0) ? LineEncoding::CrLf : LineEncoding::Lf;
+			// \r is not saved to files for cross-platform compatibility
+			// \0 is not saved to files because files have a known size and don't need them
+			{ // Latin-1 up to U+FF excluding \r and \0
+				String originalLatin1;
+				string_reserve(originalLatin1, 0xFF);
+				for (DsrChar c = 0x1; c <= 0xFF; c++) {
+					if (c != U'\r') {
+						string_appendChar(originalLatin1, c);
+					}
+				}
+				Buffer encoded = string_saveToMemory(originalLatin1, CharacterEncoding::Raw_Latin1, lineEncoding);
+				String decodedLatin1 = string_loadFromMemory(encoded);
+				//compareCharacterCodes(originalLatin1, decodedLatin1);
+				ASSERT_MATCH(originalLatin1, decodedLatin1);
+			}
+			{ // UTF-8 up to U+10FFFF excluding \r and \0
+				String originalUTF8;
+				string_reserve(originalUTF8, 0x10FFFF);
+				for (DsrChar c = 0x1; c <= 0x10FFFF; c++) {
+					if (c != U'\r') {
+						string_appendChar(originalUTF8, c);
+					}
+				}
+				Buffer encoded = string_saveToMemory(originalUTF8, CharacterEncoding::BOM_UTF8, lineEncoding);
+				String decodedUTF8 = string_loadFromMemory(encoded);
+				ASSERT_MATCH(originalUTF8, decodedUTF8);
+			}
+			// Selected cases for UTF-16
+			for (int e = 0; e <= 1; e++) {
+				CharacterEncoding characterEncoding = (e == 0) ? CharacterEncoding::BOM_UTF16BE : CharacterEncoding::BOM_UTF16LE;
+				String originalUTF16;
+				// 20-bit test cases
+				string_appendChar(originalUTF16, 0b00000000000000000001);
+				string_appendChar(originalUTF16, 0b00000000000000000010);
+				string_appendChar(originalUTF16, 0b00000000000000000011);
+				string_appendChar(originalUTF16, 0b00000000000000000100);
+				string_appendChar(originalUTF16, 0b00000000000000000111);
+				string_appendChar(originalUTF16, 0b00000000000000001000);
+				string_appendChar(originalUTF16, 0b00000000000000001111);
+				string_appendChar(originalUTF16, 0b00000000000000010000);
+				string_appendChar(originalUTF16, 0b00000000000000011111);
+				string_appendChar(originalUTF16, 0b00000000000000100000);
+				string_appendChar(originalUTF16, 0b00000000000000111111);
+				string_appendChar(originalUTF16, 0b00000000000001000000);
+				string_appendChar(originalUTF16, 0b00000000000001111111);
+				string_appendChar(originalUTF16, 0b00000000000010000000);
+				string_appendChar(originalUTF16, 0b00000000000011111111);
+				string_appendChar(originalUTF16, 0b00000000000100000000);
+				string_appendChar(originalUTF16, 0b00000000000111111111);
+				string_appendChar(originalUTF16, 0b00000000001000000000);
+				string_appendChar(originalUTF16, 0b00000000001111111111);
+				string_appendChar(originalUTF16, 0b00000000010000000000);
+				string_appendChar(originalUTF16, 0b00000000011111111111);
+				string_appendChar(originalUTF16, 0b00000000100000000000);
+				string_appendChar(originalUTF16, 0b00000000111111111111);
+				string_appendChar(originalUTF16, 0b00000001000000000000);
+				string_appendChar(originalUTF16, 0b00000001111111111111);
+				string_appendChar(originalUTF16, 0b00000010000000000000);
+				string_appendChar(originalUTF16, 0b00000011111111111111);
+				string_appendChar(originalUTF16, 0b00000100000000000000);
+				string_appendChar(originalUTF16, 0b00000111111111111111);
+				string_appendChar(originalUTF16, 0b00001000000000000000);
+				string_appendChar(originalUTF16, 0b00001111111111111111);
+				string_appendChar(originalUTF16, 0b00010000000000000000);
+				string_appendChar(originalUTF16, 0b00011111111111111111);
+				string_appendChar(originalUTF16, 0b00100000000000000000);
+				string_appendChar(originalUTF16, 0b00111111111111111111);
+				string_appendChar(originalUTF16, 0b01000000000000000000);
+				string_appendChar(originalUTF16, 0b01111111111111111111);
+				string_appendChar(originalUTF16, 0b10000000000000000000);
+				string_appendChar(originalUTF16, 0b11111111111111111111);
+				// 21-bit test cases exploiting the high range offset
+				string_appendChar(originalUTF16, 0x100000); // Using the 21:st bit
+				string_appendChar(originalUTF16, 0x10FFFF); // Maximum range for UTF
+				Buffer encoded = string_saveToMemory(originalUTF16, characterEncoding, lineEncoding);
+				String decoded = string_loadFromMemory(encoded);
+				//printBuffer(encoded);
+				//compareCharacterCodes(originalUTF16, decoded);
+				ASSERT_MATCH(originalUTF16, decoded);
+			}
+			// All UTF-16 characters excluding \r and \0
+			for (int e = 0; e <= 1; e++) {
+				CharacterEncoding characterEncoding = (e == 0) ? CharacterEncoding::BOM_UTF16BE : CharacterEncoding::BOM_UTF16LE;
+				String original;
+				string_reserve(original, 0x10FFFF);
+				for (DsrChar c = 0x1; c <= 0xD7FF; c++) {
+					if (c != U'\r') {
+						string_appendChar(original, c);
+					}
+				}
+				// 0xD800 to 0xDFFF is reserved for 
+				for (DsrChar c = 0xE000; c <= 0x10FFFF; c++) {
+					string_appendChar(original, c);
+				}
+				Buffer encoded = string_saveToMemory(original, characterEncoding, lineEncoding);
+				String decoded = string_loadFromMemory(encoded);
+				ASSERT_MATCH(original, decoded);
+			}
+		}
 	}
 	}
 	{ // Loading strings of different encodings
 	{ // Loading strings of different encodings
 		String fileLatin1 = string_load(folderPath + U"Latin1.txt", true);
 		String fileLatin1 = string_load(folderPath + U"Latin1.txt", true);
@@ -119,8 +230,8 @@ START_TEST(TextEncoding)
 		String originalContent = U"Hello my friend\n你好我的朋友\n𐐷𤭢\n";
 		String originalContent = U"Hello my friend\n你好我的朋友\n𐐷𤭢\n";
 		String latin1Expected = U"Hello my friend\n??????\n??\n";
 		String latin1Expected = U"Hello my friend\n??????\n??\n";
 		String tempPath = folderPath + U"Temporary.txt";
 		String tempPath = folderPath + U"Temporary.txt";
-		for (int i = 0; i < 2; i++) {
-			LineEncoding lineEncoding = (i == 0) ? LineEncoding::CrLf : LineEncoding::Lf;
+		for (int l = 0; l < 2; l++) {
+			LineEncoding lineEncoding = (l == 0) ? LineEncoding::CrLf : LineEncoding::Lf;
 
 
 			// Latin-1 should store up to 8 bits correctly, and write ? for complex characters
 			// Latin-1 should store up to 8 bits correctly, and write ? for complex characters
 			string_save(tempPath, originalContent, CharacterEncoding::Raw_Latin1, lineEncoding);
 			string_save(tempPath, originalContent, CharacterEncoding::Raw_Latin1, lineEncoding);