Browse Source

Remembered the offset for UTF-16 when writing the documentation.

David Piuva 5 years ago
parent
commit
e996293769

+ 4 - 3
Source/DFPSR/base/text.cpp

@@ -419,7 +419,7 @@ static void AppendStringFromFileBuffer_UTF16(String &target, const uint8_t* buff
 			uint16_t wordB = read16bits<LittleEndian>(buffer, i);
 			uint16_t wordB = read16bits<LittleEndian>(buffer, i);
 			uint32_t higher10Bits = wordA & 0b1111111111;
 			uint32_t higher10Bits = wordA & 0b1111111111;
 			uint32_t lower10Bits = wordB & 0b1111111111;
 			uint32_t lower10Bits = wordB & 0b1111111111;
-			feedCharacterFromFile(target, (DsrChar)((higher10Bits << 10) | lower10Bits));
+			feedCharacterFromFile(target, (DsrChar)(((higher10Bits << 10) | lower10Bits) + 0x10000));
 		}
 		}
 	}
 	}
 }
 }
@@ -538,8 +538,9 @@ static void encodeCharacterToStream(std::ostream &target, DsrChar character) {
 			}
 			}
 		} else if (character >= 0x010000 && character <= 0x10FFFF) {
 		} else if (character >= 0x010000 && character <= 0x10FFFF) {
 			// 110110xxxxxxxxxx 110111xxxxxxxxxx
 			// 110110xxxxxxxxxx 110111xxxxxxxxxx
-			uint32_t higher10Bits = (character & 0b11111111110000000000) >> 10;
-			uint32_t lower10Bits  =  character & 0b00000000001111111111;
+			uint32_t code = character - 0x10000;
+			uint32_t higher10Bits = (code & 0b11111111110000000000) >> 10;
+			uint32_t lower10Bits  =  code & 0b00000000001111111111;
 			uint32_t byteA = (0b110110 << 2) | ((higher10Bits & (0b11 << 8)) >> 8);
 			uint32_t byteA = (0b110110 << 2) | ((higher10Bits & (0b11 << 8)) >> 8);
 			uint32_t byteB = higher10Bits & 0b11111111;
 			uint32_t byteB = higher10Bits & 0b11111111;
 			uint32_t byteC = (0b110111 << 2) | ((lower10Bits & (0b11 << 8)) >> 8);
 			uint32_t byteC = (0b110111 << 2) | ((lower10Bits & (0b11 << 8)) >> 8);

+ 4 - 1
Source/DFPSR/base/text.h

@@ -40,7 +40,10 @@ using DsrChar = char32_t;
 
 
 // Text files support loading UTF-8/16 BE/LE with BOM or Latin-1 without BOM
 // Text files support loading UTF-8/16 BE/LE with BOM or Latin-1 without BOM
 enum class CharacterEncoding {
 enum class CharacterEncoding {
-	Raw_Latin1, BOM_UTF8, BOM_UTF16BE, BOM_UTF16LE
+	Raw_Latin1,  // U+00 to U+FF
+	BOM_UTF8,    // U+00000000 to U+0010FFFF
+	BOM_UTF16BE, // U+00000000 to U+0000D7FF, U+0000E000 to U+0000FFFF, U+00010000 to U+0010FFFF
+	BOM_UTF16LE  // U+00000000 to U+0000D7FF, U+0000E000 to U+0000FFFF, U+00010000 to U+0010FFFF
 };
 };
 
 
 // Carriage-return is removed when loading text files to prevent getting double lines
 // Carriage-return is removed when loading text files to prevent getting double lines

+ 1 - 3
Source/test/tests/TextEncodingTest.cpp

@@ -95,9 +95,7 @@ void compareCharacterCodes(String textA, String textB) {
 START_TEST(TextEncoding)
 START_TEST(TextEncoding)
 	String folderPath = string_combine(U"test", file_separator(), U"tests", file_separator(), U"resources", file_separator());
 	String folderPath = string_combine(U"test", file_separator(), U"tests", file_separator(), U"resources", file_separator());
 	{ // Text encodings stored in memory
 	{ // Text encodings stored in memory
-		// TODO: Test string_loadFromMemory
-		
-		
+		// TODO: Test string_loadFromMemory using random character codes from the extended 0x10000..0x10FFFF range
 	}
 	}
 	{ // Loading strings of different encodings
 	{ // Loading strings of different encodings
 		String fileLatin1 = string_load(folderPath + U"Latin1.txt", true);
 		String fileLatin1 = string_load(folderPath + U"Latin1.txt", true);