Browse Source

Made the text encoding test work for Latin-1 and UTF-8.

David Piuva 5 years ago
parent
commit
b5db59c02a

+ 0 - 33
Source/DFPSR/base/text.cpp

@@ -333,33 +333,6 @@ static void doubleToString_arabic(String& target, double value) {
 	} \
 	TARGET[SOURCE.length()] = '\0';
 
-/*
-String dsr::string_load(const ReadableString& filename, bool mustExist) {
-	// TODO: Load files using Unicode filenames
-	TO_RAW_ASCII(asciiFilename, filename);
-	std::ifstream inputFile(asciiFilename);
-	if (inputFile.is_open()) {
-		std::stringstream outputBuffer;
-		// TODO: Feed directly to String
-		outputBuffer << inputFile.rdbuf();
-		std::string content = outputBuffer.str();
-		String result;
-		result.reserve(content.size());
-		for (int i = 0; i < (int)(content.size()); i++) {
-			result.appendChar(content[i]);
-		}
-		inputFile.close();
-		return result;
-	} else {
-		if (mustExist) {
-			throwError("Failed to load ", filename, "\n");
-		}
-		// If the file cound not be found and opened, a null string is returned
-		return String();
-	}
-}
-*/
-
 // TODO: Give as a lambda with target captured, so that pre-allocation can measure the
 //       needed space exactly using a lambda that increases a character counter instead.
 // Interpreting a character's value and appends it to the string.
@@ -496,12 +469,6 @@ void dsr::string_save(const ReadableString& filename, const ReadableString& cont
 }
 /*
 // TODO: Choose how to encode characters and line endings using enums
-class enum textEncoding {
-	UTF1, UTF7, UTF8, UTF16BE, UTF16LE, UTF32BE, UTF32LE, UTF-EBCDIC, SCSU, BOCU1, GB18030
-};
-* class enum lineEncoding {
-	UTF1, UTF7, UTF8, UTF16BE, UTF16LE, UTF32BE, UTF32LE, UTF-EBCDIC, SCSU, BOCU1, GB18030
-};
 void dsr::string_save(const ReadableString& filename, const ReadableString& content) {
 	// TODO: Load files using Unicode filenames
 	TO_RAW_ASCII(asciiFilename, filename);

+ 5 - 0
Source/DFPSR/base/text.h

@@ -38,6 +38,11 @@ namespace dsr {
 
 using DsrChar = char32_t;
 
+// Text files support loading UTF-8/16 BE/LE with BOM or Latin-1 without BOM
+enum class CharacterEncoding {
+	Raw_Latin1, UTF8, UTF16BE, UTF16LE
+};
+
 class ReadableString {
 protected:
 	// A local pointer to the sub-allocation

+ 31 - 10
Source/test/tests/TextEncodingTest.cpp

@@ -1,18 +1,23 @@
-
+
 #include "../testTools.h"
 
+// These tests will fail if the source code document or stored files change their encoding of line breaks.
+
 String expected_latin1 =
-R"QUOTE(Hello my friend.
+R"QUOTE(Hello my friend
 Hej min vän
 Halló, vinur minn
 Hei ystäväni
 Hola mi amigo
 Ciao amico
+)QUOTE";
 
-This is Latin-1)QUOTE";
-
+// Warning!
+//   String literals containing characters above value 255 must be stored explicitly in unicode literals using U"" instead of "".
+//   Because string literals do not begin with a byte order mark to say which encoding is being used.
+//   Also make sure to save the source code document using a byte order mark so that the C++ compiler receives the correct symbol.
 String unicodeContent =
-R"QUOTE(Hello my friend.
+UR"QUOTE(Hello my friend
 Hej min vän
 Halló, vinur minn
 Hei ystäväni
@@ -71,24 +76,35 @@ void compareCharacterCodes(String textA, String textB) {
 		printCharacterCode(codeB);
 		printText(U" (", textA[i], U") (", textB[i], U")\n");
 	}
+	if (lengthA > lengthB) {
+		for (int i = minLength; i < lengthA; i++) {
+			uint32_t codeA = (uint32_t)textA[i];
+			printCharacterCode(codeA);
+			printText(U" (", textA[i], U")\n");
+		}
+	} else {
+		printText(U"                                    ");
+		for (int i = minLength; i < lengthB; i++) {
+			uint32_t codeB = (uint32_t)textB[i];
+			printCharacterCode(codeB);
+			printText(U" (", textB[i], U")\n");
+		}
+	}
 }
 
 START_TEST(TextEncoding)
+	String folderPath = string_combine(U"test", file_separator(), U"tests", file_separator(), U"resources", file_separator());
 	{ // Text encodings stored in memory
 		// TODO: Test string_loadFromMemory
 		
 		
 	}
 	{ // Loading strings of different encodings
-		String folderPath = string_combine(U"test", file_separator(), U"tests", file_separator(), U"resources", file_separator());
-
 		String fileLatin1 = string_load(folderPath + U"Latin1.txt", true);
-		printText("Latin1.txt contains:\n", fileLatin1, "\n");
-		compareCharacterCodes(fileLatin1, expected_latin1);
+		//compareCharacterCodes(fileLatin1, expected_latin1);
 		ASSERT_MATCH(fileLatin1, expected_latin1);
 
 		String fileUTF8 = string_load(folderPath + U"BomUtf8.txt", true);
-		printText("BomUtf8.txt contains:\n", fileUTF8, "\n");
 		compareCharacterCodes(fileUTF8, expected_utf8);
 		ASSERT_MATCH(fileUTF8, expected_utf8);
 
@@ -100,4 +116,9 @@ START_TEST(TextEncoding)
 		//printText("BomUtf16Be.txt contains:\n", fileUTF16BE, "\n");
 		//ASSERT_MATCH(fileUTF16BE, expected_utf16be);
 	}
+	{ // Saving text to files
+		String originalContent = U"你好我的朋友";
+		
+		
+	}
 END_TEST

BIN
Source/test/tests/resources/BomUtf16Be.txt


BIN
Source/test/tests/resources/BomUtf16Le.txt


+ 1 - 1
Source/test/tests/resources/BomUtf8.txt

@@ -1,4 +1,4 @@
-Hello my friend.
+Hello my friend
 Hej min vän
 Halló, vinur minn
 Hei ystäväni

+ 1 - 3
Source/test/tests/resources/Latin1.txt

@@ -1,8 +1,6 @@
-Hello my friend.
+Hello my friend
 Hej min vän
 Halló, vinur minn
 Hei ystäväni
 Hola mi amigo
 Ciao amico
-
-This is Latin-1