5 years ago · b5db59c02a
--- a/Source/DFPSR/base/text.cpp
+++ b/Source/DFPSR/base/text.cpp
@@ -333,33 +333,6 @@ static void doubleToString_arabic(String& target, double value) {
 
				 	} \
			
 
				 	TARGET[SOURCE.length()] = '\0';
			
 
				 
			
 
				-/*
			
 
				-String dsr::string_load(const ReadableString& filename, bool mustExist) {
			
 
				-	// TODO: Load files using Unicode filenames
			
 
				-	TO_RAW_ASCII(asciiFilename, filename);
			
 
				-	std::ifstream inputFile(asciiFilename);
			
 
				-	if (inputFile.is_open()) {
			
 
				-		std::stringstream outputBuffer;
			
 
				-		// TODO: Feed directly to String
			
 
				-		outputBuffer << inputFile.rdbuf();
			
 
				-		std::string content = outputBuffer.str();
			
 
				-		String result;
			
 
				-		result.reserve(content.size());
			
 
				-		for (int i = 0; i < (int)(content.size()); i++) {
			
 
				-			result.appendChar(content[i]);
			
 
				-		}
			
 
				-		inputFile.close();
			
 
				-		return result;
			
 
				-	} else {
			
 
				-		if (mustExist) {
			
 
				-			throwError("Failed to load ", filename, "\n");
			
 
				-		}
			
 
				-		// If the file cound not be found and opened, a null string is returned
			
 
				-		return String();
			
 
				-	}
			
 
				-}
			
 
				-*/
			
 
				-
			
 
				 // TODO: Give as a lambda with target captured, so that pre-allocation can measure the
			
 
				 //       needed space exactly using a lambda that increases a character counter instead.
			
 
				 // Interpreting a character's value and appends it to the string.
			
@@ -496,12 +469,6 @@ void dsr::string_save(const ReadableString& filename, const ReadableString& cont
 
				 }
			
 
				 /*
			
 
				 // TODO: Choose how to encode characters and line endings using enums
			
 
				-class enum textEncoding {
			
 
				-	UTF1, UTF7, UTF8, UTF16BE, UTF16LE, UTF32BE, UTF32LE, UTF-EBCDIC, SCSU, BOCU1, GB18030
			
 
				-};
			
 
				-* class enum lineEncoding {
			
 
				-	UTF1, UTF7, UTF8, UTF16BE, UTF16LE, UTF32BE, UTF32LE, UTF-EBCDIC, SCSU, BOCU1, GB18030
			
 
				-};
			
 
				 void dsr::string_save(const ReadableString& filename, const ReadableString& content) {
			
 
				 	// TODO: Load files using Unicode filenames
			
 
				 	TO_RAW_ASCII(asciiFilename, filename);
			
--- a/Source/DFPSR/base/text.h
+++ b/Source/DFPSR/base/text.h
@@ -38,6 +38,11 @@ namespace dsr {
 
				 
			
 
				 using DsrChar = char32_t;
			
 
				 
			
 
				+// Text files support loading UTF-8/16 BE/LE with BOM or Latin-1 without BOM
			
 
				+enum class CharacterEncoding {
			
 
				+	Raw_Latin1, UTF8, UTF16BE, UTF16LE
			
 
				+};
			
 
				+
			
 
				 class ReadableString {
			
 
				 protected:
			
 
				 	// A local pointer to the sub-allocation
			
--- a/Source/test/tests/TextEncodingTest.cpp
+++ b/Source/test/tests/TextEncodingTest.cpp
@@ -1,18 +1,23 @@
 
				-
			
 
				+
			
 
				 #include "../testTools.h"
			
 
				 
			
 
				+// These tests will fail if the source code document or stored files change their encoding of line breaks.
			
 
				+
			
 
				 String expected_latin1 =
			
 
				-R"QUOTE(Hello my friend.
			
 
				+R"QUOTE(Hello my friend
			
 
				 Hej min vän
			
 
				 Halló, vinur minn
			
 
				 Hei ystäväni
			
 
				 Hola mi amigo
			
 
				 Ciao amico
			
 
				+)QUOTE";
			
 
				 
			
 
				-This is Latin-1)QUOTE";
			
 
				-
			
 
				+// Warning!
			
 
				+//   String literals containing characters above value 255 must be stored explicitly in unicode literals using U"" instead of "".
			
 
				+//   Because string literals do not begin with a byte order mark to say which encoding is being used.
			
 
				+//   Also make sure to save the source code document using a byte order mark so that the C++ compiler receives the correct symbol.
			
 
				 String unicodeContent =
			
 
				-R"QUOTE(Hello my friend.
			
 
				+UR"QUOTE(Hello my friend
			
 
				 Hej min vän
			
 
				 Halló, vinur minn
			
 
				 Hei ystäväni
			
@@ -71,24 +76,35 @@ void compareCharacterCodes(String textA, String textB) {
 
				 		printCharacterCode(codeB);
			
 
				 		printText(U" (", textA[i], U") (", textB[i], U")\n");
			
 
				 	}
			
 
				+	if (lengthA > lengthB) {
			
 
				+		for (int i = minLength; i < lengthA; i++) {
			
 
				+			uint32_t codeA = (uint32_t)textA[i];
			
 
				+			printCharacterCode(codeA);
			
 
				+			printText(U" (", textA[i], U")\n");
			
 
				+		}
			
 
				+	} else {
			
 
				+		printText(U"                                    ");
			
 
				+		for (int i = minLength; i < lengthB; i++) {
			
 
				+			uint32_t codeB = (uint32_t)textB[i];
			
 
				+			printCharacterCode(codeB);
			
 
				+			printText(U" (", textB[i], U")\n");
			
 
				+		}
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 START_TEST(TextEncoding)
			
 
				+	String folderPath = string_combine(U"test", file_separator(), U"tests", file_separator(), U"resources", file_separator());
			
 
				 	{ // Text encodings stored in memory
			
 
				 		// TODO: Test string_loadFromMemory
			
 
				 		
			
 
				 		
			
 
				 	}
			
 
				 	{ // Loading strings of different encodings
			
 
				-		String folderPath = string_combine(U"test", file_separator(), U"tests", file_separator(), U"resources", file_separator());
			
 
				-
			
 
				 		String fileLatin1 = string_load(folderPath + U"Latin1.txt", true);
			
 
				-		printText("Latin1.txt contains:\n", fileLatin1, "\n");
			
 
				-		compareCharacterCodes(fileLatin1, expected_latin1);
			
 
				+		//compareCharacterCodes(fileLatin1, expected_latin1);
			
 
				 		ASSERT_MATCH(fileLatin1, expected_latin1);
			
 
				 
			
 
				 		String fileUTF8 = string_load(folderPath + U"BomUtf8.txt", true);
			
 
				-		printText("BomUtf8.txt contains:\n", fileUTF8, "\n");
			
 
				 		compareCharacterCodes(fileUTF8, expected_utf8);
			
 
				 		ASSERT_MATCH(fileUTF8, expected_utf8);
			
 
				 
			
@@ -100,4 +116,9 @@ START_TEST(TextEncoding)
 
				 		//printText("BomUtf16Be.txt contains:\n", fileUTF16BE, "\n");
			
 
				 		//ASSERT_MATCH(fileUTF16BE, expected_utf16be);
			
 
				 	}
			
 
				+	{ // Saving text to files
			
 
				+		String originalContent = U"你好我的朋友";
			
 
				+		
			
 
				+		
			
 
				+	}
			
 
				 END_TEST
			
--- a/Source/test/tests/resources/BomUtf16Be.txt
+++ b/Source/test/tests/resources/BomUtf16Be.txt
--- a/Source/test/tests/resources/BomUtf16Le.txt
+++ b/Source/test/tests/resources/BomUtf16Le.txt
--- a/Source/test/tests/resources/BomUtf8.txt
+++ b/Source/test/tests/resources/BomUtf8.txt
@@ -1,4 +1,4 @@
 
				-Hello my friend.
			
 
				+Hello my friend
			
 
				 Hej min vän
			
 
				 Halló, vinur minn
			
 
				 Hei ystäväni
			
--- a/Source/test/tests/resources/Latin1.txt
+++ b/Source/test/tests/resources/Latin1.txt
@@ -1,8 +1,6 @@
 
				-Hello my friend.
			
 
				+Hello my friend
			
 
				 Hej min vän
			
 
				 Halló, vinur minn
			
 
				 Hei ystäväni
			
 
				 Hola mi amigo
			
 
				 Ciao amico
			
 
				-
			
 
				-This is Latin-1