Преглед на файлове

Implementing UTF8 to ISO-8859-1 conversion to improve chances of finding files in zip archives

Léo Terziman преди 11 години
родител
ревизия
e2676ec176
променени са 2 файла, в които са добавени 46 реда и са изтрити 0 реда
  1. 37 0
      code/BaseImporter.cpp
  2. 9 0
      code/BaseImporter.h

+ 37 - 0
code/BaseImporter.cpp

@@ -379,6 +379,43 @@ void BaseImporter::ConvertToUTF8(std::vector<char>& data)
 	}
 	}
 }
 }
 
 
+// ------------------------------------------------------------------------------------------------
+// Convert to UTF8 data to ISO-8859-1
+void BaseImporter::ConvertUTF8toISO8859_1(std::string& data)
+{
+	unsigned int size = data.size();
+	unsigned int i = 0, j = 0;
+
+	while(i < size) {
+		if((unsigned char) data[i] < 0x80) {
+			data[j] = data[i];
+		} else if(i < size - 1) {
+			if((unsigned char) data[i] == 0xC2) {
+				data[j] = data[++i];
+			} else if((unsigned char) data[i] == 0xC3) {
+				data[j] = ((unsigned char) data[++i] + 0x40);
+			} else {
+				std::stringstream stream;
+
+				stream << "UTF8 code " << std::hex << data[i] << data[i + 1] << " can not be converted into ISA-8859-1.";
+
+				DefaultLogger::get()->error(stream.str());
+
+				data[j++] = data[i++];
+				data[j] = data[i];
+			}
+		} else {
+			DefaultLogger::get()->error("UTF8 code but only one character remaining");
+
+			data[j] = data[i];
+		}
+
+		i++; j++;
+	}
+
+	data.resize(j);
+}
+
 // ------------------------------------------------------------------------------------------------
 // ------------------------------------------------------------------------------------------------
 void BaseImporter::TextFileToBuffer(IOStream* stream,
 void BaseImporter::TextFileToBuffer(IOStream* stream,
 	std::vector<char>& data)
 	std::vector<char>& data)

+ 9 - 0
code/BaseImporter.h

@@ -331,6 +331,15 @@ public: // static utilities
 	static void ConvertToUTF8(
 	static void ConvertToUTF8(
 		std::vector<char>& data);
 		std::vector<char>& data);
 
 
+	// -------------------------------------------------------------------
+	/** An utility for all text file loaders. It converts a file from our
+	 *   UTF8 character set back to ISO-8859-1. Errors are reported, but ignored.
+	 *
+	 *  @param data File buffer to be converted from UTF8 to ISO-8859-1. The buffer
+	 *  is resized as appropriate. */
+	static void ConvertUTF8toISO8859_1(
+		std::string& data);
+
 	// -------------------------------------------------------------------
 	// -------------------------------------------------------------------
 	/** Utility for text file loaders which copies the contents of the
 	/** Utility for text file loaders which copies the contents of the
 	 *  file into a memory buffer and converts it to our UTF8
 	 *  file into a memory buffer and converts it to our UTF8