Browse Source

Merge pull request #51234 from akien-mga/tests-file-get_csv_line

Tests: Improve coverage for `File::get_csv_line()`
Rémi Verschelde 4 years ago
parent
commit
faad8833fe
4 changed files with 57 additions and 25 deletions
  1. 19 18
      core/io/file_access.cpp
  2. 9 2
      doc/classes/File.xml
  3. 5 0
      tests/data/translations.csv
  4. 24 5
      tests/test_file_access.h

+ 19 - 18
core/io/file_access.cpp

@@ -316,52 +316,53 @@ String FileAccess::get_line() const {
 }
 }
 
 
 Vector<String> FileAccess::get_csv_line(const String &p_delim) const {
 Vector<String> FileAccess::get_csv_line(const String &p_delim) const {
-	ERR_FAIL_COND_V(p_delim.length() != 1, Vector<String>());
+	ERR_FAIL_COND_V_MSG(p_delim.length() != 1, Vector<String>(), "Only single character delimiters are supported to parse CSV lines.");
+	ERR_FAIL_COND_V_MSG(p_delim[0] == '"', Vector<String>(), "The double quotation mark character (\") is not supported as a delimiter for CSV lines.");
 
 
-	String l;
+	String line;
+
+	// CSV can support entries with line breaks as long as they are enclosed
+	// in double quotes. So our "line" might be more than a single line in the
+	// text file.
 	int qc = 0;
 	int qc = 0;
 	do {
 	do {
 		if (eof_reached()) {
 		if (eof_reached()) {
 			break;
 			break;
 		}
 		}
-
-		l += get_line() + "\n";
+		line += get_line() + "\n";
 		qc = 0;
 		qc = 0;
-		for (int i = 0; i < l.length(); i++) {
-			if (l[i] == '"') {
+		for (int i = 0; i < line.length(); i++) {
+			if (line[i] == '"') {
 				qc++;
 				qc++;
 			}
 			}
 		}
 		}
-
 	} while (qc % 2);
 	} while (qc % 2);
 
 
-	l = l.substr(0, l.length() - 1);
+	// Remove the extraneous newline we've added above.
+	line = line.substr(0, line.length() - 1);
 
 
 	Vector<String> strings;
 	Vector<String> strings;
 
 
 	bool in_quote = false;
 	bool in_quote = false;
 	String current;
 	String current;
-	for (int i = 0; i < l.length(); i++) {
-		char32_t c = l[i];
-		char32_t s[2] = { 0, 0 };
-
+	for (int i = 0; i < line.length(); i++) {
+		char32_t c = line[i];
+		// A delimiter ends the current entry, unless it's in a quoted string.
 		if (!in_quote && c == p_delim[0]) {
 		if (!in_quote && c == p_delim[0]) {
 			strings.push_back(current);
 			strings.push_back(current);
 			current = String();
 			current = String();
 		} else if (c == '"') {
 		} else if (c == '"') {
-			if (l[i + 1] == '"' && in_quote) {
-				s[0] = '"';
-				current += s;
+			// Doubled quotes are escapes for intentional quotes in the string.
+			if (line[i + 1] == '"' && in_quote) {
+				current += '"';
 				i++;
 				i++;
 			} else {
 			} else {
 				in_quote = !in_quote;
 				in_quote = !in_quote;
 			}
 			}
 		} else {
 		} else {
-			s[0] = c;
-			current += s;
+			current += c;
 		}
 		}
 	}
 	}
-
 	strings.push_back(current);
 	strings.push_back(current);
 
 
 	return strings;
 	return strings;

+ 9 - 2
doc/classes/File.xml

@@ -119,8 +119,15 @@
 			<return type="PackedStringArray" />
 			<return type="PackedStringArray" />
 			<argument index="0" name="delim" type="String" default="&quot;,&quot;" />
 			<argument index="0" name="delim" type="String" default="&quot;,&quot;" />
 			<description>
 			<description>
-				Returns the next value of the file in CSV (Comma-Separated Values) format. You can pass a different delimiter [code]delim[/code] to use other than the default [code]","[/code] (comma). This delimiter must be one-character long.
-				Text is interpreted as being UTF-8 encoded.
+				Returns the next value of the file in CSV (Comma-Separated Values) format. You can pass a different delimiter [code]delim[/code] to use other than the default [code]","[/code] (comma). This delimiter must be one-character long, and cannot be a double quotation mark.
+				Text is interpreted as being UTF-8 encoded. Text values must be enclosed in double quotes if they include the delimiter character. Double quotes within a text value can be escaped by doubling their occurrence.
+				For example, the following CSV lines are valid and will be properly parsed as two strings each:
+				[codeblock]
+				Alice,"Hello, Bob!"
+				Bob,Alice! What a surprise!
+				Alice,"I thought you'd reply with ""Hello, world""."
+				[/codeblock]
+				Note how the second line can omit the enclosing quotes as it does not include the delimiter. However it [i]could[/i] very well use quotes, it was only written without for demonstration purposes. The third line must use [code]""[/code] for each quotation mark that needs to be interpreted as such instead of the end of a text value.
 			</description>
 			</description>
 		</method>
 		</method>
 		<method name="get_double" qualifiers="const">
 		<method name="get_double" qualifiers="const">

+ 5 - 0
tests/data/translations.csv

@@ -1,3 +1,8 @@
 keys,en,de
 keys,en,de
 GOOD_MORNING,"Good Morning","Guten Morgen"
 GOOD_MORNING,"Good Morning","Guten Morgen"
 GOOD_EVENING,"Good Evening",""
 GOOD_EVENING,"Good Evening",""
+Without quotes,"With, comma","With ""inner"" quotes","With ""inner"", quotes"","" and comma","With ""inner
+split"" quotes and
+line breaks","With \nnewline chars"
+Some other~delimiter~should still work, shouldn't it?
+What about	tab separated	lines, good?

+ 24 - 5
tests/test_file_access.h

@@ -37,12 +37,12 @@
 namespace TestFileAccess {
 namespace TestFileAccess {
 
 
 TEST_CASE("[FileAccess] CSV read") {
 TEST_CASE("[FileAccess] CSV read") {
-	FileAccess *f = FileAccess::open(TestUtils::get_data_path("translations.csv"), FileAccess::READ);
+	FileAccessRef f = FileAccess::open(TestUtils::get_data_path("translations.csv"), FileAccess::READ);
 
 
-	Vector<String> header = f->get_csv_line(); // Default delimiter: ","
+	Vector<String> header = f->get_csv_line(); // Default delimiter: ",".
 	REQUIRE(header.size() == 3);
 	REQUIRE(header.size() == 3);
 
 
-	Vector<String> row1 = f->get_csv_line(",");
+	Vector<String> row1 = f->get_csv_line(","); // Explicit delimiter, should be the same.
 	REQUIRE(row1.size() == 3);
 	REQUIRE(row1.size() == 3);
 	CHECK(row1[0] == "GOOD_MORNING");
 	CHECK(row1[0] == "GOOD_MORNING");
 	CHECK(row1[1] == "Good Morning");
 	CHECK(row1[1] == "Good Morning");
@@ -53,12 +53,31 @@ TEST_CASE("[FileAccess] CSV read") {
 	CHECK(row2[0] == "GOOD_EVENING");
 	CHECK(row2[0] == "GOOD_EVENING");
 	CHECK(row2[1] == "Good Evening");
 	CHECK(row2[1] == "Good Evening");
 	CHECK(row2[2] == ""); // Use case: not yet translated!
 	CHECK(row2[2] == ""); // Use case: not yet translated!
-
 	// https://github.com/godotengine/godot/issues/44269
 	// https://github.com/godotengine/godot/issues/44269
 	CHECK_MESSAGE(row2[2] != "\"", "Should not parse empty string as a single double quote.");
 	CHECK_MESSAGE(row2[2] != "\"", "Should not parse empty string as a single double quote.");
 
 
+	Vector<String> row3 = f->get_csv_line();
+	REQUIRE(row3.size() == 6);
+	CHECK(row3[0] == "Without quotes");
+	CHECK(row3[1] == "With, comma");
+	CHECK(row3[2] == "With \"inner\" quotes");
+	CHECK(row3[3] == "With \"inner\", quotes\",\" and comma");
+	CHECK(row3[4] == "With \"inner\nsplit\" quotes and\nline breaks");
+	CHECK(row3[5] == "With \\nnewline chars"); // Escaped, not an actual newline.
+
+	Vector<String> row4 = f->get_csv_line("~"); // Custom delimiter, makes inline commas easier.
+	REQUIRE(row4.size() == 3);
+	CHECK(row4[0] == "Some other");
+	CHECK(row4[1] == "delimiter");
+	CHECK(row4[2] == "should still work, shouldn't it?");
+
+	Vector<String> row5 = f->get_csv_line("\t"); // Tab separated variables.
+	REQUIRE(row5.size() == 3);
+	CHECK(row5[0] == "What about");
+	CHECK(row5[1] == "tab separated");
+	CHECK(row5[2] == "lines, good?");
+
 	f->close();
 	f->close();
-	memdelete(f);
 }
 }
 } // namespace TestFileAccess
 } // namespace TestFileAccess