Browse Source

Improve documentation and simplifies code for `File::get_csv_line()`

Also forbids using double quotes as a delimiter.

(cherry picked from commit b8c08ba5add1406783cec2333d6ad7011a29e01f)
Rémi Verschelde 4 years ago
parent
commit
b3c555504f
2 changed files with 28 additions and 20 deletions
  1. 19 18
      core/os/file_access.cpp
  2. 9 2
      doc/classes/File.xml

+ 19 - 18
core/os/file_access.cpp

@@ -315,52 +315,53 @@ String FileAccess::get_line() const {
 }
 
 Vector<String> FileAccess::get_csv_line(const String &p_delim) const {
-	ERR_FAIL_COND_V(p_delim.length() != 1, Vector<String>());
+	ERR_FAIL_COND_V_MSG(p_delim.length() != 1, Vector<String>(), "Only single character delimiters are supported to parse CSV lines.");
+	ERR_FAIL_COND_V_MSG(p_delim[0] == '"', Vector<String>(), "The double quotation mark character (\") is not supported as a delimiter for CSV lines.");
 
-	String l;
+	String line;
+
+	// CSV can support entries with line breaks as long as they are enclosed
+	// in double quotes. So our "line" might be more than a single line in the
+	// text file.
 	int qc = 0;
 	do {
 		if (eof_reached()) {
 			break;
 		}
-
-		l += get_line() + "\n";
+		line += get_line() + "\n";
 		qc = 0;
-		for (int i = 0; i < l.length(); i++) {
-			if (l[i] == '"') {
+		for (int i = 0; i < line.length(); i++) {
+			if (line[i] == '"') {
 				qc++;
 			}
 		}
-
 	} while (qc % 2);
 
-	l = l.substr(0, l.length() - 1);
+	// Remove the extraneous newline we've added above.
+	line = line.substr(0, line.length() - 1);
 
 	Vector<String> strings;
 
 	bool in_quote = false;
 	String current;
-	for (int i = 0; i < l.length(); i++) {
-		CharType c = l[i];
-		CharType s[2] = { 0, 0 };
-
+	for (int i = 0; i < line.length(); i++) {
+		CharType c = line[i];
+		// A delimiter ends the current entry, unless it's in a quoted string.
 		if (!in_quote && c == p_delim[0]) {
 			strings.push_back(current);
 			current = String();
 		} else if (c == '"') {
-			if (l[i + 1] == '"' && in_quote) {
-				s[0] = '"';
-				current += s;
+			// Doubled quotes are escapes for intentional quotes in the string.
+			if (line[i + 1] == '"' && in_quote) {
+				current += '"';
 				i++;
 			} else {
 				in_quote = !in_quote;
 			}
 		} else {
-			s[0] = c;
-			current += s;
+			current += c;
 		}
 	}
-
 	strings.push_back(current);
 
 	return strings;

+ 9 - 2
doc/classes/File.xml

@@ -99,8 +99,15 @@
 			<return type="PoolStringArray" />
 			<argument index="0" name="delim" type="String" default="&quot;,&quot;" />
 			<description>
-				Returns the next value of the file in CSV (Comma-Separated Values) format. You can pass a different delimiter [code]delim[/code] to use other than the default [code]","[/code] (comma). This delimiter must be one-character long.
-				Text is interpreted as being UTF-8 encoded.
+				Returns the next value of the file in CSV (Comma-Separated Values) format. You can pass a different delimiter [code]delim[/code] to use other than the default [code]","[/code] (comma). This delimiter must be one-character long, and cannot be a double quotation mark.
+				Text is interpreted as being UTF-8 encoded. Text values must be enclosed in double quotes if they include the delimiter character. Double quotes within a text value can be escaped by doubling their occurrence.
+				For example, the following CSV lines are valid and will be properly parsed as two strings each:
+				[codeblock]
+				Alice,"Hello, Bob!"
+				Bob,Alice! What a surprise!
+				Alice,"I thought you'd reply with ""Hello, world""."
+				[/codeblock]
+				Note how the second line can omit the enclosing quotes as it does not include the delimiter. However it [i]could[/i] very well use quotes, it was only written without for demonstration purposes. The third line must use [code]""[/code] for each quotation mark that needs to be interpreted as such instead of the end of a text value.
 			</description>
 		</method>
 		<method name="get_double" qualifiers="const">