Bladeren bron

Merge pull request #63745 from akien-mga/3.x-file-get_as_text-skip-CR

[3.x] File: Re-add support to skip CR (`\r`) in `File::get_as_text`
Rémi Verschelde 3 jaren geleden
bovenliggende
commit
9d0d9ff424

+ 3 - 3
core/bind/core_bind.cpp

@@ -2088,13 +2088,13 @@ PoolVector<uint8_t> _File::get_buffer(int64_t p_length) const {
 	return data;
 	return data;
 }
 }
 
 
-String _File::get_as_text() const {
+String _File::get_as_text(bool p_skip_cr) const {
 	ERR_FAIL_COND_V_MSG(!f, String(), "File must be opened before use.");
 	ERR_FAIL_COND_V_MSG(!f, String(), "File must be opened before use.");
 
 
 	uint64_t original_pos = f->get_position();
 	uint64_t original_pos = f->get_position();
 	f->seek(0);
 	f->seek(0);
 
 
-	String text = f->get_as_utf8_string();
+	String text = f->get_as_utf8_string(p_skip_cr);
 
 
 	f->seek(original_pos);
 	f->seek(original_pos);
 
 
@@ -2286,7 +2286,7 @@ void _File::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("get_buffer", "len"), &_File::get_buffer);
 	ClassDB::bind_method(D_METHOD("get_buffer", "len"), &_File::get_buffer);
 	ClassDB::bind_method(D_METHOD("get_line"), &_File::get_line);
 	ClassDB::bind_method(D_METHOD("get_line"), &_File::get_line);
 	ClassDB::bind_method(D_METHOD("get_csv_line", "delim"), &_File::get_csv_line, DEFVAL(","));
 	ClassDB::bind_method(D_METHOD("get_csv_line", "delim"), &_File::get_csv_line, DEFVAL(","));
-	ClassDB::bind_method(D_METHOD("get_as_text"), &_File::get_as_text);
+	ClassDB::bind_method(D_METHOD("get_as_text", "skip_cr"), &_File::get_as_text, DEFVAL(true));
 	ClassDB::bind_method(D_METHOD("get_md5", "path"), &_File::get_md5);
 	ClassDB::bind_method(D_METHOD("get_md5", "path"), &_File::get_md5);
 	ClassDB::bind_method(D_METHOD("get_sha256", "path"), &_File::get_sha256);
 	ClassDB::bind_method(D_METHOD("get_sha256", "path"), &_File::get_sha256);
 	ClassDB::bind_method(D_METHOD("get_endian_swap"), &_File::get_endian_swap);
 	ClassDB::bind_method(D_METHOD("get_endian_swap"), &_File::get_endian_swap);

+ 1 - 1
core/bind/core_bind.h

@@ -551,7 +551,7 @@ public:
 	PoolVector<uint8_t> get_buffer(int64_t p_length) const; // Get an array of bytes.
 	PoolVector<uint8_t> get_buffer(int64_t p_length) const; // Get an array of bytes.
 	String get_line() const;
 	String get_line() const;
 	Vector<String> get_csv_line(const String &p_delim = ",") const;
 	Vector<String> get_csv_line(const String &p_delim = ",") const;
-	String get_as_text() const;
+	String get_as_text(bool p_skip_cr = true) const; // Skip CR by default for compat.
 	String get_md5(const String &p_path) const;
 	String get_md5(const String &p_path) const;
 	String get_sha256(const String &p_path) const;
 	String get_sha256(const String &p_path) const;
 
 

+ 2 - 2
core/os/file_access.cpp

@@ -378,7 +378,7 @@ uint64_t FileAccess::get_buffer(uint8_t *p_dst, uint64_t p_length) const {
 	return i;
 	return i;
 }
 }
 
 
-String FileAccess::get_as_utf8_string() const {
+String FileAccess::get_as_utf8_string(bool p_skip_cr) const {
 	PoolVector<uint8_t> sourcef;
 	PoolVector<uint8_t> sourcef;
 	uint64_t len = get_len();
 	uint64_t len = get_len();
 	sourcef.resize(len + 1);
 	sourcef.resize(len + 1);
@@ -389,7 +389,7 @@ String FileAccess::get_as_utf8_string() const {
 	w[len] = 0;
 	w[len] = 0;
 
 
 	String s;
 	String s;
-	if (s.parse_utf8((const char *)w.ptr())) {
+	if (s.parse_utf8((const char *)w.ptr(), -1, p_skip_cr)) {
 		return String();
 		return String();
 	}
 	}
 	return s;
 	return s;

+ 1 - 1
core/os/file_access.h

@@ -114,7 +114,7 @@ public:
 	virtual String get_line() const;
 	virtual String get_line() const;
 	virtual String get_token() const;
 	virtual String get_token() const;
 	virtual Vector<String> get_csv_line(const String &p_delim = ",") const;
 	virtual Vector<String> get_csv_line(const String &p_delim = ",") const;
-	virtual String get_as_utf8_string() const;
+	virtual String get_as_utf8_string(bool p_skip_cr = true) const; // Skip CR by default for compat.
 
 
 	/**< use this for files WRITTEN in _big_ endian machines (ie, amiga/mac)
 	/**< use this for files WRITTEN in _big_ endian machines (ie, amiga/mac)
 	 * It's not about the current CPU type but file formats.
 	 * It's not about the current CPU type but file formats.

+ 11 - 1
core/ustring.cpp

@@ -1430,7 +1430,7 @@ String String::utf8(const char *p_utf8, int p_len) {
 	return ret;
 	return ret;
 };
 };
 
 
-bool String::parse_utf8(const char *p_utf8, int p_len) {
+bool String::parse_utf8(const char *p_utf8, int p_len, bool p_skip_cr) {
 #define _UNICERROR(m_err) print_line("Unicode error: " + String(m_err));
 #define _UNICERROR(m_err) print_line("Unicode error: " + String(m_err));
 
 
 	if (!p_utf8) {
 	if (!p_utf8) {
@@ -1462,6 +1462,11 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
 			if (skip == 0) {
 			if (skip == 0) {
 				uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp);
 				uint8_t c = *ptrtmp >= 0 ? *ptrtmp : uint8_t(256 + *ptrtmp);
 
 
+				if (p_skip_cr && c == '\r') {
+					ptrtmp++;
+					continue;
+				}
+
 				/* Determine the number of characters in sequence */
 				/* Determine the number of characters in sequence */
 				if ((c & 0x80) == 0) {
 				if ((c & 0x80) == 0) {
 					skip = 0;
 					skip = 0;
@@ -1519,6 +1524,11 @@ bool String::parse_utf8(const char *p_utf8, int p_len) {
 	while (cstr_size) {
 	while (cstr_size) {
 		int len = 0;
 		int len = 0;
 
 
+		if (p_skip_cr && *p_utf8 == '\r') {
+			p_utf8++;
+			continue;
+		}
+
 		/* Determine the number of characters in sequence */
 		/* Determine the number of characters in sequence */
 		if ((*p_utf8 & 0x80) == 0) {
 		if ((*p_utf8 & 0x80) == 0) {
 			len = 1;
 			len = 1;

+ 1 - 1
core/ustring.h

@@ -301,7 +301,7 @@ public:
 
 
 	CharString ascii(bool p_allow_extended = false) const;
 	CharString ascii(bool p_allow_extended = false) const;
 	CharString utf8() const;
 	CharString utf8() const;
-	bool parse_utf8(const char *p_utf8, int p_len = -1); //return true on error
+	bool parse_utf8(const char *p_utf8, int p_len = -1, bool p_skip_cr = false); //return true on error
 	static String utf8(const char *p_utf8, int p_len = -1);
 	static String utf8(const char *p_utf8, int p_len = -1);
 
 
 	static uint32_t hash(const CharType *p_cstr, int p_len); /* hash the string */
 	static uint32_t hash(const CharType *p_cstr, int p_len); /* hash the string */

+ 3 - 2
doc/classes/File.xml

@@ -87,9 +87,10 @@
 		</method>
 		</method>
 		<method name="get_as_text" qualifiers="const">
 		<method name="get_as_text" qualifiers="const">
 			<return type="String" />
 			<return type="String" />
+			<argument index="0" name="skip_cr" type="bool" default="true" />
 			<description>
 			<description>
-				Returns the whole file as a [String].
-				Text is interpreted as being UTF-8 encoded.
+				Returns the whole file as a [String]. Text is interpreted as being UTF-8 encoded.
+				If [code]skip_cr[/code] is [code]true[/code], carriage return characters ([code]\r[/code], CR) will be ignored when parsing the UTF-8, so that only line feed characters ([code]\n[/code], LF) represent a new line (Unix convention).
 			</description>
 			</description>
 		</method>
 		</method>
 		<method name="get_buffer" qualifiers="const">
 		<method name="get_buffer" qualifiers="const">

+ 4 - 2
platform/android/file_access_filesystem_jandroid.cpp

@@ -29,8 +29,10 @@
 /*************************************************************************/
 /*************************************************************************/
 
 
 #include "file_access_filesystem_jandroid.h"
 #include "file_access_filesystem_jandroid.h"
+
 #include "core/os/os.h"
 #include "core/os/os.h"
 #include "thread_jandroid.h"
 #include "thread_jandroid.h"
+
 #include <unistd.h>
 #include <unistd.h>
 
 
 jobject FileAccessFilesystemJAndroid::file_access_handler = nullptr;
 jobject FileAccessFilesystemJAndroid::file_access_handler = nullptr;
@@ -197,7 +199,7 @@ String FileAccessFilesystemJAndroid::get_line() const {
 			if (elem == '\n' || elem == '\0') {
 			if (elem == '\n' || elem == '\0') {
 				// Found the end of the line
 				// Found the end of the line
 				const_cast<FileAccessFilesystemJAndroid *>(this)->seek(start_position + line_buffer_position + 1);
 				const_cast<FileAccessFilesystemJAndroid *>(this)->seek(start_position + line_buffer_position + 1);
-				if (result.parse_utf8((const char *)line_buffer.ptr(), line_buffer_position)) {
+				if (result.parse_utf8((const char *)line_buffer.ptr(), line_buffer_position, true)) {
 					return String();
 					return String();
 				}
 				}
 				return result;
 				return result;
@@ -205,7 +207,7 @@ String FileAccessFilesystemJAndroid::get_line() const {
 		}
 		}
 	}
 	}
 
 
-	if (result.parse_utf8((const char *)line_buffer.ptr(), line_buffer_position)) {
+	if (result.parse_utf8((const char *)line_buffer.ptr(), line_buffer_position, true)) {
 		return String();
 		return String();
 	}
 	}
 	return result;
 	return result;