Browse Source

Add methods to decode/encode multibyte encodings.

bruvzg 11 months ago
parent
commit
48bfe13e4f

+ 8 - 0
core/os/os.cpp

@@ -199,6 +199,14 @@ void OS::set_stderr_enabled(bool p_enabled) {
 	_stderr_enabled = p_enabled;
 }
 
+String OS::multibyte_to_string(const String &p_encoding, const PackedByteArray &p_array) const {
+	return String();
+}
+
+PackedByteArray OS::string_to_multibyte(const String &p_encoding, const String &p_string) const {
+	return PackedByteArray();
+}
+
 int OS::get_exit_code() const {
 	return _exit_code;
 }

+ 3 - 0
core/os/os.h

@@ -265,6 +265,9 @@ public:
 	virtual void set_crash_handler_silent() { _silent_crash_handler = true; }
 	virtual bool is_crash_handler_silent() { return _silent_crash_handler; }
 
+	virtual String multibyte_to_string(const String &p_encoding, const PackedByteArray &p_array) const;
+	virtual PackedByteArray string_to_multibyte(const String &p_encoding, const String &p_string) const;
+
 	virtual void disable_crash_handler() {}
 	virtual bool is_disable_crash_handler() const { return false; }
 	virtual void initialize_debugging() {}

+ 6 - 0
core/string/ustring.cpp

@@ -34,6 +34,8 @@
 #include "core/math/color.h"
 #include "core/math/math_funcs.h"
 #include "core/object/object.h"
+#include "core/os/memory.h"
+#include "core/os/os.h"
 #include "core/string/print_string.h"
 #include "core/string/string_name.h"
 #include "core/string/translation_server.h"
@@ -5987,6 +5989,10 @@ Vector<uint8_t> String::to_wchar_buffer() const {
 #endif
 }
 
+Vector<uint8_t> String::to_multibyte_char_buffer(const String &p_encoding) const {
+	return OS::get_singleton()->string_to_multibyte(p_encoding, *this);
+}
+
 #ifdef TOOLS_ENABLED
 /**
  * "Tools TRanslate". Performs string replacement for internationalization

+ 1 - 0
core/string/ustring.h

@@ -618,6 +618,7 @@ public:
 	Vector<uint8_t> to_utf16_buffer() const;
 	Vector<uint8_t> to_utf32_buffer() const;
 	Vector<uint8_t> to_wchar_buffer() const;
+	Vector<uint8_t> to_multibyte_char_buffer(const String &p_encoding = String()) const;
 
 	// Constructors for NULL terminated C strings.
 	String(const char *p_cstr) {

+ 11 - 1
core/variant/variant_call.cpp

@@ -734,6 +734,14 @@ struct _VariantCall {
 		return s;
 	}
 
+	static String func_PackedByteArray_get_string_from_multibyte_char(PackedByteArray *p_instance, const String &p_encoding) {
+		String s;
+		if (p_instance->size() > 0) {
+			s = OS::get_singleton()->multibyte_to_string(p_encoding, *p_instance);
+		}
+		return s;
+	}
+
 	static PackedByteArray func_PackedByteArray_compress(PackedByteArray *p_instance, int p_mode) {
 		PackedByteArray compressed;
 
@@ -1815,8 +1823,9 @@ static void _register_variant_builtin_methods_string() {
 	bind_string_method(to_utf8_buffer, sarray(), varray());
 	bind_string_method(to_utf16_buffer, sarray(), varray());
 	bind_string_method(to_utf32_buffer, sarray(), varray());
-	bind_string_method(hex_decode, sarray(), varray());
 	bind_string_method(to_wchar_buffer, sarray(), varray());
+	bind_string_method(to_multibyte_char_buffer, sarray("encoding"), varray(String()));
+	bind_string_method(hex_decode, sarray(), varray());
 
 	bind_static_method(String, num_scientific, sarray("number"), varray());
 	bind_static_method(String, num, sarray("number", "decimals"), varray(-1));
@@ -2458,6 +2467,7 @@ static void _register_variant_builtin_methods_array() {
 	bind_function(PackedByteArray, get_string_from_utf16, _VariantCall::func_PackedByteArray_get_string_from_utf16, sarray(), varray());
 	bind_function(PackedByteArray, get_string_from_utf32, _VariantCall::func_PackedByteArray_get_string_from_utf32, sarray(), varray());
 	bind_function(PackedByteArray, get_string_from_wchar, _VariantCall::func_PackedByteArray_get_string_from_wchar, sarray(), varray());
+	bind_function(PackedByteArray, get_string_from_multibyte_char, _VariantCall::func_PackedByteArray_get_string_from_multibyte_char, sarray("encoding"), varray(String()));
 	bind_function(PackedByteArray, hex_encode, _VariantCall::func_PackedByteArray_hex_encode, sarray(), varray());
 	bind_function(PackedByteArray, compress, _VariantCall::func_PackedByteArray_compress, sarray("compression_mode"), varray(0));
 	bind_function(PackedByteArray, decompress, _VariantCall::func_PackedByteArray_decompress, sarray("buffer_size", "compression_mode"), varray(0));

+ 10 - 0
doc/classes/PackedByteArray.xml

@@ -327,6 +327,16 @@
 				Converts ASCII/Latin-1 encoded array to [String]. Fast alternative to [method get_string_from_utf8] if the content is ASCII/Latin-1 only. Unlike the UTF-8 function this function maps every byte to a character in the array. Multibyte sequences will not be interpreted correctly. For parsing user input always use [method get_string_from_utf8]. This is the inverse of [method String.to_ascii_buffer].
 			</description>
 		</method>
+		<method name="get_string_from_multibyte_char" qualifiers="const">
+			<return type="String" />
+			<param index="0" name="encoding" type="String" default="&quot;&quot;" />
+			<description>
+				Converts system multibyte code page encoded array to [String]. If conversion fails, empty string is returned. This is the inverse of [method String.to_multibyte_char_buffer].
+				The values permitted for [param encoding] are system dependent. If [param encoding] is empty string, system default encoding is used.
+				- For Windows, see [url=https://learn.microsoft.com/en-us/windows/win32/Intl/code-page-identifiers]Code Page Identifiers[/url] .NET names.
+				- For macOS and Linux/BSD, see [code]libiconv[/code] library documentation and [code]iconv --list[/code] for a list of supported encodings.
+			</description>
+		</method>
 		<method name="get_string_from_utf8" qualifiers="const">
 			<return type="String" />
 			<description>

+ 10 - 0
doc/classes/String.xml

@@ -1030,6 +1030,16 @@
 				Returns the string converted to [code]lowercase[/code].
 			</description>
 		</method>
+		<method name="to_multibyte_char_buffer" qualifiers="const">
+			<return type="PackedByteArray" />
+			<param index="0" name="encoding" type="String" default="&quot;&quot;" />
+			<description>
+				Converts the string to system multibyte code page encoded [PackedByteArray]. If conversion fails, empty array is returned.
+				The values permitted for [param encoding] are system dependent. If [param encoding] is empty string, system default encoding is used.
+				- For Windows, see [url=https://learn.microsoft.com/en-us/windows/win32/Intl/code-page-identifiers]Code Page Identifiers[/url] .NET names.
+				- For macOS and Linux/BSD, see [code]libiconv[/code] library documentation and [code]iconv --list[/code] for a list of supported encodings.
+			</description>
+		</method>
 		<method name="to_pascal_case" qualifiers="const">
 			<return type="String" />
 			<description>

+ 10 - 0
doc/classes/StringName.xml

@@ -938,6 +938,16 @@
 				Returns the string converted to [code]lowercase[/code].
 			</description>
 		</method>
+		<method name="to_multibyte_char_buffer" qualifiers="const">
+			<return type="PackedByteArray" />
+			<param index="0" name="encoding" type="String" default="&quot;&quot;" />
+			<description>
+				Converts the string to system multibyte code page encoded [PackedByteArray]. If conversion fails, empty array is returned.
+				The values permitted for [param encoding] are system dependent. If [param encoding] is empty string, system default encoding is used.
+				- For Windows, see [url=https://learn.microsoft.com/en-us/windows/win32/Intl/code-page-identifiers]Code Page Identifiers[/url] .NET names.
+				- For macOS and Linux/BSD, see [code]libiconv[/code] library documentation and [code]iconv --list[/code] for a list of supported encodings.
+			</description>
+		</method>
 		<method name="to_pascal_case" qualifiers="const">
 			<return type="String" />
 			<description>

+ 124 - 0
drivers/unix/os_unix.cpp

@@ -580,6 +580,126 @@ Dictionary OS_Unix::get_memory_info() const {
 	return meminfo;
 }
 
+#ifndef __GLIBC__
+void OS_Unix::_load_iconv() {
+#if defined(MACOS_ENABLED) || defined(IOS_ENABLED)
+	String iconv_lib_aliases[] = { "/usr/lib/libiconv.2.dylib" };
+	String iconv_func_aliases[] = { "iconv" };
+	String charset_lib_aliases[] = { "/usr/lib/libcharset.1.dylib" };
+#else
+	String iconv_lib_aliases[] = { "", "libiconv.2.so", "libiconv.so" };
+	String iconv_func_aliases[] = { "libiconv", "iconv", "bsd_iconv", "rpl_iconv" };
+	String charset_lib_aliases[] = { "", "libcharset.1.so", "libcharset.so" };
+#endif
+
+	for (size_t i = 0; i < sizeof(iconv_lib_aliases) / sizeof(iconv_lib_aliases[0]); i++) {
+		void *iconv_lib = iconv_lib_aliases[i].is_empty() ? RTLD_NEXT : dlopen(iconv_lib_aliases[i].utf8().get_data(), RTLD_NOW);
+		if (iconv_lib) {
+			for (size_t j = 0; j < sizeof(iconv_func_aliases) / sizeof(iconv_func_aliases[0]); j++) {
+				gd_iconv_open = (PIConvOpen)dlsym(iconv_lib, (iconv_func_aliases[j] + "_open").utf8().get_data());
+				gd_iconv = (PIConv)dlsym(iconv_lib, (iconv_func_aliases[j]).utf8().get_data());
+				gd_iconv_close = (PIConvClose)dlsym(iconv_lib, (iconv_func_aliases[j] + "_close").utf8().get_data());
+				if (gd_iconv_open && gd_iconv && gd_iconv_close) {
+					break;
+				}
+			}
+			if (gd_iconv_open && gd_iconv && gd_iconv_close) {
+				break;
+			}
+			if (!iconv_lib_aliases[i].is_empty()) {
+				dlclose(iconv_lib);
+			}
+		}
+	}
+
+	for (size_t i = 0; i < sizeof(charset_lib_aliases) / sizeof(charset_lib_aliases[0]); i++) {
+		void *cs_lib = charset_lib_aliases[i].is_empty() ? RTLD_NEXT : dlopen(charset_lib_aliases[i].utf8().get_data(), RTLD_NOW);
+		if (cs_lib) {
+			gd_locale_charset = (PIConvLocaleCharset)dlsym(cs_lib, "locale_charset");
+			if (gd_locale_charset) {
+				break;
+			}
+			if (!charset_lib_aliases[i].is_empty()) {
+				dlclose(cs_lib);
+			}
+		}
+	}
+	_iconv_ok = gd_iconv_open && gd_iconv && gd_iconv_close && gd_locale_charset;
+}
+#endif
+
+String OS_Unix::multibyte_to_string(const String &p_encoding, const PackedByteArray &p_array) const {
+	ERR_FAIL_COND_V_MSG(!_iconv_ok, String(), "Conversion failed: Unable to load libiconv");
+
+	LocalVector<char> chars;
+#ifdef __GLIBC__
+	gd_iconv_t ctx = gd_iconv_open("UTF-8", p_encoding.is_empty() ? nl_langinfo(CODESET) : p_encoding.utf8().get_data());
+#else
+	gd_iconv_t ctx = gd_iconv_open("UTF-8", p_encoding.is_empty() ? gd_locale_charset() : p_encoding.utf8().get_data());
+#endif
+	ERR_FAIL_COND_V_MSG(ctx == (gd_iconv_t)(-1), String(), "Conversion failed: Unknown encoding");
+
+	char *in_ptr = (char *)p_array.ptr();
+	size_t in_size = p_array.size();
+
+	chars.resize(in_size);
+	char *out_ptr = (char *)chars.ptr();
+	size_t out_size = chars.size();
+
+	while (gd_iconv(ctx, &in_ptr, &in_size, &out_ptr, &out_size) == (size_t)-1) {
+		if (errno != E2BIG) {
+			gd_iconv_close(ctx);
+			ERR_FAIL_V_MSG(String(), vformat("Conversion failed: %d - %s", errno, strerror(errno)));
+		}
+		int64_t rate = (chars.size()) / (p_array.size() - in_size);
+		size_t oldpos = chars.size() - out_size;
+		chars.resize(chars.size() + in_size * rate);
+		out_ptr = (char *)chars.ptr() + oldpos;
+		out_size = chars.size() - oldpos;
+	}
+	chars.resize(chars.size() - out_size);
+	gd_iconv_close(ctx);
+
+	return String::utf8((const char *)chars.ptr(), chars.size());
+}
+
+PackedByteArray OS_Unix::string_to_multibyte(const String &p_encoding, const String &p_string) const {
+	ERR_FAIL_COND_V_MSG(!_iconv_ok, PackedByteArray(), "Conversion failed: Unable to load libiconv");
+
+	CharString charstr = p_string.utf8();
+
+	PackedByteArray ret;
+#ifdef __GLIBC__
+	gd_iconv_t ctx = gd_iconv_open(p_encoding.is_empty() ? nl_langinfo(CODESET) : p_encoding.utf8().get_data(), "UTF-8");
+#else
+	gd_iconv_t ctx = gd_iconv_open(p_encoding.is_empty() ? gd_locale_charset() : p_encoding.utf8().get_data(), "UTF-8");
+#endif
+	ERR_FAIL_COND_V_MSG(ctx == (gd_iconv_t)(-1), PackedByteArray(), "Conversion failed: Unknown encoding");
+
+	char *in_ptr = (char *)charstr.ptr();
+	size_t in_size = charstr.size();
+
+	ret.resize(in_size);
+	char *out_ptr = (char *)ret.ptrw();
+	size_t out_size = ret.size();
+
+	while (gd_iconv(ctx, &in_ptr, &in_size, &out_ptr, &out_size) == (size_t)-1) {
+		if (errno != E2BIG) {
+			gd_iconv_close(ctx);
+			ERR_FAIL_V_MSG(PackedByteArray(), vformat("Conversion failed: %d - %s", errno, strerror(errno)));
+		}
+		int64_t rate = (ret.size()) / (charstr.size() - in_size);
+		size_t oldpos = ret.size() - out_size;
+		ret.resize(ret.size() + in_size * rate);
+		out_ptr = (char *)ret.ptrw() + oldpos;
+		out_size = ret.size() - oldpos;
+	}
+	ret.resize(ret.size() - out_size);
+	gd_iconv_close(ctx);
+
+	return ret;
+}
+
 Dictionary OS_Unix::execute_with_pipe(const String &p_path, const List<String> &p_arguments, bool p_blocking) {
 #define CLEAN_PIPES           \
 	if (pipe_in[0] >= 0) {    \
@@ -1082,6 +1202,10 @@ void UnixTerminalLogger::log_error(const char *p_function, const char *p_file, i
 UnixTerminalLogger::~UnixTerminalLogger() {}
 
 OS_Unix::OS_Unix() {
+#ifndef __GLIBC__
+	_load_iconv();
+#endif
+
 	Vector<Logger *> loggers;
 	loggers.push_back(memnew(UnixTerminalLogger));
 	_set_logger(memnew(CompositeLogger(loggers)));

+ 31 - 0
drivers/unix/os_unix.h

@@ -35,6 +35,21 @@
 #include "core/os/os.h"
 #include "drivers/unix/ip_unix.h"
 
+#ifdef __GLIBC__
+#include <iconv.h>
+#include <langinfo.h>
+#define gd_iconv_t iconv_t
+#define gd_iconv_open iconv_open
+#define gd_iconv iconv
+#define gd_iconv_close iconv_close
+#else
+typedef void *gd_iconv_t;
+typedef gd_iconv_t (*PIConvOpen)(const char *, const char *);
+typedef size_t (*PIConv)(gd_iconv_t, char **, size_t *, char **, size_t *);
+typedef int (*PIConvClose)(gd_iconv_t);
+typedef const char *(*PIConvLocaleCharset)(void);
+#endif
+
 class OS_Unix : public OS {
 	struct ProcessInfo {
 		mutable bool is_running = true;
@@ -43,6 +58,19 @@ class OS_Unix : public OS {
 	HashMap<ProcessID, ProcessInfo> *process_map = nullptr;
 	Mutex process_map_mutex;
 
+#ifdef __GLIBC__
+	bool _iconv_ok = true;
+#else
+	bool _iconv_ok = false;
+
+	PIConvOpen gd_iconv_open = nullptr;
+	PIConv gd_iconv = nullptr;
+	PIConvClose gd_iconv_close = nullptr;
+	PIConvLocaleCharset gd_locale_charset = nullptr;
+
+	void _load_iconv();
+#endif
+
 protected:
 	// UNIX only handles the core functions.
 	// inheriting platforms under unix (eg. X11) should handle the rest
@@ -87,6 +115,9 @@ public:
 
 	virtual Dictionary get_memory_info() const override;
 
+	virtual String multibyte_to_string(const String &p_encoding, const PackedByteArray &p_array) const override;
+	virtual PackedByteArray string_to_multibyte(const String &p_encoding, const String &p_string) const override;
+
 	virtual Error execute(const String &p_path, const List<String> &p_arguments, String *r_pipe = nullptr, int *r_exitcode = nullptr, bool read_stderr = false, Mutex *p_pipe_mutex = nullptr, bool p_open_console = false) override;
 	virtual Dictionary execute_with_pipe(const String &p_path, const List<String> &p_arguments, bool p_blocking = true) override;
 	virtual Error create_process(const String &p_path, const List<String> &p_arguments, ProcessID *r_child_id = nullptr, bool p_open_console = false) override;

+ 200 - 0
platform/windows/os_windows.cpp

@@ -973,6 +973,204 @@ static void _append_to_pipe(char *p_bytes, int p_size, String *r_pipe, Mutex *p_
 	}
 }
 
+void OS_Windows::_init_encodings() {
+	encodings[""] = 0;
+	encodings["CP_ACP"] = 0;
+	encodings["CP_OEMCP"] = 1;
+	encodings["CP_MACCP"] = 2;
+	encodings["CP_THREAD_ACP"] = 3;
+	encodings["CP_SYMBOL"] = 42;
+	encodings["IBM037"] = 37;
+	encodings["IBM437"] = 437;
+	encodings["IBM500"] = 500;
+	encodings["ASMO-708"] = 708;
+	encodings["ASMO-449"] = 709;
+	encodings["DOS-710"] = 710;
+	encodings["DOS-720"] = 720;
+	encodings["IBM737"] = 737;
+	encodings["IBM775"] = 775;
+	encodings["IBM850"] = 850;
+	encodings["IBM852"] = 852;
+	encodings["IBM855"] = 855;
+	encodings["IBM857"] = 857;
+	encodings["IBM00858"] = 858;
+	encodings["IBM860"] = 860;
+	encodings["IBM861"] = 861;
+	encodings["DOS-862"] = 862;
+	encodings["IBM863"] = 863;
+	encodings["IBM864"] = 864;
+	encodings["IBM865"] = 865;
+	encodings["CP866"] = 866;
+	encodings["IBM869"] = 869;
+	encodings["IBM870"] = 870;
+	encodings["WINDOWS-874"] = 874;
+	encodings["CP875"] = 875;
+	encodings["SHIFT_JIS"] = 932;
+	encodings["GB2312"] = 936;
+	encodings["KS_C_5601-1987"] = 949;
+	encodings["BIG5"] = 950;
+	encodings["IBM1026"] = 1026;
+	encodings["IBM01047"] = 1047;
+	encodings["IBM01140"] = 1140;
+	encodings["IBM01141"] = 1141;
+	encodings["IBM01142"] = 1142;
+	encodings["IBM01143"] = 1143;
+	encodings["IBM01144"] = 1144;
+	encodings["IBM01145"] = 1145;
+	encodings["IBM01146"] = 1146;
+	encodings["IBM01147"] = 1147;
+	encodings["IBM01148"] = 1148;
+	encodings["IBM01149"] = 1149;
+	encodings["UTF-16"] = 1200;
+	encodings["UNICODEFFFE"] = 1201;
+	encodings["WINDOWS-1250"] = 1250;
+	encodings["WINDOWS-1251"] = 1251;
+	encodings["WINDOWS-1252"] = 1252;
+	encodings["WINDOWS-1253"] = 1253;
+	encodings["WINDOWS-1254"] = 1254;
+	encodings["WINDOWS-1255"] = 1255;
+	encodings["WINDOWS-1256"] = 1256;
+	encodings["WINDOWS-1257"] = 1257;
+	encodings["WINDOWS-1258"] = 1258;
+	encodings["JOHAB"] = 1361;
+	encodings["MACINTOSH"] = 10000;
+	encodings["X-MAC-JAPANESE"] = 10001;
+	encodings["X-MAC-CHINESETRAD"] = 10002;
+	encodings["X-MAC-KOREAN"] = 10003;
+	encodings["X-MAC-ARABIC"] = 10004;
+	encodings["X-MAC-HEBREW"] = 10005;
+	encodings["X-MAC-GREEK"] = 10006;
+	encodings["X-MAC-CYRILLIC"] = 10007;
+	encodings["X-MAC-CHINESESIMP"] = 10008;
+	encodings["X-MAC-ROMANIAN"] = 10010;
+	encodings["X-MAC-UKRAINIAN"] = 10017;
+	encodings["X-MAC-THAI"] = 10021;
+	encodings["X-MAC-CE"] = 10029;
+	encodings["X-MAC-ICELANDIC"] = 10079;
+	encodings["X-MAC-TURKISH"] = 10081;
+	encodings["X-MAC-CROATIAN"] = 10082;
+	encodings["UTF-32"] = 12000;
+	encodings["UTF-32BE"] = 12001;
+	encodings["X-CHINESE_CNS"] = 20000;
+	encodings["X-CP20001"] = 20001;
+	encodings["X_CHINESE-ETEN"] = 20002;
+	encodings["X-CP20003"] = 20003;
+	encodings["X-CP20004"] = 20004;
+	encodings["X-CP20005"] = 20005;
+	encodings["X-IA5"] = 20105;
+	encodings["X-IA5-GERMAN"] = 20106;
+	encodings["X-IA5-SWEDISH"] = 20107;
+	encodings["X-IA5-NORWEGIAN"] = 20108;
+	encodings["US-ASCII"] = 20127;
+	encodings["X-CP20261"] = 20261;
+	encodings["X-CP20269"] = 20269;
+	encodings["IBM273"] = 20273;
+	encodings["IBM277"] = 20277;
+	encodings["IBM278"] = 20278;
+	encodings["IBM280"] = 20280;
+	encodings["IBM284"] = 20284;
+	encodings["IBM285"] = 20285;
+	encodings["IBM290"] = 20290;
+	encodings["IBM297"] = 20297;
+	encodings["IBM420"] = 20420;
+	encodings["IBM423"] = 20423;
+	encodings["IBM424"] = 20424;
+	encodings["X-EBCDIC-KOREANEXTENDED"] = 20833;
+	encodings["IBM-THAI"] = 20838;
+	encodings["KOI8-R"] = 20866;
+	encodings["IBM871"] = 20871;
+	encodings["IBM880"] = 20880;
+	encodings["IBM905"] = 20905;
+	encodings["IBM00924"] = 20924;
+	encodings["EUC-JP"] = 20932;
+	encodings["X-CP20936"] = 20936;
+	encodings["X-CP20949"] = 20949;
+	encodings["CP1025"] = 21025;
+	encodings["KOI8-U"] = 21866;
+	encodings["ISO-8859-1"] = 28591;
+	encodings["ISO-8859-2"] = 28592;
+	encodings["ISO-8859-3"] = 28593;
+	encodings["ISO-8859-4"] = 28594;
+	encodings["ISO-8859-5"] = 28595;
+	encodings["ISO-8859-6"] = 28596;
+	encodings["ISO-8859-7"] = 28597;
+	encodings["ISO-8859-8"] = 28598;
+	encodings["ISO-8859-9"] = 28599;
+	encodings["ISO-8859-13"] = 28603;
+	encodings["ISO-8859-15"] = 28605;
+	encodings["X-EUROPA"] = 29001;
+	encodings["ISO-8859-8-I"] = 38598;
+	encodings["ISO-2022-JP"] = 50220;
+	encodings["CSISO2022JP"] = 50221;
+	encodings["ISO-2022-JP"] = 50222;
+	encodings["ISO-2022-KR"] = 50225;
+	encodings["X-CP50227"] = 50227;
+	encodings["EBCDIC-JP"] = 50930;
+	encodings["EBCDIC-US-JP"] = 50931;
+	encodings["EBCDIC-KR"] = 50933;
+	encodings["EBCDIC-CN-eXT"] = 50935;
+	encodings["EBCDIC-CN"] = 50936;
+	encodings["EBCDIC-US-CN"] = 50937;
+	encodings["EBCDIC-JP-EXT"] = 50939;
+	encodings["EUC-JP"] = 51932;
+	encodings["EUC-CN"] = 51936;
+	encodings["EUC-KR"] = 51949;
+	encodings["HZ-GB-2312"] = 52936;
+	encodings["GB18030"] = 54936;
+	encodings["X-ISCII-DE"] = 57002;
+	encodings["X-ISCII-BE"] = 57003;
+	encodings["X-ISCII-TA"] = 57004;
+	encodings["X-ISCII-TE"] = 57005;
+	encodings["X-ISCII-AS"] = 57006;
+	encodings["X-ISCII-OR"] = 57007;
+	encodings["X-ISCII-KA"] = 57008;
+	encodings["X-ISCII-MA"] = 57009;
+	encodings["X-ISCII-GU"] = 57010;
+	encodings["X-ISCII-PA"] = 57011;
+	encodings["UTF-7"] = 65000;
+	encodings["UTF-8"] = 65001;
+}
+
+String OS_Windows::multibyte_to_string(const String &p_encoding, const PackedByteArray &p_array) const {
+	const int *encoding = encodings.getptr(p_encoding.to_upper());
+	ERR_FAIL_NULL_V_MSG(encoding, String(), "Conversion failed: Unknown encoding");
+
+	LocalVector<wchar_t> wchars;
+	int total_wchars = MultiByteToWideChar(*encoding, 0, (const char *)p_array.ptr(), p_array.size(), nullptr, 0);
+	if (total_wchars == 0) {
+		DWORD err_code = GetLastError();
+		ERR_FAIL_V_MSG(String(), vformat("Conversion failed: %s", format_error_message(err_code)));
+	}
+	wchars.resize(total_wchars);
+	if (MultiByteToWideChar(*encoding, 0, (const char *)p_array.ptr(), p_array.size(), wchars.ptr(), total_wchars) == 0) {
+		DWORD err_code = GetLastError();
+		ERR_FAIL_V_MSG(String(), vformat("Conversion failed: %s", format_error_message(err_code)));
+	}
+
+	return String::utf16((const char16_t *)wchars.ptr(), wchars.size());
+}
+
+PackedByteArray OS_Windows::string_to_multibyte(const String &p_encoding, const String &p_string) const {
+	const int *encoding = encodings.getptr(p_encoding.to_upper());
+	ERR_FAIL_NULL_V_MSG(encoding, PackedByteArray(), "Conversion failed: Unknown encoding");
+
+	Char16String charstr = p_string.utf16();
+	PackedByteArray ret;
+	int total_mbchars = WideCharToMultiByte(*encoding, 0, (const wchar_t *)charstr.ptr(), charstr.size(), nullptr, 0, nullptr, nullptr);
+	if (total_mbchars == 0) {
+		DWORD err_code = GetLastError();
+		ERR_FAIL_V_MSG(PackedByteArray(), vformat("Conversion failed: %s", format_error_message(err_code)));
+	}
+
+	ret.resize(total_mbchars);
+	if (WideCharToMultiByte(*encoding, 0, (const wchar_t *)charstr.ptr(), charstr.size(), (char *)ret.ptrw(), ret.size(), nullptr, nullptr) == 0) {
+		DWORD err_code = GetLastError();
+		ERR_FAIL_V_MSG(PackedByteArray(), vformat("Conversion failed: %s", format_error_message(err_code)));
+	}
+
+	return ret;
+}
+
 Dictionary OS_Windows::get_memory_info() const {
 	Dictionary meminfo;
 
@@ -2463,6 +2661,8 @@ bool OS_Windows::_test_create_rendering_device_and_gl(const String &p_display_dr
 OS_Windows::OS_Windows(HINSTANCE _hInstance) {
 	hInstance = _hInstance;
 
+	_init_encodings();
+
 	// Reset CWD to ensure long path is used.
 	Char16String current_dir_name;
 	size_t str_len = GetCurrentDirectoryW(0, nullptr);

+ 6 - 0
platform/windows/os_windows.h

@@ -136,6 +136,9 @@ class OS_Windows : public OS {
 
 	bool is_using_con_wrapper() const;
 
+	HashMap<String, int> encodings;
+	void _init_encodings();
+
 	// functions used by main to initialize/deinitialize the OS
 protected:
 	virtual void initialize() override;
@@ -241,6 +244,9 @@ public:
 
 	virtual bool _check_internal_feature_support(const String &p_feature) override;
 
+	virtual String multibyte_to_string(const String &p_encoding, const PackedByteArray &p_array) const override;
+	virtual PackedByteArray string_to_multibyte(const String &p_encoding, const String &p_string) const override;
+
 	virtual void disable_crash_handler() override;
 	virtual bool is_disable_crash_handler() const override;
 	virtual void initialize_debugging() override;