소스 검색

Merge pull request #98743 from dbnicholson/improve-compare-locales

Improve locale comparison
Thaddeus Crews 9 달 전
부모
커밋
27b394c52a
3개의 변경된 파일148개의 추가작업 그리고 65개의 파일을 삭제
  1. 76 61
      core/string/translation_server.cpp
  2. 18 0
      core/string/translation_server.h
  3. 54 4
      tests/core/string/test_translation_server.h

+ 76 - 61
core/string/translation_server.cpp

@@ -118,36 +118,45 @@ void TranslationServer::init_locale_info() {
 	}
 }
 
-String TranslationServer::standardize_locale(const String &p_locale) const {
-	return _standardize_locale(p_locale, false);
+TranslationServer::Locale::operator String() const {
+	String out = language;
+	if (!script.is_empty()) {
+		out = out + "_" + script;
+	}
+	if (!country.is_empty()) {
+		out = out + "_" + country;
+	}
+	if (!variant.is_empty()) {
+		out = out + "_" + variant;
+	}
+	return out;
 }
 
-String TranslationServer::_standardize_locale(const String &p_locale, bool p_add_defaults) const {
+TranslationServer::Locale::Locale(const TranslationServer &p_server, const String &p_locale, bool p_add_defaults) {
 	// Replaces '-' with '_' for macOS style locales.
 	String univ_locale = p_locale.replace("-", "_");
 
 	// Extract locale elements.
-	String lang_name, script_name, country_name, variant_name;
 	Vector<String> locale_elements = univ_locale.get_slice("@", 0).split("_");
-	lang_name = locale_elements[0];
+	language = locale_elements[0];
 	if (locale_elements.size() >= 2) {
 		if (locale_elements[1].length() == 4 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_lower_case(locale_elements[1][1]) && is_ascii_lower_case(locale_elements[1][2]) && is_ascii_lower_case(locale_elements[1][3])) {
-			script_name = locale_elements[1];
+			script = locale_elements[1];
 		}
 		if (locale_elements[1].length() == 2 && is_ascii_upper_case(locale_elements[1][0]) && is_ascii_upper_case(locale_elements[1][1])) {
-			country_name = locale_elements[1];
+			country = locale_elements[1];
 		}
 	}
 	if (locale_elements.size() >= 3) {
 		if (locale_elements[2].length() == 2 && is_ascii_upper_case(locale_elements[2][0]) && is_ascii_upper_case(locale_elements[2][1])) {
-			country_name = locale_elements[2];
-		} else if (variant_map.has(locale_elements[2].to_lower()) && variant_map[locale_elements[2].to_lower()] == lang_name) {
-			variant_name = locale_elements[2].to_lower();
+			country = locale_elements[2];
+		} else if (p_server.variant_map.has(locale_elements[2].to_lower()) && p_server.variant_map[locale_elements[2].to_lower()] == language) {
+			variant = locale_elements[2].to_lower();
 		}
 	}
 	if (locale_elements.size() >= 4) {
-		if (variant_map.has(locale_elements[3].to_lower()) && variant_map[locale_elements[3].to_lower()] == lang_name) {
-			variant_name = locale_elements[3].to_lower();
+		if (p_server.variant_map.has(locale_elements[3].to_lower()) && p_server.variant_map[locale_elements[3].to_lower()] == language) {
+			variant = locale_elements[3].to_lower();
 		}
 	}
 
@@ -155,71 +164,62 @@ String TranslationServer::_standardize_locale(const String &p_locale, bool p_add
 	Vector<String> script_extra = univ_locale.get_slice("@", 1).split(";");
 	for (int i = 0; i < script_extra.size(); i++) {
 		if (script_extra[i].to_lower() == "cyrillic") {
-			script_name = "Cyrl";
+			script = "Cyrl";
 			break;
 		} else if (script_extra[i].to_lower() == "latin") {
-			script_name = "Latn";
+			script = "Latn";
 			break;
 		} else if (script_extra[i].to_lower() == "devanagari") {
-			script_name = "Deva";
+			script = "Deva";
 			break;
-		} else if (variant_map.has(script_extra[i].to_lower()) && variant_map[script_extra[i].to_lower()] == lang_name) {
-			variant_name = script_extra[i].to_lower();
+		} else if (p_server.variant_map.has(script_extra[i].to_lower()) && p_server.variant_map[script_extra[i].to_lower()] == language) {
+			variant = script_extra[i].to_lower();
 		}
 	}
 
 	// Handles known non-ISO language names used e.g. on Windows.
-	if (locale_rename_map.has(lang_name)) {
-		lang_name = locale_rename_map[lang_name];
+	if (p_server.locale_rename_map.has(language)) {
+		language = p_server.locale_rename_map[language];
 	}
 
 	// Handle country renames.
-	if (country_rename_map.has(country_name)) {
-		country_name = country_rename_map[country_name];
+	if (p_server.country_rename_map.has(country)) {
+		country = p_server.country_rename_map[country];
 	}
 
 	// Remove unsupported script codes.
-	if (!script_map.has(script_name)) {
-		script_name = "";
+	if (!p_server.script_map.has(script)) {
+		script = "";
 	}
 
 	// Add script code base on language and country codes for some ambiguous cases.
 	if (p_add_defaults) {
-		if (script_name.is_empty()) {
-			for (int i = 0; i < locale_script_info.size(); i++) {
-				const LocaleScriptInfo &info = locale_script_info[i];
-				if (info.name == lang_name) {
-					if (country_name.is_empty() || info.supported_countries.has(country_name)) {
-						script_name = info.script;
+		if (script.is_empty()) {
+			for (int i = 0; i < p_server.locale_script_info.size(); i++) {
+				const LocaleScriptInfo &info = p_server.locale_script_info[i];
+				if (info.name == language) {
+					if (country.is_empty() || info.supported_countries.has(country)) {
+						script = info.script;
 						break;
 					}
 				}
 			}
 		}
-		if (!script_name.is_empty() && country_name.is_empty()) {
+		if (!script.is_empty() && country.is_empty()) {
 			// Add conntry code based on script for some ambiguous cases.
-			for (int i = 0; i < locale_script_info.size(); i++) {
-				const LocaleScriptInfo &info = locale_script_info[i];
-				if (info.name == lang_name && info.script == script_name) {
-					country_name = info.default_country;
+			for (int i = 0; i < p_server.locale_script_info.size(); i++) {
+				const LocaleScriptInfo &info = p_server.locale_script_info[i];
+				if (info.name == language && info.script == script) {
+					country = info.default_country;
 					break;
 				}
 			}
 		}
 	}
+}
 
-	// Combine results.
-	String out = lang_name;
-	if (!script_name.is_empty()) {
-		out = out + "_" + script_name;
-	}
-	if (!country_name.is_empty()) {
-		out = out + "_" + country_name;
-	}
-	if (!variant_name.is_empty()) {
-		out = out + "_" + variant_name;
-	}
-	return out;
+String TranslationServer::standardize_locale(const String &p_locale) const {
+	return Locale(*this, p_locale, false).operator String();
 }
 
 int TranslationServer::compare_locales(const String &p_locale_a, const String &p_locale_b) const {
@@ -234,8 +234,8 @@ int TranslationServer::compare_locales(const String &p_locale_a, const String &p
 		return *cached_result;
 	}
 
-	String locale_a = _standardize_locale(p_locale_a, true);
-	String locale_b = _standardize_locale(p_locale_b, true);
+	Locale locale_a = Locale(*this, p_locale_a, true);
+	Locale locale_b = Locale(*this, p_locale_b, true);
 
 	if (locale_a == locale_b) {
 		// Exact match.
@@ -243,26 +243,41 @@ int TranslationServer::compare_locales(const String &p_locale_a, const String &p
 		return 10;
 	}
 
-	Vector<String> locale_a_elements = locale_a.split("_");
-	Vector<String> locale_b_elements = locale_b.split("_");
-	if (locale_a_elements[0] != locale_b_elements[0]) {
+	if (locale_a.language != locale_b.language) {
 		// No match.
 		locale_compare_cache.insert(cache_key, 0);
 		return 0;
 	}
 
-	// Matching language, both locales have extra parts.
-	// Return number of matching elements.
-	int matching_elements = 1;
-	for (int i = 1; i < locale_a_elements.size(); i++) {
-		for (int j = 1; j < locale_b_elements.size(); j++) {
-			if (locale_a_elements[i] == locale_b_elements[j]) {
-				matching_elements++;
-			}
+	// Matching language, both locales have extra parts. Compare the
+	// remaining elements. If both elements are non-empty, check the
+	// match to increase or decrease the score. If either element or
+	// both are empty, leave the score as is.
+	int score = 5;
+	if (!locale_a.script.is_empty() && !locale_b.script.is_empty()) {
+		if (locale_a.script == locale_b.script) {
+			score++;
+		} else {
+			score--;
 		}
 	}
-	locale_compare_cache.insert(cache_key, matching_elements);
-	return matching_elements;
+	if (!locale_a.country.is_empty() && !locale_b.country.is_empty()) {
+		if (locale_a.country == locale_b.country) {
+			score++;
+		} else {
+			score--;
+		}
+	}
+	if (!locale_a.variant.is_empty() && !locale_b.variant.is_empty()) {
+		if (locale_a.variant == locale_b.variant) {
+			score++;
+		} else {
+			score--;
+		}
+	}
+
+	locale_compare_cache.insert(cache_key, score);
+	return score;
 }
 
 String TranslationServer::get_locale_name(const String &p_locale) const {

+ 18 - 0
core/string/translation_server.h

@@ -64,6 +64,24 @@ class TranslationServer : public Object {
 	};
 	static Vector<LocaleScriptInfo> locale_script_info;
 
+	struct Locale {
+		String language;
+		String script;
+		String country;
+		String variant;
+
+		bool operator==(const Locale &p_locale) const {
+			return (p_locale.language == language) &&
+					(p_locale.script == script) &&
+					(p_locale.country == country) &&
+					(p_locale.variant == variant);
+		}
+
+		operator String() const;
+
+		Locale(const TranslationServer &p_server, const String &p_locale, bool p_add_defaults);
+	};
+
 	static HashMap<String, String> language_map;
 	static HashMap<String, String> script_map;
 	static HashMap<String, String> locale_rename_map;

+ 54 - 4
tests/core/string/test_translation_server.h

@@ -120,18 +120,50 @@ TEST_CASE("[TranslationServer] Comparing locales") {
 	locale_a = "sr-Latn-CS";
 	locale_b = "sr-Latn-RS";
 
-	// Two elements from locales match.
+	// Script matches (+1) but country doesn't (-1).
 	res = ts->compare_locales(locale_a, locale_b);
 
-	CHECK(res == 2);
+	CHECK(res == 5);
 
 	locale_a = "uz-Cyrl-UZ";
 	locale_b = "uz-Latn-UZ";
 
-	// Two elements match, but they are not sequentual.
+	// Country matches (+1) but script doesn't (-1).
 	res = ts->compare_locales(locale_a, locale_b);
 
-	CHECK(res == 2);
+	CHECK(res == 5);
+
+	locale_a = "aa-Latn-ER";
+	locale_b = "aa-Latn-ER-saaho";
+
+	// Script and country match (+2) with variant on one locale (+0).
+	res = ts->compare_locales(locale_a, locale_b);
+
+	CHECK(res == 7);
+
+	locale_a = "uz-Cyrl-UZ";
+	locale_b = "uz-Latn-KG";
+
+	// Both script and country mismatched (-2).
+	res = ts->compare_locales(locale_a, locale_b);
+
+	CHECK(res == 3);
+
+	locale_a = "es-ES";
+	locale_b = "es-AR";
+
+	// Mismatched country (-1).
+	res = ts->compare_locales(locale_a, locale_b);
+
+	CHECK(res == 4);
+
+	locale_a = "es";
+	locale_b = "es-AR";
+
+	// No country for one locale (+0).
+	res = ts->compare_locales(locale_a, locale_b);
+
+	CHECK(res == 5);
 
 	locale_a = "es-EC";
 	locale_b = "fr-LU";
@@ -140,6 +172,24 @@ TEST_CASE("[TranslationServer] Comparing locales") {
 	res = ts->compare_locales(locale_a, locale_b);
 
 	CHECK(res == 0);
+
+	locale_a = "zh-HK";
+	locale_b = "zh";
+
+	// In full standardization, zh-HK becomes zh_Hant_HK and zh becomes
+	// zh_Hans_CN. Both script and country mismatch (-2).
+	res = ts->compare_locales(locale_a, locale_b);
+
+	CHECK(res == 3);
+
+	locale_a = "zh-CN";
+	locale_b = "zh";
+
+	// In full standardization, zh and zh-CN both become zh_Hans_CN for an
+	// exact match.
+	res = ts->compare_locales(locale_a, locale_b);
+
+	CHECK(res == 10);
 }
 } // namespace TestTranslationServer