Browse Source

Move plural rules logic into a separate class

- Extracts plural rules logic in `TranslationPO` into a new `PluralRules` class.
- Changes caching the last used plural index in `TranslationPO` into an LRU cache in `PluralRules`.
- Adds tests for `PluralRules`.
Haoyu Qiu 5 tháng trước cách đây
mục cha
commit
ebb96e2303

+ 167 - 0
core/string/plural_rules.cpp

@@ -0,0 +1,167 @@
+/**************************************************************************/
+/*  plural_rules.cpp                                                      */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#include "plural_rules.h"
+
+#include "core/math/expression.h"
+
+int PluralRules::_eq_test(const Array &p_input_val, const Ref<EQNode> &p_node, const Variant &p_result) const {
+	if (p_node.is_null()) {
+		return p_result;
+	}
+
+	static const Vector<String> input_name = { "n" };
+
+	Error err = expr->parse(p_node->regex, input_name);
+	ERR_FAIL_COND_V_MSG(err != OK, 0, vformat("Cannot parse expression \"%s\". Error: %s", p_node->regex, expr->get_error_text()));
+
+	Variant result = expr->execute(p_input_val);
+	ERR_FAIL_COND_V_MSG(expr->has_execute_failed(), 0, vformat("Cannot evaluate expression \"%s\".", p_node->regex));
+
+	if (bool(result)) {
+		return _eq_test(p_input_val, p_node->left, result);
+	} else {
+		return _eq_test(p_input_val, p_node->right, result);
+	}
+}
+
+int PluralRules::_find_unquoted(const String &p_src, char32_t p_chr) const {
+	const int len = p_src.length();
+	if (len == 0) {
+		return -1;
+	}
+
+	const char32_t *src = p_src.get_data();
+	bool in_quote = false;
+	for (int i = 0; i < len; i++) {
+		if (in_quote) {
+			if (src[i] == ')') {
+				in_quote = false;
+			}
+		} else {
+			if (src[i] == '(') {
+				in_quote = true;
+			} else if (src[i] == p_chr) {
+				return i;
+			}
+		}
+	}
+
+	return -1;
+}
+
+void PluralRules::_cache_plural_tests(const String &p_plural_rule, Ref<EQNode> &p_node) {
+	// Some examples of p_plural_rule passed in can have the form:
+	// "n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5" (Arabic)
+	// "n >= 2" (French) // When evaluating the last, especially careful with this one.
+	// "n != 1" (English)
+
+	String rule = p_plural_rule;
+	if (rule.begins_with("(") && rule.ends_with(")")) {
+		int bcount = 0;
+		for (int i = 1; i < rule.length() - 1 && bcount >= 0; i++) {
+			if (rule[i] == '(') {
+				bcount++;
+			} else if (rule[i] == ')') {
+				bcount--;
+			}
+		}
+		if (bcount == 0) {
+			rule = rule.substr(1, rule.length() - 2);
+		}
+	}
+
+	int first_ques_mark = _find_unquoted(rule, '?');
+	int first_colon = _find_unquoted(rule, ':');
+
+	if (first_ques_mark == -1) {
+		p_node->regex = rule.strip_edges();
+		return;
+	}
+
+	p_node->regex = rule.substr(0, first_ques_mark).strip_edges();
+
+	p_node->left.instantiate();
+	_cache_plural_tests(rule.substr(first_ques_mark + 1, first_colon - first_ques_mark - 1).strip_edges(), p_node->left);
+	p_node->right.instantiate();
+	_cache_plural_tests(rule.substr(first_colon + 1).strip_edges(), p_node->right);
+}
+
+int PluralRules::evaluate(int p_n) const {
+	const int *cached = cache.getptr(p_n);
+	if (cached) {
+		return *cached;
+	}
+
+	const Array &input_val = { p_n };
+	int index = _eq_test(input_val, equi_tests, 0);
+	cache.insert(p_n, index);
+	return index;
+}
+
+PluralRules::PluralRules(int p_nplurals, const String &p_plural) :
+		nplurals(p_nplurals),
+		plural(p_plural) {
+	equi_tests.instantiate();
+	_cache_plural_tests(plural, equi_tests);
+
+	expr.instantiate();
+}
+
+PluralRules *PluralRules::parse(const String &p_rules) {
+	// `p_rules` should be in the format "nplurals=<N>; plural=<Expression>;".
+
+	const int nplurals_eq = p_rules.find_char('=');
+	ERR_FAIL_COND_V_MSG(nplurals_eq == -1, nullptr, "Invalid plural rules format. Missing equal sign for `nplurals`.");
+
+	const int nplurals_semi_col = p_rules.find_char(';', nplurals_eq);
+	ERR_FAIL_COND_V_MSG(nplurals_semi_col == -1, nullptr, "Invalid plural rules format. Missing semicolon for `nplurals`.");
+
+	const String nplurals_str = p_rules.substr(nplurals_eq + 1, nplurals_semi_col - (nplurals_eq + 1)).strip_edges();
+	ERR_FAIL_COND_V_MSG(!nplurals_str.is_valid_int(), nullptr, "Invalid plural rules format. `nplurals` should be an integer.");
+
+	const int nplurals = nplurals_str.to_int();
+	ERR_FAIL_COND_V_MSG(nplurals < 1, nullptr, "Invalid plural rules format. `nplurals` should be at least 1.");
+
+	const int expression_eq = p_rules.find_char('=', nplurals_semi_col + 1);
+	ERR_FAIL_COND_V_MSG(expression_eq == -1, nullptr, "Invalid plural rules format. Missing equal sign for `plural`.");
+
+	int expression_end = p_rules.rfind_char(';');
+	if (expression_end == -1) {
+		WARN_PRINT("Invalid plural rules format. Missing semicolon at the end of `plural` expression. Assuming ends at the end of the string.");
+		expression_end = p_rules.length();
+	}
+
+	const int expression_start = expression_eq + 1;
+	ERR_FAIL_COND_V_MSG(expression_end <= expression_start, nullptr, "Invalid plural rules format. `plural` expression is empty.");
+
+	const String &plural = p_rules.substr(expression_start, expression_end - expression_start).strip_edges();
+	return memnew(PluralRules(nplurals, plural));
+}

+ 72 - 0
core/string/plural_rules.h

@@ -0,0 +1,72 @@
+/**************************************************************************/
+/*  plural_rules.h                                                        */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#pragma once
+
+#include "core/object/ref_counted.h"
+#include "core/templates/lru.h"
+
+class Expression;
+
+class PluralRules : public Object {
+	GDSOFTCLASS(PluralRules, Object);
+
+	mutable LRUCache<int, int> cache;
+
+	// These two fields are initialized in the constructor.
+	const int nplurals;
+	const String plural;
+
+	// Cache temporary variables related to `evaluate()` to make it faster.
+	class EQNode : public RefCounted {
+		GDSOFTCLASS(EQNode, RefCounted);
+
+	public:
+		String regex;
+		Ref<EQNode> left;
+		Ref<EQNode> right;
+	};
+	Ref<EQNode> equi_tests;
+	Ref<Expression> expr;
+
+	int _find_unquoted(const String &p_src, char32_t p_chr) const;
+	int _eq_test(const Array &p_input_val, const Ref<EQNode> &p_node, const Variant &p_result) const;
+	void _cache_plural_tests(const String &p_plural_rule, Ref<EQNode> &p_node);
+
+	PluralRules(int p_nplurals, const String &p_plural);
+
+public:
+	int evaluate(int p_n) const;
+
+	int get_nplurals() const { return nplurals; }
+	String get_plural() const { return plural; }
+
+	static PluralRules *parse(const String &p_rules);
+};

+ 19 - 121
core/string/translation_po.cpp

@@ -30,6 +30,8 @@
 
 #include "translation_po.h"
 
+#include "core/string/plural_rules.h"
+
 #ifdef DEBUG_TRANSLATION_PO
 #include "core/io/file_access.h"
 
@@ -129,112 +131,11 @@ Vector<String> TranslationPO::_get_message_list() const {
 	return v;
 }
 
-int TranslationPO::_get_plural_index(int p_n) const {
-	// Get a number between [0;number of plural forms).
-
-	input_val.clear();
-	input_val.push_back(p_n);
-
-	return _eq_test(equi_tests, 0);
-}
-
-int TranslationPO::_eq_test(const Ref<EQNode> &p_node, const Variant &p_result) const {
-	if (p_node.is_valid()) {
-		Error err = expr->parse(p_node->regex, input_name);
-		ERR_FAIL_COND_V_MSG(err != OK, 0, vformat("Cannot parse expression \"%s\". Error: %s", p_node->regex, expr->get_error_text()));
-
-		Variant result = expr->execute(input_val);
-		ERR_FAIL_COND_V_MSG(expr->has_execute_failed(), 0, vformat("Cannot evaluate expression \"%s\".", p_node->regex));
-
-		if (bool(result)) {
-			return _eq_test(p_node->left, result);
-		} else {
-			return _eq_test(p_node->right, result);
-		}
-	} else {
-		return p_result;
-	}
-}
-
-int TranslationPO::_find_unquoted(const String &p_src, char32_t p_chr) const {
-	const int len = p_src.length();
-	if (len == 0) {
-		return -1;
-	}
-
-	const char32_t *src = p_src.get_data();
-	bool in_quote = false;
-	for (int i = 0; i < len; i++) {
-		if (in_quote) {
-			if (src[i] == ')') {
-				in_quote = false;
-			}
-		} else {
-			if (src[i] == '(') {
-				in_quote = true;
-			} else if (src[i] == p_chr) {
-				return i;
-			}
-		}
-	}
-
-	return -1;
-}
-
-void TranslationPO::_cache_plural_tests(const String &p_plural_rule, Ref<EQNode> &p_node) {
-	// Some examples of p_plural_rule passed in can have the form:
-	// "n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5" (Arabic)
-	// "n >= 2" (French) // When evaluating the last, especially careful with this one.
-	// "n != 1" (English)
-
-	String rule = p_plural_rule;
-	if (rule.begins_with("(") && rule.ends_with(")")) {
-		int bcount = 0;
-		for (int i = 1; i < rule.length() - 1 && bcount >= 0; i++) {
-			if (rule[i] == '(') {
-				bcount++;
-			} else if (rule[i] == ')') {
-				bcount--;
-			}
-		}
-		if (bcount == 0) {
-			rule = rule.substr(1, rule.length() - 2);
-		}
-	}
-
-	int first_ques_mark = _find_unquoted(rule, '?');
-	int first_colon = _find_unquoted(rule, ':');
-
-	if (first_ques_mark == -1) {
-		p_node->regex = rule.strip_edges();
-		return;
-	}
-
-	p_node->regex = rule.substr(0, first_ques_mark).strip_edges();
-
-	p_node->left.instantiate();
-	_cache_plural_tests(rule.substr(first_ques_mark + 1, first_colon - first_ques_mark - 1).strip_edges(), p_node->left);
-	p_node->right.instantiate();
-	_cache_plural_tests(rule.substr(first_colon + 1).strip_edges(), p_node->right);
-}
-
 void TranslationPO::set_plural_rule(const String &p_plural_rule) {
-	// Set plural_forms and plural_rule.
-	// p_plural_rule passed in has the form "Plural-Forms: nplurals=2; plural=(n >= 2);".
-
-	int first_semi_col = p_plural_rule.find_char(';');
-	plural_forms = p_plural_rule.substr(p_plural_rule.find_char('=') + 1, first_semi_col - (p_plural_rule.find_char('=') + 1)).to_int();
-
-	int expression_start = p_plural_rule.find_char('=', first_semi_col) + 1;
-	int second_semi_col = p_plural_rule.rfind_char(';');
-	plural_rule = p_plural_rule.substr(expression_start, second_semi_col - expression_start).strip_edges();
-
-	// Setup the cache to make evaluating plural rule faster later on.
-	equi_tests.instantiate();
-	_cache_plural_tests(plural_rule, equi_tests);
-
-	expr.instantiate();
-	input_name.push_back("n");
+	if (plural_rules) {
+		memdelete(plural_rules);
+	}
+	plural_rules = PluralRules::parse(p_plural_rule);
 }
 
 void TranslationPO::add_message(const StringName &p_src_text, const StringName &p_xlated_text, const StringName &p_context) {
@@ -249,7 +150,8 @@ void TranslationPO::add_message(const StringName &p_src_text, const StringName &
 }
 
 void TranslationPO::add_plural_message(const StringName &p_src_text, const Vector<String> &p_plural_xlated_texts, const StringName &p_context) {
-	ERR_FAIL_COND_MSG(p_plural_xlated_texts.size() != plural_forms, vformat("Trying to add plural texts that don't match the required number of plural forms for locale \"%s\".", get_locale()));
+	ERR_FAIL_NULL_MSG(plural_rules, "Plural rules are not set. Please call set_plural_rule() before calling add_plural_message().");
+	ERR_FAIL_COND_MSG(p_plural_xlated_texts.size() != plural_rules->get_nplurals(), vformat("Trying to add plural texts that don't match the required number of plural forms for locale \"%s\".", get_locale()));
 
 	HashMap<StringName, Vector<StringName>> &map_id_str = translation_map[p_context];
 
@@ -264,11 +166,11 @@ void TranslationPO::add_plural_message(const StringName &p_src_text, const Vecto
 }
 
 int TranslationPO::get_plural_forms() const {
-	return plural_forms;
+	return plural_rules ? plural_rules->get_nplurals() : 0;
 }
 
 String TranslationPO::get_plural_rule() const {
-	return plural_rule;
+	return plural_rules ? plural_rules->get_plural() : String();
 }
 
 StringName TranslationPO::get_message(const StringName &p_src_text, const StringName &p_context) const {
@@ -282,27 +184,16 @@ StringName TranslationPO::get_message(const StringName &p_src_text, const String
 
 StringName TranslationPO::get_plural_message(const StringName &p_src_text, const StringName &p_plural_text, int p_n, const StringName &p_context) const {
 	ERR_FAIL_COND_V_MSG(p_n < 0, StringName(), "N passed into translation to get a plural message should not be negative. For negative numbers, use singular translation please. Search \"gettext PO Plural Forms\" online for the documentation on translating negative numbers.");
-
-	// If the query is the same as last time, return the cached result.
-	if (p_n == last_plural_n && p_context == last_plural_context && p_src_text == last_plural_key) {
-		return translation_map[p_context][p_src_text][last_plural_mapped_index];
-	}
+	ERR_FAIL_NULL_V_MSG(plural_rules, StringName(), "Plural rules are not set. Please call set_plural_rule() before calling get_plural_message().");
 
 	if (!translation_map.has(p_context) || !translation_map[p_context].has(p_src_text)) {
 		return StringName();
 	}
 	ERR_FAIL_COND_V_MSG(translation_map[p_context][p_src_text].is_empty(), StringName(), vformat("Source text \"%s\" is registered but doesn't have a translation. Please report this bug.", String(p_src_text)));
 
-	int plural_index = _get_plural_index(p_n);
+	int plural_index = plural_rules->evaluate(p_n);
 	ERR_FAIL_COND_V_MSG(plural_index < 0 || translation_map[p_context][p_src_text].size() < plural_index + 1, StringName(), "Plural index returned or number of plural translations is not valid. Please report this bug.");
 
-	// Cache result so that if the next entry is the same, we can return directly.
-	// _get_plural_index(p_n) can get very costly, especially when evaluating long plural-rule (Arabic)
-	last_plural_key = p_src_text;
-	last_plural_context = p_context;
-	last_plural_n = p_n;
-	last_plural_mapped_index = plural_index;
-
 	return translation_map[p_context][p_src_text][plural_index];
 }
 
@@ -343,3 +234,10 @@ void TranslationPO::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("get_plural_forms"), &TranslationPO::get_plural_forms);
 	ClassDB::bind_method(D_METHOD("get_plural_rule"), &TranslationPO::get_plural_rule);
 }
+
+TranslationPO::~TranslationPO() {
+	if (plural_rules) {
+		memdelete(plural_rules);
+		plural_rules = nullptr;
+	}
+}

+ 4 - 29
core/string/translation_po.h

@@ -32,9 +32,10 @@
 
 //#define DEBUG_TRANSLATION_PO
 
-#include "core/math/expression.h"
 #include "core/string/translation.h"
 
+class PluralRules;
+
 class TranslationPO : public Translation {
 	GDCLASS(TranslationPO, Translation);
 
@@ -45,33 +46,7 @@ class TranslationPO : public Translation {
 	// Strings without context have "" as first key.
 	HashMap<StringName, HashMap<StringName, Vector<StringName>>> translation_map;
 
-	int plural_forms = 0; // 0 means no "Plural-Forms" is given in the PO header file. The min for all languages is 1.
-	String plural_rule;
-
-	// Cache temporary variables related to _get_plural_index() to make it faster
-	class EQNode : public RefCounted {
-		GDSOFTCLASS(EQNode, RefCounted);
-
-	public:
-		String regex;
-		Ref<EQNode> left;
-		Ref<EQNode> right;
-	};
-	Ref<EQNode> equi_tests;
-
-	int _find_unquoted(const String &p_src, char32_t p_chr) const;
-	int _eq_test(const Ref<EQNode> &p_node, const Variant &p_result) const;
-
-	Vector<String> input_name;
-	mutable Ref<Expression> expr;
-	mutable Array input_val;
-	mutable StringName last_plural_key;
-	mutable StringName last_plural_context;
-	mutable int last_plural_n = -1; // Set it to an impossible value at the beginning.
-	mutable int last_plural_mapped_index = 0;
-
-	void _cache_plural_tests(const String &p_plural_rule, Ref<EQNode> &p_node);
-	int _get_plural_index(int p_n) const;
+	PluralRules *plural_rules = nullptr;
 
 	Vector<String> _get_message_list() const override;
 	Dictionary _get_messages() const override;
@@ -98,5 +73,5 @@ public:
 	void print_translation_map();
 #endif
 
-	TranslationPO() {}
+	~TranslationPO();
 };

+ 49 - 0
tests/core/string/test_translation.h

@@ -31,6 +31,7 @@
 #pragma once
 
 #include "core/string/optimized_translation.h"
+#include "core/string/plural_rules.h"
 #include "core/string/translation.h"
 #include "core/string/translation_po.h"
 #include "core/string/translation_server.h"
@@ -129,6 +130,54 @@ TEST_CASE("[TranslationPO] Plural messages") {
 	CHECK(vformat(translation->get_plural_message("There are %d apples", "", 2), 2) == "Il y a 2 pommes");
 }
 
+TEST_CASE("[TranslationPO] Plural rules parsing") {
+	ERR_PRINT_OFF;
+	{
+		CHECK(PluralRules::parse("") == nullptr);
+
+		CHECK(PluralRules::parse("plurals=(n != 1);") == nullptr);
+		CHECK(PluralRules::parse("nplurals; plurals=(n != 1);") == nullptr);
+		CHECK(PluralRules::parse("nplurals=; plurals=(n != 1);") == nullptr);
+		CHECK(PluralRules::parse("nplurals=0; plurals=(n != 1);") == nullptr);
+		CHECK(PluralRules::parse("nplurals=-1; plurals=(n != 1);") == nullptr);
+
+		CHECK(PluralRules::parse("nplurals=2;") == nullptr);
+		CHECK(PluralRules::parse("nplurals=2; plurals;") == nullptr);
+		CHECK(PluralRules::parse("nplurals=2; plurals=;") == nullptr);
+	}
+	ERR_PRINT_ON;
+
+	{
+		PluralRules *pr = PluralRules::parse("nplurals=3; plural=(n==0 ? 0 : n==1 ? 1 : 2);");
+		REQUIRE(pr != nullptr);
+
+		CHECK(pr->get_nplurals() == 3);
+		CHECK(pr->get_plural() == "(n==0 ? 0 : n==1 ? 1 : 2)");
+
+		CHECK(pr->evaluate(0) == 0);
+		CHECK(pr->evaluate(1) == 1);
+		CHECK(pr->evaluate(2) == 2);
+		CHECK(pr->evaluate(3) == 2);
+
+		memdelete(pr);
+	}
+
+	{
+		PluralRules *pr = PluralRules::parse("nplurals=1; plural=0;");
+		REQUIRE(pr != nullptr);
+
+		CHECK(pr->get_nplurals() == 1);
+		CHECK(pr->get_plural() == "0");
+
+		CHECK(pr->evaluate(0) == 0);
+		CHECK(pr->evaluate(1) == 0);
+		CHECK(pr->evaluate(2) == 0);
+		CHECK(pr->evaluate(3) == 0);
+
+		memdelete(pr);
+	}
+}
+
 #ifdef TOOLS_ENABLED
 TEST_CASE("[OptimizedTranslation] Generate from Translation and read messages") {
 	Ref<Translation> translation = memnew(Translation);