Ver Fonte

Merge pull request #74794 from lawnjelly/gdscript_parser_hashtable

[3.x] Use hash table for GDScript parsing
Rémi Verschelde há 2 anos atrás
pai
commit
26a584179c

+ 4 - 0
modules/gdscript/gdscript.cpp

@@ -2095,6 +2095,8 @@ GDScriptWarning::Code GDScriptWarning::get_code_from_name(const String &p_name)
 #endif // DEBUG_ENABLED
 
 GDScriptLanguage::GDScriptLanguage() {
+	GDScriptTokenizer::initialize();
+
 	calls = 0;
 	ERR_FAIL_COND(singleton);
 	singleton = this;
@@ -2139,6 +2141,8 @@ GDScriptLanguage::GDScriptLanguage() {
 }
 
 GDScriptLanguage::~GDScriptLanguage() {
+	GDScriptTokenizer::terminate();
+
 	if (_call_stack) {
 		memdelete_arr(_call_stack);
 	}

+ 96 - 59
modules/gdscript/gdscript_tokenizer.cpp

@@ -35,6 +35,8 @@
 #include "core/print_string.h"
 #include "gdscript_functions.h"
 
+OAHashMap<String, int> *GDScriptTokenizer::token_hashtable = nullptr;
+
 const char *GDScriptTokenizer::token_names[TK_MAX] = {
 	"Empty",
 	"Identifier",
@@ -235,6 +237,96 @@ static const _kws _keyword_list[] = {
 	{ GDScriptTokenizer::TK_ERROR, nullptr }
 };
 
+// Prepare the hash table for parsing as a one off at startup.
+void GDScriptTokenizer::initialize() {
+	token_hashtable = memnew((OAHashMap<String, int>));
+
+	token_hashtable->insert("null", 0);
+	token_hashtable->insert("true", 1);
+	token_hashtable->insert("false", 2);
+
+	// _type_list
+	int id = TOKEN_HASH_TABLE_TYPE_START;
+	int idx = 0;
+	while (_type_list[idx].text) {
+		token_hashtable->insert(_type_list[idx].text, id++);
+		idx++;
+	}
+
+	// built in funcs
+	id = TOKEN_HASH_TABLE_BUILTIN_START;
+	for (int j = 0; j < GDScriptFunctions::FUNC_MAX; j++) {
+		token_hashtable->insert(GDScriptFunctions::get_func_name(GDScriptFunctions::Function(j)), id++);
+	}
+
+	// keywords
+	id = TOKEN_HASH_TABLE_KEYWORD_START;
+	idx = 0;
+	while (_keyword_list[idx].text) {
+		token_hashtable->insert(_keyword_list[idx].text, id++);
+		idx++;
+	}
+}
+
+void GDScriptTokenizer::terminate() {
+	if (token_hashtable) {
+		memdelete(token_hashtable);
+		token_hashtable = nullptr;
+	}
+}
+
+// return whether found
+bool GDScriptTokenizerText::_parse_identifier(const String &p_str) {
+	// N.B. GDScriptTokenizer::initialize() must have been called before using this function,
+	// else token_hashtable will be NULL.
+	const int *found = token_hashtable->lookup_ptr(p_str);
+
+	if (found) {
+		int id = *found;
+		if (id < TOKEN_HASH_TABLE_TYPE_START) {
+			switch (id) {
+				case 0: {
+					_make_constant(Variant());
+				} break;
+				case 1: {
+					_make_constant(true);
+				} break;
+				case 2: {
+					_make_constant(false);
+				} break;
+				default: {
+					DEV_ASSERT(0);
+				} break;
+			}
+			return true;
+		} else {
+			// type list
+			if (id < TOKEN_HASH_TABLE_BUILTIN_START) {
+				int idx = id - TOKEN_HASH_TABLE_TYPE_START;
+				_make_type(_type_list[idx].type);
+				return true;
+			}
+
+			// built in func
+			if (id < TOKEN_HASH_TABLE_KEYWORD_START) {
+				int idx = id - TOKEN_HASH_TABLE_BUILTIN_START;
+				_make_built_in_func(GDScriptFunctions::Function(idx));
+				return true;
+			}
+
+			// keyword
+			int idx = id - TOKEN_HASH_TABLE_KEYWORD_START;
+			_make_token(_keyword_list[idx].token);
+			return true;
+		}
+
+		return true;
+	}
+
+	// not found
+	return false;
+}
+
 const char *GDScriptTokenizer::get_token_name(Token p_token) {
 	ERR_FAIL_INDEX_V(p_token, TK_MAX, "<error>");
 	return token_names[p_token];
@@ -977,68 +1069,13 @@ void GDScriptTokenizerText::_advance() {
 						i++;
 					}
 
-					bool identifier = false;
-
-					if (str == "null") {
-						_make_constant(Variant());
-
-					} else if (str == "true") {
-						_make_constant(true);
-
-					} else if (str == "false") {
-						_make_constant(false);
-					} else {
-						bool found = false;
-
-						{
-							int idx = 0;
-
-							while (_type_list[idx].text) {
-								if (str == _type_list[idx].text) {
-									_make_type(_type_list[idx].type);
-									found = true;
-									break;
-								}
-								idx++;
-							}
-						}
-
-						if (!found) {
-							//built in func?
-
-							for (int j = 0; j < GDScriptFunctions::FUNC_MAX; j++) {
-								if (str == GDScriptFunctions::get_func_name(GDScriptFunctions::Function(j))) {
-									_make_built_in_func(GDScriptFunctions::Function(j));
-									found = true;
-									break;
-								}
-							}
-						}
-
-						if (!found) {
-							//keyword
+					// Detect preset keywords / functions using hashtable.
+					bool found = _parse_identifier(str);
 
-							int idx = 0;
-							found = false;
-
-							while (_keyword_list[idx].text) {
-								if (str == _keyword_list[idx].text) {
-									_make_token(_keyword_list[idx].token);
-									found = true;
-									break;
-								}
-								idx++;
-							}
-						}
-
-						if (!found) {
-							identifier = true;
-						}
-					}
-
-					if (identifier) {
+					if (!found) {
 						_make_identifier(str);
 					}
+
 					INCPOS(str.length());
 					return;
 				}

+ 14 - 1
modules/gdscript/gdscript_tokenizer.h

@@ -31,6 +31,7 @@
 #ifndef GDSCRIPT_TOKENIZER_H
 #define GDSCRIPT_TOKENIZER_H
 
+#include "core/oa_hash_map.h"
 #include "core/pair.h"
 #include "core/string_name.h"
 #include "core/ustring.h"
@@ -154,9 +155,20 @@ protected:
 
 	static const char *token_names[TK_MAX];
 
+	enum {
+		TOKEN_HASH_TABLE_TYPE_START = 3,
+		TOKEN_HASH_TABLE_BUILTIN_START = TOKEN_HASH_TABLE_TYPE_START + Variant::VARIANT_MAX,
+		TOKEN_HASH_TABLE_KEYWORD_START = TOKEN_HASH_TABLE_BUILTIN_START + GDScriptFunctions::FUNC_MAX,
+	};
+
+	static OAHashMap<String, int> *token_hashtable;
+
 public:
 	static const char *get_token_name(Token p_token);
 
+	static void initialize();
+	static void terminate();
+
 	bool is_token_literal(int p_offset = 0, bool variable_safe = false) const;
 	StringName get_token_literal(int p_offset = 0) const;
 
@@ -177,7 +189,7 @@ public:
 	virtual bool is_ignoring_warnings() const = 0;
 #endif // DEBUG_ENABLED
 
-	virtual ~GDScriptTokenizer(){};
+	virtual ~GDScriptTokenizer() {}
 };
 
 class GDScriptTokenizerText : public GDScriptTokenizer {
@@ -230,6 +242,7 @@ class GDScriptTokenizerText : public GDScriptTokenizer {
 #endif // DEBUG_ENABLED
 
 	void _advance();
+	bool _parse_identifier(const String &p_str);
 
 public:
 	void set_code(const String &p_code);