1 year ago · 0ba0be27c7
--- a/SConstruct
+++ b/SConstruct
@@ -861,7 +861,7 @@ else:  # GCC, Clang
 
				             if cc_version_major >= 11:  # Broke on MethodBind templates before GCC 11.
			
 
				                 env.Append(CCFLAGS=["-Wlogical-op"])
			
 
				         elif methods.using_clang(env) or methods.using_emcc(env):
			
 
				-            env.Append(CCFLAGS=["-Wimplicit-fallthrough"])
			
 
				+            env.Append(CCFLAGS=["-Wimplicit-fallthrough", "-Wno-undefined-var-template"])
			
 
				     elif env["warnings"] == "all":
			
 
				         env.Append(CCFLAGS=["-Wall"] + common_warnings)
			
 
				     elif env["warnings"] == "moderate":
			
--- a/core/io/resource_loader.cpp
+++ b/core/io/resource_loader.cpp
@@ -207,34 +207,53 @@ void ResourceFormatLoader::_bind_methods() {
 
				 
			
 
				 ///////////////////////////////////
			
 
				 
			
 
				+// These are used before and after a wait for a WorkerThreadPool task
			
 
				+// because that can lead to another load started in the same thread,
			
 
				+// something we must treat as a different stack for the purposes
			
 
				+// of tracking nesting.
			
 
				+
			
 
				+#define PREPARE_FOR_WTP_WAIT                                                   \
			
 
				+	int load_nesting_backup = ResourceLoader::load_nesting;                    \
			
 
				+	Vector<String> load_paths_stack_backup = ResourceLoader::load_paths_stack; \
			
 
				+	ResourceLoader::load_nesting = 0;                                          \
			
 
				+	ResourceLoader::load_paths_stack.clear();
			
 
				+
			
 
				+#define RESTORE_AFTER_WTP_WAIT                                  \
			
 
				+	DEV_ASSERT(ResourceLoader::load_nesting == 0);              \
			
 
				+	DEV_ASSERT(ResourceLoader::load_paths_stack.is_empty());    \
			
 
				+	ResourceLoader::load_nesting = load_nesting_backup;         \
			
 
				+	ResourceLoader::load_paths_stack = load_paths_stack_backup; \
			
 
				+	load_paths_stack_backup.clear();
			
 
				+
			
 
				 // This should be robust enough to be called redundantly without issues.
			
 
				 void ResourceLoader::LoadToken::clear() {
			
 
				 	thread_load_mutex.lock();
			
 
				 
			
 
				 	WorkerThreadPool::TaskID task_to_await = 0;
			
 
				 
			
 
				+	// User-facing tokens shouldn't be deleted until completely claimed.
			
 
				+	DEV_ASSERT(user_rc == 0 && user_path.is_empty());
			
 
				+
			
 
				 	if (!local_path.is_empty()) { // Empty is used for the special case where the load task is not registered.
			
 
				 		DEV_ASSERT(thread_load_tasks.has(local_path));
			
 
				 		ThreadLoadTask &load_task = thread_load_tasks[local_path];
			
 
				-		if (!load_task.awaited) {
			
 
				+		if (load_task.task_id && !load_task.awaited) {
			
 
				 			task_to_await = load_task.task_id;
			
 
				-			load_task.awaited = true;
			
 
				 		}
			
 
				+		// Removing a task which is still in progress would be catastrophic.
			
 
				+		// Tokens must be alive until the task thread function is done.
			
 
				+		DEV_ASSERT(load_task.status == THREAD_LOAD_FAILED || load_task.status == THREAD_LOAD_LOADED);
			
 
				 		thread_load_tasks.erase(local_path);
			
 
				 		local_path.clear();
			
 
				 	}
			
 
				 
			
 
				-	if (!user_path.is_empty()) {
			
 
				-		DEV_ASSERT(user_load_tokens.has(user_path));
			
 
				-		user_load_tokens.erase(user_path);
			
 
				-		user_path.clear();
			
 
				-	}
			
 
				-
			
 
				 	thread_load_mutex.unlock();
			
 
				 
			
 
				 	// If task is unused, await it here, locally, now the token data is consistent.
			
 
				 	if (task_to_await) {
			
 
				+		PREPARE_FOR_WTP_WAIT
			
 
				 		WorkerThreadPool::get_singleton()->wait_for_task_completion(task_to_await);
			
 
				+		RESTORE_AFTER_WTP_WAIT
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -295,11 +314,11 @@ Ref<Resource> ResourceLoader::_load(const String &p_path, const String &p_origin
 
				 	ERR_FAIL_V_MSG(Ref<Resource>(), vformat("No loader found for resource: %s (expected type: %s)", p_path, p_type_hint));
			
 
				 }
			
 
				 
			
 
				-void ResourceLoader::_thread_load_function(void *p_userdata) {
			
 
				+// This implementation must allow re-entrancy for a task that started awaiting in a deeper stack frame.
			
 
				+void ResourceLoader::_run_load_task(void *p_userdata) {
			
 
				 	ThreadLoadTask &load_task = *(ThreadLoadTask *)p_userdata;
			
 
				 
			
 
				 	thread_load_mutex.lock();
			
 
				-	caller_task_id = load_task.task_id;
			
 
				 	if (cleaning_tasks) {
			
 
				 		load_task.status = THREAD_LOAD_FAILED;
			
 
				 		thread_load_mutex.unlock();
			
@@ -322,8 +341,10 @@ void ResourceLoader::_thread_load_function(void *p_userdata) {
 
				 	}
			
 
				 	// --
			
 
				 
			
 
				+	bool xl_remapped = false;
			
 
				+	const String &remapped_path = _path_remap(load_task.local_path, &xl_remapped);
			
 
				 	Error load_err = OK;
			
 
				-	Ref<Resource> res = _load(load_task.remapped_path, load_task.remapped_path != load_task.local_path ? load_task.local_path : String(), load_task.type_hint, load_task.cache_mode, &load_err, load_task.use_sub_threads, &load_task.progress);
			
 
				+	Ref<Resource> res = _load(remapped_path, remapped_path != load_task.local_path ? load_task.local_path : String(), load_task.type_hint, load_task.cache_mode, &load_err, load_task.use_sub_threads, &load_task.progress);
			
 
				 	if (MessageQueue::get_singleton() != MessageQueue::get_main_singleton()) {
			
 
				 		MessageQueue::get_singleton()->flush();
			
 
				 	}
			
@@ -356,27 +377,40 @@ void ResourceLoader::_thread_load_function(void *p_userdata) {
 
				 		unlock_pending = false;
			
 
				 
			
 
				 		if (!ignoring) {
			
 
				-			if (replacing) {
			
 
				-				Ref<Resource> old_res = ResourceCache::get_ref(load_task.local_path);
			
 
				-				if (old_res.is_valid() && old_res != load_task.resource) {
			
 
				-					// If resource is already loaded, only replace its data, to avoid existing invalidating instances.
			
 
				-					old_res->copy_from(load_task.resource);
			
 
				+			ResourceCache::lock.lock(); // Check and operations must happen atomically.
			
 
				+			bool pending_unlock = true;
			
 
				+			Ref<Resource> old_res = ResourceCache::get_ref(load_task.local_path);
			
 
				+			if (old_res.is_valid()) {
			
 
				+				if (old_res != load_task.resource) {
			
 
				+					// Resource can already exists at this point for two reasons:
			
 
				+					// a) The load uses replace mode.
			
 
				+					// b) There were more than one load in flight for the same path because of deadlock prevention.
			
 
				+					// Either case, we want to keep the resource that was already there.
			
 
				+					ResourceCache::lock.unlock();
			
 
				+					pending_unlock = false;
			
 
				+					if (replacing) {
			
 
				+						old_res->copy_from(load_task.resource);
			
 
				+					}
			
 
				 					load_task.resource = old_res;
			
 
				 				}
			
 
				+			} else {
			
 
				+				load_task.resource->set_path(load_task.local_path);
			
 
				+			}
			
 
				+			if (pending_unlock) {
			
 
				+				ResourceCache::lock.unlock();
			
 
				 			}
			
 
				-			load_task.resource->set_path(load_task.local_path, replacing);
			
 
				 		} else {
			
 
				 			load_task.resource->set_path_cache(load_task.local_path);
			
 
				 		}
			
 
				 
			
 
				-		if (load_task.xl_remapped) {
			
 
				+		if (xl_remapped) {
			
 
				 			load_task.resource->set_as_translation_remapped(true);
			
 
				 		}
			
 
				 
			
 
				 #ifdef TOOLS_ENABLED
			
 
				 		load_task.resource->set_edited(false);
			
 
				 		if (timestamp_on_load) {
			
 
				-			uint64_t mt = FileAccess::get_modified_time(load_task.remapped_path);
			
 
				+			uint64_t mt = FileAccess::get_modified_time(remapped_path);
			
 
				 			//printf("mt %s: %lli\n",remapped_path.utf8().get_data(),mt);
			
 
				 			load_task.resource->set_last_modified_time(mt);
			
 
				 		}
			
@@ -426,36 +460,44 @@ static String _validate_local_path(const String &p_path) {
 
				 }
			
 
				 
			
 
				 Error ResourceLoader::load_threaded_request(const String &p_path, const String &p_type_hint, bool p_use_sub_threads, ResourceFormatLoader::CacheMode p_cache_mode) {
			
 
				-	thread_load_mutex.lock();
			
 
				-	if (user_load_tokens.has(p_path)) {
			
 
				-		print_verbose("load_threaded_request(): Another threaded load for resource path '" + p_path + "' has been initiated. Not an error.");
			
 
				-		user_load_tokens[p_path]->reference(); // Additional request.
			
 
				-		thread_load_mutex.unlock();
			
 
				-		return OK;
			
 
				-	}
			
 
				-	user_load_tokens[p_path] = nullptr;
			
 
				-	thread_load_mutex.unlock();
			
 
				+	Ref<ResourceLoader::LoadToken> token = _load_start(p_path, p_type_hint, p_use_sub_threads ? LOAD_THREAD_DISTRIBUTE : LOAD_THREAD_SPAWN_SINGLE, p_cache_mode, true);
			
 
				+	return token.is_valid() ? OK : FAILED;
			
 
				+}
			
 
				 
			
 
				-	Ref<ResourceLoader::LoadToken> token = _load_start(p_path, p_type_hint, p_use_sub_threads ? LOAD_THREAD_DISTRIBUTE : LOAD_THREAD_SPAWN_SINGLE, p_cache_mode);
			
 
				-	if (token.is_valid()) {
			
 
				-		thread_load_mutex.lock();
			
 
				-		token->user_path = p_path;
			
 
				-		token->reference(); // First request.
			
 
				-		user_load_tokens[p_path] = token.ptr();
			
 
				-		print_lt("REQUEST: user load tokens: " + itos(user_load_tokens.size()));
			
 
				-		thread_load_mutex.unlock();
			
 
				-		return OK;
			
 
				+ResourceLoader::LoadToken *ResourceLoader::_load_threaded_request_reuse_user_token(const String &p_path) {
			
 
				+	HashMap<String, LoadToken *>::Iterator E = user_load_tokens.find(p_path);
			
 
				+	if (E) {
			
 
				+		print_verbose("load_threaded_request(): Another threaded load for resource path '" + p_path + "' has been initiated. Not an error.");
			
 
				+		LoadToken *token = E->value;
			
 
				+		token->user_rc++;
			
 
				+		return token;
			
 
				 	} else {
			
 
				-		return FAILED;
			
 
				+		return nullptr;
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				+void ResourceLoader::_load_threaded_request_setup_user_token(LoadToken *p_token, const String &p_path) {
			
 
				+	p_token->user_path = p_path;
			
 
				+	p_token->reference(); // Extra RC until all user requests have been gotten.
			
 
				+	p_token->user_rc = 1;
			
 
				+	user_load_tokens[p_path] = p_token;
			
 
				+	print_lt("REQUEST: user load tokens: " + itos(user_load_tokens.size()));
			
 
				+}
			
 
				+
			
 
				 Ref<Resource> ResourceLoader::load(const String &p_path, const String &p_type_hint, ResourceFormatLoader::CacheMode p_cache_mode, Error *r_error) {
			
 
				 	if (r_error) {
			
 
				 		*r_error = OK;
			
 
				 	}
			
 
				 
			
 
				-	Ref<LoadToken> load_token = _load_start(p_path, p_type_hint, LOAD_THREAD_FROM_CURRENT, p_cache_mode);
			
 
				+	LoadThreadMode thread_mode = LOAD_THREAD_FROM_CURRENT;
			
 
				+	if (WorkerThreadPool::get_singleton()->get_caller_task_id() != WorkerThreadPool::INVALID_TASK_ID) {
			
 
				+		// If user is initiating a single-threaded load from a WorkerThreadPool task,
			
 
				+		// we instead spawn a new task so there's a precondition that a load in a pool task
			
 
				+		// is always initiated by the engine. That makes certain aspects simpler, such as
			
 
				+		// cyclic load detection and awaiting.
			
 
				+		thread_mode = LOAD_THREAD_SPAWN_SINGLE;
			
 
				+	}
			
 
				+	Ref<LoadToken> load_token = _load_start(p_path, p_type_hint, thread_mode, p_cache_mode);
			
 
				 	if (!load_token.is_valid()) {
			
 
				 		if (r_error) {
			
 
				 			*r_error = FAILED;
			
@@ -467,7 +509,7 @@ Ref<Resource> ResourceLoader::load(const String &p_path, const String &p_type_hi
 
				 	return res;
			
 
				 }
			
 
				 
			
 
				-Ref<ResourceLoader::LoadToken> ResourceLoader::_load_start(const String &p_path, const String &p_type_hint, LoadThreadMode p_thread_mode, ResourceFormatLoader::CacheMode p_cache_mode) {
			
 
				+Ref<ResourceLoader::LoadToken> ResourceLoader::_load_start(const String &p_path, const String &p_type_hint, LoadThreadMode p_thread_mode, ResourceFormatLoader::CacheMode p_cache_mode, bool p_for_user) {
			
 
				 	String local_path = _validate_local_path(p_path);
			
 
				 
			
 
				 	bool ignoring_cache = p_cache_mode == ResourceFormatLoader::CACHE_MODE_IGNORE || p_cache_mode == ResourceFormatLoader::CACHE_MODE_IGNORE_DEEP;
			
@@ -480,6 +522,13 @@ Ref<ResourceLoader::LoadToken> ResourceLoader::_load_start(const String &p_path,
 
				 	{
			
 
				 		MutexLock thread_load_lock(thread_load_mutex);
			
 
				 
			
 
				+		if (p_for_user) {
			
 
				+			LoadToken *existing_token = _load_threaded_request_reuse_user_token(p_path);
			
 
				+			if (existing_token) {
			
 
				+				return Ref<LoadToken>(existing_token);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				 		if (!ignoring_cache && thread_load_tasks.has(local_path)) {
			
 
				 			load_token = Ref<LoadToken>(thread_load_tasks[local_path].load_token);
			
 
				 			if (load_token.is_valid()) {
			
@@ -493,12 +542,14 @@ Ref<ResourceLoader::LoadToken> ResourceLoader::_load_start(const String &p_path,
 
				 
			
 
				 		load_token.instantiate();
			
 
				 		load_token->local_path = local_path;
			
 
				+		if (p_for_user) {
			
 
				+			_load_threaded_request_setup_user_token(load_token.ptr(), p_path);
			
 
				+		}
			
 
				 
			
 
				 		//create load task
			
 
				 		{
			
 
				 			ThreadLoadTask load_task;
			
 
				 
			
 
				-			load_task.remapped_path = _path_remap(local_path, &load_task.xl_remapped);
			
 
				 			load_task.load_token = load_token.ptr();
			
 
				 			load_task.local_path = local_path;
			
 
				 			load_task.type_hint = p_type_hint;
			
@@ -511,6 +562,7 @@ Ref<ResourceLoader::LoadToken> ResourceLoader::_load_start(const String &p_path,
 
				 					load_task.resource = existing;
			
 
				 					load_task.status = THREAD_LOAD_LOADED;
			
 
				 					load_task.progress = 1.0;
			
 
				+					DEV_ASSERT(!thread_load_tasks.has(local_path));
			
 
				 					thread_load_tasks[local_path] = load_task;
			
 
				 					return load_token;
			
 
				 				}
			
@@ -532,14 +584,20 @@ Ref<ResourceLoader::LoadToken> ResourceLoader::_load_start(const String &p_path,
 
				 		run_on_current_thread = must_not_register || p_thread_mode == LOAD_THREAD_FROM_CURRENT;
			
 
				 
			
 
				 		if (run_on_current_thread) {
			
 
				-			load_task_ptr->thread_id = Thread::get_caller_id();
			
 
				+			// The current thread may happen to be a thread from the pool.
			
 
				+			WorkerThreadPool::TaskID tid = WorkerThreadPool::get_singleton()->get_caller_task_id();
			
 
				+			if (tid != WorkerThreadPool::INVALID_TASK_ID) {
			
 
				+				load_task_ptr->task_id = tid;
			
 
				+			} else {
			
 
				+				load_task_ptr->thread_id = Thread::get_caller_id();
			
 
				+			}
			
 
				 		} else {
			
 
				-			load_task_ptr->task_id = WorkerThreadPool::get_singleton()->add_native_task(&ResourceLoader::_thread_load_function, load_task_ptr);
			
 
				+			load_task_ptr->task_id = WorkerThreadPool::get_singleton()->add_native_task(&ResourceLoader::_run_load_task, load_task_ptr);
			
 
				 		}
			
 
				-	}
			
 
				+	} // MutexLock(thread_load_mutex).
			
 
				 
			
 
				 	if (run_on_current_thread) {
			
 
				-		_thread_load_function(load_task_ptr);
			
 
				+		_run_load_task(load_task_ptr);
			
 
				 		if (must_not_register) {
			
 
				 			load_token->res_if_unregistered = load_task_ptr->resource;
			
 
				 		}
			
@@ -626,13 +684,7 @@ Ref<Resource> ResourceLoader::load_threaded_get(const String &p_path, Error *r_e
 
				 		}
			
 
				 
			
 
				 		LoadToken *load_token = user_load_tokens[p_path];
			
 
				-		if (!load_token) {
			
 
				-			// This happens if requested from one thread and rapidly querying from another.
			
 
				-			if (r_error) {
			
 
				-				*r_error = ERR_BUSY;
			
 
				-			}
			
 
				-			return Ref<Resource>();
			
 
				-		}
			
 
				+		DEV_ASSERT(load_token->user_rc >= 1);
			
 
				 
			
 
				 		// Support userland requesting on the main thread before the load is reported to be complete.
			
 
				 		if (Thread::is_main_thread() && !load_token->local_path.is_empty()) {
			
@@ -649,8 +701,15 @@ Ref<Resource> ResourceLoader::load_threaded_get(const String &p_path, Error *r_e
 
				 		}
			
 
				 
			
 
				 		res = _load_complete_inner(*load_token, r_error, thread_load_lock);
			
 
				-		if (load_token->unreference()) {
			
 
				-			memdelete(load_token);
			
 
				+
			
 
				+		load_token->user_rc--;
			
 
				+		if (load_token->user_rc == 0) {
			
 
				+			load_token->user_path.clear();
			
 
				+			user_load_tokens.erase(p_path);
			
 
				+			if (load_token->unreference()) {
			
 
				+				memdelete(load_token);
			
 
				+				load_token = nullptr;
			
 
				+			}
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -682,7 +741,7 @@ Ref<Resource> ResourceLoader::_load_complete_inner(LoadToken &p_load_token, Erro
 
				 		if (load_task.status == THREAD_LOAD_IN_PROGRESS) {
			
 
				 			DEV_ASSERT((load_task.task_id == 0) != (load_task.thread_id == 0));
			
 
				 
			
 
				-			if ((load_task.task_id != 0 && load_task.task_id == caller_task_id) ||
			
 
				+			if ((load_task.task_id != 0 && load_task.task_id == WorkerThreadPool::get_singleton()->get_caller_task_id()) ||
			
 
				 					(load_task.thread_id != 0 && load_task.thread_id == Thread::get_caller_id())) {
			
 
				 				// Load is in progress, but it's precisely this thread the one in charge.
			
 
				 				// That means this is a cyclic load.
			
@@ -693,55 +752,45 @@ Ref<Resource> ResourceLoader::_load_complete_inner(LoadToken &p_load_token, Erro
 
				 			}
			
 
				 
			
 
				 			bool loader_is_wtp = load_task.task_id != 0;
			
 
				-			Error wtp_task_err = FAILED;
			
 
				 			if (loader_is_wtp) {
			
 
				 				// Loading thread is in the worker pool.
			
 
				-				load_task.awaited = true;
			
 
				 				thread_load_mutex.unlock();
			
 
				-				wtp_task_err = WorkerThreadPool::get_singleton()->wait_for_task_completion(load_task.task_id);
			
 
				-			}
			
 
				 
			
 
				-			if (load_task.status == THREAD_LOAD_IN_PROGRESS) { // If early errored, awaiting would deadlock.
			
 
				-				if (loader_is_wtp) {
			
 
				-					if (wtp_task_err == ERR_BUSY) {
			
 
				-						// The WorkerThreadPool has reported that the current task wants to await on an older one.
			
 
				-						// That't not allowed for safety, to avoid deadlocks. Fortunately, though, in the context of
			
 
				-						// resource loading that means that the task to wait for can be restarted here to break the
			
 
				-						// cycle, with as much recursion into this process as needed.
			
 
				-						// When the stack is eventually unrolled, the original load will have been notified to go on.
			
 
				-						// CACHE_MODE_IGNORE is needed because, otherwise, the new request would just see there's
			
 
				-						// an ongoing load for that resource and wait for it again. This value forces a new load.
			
 
				-						Ref<ResourceLoader::LoadToken> token = _load_start(load_task.local_path, load_task.type_hint, LOAD_THREAD_DISTRIBUTE, ResourceFormatLoader::CACHE_MODE_IGNORE);
			
 
				-						Ref<Resource> resource = _load_complete(*token.ptr(), &wtp_task_err);
			
 
				-						if (r_error) {
			
 
				-							*r_error = wtp_task_err;
			
 
				-						}
			
 
				-						thread_load_mutex.lock();
			
 
				-						return resource;
			
 
				-					} else {
			
 
				-						DEV_ASSERT(wtp_task_err == OK);
			
 
				-						thread_load_mutex.lock();
			
 
				-					}
			
 
				-				} else if (load_task.need_wait) {
			
 
				-					// Loading thread is main or user thread.
			
 
				-					if (!load_task.cond_var) {
			
 
				-						load_task.cond_var = memnew(ConditionVariable);
			
 
				-					}
			
 
				-					load_task.awaiters_count++;
			
 
				-					do {
			
 
				-						load_task.cond_var->wait(p_thread_load_lock);
			
 
				-						DEV_ASSERT(thread_load_tasks.has(p_load_token.local_path) && p_load_token.get_reference_count());
			
 
				-					} while (load_task.need_wait);
			
 
				-					load_task.awaiters_count--;
			
 
				-					if (load_task.awaiters_count == 0) {
			
 
				-						memdelete(load_task.cond_var);
			
 
				-						load_task.cond_var = nullptr;
			
 
				-					}
			
 
				+				PREPARE_FOR_WTP_WAIT
			
 
				+				Error wait_err = WorkerThreadPool::get_singleton()->wait_for_task_completion(load_task.task_id);
			
 
				+				RESTORE_AFTER_WTP_WAIT
			
 
				+
			
 
				+				DEV_ASSERT(!wait_err || wait_err == ERR_BUSY);
			
 
				+				if (wait_err == ERR_BUSY) {
			
 
				+					// The WorkerThreadPool has reported that the current task wants to await on an older one.
			
 
				+					// That't not allowed for safety, to avoid deadlocks. Fortunately, though, in the context of
			
 
				+					// resource loading that means that the task to wait for can be restarted here to break the
			
 
				+					// cycle, with as much recursion into this process as needed.
			
 
				+					// When the stack is eventually unrolled, the original load will have been notified to go on.
			
 
				+					_run_load_task(&load_task);
			
 
				 				}
			
 
				-			} else {
			
 
				-				if (loader_is_wtp) {
			
 
				-					thread_load_mutex.lock();
			
 
				+
			
 
				+				thread_load_mutex.lock();
			
 
				+				load_task.awaited = true;
			
 
				+
			
 
				+				DEV_ASSERT(load_task.status == THREAD_LOAD_FAILED || load_task.status == THREAD_LOAD_LOADED);
			
 
				+			} else if (load_task.need_wait) {
			
 
				+				// Loading thread is main or user thread.
			
 
				+				if (!load_task.cond_var) {
			
 
				+					load_task.cond_var = memnew(ConditionVariable);
			
 
				+				}
			
 
				+				load_task.awaiters_count++;
			
 
				+				do {
			
 
				+					load_task.cond_var->wait(p_thread_load_lock);
			
 
				+					DEV_ASSERT(thread_load_tasks.has(p_load_token.local_path) && p_load_token.get_reference_count());
			
 
				+				} while (load_task.need_wait);
			
 
				+				load_task.awaiters_count--;
			
 
				+				if (load_task.awaiters_count == 0) {
			
 
				+					memdelete(load_task.cond_var);
			
 
				+					load_task.cond_var = nullptr;
			
 
				 				}
			
 
				+
			
 
				+				DEV_ASSERT(load_task.status == THREAD_LOAD_FAILED || load_task.status == THREAD_LOAD_LOADED);
			
 
				 			}
			
 
				 		}
			
 
				 
			
@@ -1181,10 +1230,13 @@ void ResourceLoader::clear_thread_load_tasks() {
 
				 	}
			
 
				 
			
 
				 	while (user_load_tokens.begin()) {
			
 
				-		// User load tokens remove themselves from the map on destruction.
			
 
				-		memdelete(user_load_tokens.begin()->value);
			
 
				+		LoadToken *user_token = user_load_tokens.begin()->value;
			
 
				+		user_load_tokens.remove(user_load_tokens.begin());
			
 
				+		DEV_ASSERT(user_token->user_rc > 0 && !user_token->user_path.is_empty());
			
 
				+		user_token->user_path.clear();
			
 
				+		user_token->user_rc = 0;
			
 
				+		user_token->unreference();
			
 
				 	}
			
 
				-	user_load_tokens.clear();
			
 
				 
			
 
				 	thread_load_tasks.clear();
			
 
				 
			
@@ -1302,12 +1354,15 @@ bool ResourceLoader::abort_on_missing_resource = true;
 
				 bool ResourceLoader::timestamp_on_load = false;
			
 
				 
			
 
				 thread_local int ResourceLoader::load_nesting = 0;
			
 
				-thread_local WorkerThreadPool::TaskID ResourceLoader::caller_task_id = 0;
			
 
				 thread_local Vector<String> ResourceLoader::load_paths_stack;
			
 
				 thread_local HashMap<int, HashMap<String, Ref<Resource>>> ResourceLoader::res_ref_overrides;
			
 
				 
			
 
				+SafeBinaryMutex<ResourceLoader::BINARY_MUTEX_TAG> &_get_res_loader_mutex() {
			
 
				+	return ResourceLoader::thread_load_mutex;
			
 
				+}
			
 
				+
			
 
				 template <>
			
 
				-thread_local uint32_t SafeBinaryMutex<ResourceLoader::BINARY_MUTEX_TAG>::count = 0;
			
 
				+thread_local SafeBinaryMutex<ResourceLoader::BINARY_MUTEX_TAG>::TLSData SafeBinaryMutex<ResourceLoader::BINARY_MUTEX_TAG>::tls_data(_get_res_loader_mutex());
			
 
				 SafeBinaryMutex<ResourceLoader::BINARY_MUTEX_TAG> ResourceLoader::thread_load_mutex;
			
 
				 HashMap<String, ResourceLoader::ThreadLoadTask> ResourceLoader::thread_load_tasks;
			
 
				 bool ResourceLoader::cleaning_tasks = false;
			
--- a/core/io/resource_loader.h
+++ b/core/io/resource_loader.h
@@ -100,6 +100,8 @@ typedef Error (*ResourceLoaderImport)(const String &p_path);
 
				 typedef void (*ResourceLoadedCallback)(Ref<Resource> p_resource, const String &p_path);
			
 
				 
			
 
				 class ResourceLoader {
			
 
				+	friend class LoadToken;
			
 
				+
			
 
				 	enum {
			
 
				 		MAX_LOADERS = 64
			
 
				 	};
			
@@ -121,6 +123,7 @@ public:
 
				 	struct LoadToken : public RefCounted {
			
 
				 		String local_path;
			
 
				 		String user_path;
			
 
				+		uint32_t user_rc = 0; // Having user RC implies regular RC incremented in one, until the user RC reaches zero.
			
 
				 		Ref<Resource> res_if_unregistered;
			
 
				 
			
 
				 		void clear();
			
@@ -130,10 +133,13 @@ public:
 
				 
			
 
				 	static const int BINARY_MUTEX_TAG = 1;
			
 
				 
			
 
				-	static Ref<LoadToken> _load_start(const String &p_path, const String &p_type_hint, LoadThreadMode p_thread_mode, ResourceFormatLoader::CacheMode p_cache_mode);
			
 
				+	static Ref<LoadToken> _load_start(const String &p_path, const String &p_type_hint, LoadThreadMode p_thread_mode, ResourceFormatLoader::CacheMode p_cache_mode, bool p_for_user = false);
			
 
				 	static Ref<Resource> _load_complete(LoadToken &p_load_token, Error *r_error);
			
 
				 
			
 
				 private:
			
 
				+	static LoadToken *_load_threaded_request_reuse_user_token(const String &p_path);
			
 
				+	static void _load_threaded_request_setup_user_token(LoadToken *p_token, const String &p_path);
			
 
				+
			
 
				 	static Ref<Resource> _load_complete_inner(LoadToken &p_load_token, Error *r_error, MutexLock<SafeBinaryMutex<BINARY_MUTEX_TAG>> &p_thread_load_lock);
			
 
				 
			
 
				 	static Ref<ResourceFormatLoader> loader[MAX_LOADERS];
			
@@ -171,7 +177,6 @@ private:
 
				 		bool need_wait = true;
			
 
				 		LoadToken *load_token = nullptr;
			
 
				 		String local_path;
			
 
				-		String remapped_path;
			
 
				 		String type_hint;
			
 
				 		float progress = 0.0f;
			
 
				 		float max_reported_progress = 0.0f;
			
@@ -180,18 +185,19 @@ private:
 
				 		ResourceFormatLoader::CacheMode cache_mode = ResourceFormatLoader::CACHE_MODE_REUSE;
			
 
				 		Error error = OK;
			
 
				 		Ref<Resource> resource;
			
 
				-		bool xl_remapped = false;
			
 
				 		bool use_sub_threads = false;
			
 
				 		HashSet<String> sub_tasks;
			
 
				 	};
			
 
				 
			
 
				-	static void _thread_load_function(void *p_userdata);
			
 
				+	static void _run_load_task(void *p_userdata);
			
 
				 
			
 
				 	static thread_local int load_nesting;
			
 
				-	static thread_local WorkerThreadPool::TaskID caller_task_id;
			
 
				 	static thread_local HashMap<int, HashMap<String, Ref<Resource>>> res_ref_overrides; // Outermost key is nesting level.
			
 
				 	static thread_local Vector<String> load_paths_stack;
			
 
				+
			
 
				 	static SafeBinaryMutex<BINARY_MUTEX_TAG> thread_load_mutex;
			
 
				+	friend SafeBinaryMutex<BINARY_MUTEX_TAG> &_get_res_loader_mutex();
			
 
				+
			
 
				 	static HashMap<String, ThreadLoadTask> thread_load_tasks;
			
 
				 	static bool cleaning_tasks;
			
 
				 
			
--- a/core/object/worker_thread_pool.cpp
+++ b/core/object/worker_thread_pool.cpp
@@ -32,6 +32,7 @@
 
				 
			
 
				 #include "core/object/script_language.h"
			
 
				 #include "core/os/os.h"
			
 
				+#include "core/os/safe_binary_mutex.h"
			
 
				 #include "core/os/thread_safe.h"
			
 
				 
			
 
				 WorkerThreadPool::Task *const WorkerThreadPool::ThreadData::YIELDING = (Task *)1;
			
@@ -46,7 +47,7 @@ void WorkerThreadPool::Task::free_template_userdata() {
 
				 WorkerThreadPool *WorkerThreadPool::singleton = nullptr;
			
 
				 
			
 
				 #ifdef THREADS_ENABLED
			
 
				-thread_local uintptr_t WorkerThreadPool::unlockable_mutexes[MAX_UNLOCKABLE_MUTEXES] = {};
			
 
				+thread_local WorkerThreadPool::UnlockableLocks WorkerThreadPool::unlockable_locks[MAX_UNLOCKABLE_LOCKS];
			
 
				 #endif
			
 
				 
			
 
				 void WorkerThreadPool::_process_task(Task *p_task) {
			
@@ -428,13 +429,9 @@ Error WorkerThreadPool::wait_for_task_completion(TaskID p_task_id) {
 
				 
			
 
				 void WorkerThreadPool::_lock_unlockable_mutexes() {
			
 
				 #ifdef THREADS_ENABLED
			
 
				-	for (uint32_t i = 0; i < MAX_UNLOCKABLE_MUTEXES; i++) {
			
 
				-		if (unlockable_mutexes[i]) {
			
 
				-			if ((((uintptr_t)unlockable_mutexes[i]) & 1) == 0) {
			
 
				-				((Mutex *)unlockable_mutexes[i])->lock();
			
 
				-			} else {
			
 
				-				((BinaryMutex *)(unlockable_mutexes[i] & ~1))->lock();
			
 
				-			}
			
 
				+	for (uint32_t i = 0; i < MAX_UNLOCKABLE_LOCKS; i++) {
			
 
				+		if (unlockable_locks[i].ulock) {
			
 
				+			unlockable_locks[i].ulock->lock();
			
 
				 		}
			
 
				 	}
			
 
				 #endif
			
@@ -442,13 +439,9 @@ void WorkerThreadPool::_lock_unlockable_mutexes() {
 
				 
			
 
				 void WorkerThreadPool::_unlock_unlockable_mutexes() {
			
 
				 #ifdef THREADS_ENABLED
			
 
				-	for (uint32_t i = 0; i < MAX_UNLOCKABLE_MUTEXES; i++) {
			
 
				-		if (unlockable_mutexes[i]) {
			
 
				-			if ((((uintptr_t)unlockable_mutexes[i]) & 1) == 0) {
			
 
				-				((Mutex *)unlockable_mutexes[i])->unlock();
			
 
				-			} else {
			
 
				-				((BinaryMutex *)(unlockable_mutexes[i] & ~1))->unlock();
			
 
				-			}
			
 
				+	for (uint32_t i = 0; i < MAX_UNLOCKABLE_LOCKS; i++) {
			
 
				+		if (unlockable_locks[i].ulock) {
			
 
				+			unlockable_locks[i].ulock->unlock();
			
 
				 		}
			
 
				 	}
			
 
				 #endif
			
@@ -665,38 +658,38 @@ int WorkerThreadPool::get_thread_index() {
 
				 	return singleton->thread_ids.has(tid) ? singleton->thread_ids[tid] : -1;
			
 
				 }
			
 
				 
			
 
				-#ifdef THREADS_ENABLED
			
 
				-uint32_t WorkerThreadPool::thread_enter_unlock_allowance_zone(Mutex *p_mutex) {
			
 
				-	return _thread_enter_unlock_allowance_zone(p_mutex, false);
			
 
				-}
			
 
				-
			
 
				-uint32_t WorkerThreadPool::thread_enter_unlock_allowance_zone(BinaryMutex *p_mutex) {
			
 
				-	return _thread_enter_unlock_allowance_zone(p_mutex, true);
			
 
				+WorkerThreadPool::TaskID WorkerThreadPool::get_caller_task_id() {
			
 
				+	int th_index = get_thread_index();
			
 
				+	if (th_index != -1 && singleton->threads[th_index].current_task) {
			
 
				+		return singleton->threads[th_index].current_task->self;
			
 
				+	} else {
			
 
				+		return INVALID_TASK_ID;
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				-uint32_t WorkerThreadPool::_thread_enter_unlock_allowance_zone(void *p_mutex, bool p_is_binary) {
			
 
				-	for (uint32_t i = 0; i < MAX_UNLOCKABLE_MUTEXES; i++) {
			
 
				-		if (unlikely((unlockable_mutexes[i] & ~1) == (uintptr_t)p_mutex)) {
			
 
				+#ifdef THREADS_ENABLED
			
 
				+uint32_t WorkerThreadPool::_thread_enter_unlock_allowance_zone(THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> &p_ulock) {
			
 
				+	for (uint32_t i = 0; i < MAX_UNLOCKABLE_LOCKS; i++) {
			
 
				+		DEV_ASSERT((bool)unlockable_locks[i].ulock == (bool)unlockable_locks[i].rc);
			
 
				+		if (unlockable_locks[i].ulock == &p_ulock) {
			
 
				 			// Already registered in the current thread.
			
 
				-			return UINT32_MAX;
			
 
				-		}
			
 
				-		if (!unlockable_mutexes[i]) {
			
 
				-			unlockable_mutexes[i] = (uintptr_t)p_mutex;
			
 
				-			if (p_is_binary) {
			
 
				-				unlockable_mutexes[i] |= 1;
			
 
				-			}
			
 
				+			unlockable_locks[i].rc++;
			
 
				+			return i;
			
 
				+		} else if (!unlockable_locks[i].ulock) {
			
 
				+			unlockable_locks[i].ulock = &p_ulock;
			
 
				+			unlockable_locks[i].rc = 1;
			
 
				 			return i;
			
 
				 		}
			
 
				 	}
			
 
				-	ERR_FAIL_V_MSG(UINT32_MAX, "No more unlockable mutex slots available. Engine bug.");
			
 
				+	ERR_FAIL_V_MSG(UINT32_MAX, "No more unlockable lock slots available. Engine bug.");
			
 
				 }
			
 
				 
			
 
				 void WorkerThreadPool::thread_exit_unlock_allowance_zone(uint32_t p_zone_id) {
			
 
				-	if (p_zone_id == UINT32_MAX) {
			
 
				-		return;
			
 
				+	DEV_ASSERT(unlockable_locks[p_zone_id].ulock && unlockable_locks[p_zone_id].rc);
			
 
				+	unlockable_locks[p_zone_id].rc--;
			
 
				+	if (unlockable_locks[p_zone_id].rc == 0) {
			
 
				+		unlockable_locks[p_zone_id].ulock = nullptr;
			
 
				 	}
			
 
				-	DEV_ASSERT(unlockable_mutexes[p_zone_id]);
			
 
				-	unlockable_mutexes[p_zone_id] = 0;
			
 
				 }
			
 
				 #endif
			
 
				 
			
--- a/core/object/worker_thread_pool.h
+++ b/core/object/worker_thread_pool.h
@@ -162,8 +162,12 @@ private:
 
				 	static WorkerThreadPool *singleton;
			
 
				 
			
 
				 #ifdef THREADS_ENABLED
			
 
				-	static const uint32_t MAX_UNLOCKABLE_MUTEXES = 2;
			
 
				-	static thread_local uintptr_t unlockable_mutexes[MAX_UNLOCKABLE_MUTEXES];
			
 
				+	static const uint32_t MAX_UNLOCKABLE_LOCKS = 2;
			
 
				+	struct UnlockableLocks {
			
 
				+		THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> *ulock = nullptr;
			
 
				+		uint32_t rc = 0;
			
 
				+	};
			
 
				+	static thread_local UnlockableLocks unlockable_locks[MAX_UNLOCKABLE_LOCKS];
			
 
				 #endif
			
 
				 
			
 
				 	TaskID _add_task(const Callable &p_callable, void (*p_func)(void *), void *p_userdata, BaseTemplateUserdata *p_template_userdata, bool p_high_priority, const String &p_description);
			
@@ -192,7 +196,7 @@ private:
 
				 	void _wait_collaboratively(ThreadData *p_caller_pool_thread, Task *p_task);
			
 
				 
			
 
				 #ifdef THREADS_ENABLED
			
 
				-	static uint32_t _thread_enter_unlock_allowance_zone(void *p_mutex, bool p_is_binary);
			
 
				+	static uint32_t _thread_enter_unlock_allowance_zone(THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> &p_ulock);
			
 
				 #endif
			
 
				 
			
 
				 	void _lock_unlockable_mutexes();
			
@@ -239,13 +243,17 @@ public:
 
				 
			
 
				 	static WorkerThreadPool *get_singleton() { return singleton; }
			
 
				 	static int get_thread_index();
			
 
				+	static TaskID get_caller_task_id();
			
 
				 
			
 
				 #ifdef THREADS_ENABLED
			
 
				-	static uint32_t thread_enter_unlock_allowance_zone(Mutex *p_mutex);
			
 
				-	static uint32_t thread_enter_unlock_allowance_zone(BinaryMutex *p_mutex);
			
 
				+	_ALWAYS_INLINE_ static uint32_t thread_enter_unlock_allowance_zone(const MutexLock<BinaryMutex> &p_lock) { return _thread_enter_unlock_allowance_zone(p_lock._get_lock()); }
			
 
				+	template <int Tag>
			
 
				+	_ALWAYS_INLINE_ static uint32_t thread_enter_unlock_allowance_zone(const SafeBinaryMutex<Tag> &p_mutex) { return _thread_enter_unlock_allowance_zone(p_mutex._get_lock()); }
			
 
				 	static void thread_exit_unlock_allowance_zone(uint32_t p_zone_id);
			
 
				 #else
			
 
				-	static uint32_t thread_enter_unlock_allowance_zone(void *p_mutex) { return UINT32_MAX; }
			
 
				+	static uint32_t thread_enter_unlock_allowance_zone(const MutexLock<BinaryMutex> &p_lock) { return UINT32_MAX; }
			
 
				+	template <int Tag>
			
 
				+	static uint32_t thread_enter_unlock_allowance_zone(const SafeBinaryMutex<Tag> &p_mutex) { return UINT32_MAX; }
			
 
				 	static void thread_exit_unlock_allowance_zone(uint32_t p_zone_id) {}
			
 
				 #endif
			
 
				 
			
--- a/core/os/condition_variable.h
+++ b/core/os/condition_variable.h
@@ -32,6 +32,7 @@
 
				 #define CONDITION_VARIABLE_H
			
 
				 
			
 
				 #include "core/os/mutex.h"
			
 
				+#include "core/os/safe_binary_mutex.h"
			
 
				 
			
 
				 #ifdef THREADS_ENABLED
			
 
				 
			
@@ -56,7 +57,12 @@ class ConditionVariable {
 
				 public:
			
 
				 	template <typename BinaryMutexT>
			
 
				 	_ALWAYS_INLINE_ void wait(const MutexLock<BinaryMutexT> &p_lock) const {
			
 
				-		condition.wait(const_cast<THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> &>(p_lock.lock));
			
 
				+		condition.wait(const_cast<THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> &>(p_lock._get_lock()));
			
 
				+	}
			
 
				+
			
 
				+	template <int Tag>
			
 
				+	_ALWAYS_INLINE_ void wait(const MutexLock<SafeBinaryMutex<Tag>> &p_lock) const {
			
 
				+		condition.wait(const_cast<THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> &>(p_lock.mutex._get_lock()));
			
 
				 	}
			
 
				 
			
 
				 	_ALWAYS_INLINE_ void notify_one() const {
			
--- a/core/os/mutex.h
+++ b/core/os/mutex.h
@@ -72,13 +72,18 @@ public:
 
				 
			
 
				 template <typename MutexT>
			
 
				 class MutexLock {
			
 
				-	friend class ConditionVariable;
			
 
				-
			
 
				 	THREADING_NAMESPACE::unique_lock<typename MutexT::StdMutexType> lock;
			
 
				 
			
 
				 public:
			
 
				 	explicit MutexLock(const MutexT &p_mutex) :
			
 
				 			lock(p_mutex.mutex) {}
			
 
				+
			
 
				+	// Clarification: all the funny syntax is needed so this function exists only for binary mutexes.
			
 
				+	template <typename T = MutexT>
			
 
				+	_ALWAYS_INLINE_ THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> &_get_lock(
			
 
				+			typename std::enable_if<std::is_same<T, THREADING_NAMESPACE::mutex>::value> * = nullptr) const {
			
 
				+		return const_cast<THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> &>(lock);
			
 
				+	}
			
 
				 };
			
 
				 
			
 
				 using Mutex = MutexImpl<THREADING_NAMESPACE::recursive_mutex>; // Recursive, for general use
			
--- a/core/os/safe_binary_mutex.h
+++ b/core/os/safe_binary_mutex.h
@@ -47,76 +47,76 @@
 
				 // Also, don't forget to declare the thread_local variable on each use.
			
 
				 template <int Tag>
			
 
				 class SafeBinaryMutex {
			
 
				-	friend class MutexLock<SafeBinaryMutex>;
			
 
				+	friend class MutexLock<SafeBinaryMutex<Tag>>;
			
 
				 
			
 
				 	using StdMutexType = THREADING_NAMESPACE::mutex;
			
 
				 
			
 
				 	mutable THREADING_NAMESPACE::mutex mutex;
			
 
				-	static thread_local uint32_t count;
			
 
				+
			
 
				+	struct TLSData {
			
 
				+		mutable THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> lock;
			
 
				+		uint32_t count = 0;
			
 
				+
			
 
				+		TLSData(SafeBinaryMutex<Tag> &p_mutex) :
			
 
				+				lock(p_mutex.mutex, THREADING_NAMESPACE::defer_lock) {}
			
 
				+	};
			
 
				+	static thread_local TLSData tls_data;
			
 
				 
			
 
				 public:
			
 
				 	_ALWAYS_INLINE_ void lock() const {
			
 
				-		if (++count == 1) {
			
 
				-			mutex.lock();
			
 
				+		if (++tls_data.count == 1) {
			
 
				+			tls_data.lock.lock();
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				 	_ALWAYS_INLINE_ void unlock() const {
			
 
				-		DEV_ASSERT(count);
			
 
				-		if (--count == 0) {
			
 
				-			mutex.unlock();
			
 
				+		DEV_ASSERT(tls_data.count);
			
 
				+		if (--tls_data.count == 0) {
			
 
				+			tls_data.lock.unlock();
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	_ALWAYS_INLINE_ bool try_lock() const {
			
 
				-		if (count) {
			
 
				-			count++;
			
 
				-			return true;
			
 
				-		} else {
			
 
				-			if (mutex.try_lock()) {
			
 
				-				count++;
			
 
				-				return true;
			
 
				-			} else {
			
 
				-				return false;
			
 
				-			}
			
 
				-		}
			
 
				+	_ALWAYS_INLINE_ THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> &_get_lock() const {
			
 
				+		return const_cast<THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> &>(tls_data.lock);
			
 
				 	}
			
 
				 
			
 
				-	~SafeBinaryMutex() {
			
 
				-		DEV_ASSERT(!count);
			
 
				+	_ALWAYS_INLINE_ SafeBinaryMutex() {
			
 
				+	}
			
 
				+
			
 
				+	_ALWAYS_INLINE_ ~SafeBinaryMutex() {
			
 
				+		DEV_ASSERT(!tls_data.count);
			
 
				 	}
			
 
				 };
			
 
				 
			
 
				-// This specialization is needed so manual locking and MutexLock can be used
			
 
				-// at the same time on a SafeBinaryMutex.
			
 
				 template <int Tag>
			
 
				 class MutexLock<SafeBinaryMutex<Tag>> {
			
 
				 	friend class ConditionVariable;
			
 
				 
			
 
				-	THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> lock;
			
 
				+	const SafeBinaryMutex<Tag> &mutex;
			
 
				 
			
 
				 public:
			
 
				-	_ALWAYS_INLINE_ explicit MutexLock(const SafeBinaryMutex<Tag> &p_mutex) :
			
 
				-			lock(p_mutex.mutex) {
			
 
				-		SafeBinaryMutex<Tag>::count++;
			
 
				-	};
			
 
				-	_ALWAYS_INLINE_ ~MutexLock() {
			
 
				-		SafeBinaryMutex<Tag>::count--;
			
 
				-	};
			
 
				+	explicit MutexLock(const SafeBinaryMutex<Tag> &p_mutex) :
			
 
				+			mutex(p_mutex) {
			
 
				+		mutex.lock();
			
 
				+	}
			
 
				+
			
 
				+	~MutexLock() {
			
 
				+		mutex.unlock();
			
 
				+	}
			
 
				 };
			
 
				 
			
 
				 #else // No threads.
			
 
				 
			
 
				 template <int Tag>
			
 
				-class SafeBinaryMutex : public MutexImpl {
			
 
				-	static thread_local uint32_t count;
			
 
				-};
			
 
				+class SafeBinaryMutex {
			
 
				+	struct TLSData {
			
 
				+		TLSData(SafeBinaryMutex<Tag> &p_mutex) {}
			
 
				+	};
			
 
				+	static thread_local TLSData tls_data;
			
 
				 
			
 
				-template <int Tag>
			
 
				-class MutexLock<SafeBinaryMutex<Tag>> {
			
 
				 public:
			
 
				-	MutexLock(const SafeBinaryMutex<Tag> &p_mutex) {}
			
 
				-	~MutexLock() {}
			
 
				+	void lock() const {}
			
 
				+	void unlock() const {}
			
 
				 };
			
 
				 
			
 
				 #endif // THREADS_ENABLED
			
--- a/core/templates/command_queue_mt.cpp
+++ b/core/templates/command_queue_mt.cpp
@@ -33,14 +33,6 @@
 
				 #include "core/config/project_settings.h"
			
 
				 #include "core/os/os.h"
			
 
				 
			
 
				-void CommandQueueMT::lock() {
			
 
				-	mutex.lock();
			
 
				-}
			
 
				-
			
 
				-void CommandQueueMT::unlock() {
			
 
				-	mutex.unlock();
			
 
				-}
			
 
				-
			
 
				 CommandQueueMT::CommandQueueMT() {
			
 
				 	command_mem.reserve(DEFAULT_COMMAND_MEM_SIZE_KB * 1024);
			
 
				 }
			
--- a/core/templates/command_queue_mt.h
+++ b/core/templates/command_queue_mt.h
@@ -362,23 +362,24 @@ class CommandQueueMT {
 
				 			return;
			
 
				 		}
			
 
				 
			
 
				-		lock();
			
 
				+		MutexLock lock(mutex);
			
 
				 
			
 
				-		uint32_t allowance_id = WorkerThreadPool::thread_enter_unlock_allowance_zone(&mutex);
			
 
				 		while (flush_read_ptr < command_mem.size()) {
			
 
				 			uint64_t size = *(uint64_t *)&command_mem[flush_read_ptr];
			
 
				 			flush_read_ptr += 8;
			
 
				 			CommandBase *cmd = reinterpret_cast<CommandBase *>(&command_mem[flush_read_ptr]);
			
 
				+			uint32_t allowance_id = WorkerThreadPool::thread_enter_unlock_allowance_zone(lock);
			
 
				 			cmd->call();
			
 
				+			WorkerThreadPool::thread_exit_unlock_allowance_zone(allowance_id);
			
 
				 
			
 
				 			// Handle potential realloc due to the command and unlock allowance.
			
 
				 			cmd = reinterpret_cast<CommandBase *>(&command_mem[flush_read_ptr]);
			
 
				 
			
 
				 			if (unlikely(cmd->sync)) {
			
 
				 				sync_head++;
			
 
				-				unlock(); // Give an opportunity to awaiters right away.
			
 
				+				lock.~MutexLock(); // Give an opportunity to awaiters right away.
			
 
				 				sync_cond_var.notify_all();
			
 
				-				lock();
			
 
				+				new (&lock) MutexLock(mutex);
			
 
				 				// Handle potential realloc happened during unlock.
			
 
				 				cmd = reinterpret_cast<CommandBase *>(&command_mem[flush_read_ptr]);
			
 
				 			}
			
@@ -387,14 +388,11 @@ class CommandQueueMT {
 
				 
			
 
				 			flush_read_ptr += size;
			
 
				 		}
			
 
				-		WorkerThreadPool::thread_exit_unlock_allowance_zone(allowance_id);
			
 
				 
			
 
				 		command_mem.clear();
			
 
				 		flush_read_ptr = 0;
			
 
				 
			
 
				 		_prevent_sync_wraparound();
			
 
				-
			
 
				-		unlock();
			
 
				 	}
			
 
				 
			
 
				 	_FORCE_INLINE_ void _wait_for_sync(MutexLock<BinaryMutex> &p_lock) {
			
@@ -410,9 +408,6 @@ class CommandQueueMT {
 
				 	void _no_op() {}
			
 
				 
			
 
				 public:
			
 
				-	void lock();
			
 
				-	void unlock();
			
 
				-
			
 
				 	/* NORMAL PUSH COMMANDS */
			
 
				 	DECL_PUSH(0)
			
 
				 	SPACE_SEP_LIST(DECL_PUSH, 15)
			
@@ -446,9 +441,8 @@ public:
 
				 	}
			
 
				 
			
 
				 	void set_pump_task_id(WorkerThreadPool::TaskID p_task_id) {
			
 
				-		lock();
			
 
				+		MutexLock lock(mutex);
			
 
				 		pump_task_id = p_task_id;
			
 
				-		unlock();
			
 
				 	}
			
 
				 
			
 
				 	CommandQueueMT();
			
--- a/modules/gdscript/gdscript_cache.cpp
+++ b/modules/gdscript/gdscript_cache.cpp
@@ -144,6 +144,14 @@ GDScriptParserRef::~GDScriptParserRef() {
 
				 
			
 
				 GDScriptCache *GDScriptCache::singleton = nullptr;
			
 
				 
			
 
				+SafeBinaryMutex<GDScriptCache::BINARY_MUTEX_TAG> &_get_gdscript_cache_mutex() {
			
 
				+	return GDScriptCache::mutex;
			
 
				+}
			
 
				+
			
 
				+template <>
			
 
				+thread_local SafeBinaryMutex<GDScriptCache::BINARY_MUTEX_TAG>::TLSData SafeBinaryMutex<GDScriptCache::BINARY_MUTEX_TAG>::tls_data(_get_gdscript_cache_mutex());
			
 
				+SafeBinaryMutex<GDScriptCache::BINARY_MUTEX_TAG> GDScriptCache::mutex;
			
 
				+
			
 
				 void GDScriptCache::move_script(const String &p_from, const String &p_to) {
			
 
				 	if (singleton == nullptr || p_from == p_to) {
			
 
				 		return;
			
@@ -369,7 +377,7 @@ Ref<GDScript> GDScriptCache::get_full_script(const String &p_path, Error &r_erro
 
				 
			
 
				 	// Allowing lifting the lock might cause a script to be reloaded multiple times,
			
 
				 	// which, as a last resort deadlock prevention strategy, is a good tradeoff.
			
 
				-	uint32_t allowance_id = WorkerThreadPool::thread_enter_unlock_allowance_zone(&singleton->mutex);
			
 
				+	uint32_t allowance_id = WorkerThreadPool::thread_enter_unlock_allowance_zone(singleton->mutex);
			
 
				 	r_error = script->reload(true);
			
 
				 	WorkerThreadPool::thread_exit_unlock_allowance_zone(allowance_id);
			
 
				 	if (r_error) {
			
--- a/modules/gdscript/gdscript_cache.h
+++ b/modules/gdscript/gdscript_cache.h
@@ -34,7 +34,7 @@
 
				 #include "gdscript.h"
			
 
				 
			
 
				 #include "core/object/ref_counted.h"
			
 
				-#include "core/os/mutex.h"
			
 
				+#include "core/os/safe_binary_mutex.h"
			
 
				 #include "core/templates/hash_map.h"
			
 
				 #include "core/templates/hash_set.h"
			
 
				 
			
@@ -95,7 +95,12 @@ class GDScriptCache {
 
				 
			
 
				 	bool cleared = false;
			
 
				 
			
 
				-	Mutex mutex;
			
 
				+public:
			
 
				+	static const int BINARY_MUTEX_TAG = 2;
			
 
				+
			
 
				+private:
			
 
				+	static SafeBinaryMutex<BINARY_MUTEX_TAG> mutex;
			
 
				+	friend SafeBinaryMutex<BINARY_MUTEX_TAG> &_get_gdscript_cache_mutex();
			
 
				 
			
 
				 public:
			
 
				 	static void move_script(const String &p_from, const String &p_to);