Browse Source

WorkerThreadPool (plus friends): Overhaul unlock allowance zones

This fixes a rare but possible deadlock, maybe due to undefined behavior. The new implementation is safer, at the cost of some added boilerplate.

(cherry picked from commit f4d76853b9d921e3645295f9bebc39eb73661e67)
Pedro J. Estébanez 1 year ago
parent
commit
c75c50ecac

+ 1 - 1
SConstruct

@@ -856,7 +856,7 @@ else:  # GCC, Clang
             if cc_version_major >= 11:  # Broke on MethodBind templates before GCC 11.
                 env.Append(CCFLAGS=["-Wlogical-op"])
         elif methods.using_clang(env) or methods.using_emcc(env):
-            env.Append(CCFLAGS=["-Wimplicit-fallthrough"])
+            env.Append(CCFLAGS=["-Wimplicit-fallthrough", "-Wno-undefined-var-template"])
     elif env["warnings"] == "all":
         env.Append(CCFLAGS=["-Wall"] + common_warnings)
     elif env["warnings"] == "moderate":

+ 5 - 1
core/io/resource_loader.cpp

@@ -1363,8 +1363,12 @@ thread_local int ResourceLoader::load_nesting = 0;
 thread_local Vector<String> ResourceLoader::load_paths_stack;
 thread_local HashMap<int, HashMap<String, Ref<Resource>>> ResourceLoader::res_ref_overrides;
 
+SafeBinaryMutex<ResourceLoader::BINARY_MUTEX_TAG> &_get_res_loader_mutex() {
+	return ResourceLoader::thread_load_mutex;
+}
+
 template <>
-thread_local uint32_t SafeBinaryMutex<ResourceLoader::BINARY_MUTEX_TAG>::count = 0;
+thread_local SafeBinaryMutex<ResourceLoader::BINARY_MUTEX_TAG>::TLSData SafeBinaryMutex<ResourceLoader::BINARY_MUTEX_TAG>::tls_data(_get_res_loader_mutex());
 SafeBinaryMutex<ResourceLoader::BINARY_MUTEX_TAG> ResourceLoader::thread_load_mutex;
 HashMap<String, ResourceLoader::ThreadLoadTask> ResourceLoader::thread_load_tasks;
 bool ResourceLoader::cleaning_tasks = false;

+ 3 - 0
core/io/resource_loader.h

@@ -194,7 +194,10 @@ private:
 	static thread_local int load_nesting;
 	static thread_local HashMap<int, HashMap<String, Ref<Resource>>> res_ref_overrides; // Outermost key is nesting level.
 	static thread_local Vector<String> load_paths_stack;
+
 	static SafeBinaryMutex<BINARY_MUTEX_TAG> thread_load_mutex;
+	friend SafeBinaryMutex<BINARY_MUTEX_TAG> &_get_res_loader_mutex();
+
 	static HashMap<String, ThreadLoadTask> thread_load_tasks;
 	static bool cleaning_tasks;
 

+ 22 - 38
core/object/worker_thread_pool.cpp

@@ -32,6 +32,7 @@
 
 #include "core/object/script_language.h"
 #include "core/os/os.h"
+#include "core/os/safe_binary_mutex.h"
 #include "core/os/thread_safe.h"
 
 WorkerThreadPool::Task *const WorkerThreadPool::ThreadData::YIELDING = (Task *)1;
@@ -46,7 +47,7 @@ void WorkerThreadPool::Task::free_template_userdata() {
 WorkerThreadPool *WorkerThreadPool::singleton = nullptr;
 
 #ifdef THREADS_ENABLED
-thread_local uintptr_t WorkerThreadPool::unlockable_mutexes[MAX_UNLOCKABLE_MUTEXES] = {};
+thread_local WorkerThreadPool::UnlockableLocks WorkerThreadPool::unlockable_locks[MAX_UNLOCKABLE_LOCKS];
 #endif
 
 void WorkerThreadPool::_process_task(Task *p_task) {
@@ -428,13 +429,9 @@ Error WorkerThreadPool::wait_for_task_completion(TaskID p_task_id) {
 
 void WorkerThreadPool::_lock_unlockable_mutexes() {
 #ifdef THREADS_ENABLED
-	for (uint32_t i = 0; i < MAX_UNLOCKABLE_MUTEXES; i++) {
-		if (unlockable_mutexes[i]) {
-			if ((((uintptr_t)unlockable_mutexes[i]) & 1) == 0) {
-				((Mutex *)unlockable_mutexes[i])->lock();
-			} else {
-				((BinaryMutex *)(unlockable_mutexes[i] & ~1))->lock();
-			}
+	for (uint32_t i = 0; i < MAX_UNLOCKABLE_LOCKS; i++) {
+		if (unlockable_locks[i].ulock) {
+			unlockable_locks[i].ulock->lock();
 		}
 	}
 #endif
@@ -442,13 +439,9 @@ void WorkerThreadPool::_lock_unlockable_mutexes() {
 
 void WorkerThreadPool::_unlock_unlockable_mutexes() {
 #ifdef THREADS_ENABLED
-	for (uint32_t i = 0; i < MAX_UNLOCKABLE_MUTEXES; i++) {
-		if (unlockable_mutexes[i]) {
-			if ((((uintptr_t)unlockable_mutexes[i]) & 1) == 0) {
-				((Mutex *)unlockable_mutexes[i])->unlock();
-			} else {
-				((BinaryMutex *)(unlockable_mutexes[i] & ~1))->unlock();
-			}
+	for (uint32_t i = 0; i < MAX_UNLOCKABLE_LOCKS; i++) {
+		if (unlockable_locks[i].ulock) {
+			unlockable_locks[i].ulock->unlock();
 		}
 	}
 #endif
@@ -675,37 +668,28 @@ WorkerThreadPool::TaskID WorkerThreadPool::get_caller_task_id() {
 }
 
 #ifdef THREADS_ENABLED
-uint32_t WorkerThreadPool::thread_enter_unlock_allowance_zone(Mutex *p_mutex) {
-	return _thread_enter_unlock_allowance_zone(p_mutex, false);
-}
-
-uint32_t WorkerThreadPool::thread_enter_unlock_allowance_zone(BinaryMutex *p_mutex) {
-	return _thread_enter_unlock_allowance_zone(p_mutex, true);
-}
-
-uint32_t WorkerThreadPool::_thread_enter_unlock_allowance_zone(void *p_mutex, bool p_is_binary) {
-	for (uint32_t i = 0; i < MAX_UNLOCKABLE_MUTEXES; i++) {
-		if (unlikely((unlockable_mutexes[i] & ~1) == (uintptr_t)p_mutex)) {
+uint32_t WorkerThreadPool::_thread_enter_unlock_allowance_zone(THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> &p_ulock) {
+	for (uint32_t i = 0; i < MAX_UNLOCKABLE_LOCKS; i++) {
+		DEV_ASSERT((bool)unlockable_locks[i].ulock == (bool)unlockable_locks[i].rc);
+		if (unlockable_locks[i].ulock == &p_ulock) {
 			// Already registered in the current thread.
-			return UINT32_MAX;
-		}
-		if (!unlockable_mutexes[i]) {
-			unlockable_mutexes[i] = (uintptr_t)p_mutex;
-			if (p_is_binary) {
-				unlockable_mutexes[i] |= 1;
-			}
+			unlockable_locks[i].rc++;
+			return i;
+		} else if (!unlockable_locks[i].ulock) {
+			unlockable_locks[i].ulock = &p_ulock;
+			unlockable_locks[i].rc = 1;
 			return i;
 		}
 	}
-	ERR_FAIL_V_MSG(UINT32_MAX, "No more unlockable mutex slots available. Engine bug.");
+	ERR_FAIL_V_MSG(UINT32_MAX, "No more unlockable lock slots available. Engine bug.");
 }
 
 void WorkerThreadPool::thread_exit_unlock_allowance_zone(uint32_t p_zone_id) {
-	if (p_zone_id == UINT32_MAX) {
-		return;
+	DEV_ASSERT(unlockable_locks[p_zone_id].ulock && unlockable_locks[p_zone_id].rc);
+	unlockable_locks[p_zone_id].rc--;
+	if (unlockable_locks[p_zone_id].rc == 0) {
+		unlockable_locks[p_zone_id].ulock = nullptr;
 	}
-	DEV_ASSERT(unlockable_mutexes[p_zone_id]);
-	unlockable_mutexes[p_zone_id] = 0;
 }
 #endif
 

+ 13 - 6
core/object/worker_thread_pool.h

@@ -162,8 +162,12 @@ private:
 	static WorkerThreadPool *singleton;
 
 #ifdef THREADS_ENABLED
-	static const uint32_t MAX_UNLOCKABLE_MUTEXES = 2;
-	static thread_local uintptr_t unlockable_mutexes[MAX_UNLOCKABLE_MUTEXES];
+	static const uint32_t MAX_UNLOCKABLE_LOCKS = 2;
+	struct UnlockableLocks {
+		THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> *ulock = nullptr;
+		uint32_t rc = 0;
+	};
+	static thread_local UnlockableLocks unlockable_locks[MAX_UNLOCKABLE_LOCKS];
 #endif
 
 	TaskID _add_task(const Callable &p_callable, void (*p_func)(void *), void *p_userdata, BaseTemplateUserdata *p_template_userdata, bool p_high_priority, const String &p_description);
@@ -192,7 +196,7 @@ private:
 	void _wait_collaboratively(ThreadData *p_caller_pool_thread, Task *p_task);
 
 #ifdef THREADS_ENABLED
-	static uint32_t _thread_enter_unlock_allowance_zone(void *p_mutex, bool p_is_binary);
+	static uint32_t _thread_enter_unlock_allowance_zone(THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> &p_ulock);
 #endif
 
 	void _lock_unlockable_mutexes();
@@ -242,11 +246,14 @@ public:
 	static TaskID get_caller_task_id();
 
 #ifdef THREADS_ENABLED
-	static uint32_t thread_enter_unlock_allowance_zone(Mutex *p_mutex);
-	static uint32_t thread_enter_unlock_allowance_zone(BinaryMutex *p_mutex);
+	_ALWAYS_INLINE_ static uint32_t thread_enter_unlock_allowance_zone(const MutexLock<BinaryMutex> &p_lock) { return _thread_enter_unlock_allowance_zone(p_lock._get_lock()); }
+	template <int Tag>
+	_ALWAYS_INLINE_ static uint32_t thread_enter_unlock_allowance_zone(const SafeBinaryMutex<Tag> &p_mutex) { return _thread_enter_unlock_allowance_zone(p_mutex._get_lock()); }
 	static void thread_exit_unlock_allowance_zone(uint32_t p_zone_id);
 #else
-	static uint32_t thread_enter_unlock_allowance_zone(void *p_mutex) { return UINT32_MAX; }
+	static uint32_t thread_enter_unlock_allowance_zone(const MutexLock<BinaryMutex> &p_lock) { return UINT32_MAX; }
+	template <int Tag>
+	static uint32_t thread_enter_unlock_allowance_zone(const SafeBinaryMutex<Tag> &p_mutex) { return UINT32_MAX; }
 	static void thread_exit_unlock_allowance_zone(uint32_t p_zone_id) {}
 #endif
 

+ 7 - 1
core/os/condition_variable.h

@@ -32,6 +32,7 @@
 #define CONDITION_VARIABLE_H
 
 #include "core/os/mutex.h"
+#include "core/os/safe_binary_mutex.h"
 
 #ifdef THREADS_ENABLED
 
@@ -56,7 +57,12 @@ class ConditionVariable {
 public:
 	template <typename BinaryMutexT>
 	_ALWAYS_INLINE_ void wait(const MutexLock<BinaryMutexT> &p_lock) const {
-		condition.wait(const_cast<THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> &>(p_lock.lock));
+		condition.wait(const_cast<THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> &>(p_lock._get_lock()));
+	}
+
+	template <int Tag>
+	_ALWAYS_INLINE_ void wait(const MutexLock<SafeBinaryMutex<Tag>> &p_lock) const {
+		condition.wait(const_cast<THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> &>(p_lock.mutex._get_lock()));
 	}
 
 	_ALWAYS_INLINE_ void notify_one() const {

+ 7 - 2
core/os/mutex.h

@@ -72,13 +72,18 @@ public:
 
 template <typename MutexT>
 class MutexLock {
-	friend class ConditionVariable;
-
 	THREADING_NAMESPACE::unique_lock<typename MutexT::StdMutexType> lock;
 
 public:
 	explicit MutexLock(const MutexT &p_mutex) :
 			lock(p_mutex.mutex) {}
+
+	// Clarification: all the funny syntax is needed so this function exists only for binary mutexes.
+	template <typename T = MutexT>
+	_ALWAYS_INLINE_ THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> &_get_lock(
+			typename std::enable_if<std::is_same<T, THREADING_NAMESPACE::mutex>::value> * = nullptr) const {
+		return const_cast<THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> &>(lock);
+	}
 };
 
 using Mutex = MutexImpl<THREADING_NAMESPACE::recursive_mutex>; // Recursive, for general use

+ 38 - 38
core/os/safe_binary_mutex.h

@@ -47,76 +47,76 @@
 // Also, don't forget to declare the thread_local variable on each use.
 template <int Tag>
 class SafeBinaryMutex {
-	friend class MutexLock<SafeBinaryMutex>;
+	friend class MutexLock<SafeBinaryMutex<Tag>>;
 
 	using StdMutexType = THREADING_NAMESPACE::mutex;
 
 	mutable THREADING_NAMESPACE::mutex mutex;
-	static thread_local uint32_t count;
+
+	struct TLSData {
+		mutable THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> lock;
+		uint32_t count = 0;
+
+		TLSData(SafeBinaryMutex<Tag> &p_mutex) :
+				lock(p_mutex.mutex, THREADING_NAMESPACE::defer_lock) {}
+	};
+	static thread_local TLSData tls_data;
 
 public:
 	_ALWAYS_INLINE_ void lock() const {
-		if (++count == 1) {
-			mutex.lock();
+		if (++tls_data.count == 1) {
+			tls_data.lock.lock();
 		}
 	}
 
 	_ALWAYS_INLINE_ void unlock() const {
-		DEV_ASSERT(count);
-		if (--count == 0) {
-			mutex.unlock();
+		DEV_ASSERT(tls_data.count);
+		if (--tls_data.count == 0) {
+			tls_data.lock.unlock();
 		}
 	}
 
-	_ALWAYS_INLINE_ bool try_lock() const {
-		if (count) {
-			count++;
-			return true;
-		} else {
-			if (mutex.try_lock()) {
-				count++;
-				return true;
-			} else {
-				return false;
-			}
-		}
+	_ALWAYS_INLINE_ THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> &_get_lock() const {
+		return const_cast<THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> &>(tls_data.lock);
 	}
 
-	~SafeBinaryMutex() {
-		DEV_ASSERT(!count);
+	_ALWAYS_INLINE_ SafeBinaryMutex() {
+	}
+
+	_ALWAYS_INLINE_ ~SafeBinaryMutex() {
+		DEV_ASSERT(!tls_data.count);
 	}
 };
 
-// This specialization is needed so manual locking and MutexLock can be used
-// at the same time on a SafeBinaryMutex.
 template <int Tag>
 class MutexLock<SafeBinaryMutex<Tag>> {
 	friend class ConditionVariable;
 
-	THREADING_NAMESPACE::unique_lock<THREADING_NAMESPACE::mutex> lock;
+	const SafeBinaryMutex<Tag> &mutex;
 
 public:
-	_ALWAYS_INLINE_ explicit MutexLock(const SafeBinaryMutex<Tag> &p_mutex) :
-			lock(p_mutex.mutex) {
-		SafeBinaryMutex<Tag>::count++;
-	};
-	_ALWAYS_INLINE_ ~MutexLock() {
-		SafeBinaryMutex<Tag>::count--;
-	};
+	explicit MutexLock(const SafeBinaryMutex<Tag> &p_mutex) :
+			mutex(p_mutex) {
+		mutex.lock();
+	}
+
+	~MutexLock() {
+		mutex.unlock();
+	}
 };
 
 #else // No threads.
 
 template <int Tag>
-class SafeBinaryMutex : public MutexImpl {
-	static thread_local uint32_t count;
-};
+class SafeBinaryMutex {
+	struct TLSData {
+		TLSData(SafeBinaryMutex<Tag> &p_mutex) {}
+	};
+	static thread_local TLSData tls_data;
 
-template <int Tag>
-class MutexLock<SafeBinaryMutex<Tag>> {
 public:
-	MutexLock(const SafeBinaryMutex<Tag> &p_mutex) {}
-	~MutexLock() {}
+	void lock() const {}
+	void unlock() const {}
 };
 
 #endif // THREADS_ENABLED

+ 0 - 8
core/templates/command_queue_mt.cpp

@@ -33,14 +33,6 @@
 #include "core/config/project_settings.h"
 #include "core/os/os.h"
 
-void CommandQueueMT::lock() {
-	mutex.lock();
-}
-
-void CommandQueueMT::unlock() {
-	mutex.unlock();
-}
-
 CommandQueueMT::CommandQueueMT() {
 	command_mem.reserve(DEFAULT_COMMAND_MEM_SIZE_KB * 1024);
 }

+ 6 - 12
core/templates/command_queue_mt.h

@@ -362,23 +362,24 @@ class CommandQueueMT {
 			return;
 		}
 
-		lock();
+		MutexLock lock(mutex);
 
-		uint32_t allowance_id = WorkerThreadPool::thread_enter_unlock_allowance_zone(&mutex);
 		while (flush_read_ptr < command_mem.size()) {
 			uint64_t size = *(uint64_t *)&command_mem[flush_read_ptr];
 			flush_read_ptr += 8;
 			CommandBase *cmd = reinterpret_cast<CommandBase *>(&command_mem[flush_read_ptr]);
+			uint32_t allowance_id = WorkerThreadPool::thread_enter_unlock_allowance_zone(lock);
 			cmd->call();
+			WorkerThreadPool::thread_exit_unlock_allowance_zone(allowance_id);
 
 			// Handle potential realloc due to the command and unlock allowance.
 			cmd = reinterpret_cast<CommandBase *>(&command_mem[flush_read_ptr]);
 
 			if (unlikely(cmd->sync)) {
 				sync_head++;
-				unlock(); // Give an opportunity to awaiters right away.
+				lock.~MutexLock(); // Give an opportunity to awaiters right away.
 				sync_cond_var.notify_all();
-				lock();
+				new (&lock) MutexLock(mutex);
 				// Handle potential realloc happened during unlock.
 				cmd = reinterpret_cast<CommandBase *>(&command_mem[flush_read_ptr]);
 			}
@@ -387,14 +388,11 @@ class CommandQueueMT {
 
 			flush_read_ptr += size;
 		}
-		WorkerThreadPool::thread_exit_unlock_allowance_zone(allowance_id);
 
 		command_mem.clear();
 		flush_read_ptr = 0;
 
 		_prevent_sync_wraparound();
-
-		unlock();
 	}
 
 	_FORCE_INLINE_ void _wait_for_sync(MutexLock<BinaryMutex> &p_lock) {
@@ -410,9 +408,6 @@ class CommandQueueMT {
 	void _no_op() {}
 
 public:
-	void lock();
-	void unlock();
-
 	/* NORMAL PUSH COMMANDS */
 	DECL_PUSH(0)
 	SPACE_SEP_LIST(DECL_PUSH, 15)
@@ -446,9 +441,8 @@ public:
 	}
 
 	void set_pump_task_id(WorkerThreadPool::TaskID p_task_id) {
-		lock();
+		MutexLock lock(mutex);
 		pump_task_id = p_task_id;
-		unlock();
 	}
 
 	CommandQueueMT();

+ 9 - 1
modules/gdscript/gdscript_cache.cpp

@@ -144,6 +144,14 @@ GDScriptParserRef::~GDScriptParserRef() {
 
 GDScriptCache *GDScriptCache::singleton = nullptr;
 
+SafeBinaryMutex<GDScriptCache::BINARY_MUTEX_TAG> &_get_gdscript_cache_mutex() {
+	return GDScriptCache::mutex;
+}
+
+template <>
+thread_local SafeBinaryMutex<GDScriptCache::BINARY_MUTEX_TAG>::TLSData SafeBinaryMutex<GDScriptCache::BINARY_MUTEX_TAG>::tls_data(_get_gdscript_cache_mutex());
+SafeBinaryMutex<GDScriptCache::BINARY_MUTEX_TAG> GDScriptCache::mutex;
+
 void GDScriptCache::move_script(const String &p_from, const String &p_to) {
 	if (singleton == nullptr || p_from == p_to) {
 		return;
@@ -369,7 +377,7 @@ Ref<GDScript> GDScriptCache::get_full_script(const String &p_path, Error &r_erro
 
 	// Allowing lifting the lock might cause a script to be reloaded multiple times,
 	// which, as a last resort deadlock prevention strategy, is a good tradeoff.
-	uint32_t allowance_id = WorkerThreadPool::thread_enter_unlock_allowance_zone(&singleton->mutex);
+	uint32_t allowance_id = WorkerThreadPool::thread_enter_unlock_allowance_zone(singleton->mutex);
 	r_error = script->reload(true);
 	WorkerThreadPool::thread_exit_unlock_allowance_zone(allowance_id);
 	if (r_error) {

+ 7 - 2
modules/gdscript/gdscript_cache.h

@@ -34,7 +34,7 @@
 #include "gdscript.h"
 
 #include "core/object/ref_counted.h"
-#include "core/os/mutex.h"
+#include "core/os/safe_binary_mutex.h"
 #include "core/templates/hash_map.h"
 #include "core/templates/hash_set.h"
 
@@ -95,7 +95,12 @@ class GDScriptCache {
 
 	bool cleared = false;
 
-	Mutex mutex;
+public:
+	static const int BINARY_MUTEX_TAG = 2;
+
+private:
+	static SafeBinaryMutex<BINARY_MUTEX_TAG> mutex;
+	friend SafeBinaryMutex<BINARY_MUTEX_TAG> &_get_gdscript_cache_mutex();
 
 public:
 	static void move_script(const String &p_from, const String &p_to);