3 年之前 · 18159a6574
--- a/core/object/worker_thread_pool.cpp
+++ b/core/object/worker_thread_pool.cpp
@@ -32,6 +32,13 @@
 
				 
			
 
				 #include "core/os/os.h"
			
 
				 
			
 
				+void WorkerThreadPool::Task::free_template_userdata() {
			
 
				+	ERR_FAIL_COND(!template_userdata);
			
 
				+	ERR_FAIL_COND(native_func_userdata == nullptr);
			
 
				+	BaseTemplateUserdata *btu = (BaseTemplateUserdata *)native_func_userdata;
			
 
				+	memdelete(btu);
			
 
				+}
			
 
				+
			
 
				 WorkerThreadPool *WorkerThreadPool::singleton = nullptr;
			
 
				 
			
 
				 void WorkerThreadPool::_process_task_queue() {
			
@@ -48,30 +55,36 @@ void WorkerThreadPool::_process_task(Task *p_task) {
 
				 	if (p_task->group) {
			
 
				 		// Handling a group
			
 
				 		bool do_post = false;
			
 
				-		if (p_task->native_group_func) {
			
 
				-			while (true) {
			
 
				-				uint32_t work_index = p_task->group->index.postincrement();
			
 
				-				if (work_index >= p_task->group->max) {
			
 
				-					do_post = work_index == p_task->group->max; // First one reaching max handles semaphore and clean-up.
			
 
				-					break;
			
 
				-				}
			
 
				-				p_task->native_group_func(p_task->native_func_userdata, work_index);
			
 
				-			}
			
 
				+		Callable::CallError ce;
			
 
				+		Variant ret;
			
 
				+		Variant arg;
			
 
				+		Variant *argptr = &arg;
			
 
				 
			
 
				-		} else {
			
 
				-			Callable::CallError ce;
			
 
				-			Variant ret;
			
 
				-			Variant arg;
			
 
				-			Variant *argptr = &arg;
			
 
				-			while (true) {
			
 
				-				uint32_t work_index = p_task->group->index.postincrement();
			
 
				-				if (work_index >= p_task->group->max) {
			
 
				-					do_post = work_index == p_task->group->max; // First one reaching max handles semaphore and clean-up.
			
 
				-					break;
			
 
				-				}
			
 
				+		while (true) {
			
 
				+			uint32_t work_index = p_task->group->index.postincrement();
			
 
				+
			
 
				+			if (work_index >= p_task->group->max) {
			
 
				+				break;
			
 
				+			}
			
 
				+			if (p_task->native_group_func) {
			
 
				+				p_task->native_group_func(p_task->native_func_userdata, work_index);
			
 
				+			} else if (p_task->template_userdata) {
			
 
				+				p_task->template_userdata->callback_indexed(work_index);
			
 
				+			} else {
			
 
				 				arg = work_index;
			
 
				 				p_task->callable.call((const Variant **)&argptr, 1, ret, ce);
			
 
				 			}
			
 
				+
			
 
				+			// This is the only way to ensure posting is done when all tasks are really complete.
			
 
				+			uint32_t completed_amount = p_task->group->completed_index.increment();
			
 
				+
			
 
				+			if (completed_amount == p_task->group->max) {
			
 
				+				do_post = true;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if (do_post && p_task->template_userdata) {
			
 
				+			memdelete(p_task->template_userdata); // This is no longer needed at this point, so get rid of it.
			
 
				 		}
			
 
				 
			
 
				 		if (low_priority && use_native_low_priority_threads) {
			
@@ -104,6 +117,9 @@ void WorkerThreadPool::_process_task(Task *p_task) {
 
				 	} else {
			
 
				 		if (p_task->native_func) {
			
 
				 			p_task->native_func(p_task->native_func_userdata);
			
 
				+		} else if (p_task->template_userdata) {
			
 
				+			p_task->template_userdata->callback();
			
 
				+			memdelete(p_task->template_userdata);
			
 
				 		} else {
			
 
				 			Callable::CallError ce;
			
 
				 			Variant ret;
			
@@ -171,13 +187,19 @@ void WorkerThreadPool::_post_task(Task *p_task, bool p_high_priority) {
 
				 }
			
 
				 
			
 
				 WorkerThreadPool::TaskID WorkerThreadPool::add_native_task(void (*p_func)(void *), void *p_userdata, bool p_high_priority, const String &p_description) {
			
 
				+	return _add_task(Callable(), p_func, p_userdata, nullptr, p_high_priority, p_description);
			
 
				+}
			
 
				+
			
 
				+WorkerThreadPool::TaskID WorkerThreadPool::_add_task(const Callable &p_callable, void (*p_func)(void *), void *p_userdata, BaseTemplateUserdata *p_template_userdata, bool p_high_priority, const String &p_description) {
			
 
				 	task_mutex.lock();
			
 
				 	// Get a free task
			
 
				 	Task *task = task_allocator.alloc();
			
 
				 	TaskID id = last_task++;
			
 
				+	task->callable = p_callable;
			
 
				 	task->native_func = p_func;
			
 
				 	task->native_func_userdata = p_userdata;
			
 
				 	task->description = p_description;
			
 
				+	task->template_userdata = p_template_userdata;
			
 
				 	tasks.insert(id, task);
			
 
				 	task_mutex.unlock();
			
 
				 
			
@@ -187,18 +209,7 @@ WorkerThreadPool::TaskID WorkerThreadPool::add_native_task(void (*p_func)(void *
 
				 }
			
 
				 
			
 
				 WorkerThreadPool::TaskID WorkerThreadPool::add_task(const Callable &p_action, bool p_high_priority, const String &p_description) {
			
 
				-	task_mutex.lock();
			
 
				-	// Get a free task
			
 
				-	Task *task = task_allocator.alloc();
			
 
				-	TaskID id = last_task++;
			
 
				-	task->callable = p_action;
			
 
				-	task->description = p_description;
			
 
				-	tasks.insert(id, task);
			
 
				-	task_mutex.unlock();
			
 
				-
			
 
				-	_post_task(task, p_high_priority);
			
 
				-
			
 
				-	return id;
			
 
				+	return _add_task(p_action, nullptr, nullptr, nullptr, p_high_priority, p_description);
			
 
				 }
			
 
				 
			
 
				 bool WorkerThreadPool::is_task_completed(TaskID p_task_id) const {
			
@@ -269,8 +280,8 @@ void WorkerThreadPool::wait_for_task_completion(TaskID p_task_id) {
 
				 	task_mutex.unlock();
			
 
				 }
			
 
				 
			
 
				-WorkerThreadPool::GroupID WorkerThreadPool::add_native_group_task(void (*p_func)(void *, uint32_t), void *p_userdata, int p_elements, int p_tasks, bool p_high_priority, const String &p_description) {
			
 
				-	ERR_FAIL_COND_V(p_elements <= 0, INVALID_TASK_ID);
			
 
				+WorkerThreadPool::GroupID WorkerThreadPool::_add_group_task(const Callable &p_callable, void (*p_func)(void *, uint32_t), void *p_userdata, BaseTemplateUserdata *p_template_userdata, int p_elements, int p_tasks, bool p_high_priority, const String &p_description) {
			
 
				+	ERR_FAIL_COND_V(p_elements < 0, INVALID_TASK_ID);
			
 
				 	if (p_tasks < 0) {
			
 
				 		p_tasks = threads.size();
			
 
				 	}
			
@@ -280,17 +291,34 @@ WorkerThreadPool::GroupID WorkerThreadPool::add_native_group_task(void (*p_func)
 
				 	GroupID id = last_task++;
			
 
				 	group->max = p_elements;
			
 
				 	group->self = id;
			
 
				-	group->tasks_used = p_tasks;
			
 
				-	Task **tasks_posted = (Task **)alloca(sizeof(Task *) * p_tasks);
			
 
				-	for (int i = 0; i < p_tasks; i++) {
			
 
				-		Task *task = task_allocator.alloc();
			
 
				-		task->native_group_func = p_func;
			
 
				-		task->native_func_userdata = p_userdata;
			
 
				-		task->description = p_description;
			
 
				-		task->group = group;
			
 
				-		tasks_posted[i] = task;
			
 
				-		// No task ID is used.
			
 
				+
			
 
				+	Task **tasks_posted = nullptr;
			
 
				+	if (p_elements == 0) {
			
 
				+		// Should really not call it with zero Elements, but at least it should work.
			
 
				+		group->completed.set_to(true);
			
 
				+		group->done_semaphore.post();
			
 
				+		group->tasks_used = 0;
			
 
				+		p_tasks = 0;
			
 
				+		if (p_template_userdata) {
			
 
				+			memdelete(p_template_userdata);
			
 
				+		}
			
 
				+
			
 
				+	} else {
			
 
				+		group->tasks_used = p_tasks;
			
 
				+		tasks_posted = (Task **)alloca(sizeof(Task *) * p_tasks);
			
 
				+		for (int i = 0; i < p_tasks; i++) {
			
 
				+			Task *task = task_allocator.alloc();
			
 
				+			task->native_group_func = p_func;
			
 
				+			task->native_func_userdata = p_userdata;
			
 
				+			task->description = p_description;
			
 
				+			task->group = group;
			
 
				+			task->callable = p_callable;
			
 
				+			task->template_userdata = p_template_userdata;
			
 
				+			tasks_posted[i] = task;
			
 
				+			// No task ID is used.
			
 
				+		}
			
 
				 	}
			
 
				+
			
 
				 	groups[id] = group;
			
 
				 	task_mutex.unlock();
			
 
				 
			
@@ -308,43 +336,25 @@ WorkerThreadPool::GroupID WorkerThreadPool::add_native_group_task(void (*p_func)
 
				 	return id;
			
 
				 }
			
 
				 
			
 
				+WorkerThreadPool::GroupID WorkerThreadPool::add_native_group_task(void (*p_func)(void *, uint32_t), void *p_userdata, int p_elements, int p_tasks, bool p_high_priority, const String &p_description) {
			
 
				+	return _add_group_task(Callable(), p_func, p_userdata, nullptr, p_elements, p_tasks, p_high_priority, p_description);
			
 
				+}
			
 
				+
			
 
				 WorkerThreadPool::GroupID WorkerThreadPool::add_group_task(const Callable &p_action, int p_elements, int p_tasks, bool p_high_priority, const String &p_description) {
			
 
				-	ERR_FAIL_COND_V(p_elements <= 0, INVALID_TASK_ID);
			
 
				-	if (p_tasks < 0) {
			
 
				-		p_tasks = threads.size();
			
 
				-	}
			
 
				+	return _add_group_task(p_action, nullptr, nullptr, nullptr, p_elements, p_tasks, p_high_priority, p_description);
			
 
				+}
			
 
				 
			
 
				+uint32_t WorkerThreadPool::get_group_processed_element_count(GroupID p_group) const {
			
 
				 	task_mutex.lock();
			
 
				-	Group *group = group_allocator.alloc();
			
 
				-	GroupID id = last_task++;
			
 
				-	group->max = p_elements;
			
 
				-	group->self = id;
			
 
				-	group->tasks_used = p_tasks;
			
 
				-	Task **tasks_posted = (Task **)alloca(sizeof(Task *) * p_tasks);
			
 
				-	for (int i = 0; i < p_tasks; i++) {
			
 
				-		Task *task = task_allocator.alloc();
			
 
				-		task->callable = p_action;
			
 
				-		task->description = p_description;
			
 
				-		task->group = group;
			
 
				-		tasks_posted[i] = task;
			
 
				-		// No task ID is used.
			
 
				+	const Group *const *groupp = groups.getptr(p_group);
			
 
				+	if (!groupp) {
			
 
				+		task_mutex.unlock();
			
 
				+		ERR_FAIL_V_MSG(0, "Invalid Group ID");
			
 
				 	}
			
 
				-	groups[id] = group;
			
 
				+	uint32_t elements = (*groupp)->completed_index.get();
			
 
				 	task_mutex.unlock();
			
 
				-
			
 
				-	if (!p_high_priority && use_native_low_priority_threads) {
			
 
				-		group->low_priority_native_tasks.resize(p_tasks);
			
 
				-	}
			
 
				-
			
 
				-	for (int i = 0; i < p_tasks; i++) {
			
 
				-		_post_task(tasks_posted[i], p_high_priority);
			
 
				-		if (!p_high_priority && use_native_low_priority_threads) {
			
 
				-			group->low_priority_native_tasks[i] = tasks_posted[i];
			
 
				-		}
			
 
				-	}
			
 
				-	return id;
			
 
				+	return elements;
			
 
				 }
			
 
				-
			
 
				 bool WorkerThreadPool::is_group_task_completed(GroupID p_group) const {
			
 
				 	task_mutex.lock();
			
 
				 	const Group *const *groupp = groups.getptr(p_group);
			
@@ -451,6 +461,7 @@ void WorkerThreadPool::_bind_methods() {
 
				 
			
 
				 	ClassDB::bind_method(D_METHOD("add_group_task", "action", "elements", "tasks_needed", "high_priority", "description"), &WorkerThreadPool::add_group_task, DEFVAL(-1), DEFVAL(false), DEFVAL(String()));
			
 
				 	ClassDB::bind_method(D_METHOD("is_group_task_completed", "group_id"), &WorkerThreadPool::is_group_task_completed);
			
 
				+	ClassDB::bind_method(D_METHOD("get_group_processed_element_count", "group_id"), &WorkerThreadPool::get_group_processed_element_count);
			
 
				 	ClassDB::bind_method(D_METHOD("wait_for_group_task_completion", "group_id"), &WorkerThreadPool::wait_for_group_task_completion);
			
 
				 }
			
 
				 
			
--- a/core/object/worker_thread_pool.h
+++ b/core/object/worker_thread_pool.h
@@ -53,9 +53,16 @@ public:
 
				 private:
			
 
				 	struct Task;
			
 
				 
			
 
				+	struct BaseTemplateUserdata {
			
 
				+		virtual void callback() {}
			
 
				+		virtual void callback_indexed(uint32_t p_index) {}
			
 
				+		virtual ~BaseTemplateUserdata() {}
			
 
				+	};
			
 
				+
			
 
				 	struct Group {
			
 
				 		GroupID self;
			
 
				 		SafeNumeric<uint32_t> index;
			
 
				+		SafeNumeric<uint32_t> completed_index;
			
 
				 		uint32_t max = 0;
			
 
				 		Semaphore done_semaphore;
			
 
				 		SafeFlag completed;
			
@@ -76,7 +83,10 @@ private:
 
				 		SelfList<Task> task_elem;
			
 
				 		bool waiting = false; // Waiting for completion
			
 
				 		bool low_priority = false;
			
 
				+		BaseTemplateUserdata *template_userdata = nullptr;
			
 
				 		Thread *low_priority_thread = nullptr;
			
 
				+
			
 
				+		void free_template_userdata();
			
 
				 		Task() :
			
 
				 				task_elem(this) {}
			
 
				 	};
			
@@ -119,18 +129,60 @@ private:
 
				 
			
 
				 	static WorkerThreadPool *singleton;
			
 
				 
			
 
				+	TaskID _add_task(const Callable &p_callable, void (*p_func)(void *), void *p_userdata, BaseTemplateUserdata *p_template_userdata, bool p_high_priority, const String &p_description);
			
 
				+	GroupID _add_group_task(const Callable &p_callable, void (*p_func)(void *, uint32_t), void *p_userdata, BaseTemplateUserdata *p_template_userdata, int p_elements, int p_tasks, bool p_high_priority, const String &p_description);
			
 
				+
			
 
				+	template <class C, class M, class U>
			
 
				+	struct TaskUserData : public BaseTemplateUserdata {
			
 
				+		C *instance;
			
 
				+		M method;
			
 
				+		U userdata;
			
 
				+		virtual void callback() override {
			
 
				+			(instance->*method)(userdata);
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	template <class C, class M, class U>
			
 
				+	struct GroupUserData : public BaseTemplateUserdata {
			
 
				+		C *instance;
			
 
				+		M method;
			
 
				+		U userdata;
			
 
				+		virtual void callback_indexed(uint32_t p_index) override {
			
 
				+			(instance->*method)(p_index, userdata);
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				 protected:
			
 
				 	static void _bind_methods();
			
 
				 
			
 
				 public:
			
 
				+	template <class C, class M, class U>
			
 
				+	TaskID add_template_task(C *p_instance, M p_method, U p_userdata, bool p_high_priority = false, const String &p_description = String()) {
			
 
				+		typedef TaskUserData<C, M, U> TUD;
			
 
				+		TUD *ud = memnew(TUD);
			
 
				+		ud->instance = p_instance;
			
 
				+		ud->method = p_method;
			
 
				+		ud->userdata = p_userdata;
			
 
				+		return _add_task(Callable(), nullptr, nullptr, ud, p_high_priority, p_description);
			
 
				+	}
			
 
				 	TaskID add_native_task(void (*p_func)(void *), void *p_userdata, bool p_high_priority = false, const String &p_description = String());
			
 
				 	TaskID add_task(const Callable &p_action, bool p_high_priority = false, const String &p_description = String());
			
 
				 
			
 
				 	bool is_task_completed(TaskID p_task_id) const;
			
 
				 	void wait_for_task_completion(TaskID p_task_id);
			
 
				 
			
 
				+	template <class C, class M, class U>
			
 
				+	GroupID add_template_group_task(C *p_instance, M p_method, U p_userdata, int p_elements, int p_tasks = -1, bool p_high_priority = false, const String &p_description = String()) {
			
 
				+		typedef GroupUserData<C, M, U> GUD;
			
 
				+		GUD *ud = memnew(GUD);
			
 
				+		ud->instance = p_instance;
			
 
				+		ud->method = p_method;
			
 
				+		ud->userdata = p_userdata;
			
 
				+		return _add_group_task(Callable(), nullptr, nullptr, ud, p_elements, p_tasks, p_high_priority, p_description);
			
 
				+	}
			
 
				 	GroupID add_native_group_task(void (*p_func)(void *, uint32_t), void *p_userdata, int p_elements, int p_tasks = -1, bool p_high_priority = false, const String &p_description = String());
			
 
				 	GroupID add_group_task(const Callable &p_action, int p_elements, int p_tasks = -1, bool p_high_priority = false, const String &p_description = String());
			
 
				+	uint32_t get_group_processed_element_count(GroupID p_group) const;
			
 
				 	bool is_group_task_completed(GroupID p_group) const;
			
 
				 	void wait_for_group_task_completion(GroupID p_group);
			
 
				 
			
--- a/core/templates/thread_work_pool.cpp
+++ b/core/templates/thread_work_pool.cpp
@@ -1,81 +0,0 @@
 
				-/*************************************************************************/
			
 
				-/*  thread_work_pool.cpp                                                 */
			
 
				-/*************************************************************************/
			
 
				-/*                       This file is part of:                           */
			
 
				-/*                           GODOT ENGINE                                */
			
 
				-/*                      https://godotengine.org                          */
			
 
				-/*************************************************************************/
			
 
				-/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
			
 
				-/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
			
 
				-/*                                                                       */
			
 
				-/* Permission is hereby granted, free of charge, to any person obtaining */
			
 
				-/* a copy of this software and associated documentation files (the       */
			
 
				-/* "Software"), to deal in the Software without restriction, including   */
			
 
				-/* without limitation the rights to use, copy, modify, merge, publish,   */
			
 
				-/* distribute, sublicense, and/or sell copies of the Software, and to    */
			
 
				-/* permit persons to whom the Software is furnished to do so, subject to */
			
 
				-/* the following conditions:                                             */
			
 
				-/*                                                                       */
			
 
				-/* The above copyright notice and this permission notice shall be        */
			
 
				-/* included in all copies or substantial portions of the Software.       */
			
 
				-/*                                                                       */
			
 
				-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
			
 
				-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
			
 
				-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
			
 
				-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
			
 
				-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
			
 
				-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
			
 
				-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
			
 
				-/*************************************************************************/
			
 
				-
			
 
				-#include "thread_work_pool.h"
			
 
				-
			
 
				-#include "core/os/os.h"
			
 
				-
			
 
				-void ThreadWorkPool::_thread_function(void *p_user) {
			
 
				-	ThreadData *thread = static_cast<ThreadData *>(p_user);
			
 
				-	while (true) {
			
 
				-		thread->start.wait();
			
 
				-		if (thread->exit.load()) {
			
 
				-			break;
			
 
				-		}
			
 
				-		thread->work->work();
			
 
				-		thread->completed.post();
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void ThreadWorkPool::init(int p_thread_count) {
			
 
				-	ERR_FAIL_COND(threads != nullptr);
			
 
				-	if (p_thread_count < 0) {
			
 
				-		p_thread_count = OS::get_singleton()->get_default_thread_pool_size();
			
 
				-	}
			
 
				-
			
 
				-	thread_count = p_thread_count;
			
 
				-	threads = memnew_arr(ThreadData, thread_count);
			
 
				-
			
 
				-	for (uint32_t i = 0; i < thread_count; i++) {
			
 
				-		threads[i].exit.store(false);
			
 
				-		threads[i].thread.start(&ThreadWorkPool::_thread_function, &threads[i]);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-void ThreadWorkPool::finish() {
			
 
				-	if (threads == nullptr) {
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	for (uint32_t i = 0; i < thread_count; i++) {
			
 
				-		threads[i].exit.store(true);
			
 
				-		threads[i].start.post();
			
 
				-	}
			
 
				-	for (uint32_t i = 0; i < thread_count; i++) {
			
 
				-		threads[i].thread.wait_to_finish();
			
 
				-	}
			
 
				-
			
 
				-	memdelete_arr(threads);
			
 
				-	threads = nullptr;
			
 
				-}
			
 
				-
			
 
				-ThreadWorkPool::~ThreadWorkPool() {
			
 
				-	finish();
			
 
				-}
			
--- a/core/templates/thread_work_pool.h
+++ b/core/templates/thread_work_pool.h
@@ -1,157 +0,0 @@
 
				-/*************************************************************************/
			
 
				-/*  thread_work_pool.h                                                   */
			
 
				-/*************************************************************************/
			
 
				-/*                       This file is part of:                           */
			
 
				-/*                           GODOT ENGINE                                */
			
 
				-/*                      https://godotengine.org                          */
			
 
				-/*************************************************************************/
			
 
				-/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
			
 
				-/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
			
 
				-/*                                                                       */
			
 
				-/* Permission is hereby granted, free of charge, to any person obtaining */
			
 
				-/* a copy of this software and associated documentation files (the       */
			
 
				-/* "Software"), to deal in the Software without restriction, including   */
			
 
				-/* without limitation the rights to use, copy, modify, merge, publish,   */
			
 
				-/* distribute, sublicense, and/or sell copies of the Software, and to    */
			
 
				-/* permit persons to whom the Software is furnished to do so, subject to */
			
 
				-/* the following conditions:                                             */
			
 
				-/*                                                                       */
			
 
				-/* The above copyright notice and this permission notice shall be        */
			
 
				-/* included in all copies or substantial portions of the Software.       */
			
 
				-/*                                                                       */
			
 
				-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
			
 
				-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
			
 
				-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
			
 
				-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
			
 
				-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
			
 
				-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
			
 
				-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
			
 
				-/*************************************************************************/
			
 
				-
			
 
				-#ifndef THREAD_WORK_POOL_H
			
 
				-#define THREAD_WORK_POOL_H
			
 
				-
			
 
				-#include "core/os/memory.h"
			
 
				-#include "core/os/semaphore.h"
			
 
				-#include "core/os/thread.h"
			
 
				-
			
 
				-#include <atomic>
			
 
				-
			
 
				-class ThreadWorkPool {
			
 
				-	std::atomic<uint32_t> index;
			
 
				-
			
 
				-	struct BaseWork {
			
 
				-		std::atomic<uint32_t> *index = nullptr;
			
 
				-		uint32_t max_elements = 0;
			
 
				-		virtual void work() = 0;
			
 
				-		virtual ~BaseWork() = default;
			
 
				-	};
			
 
				-
			
 
				-	template <class C, class M, class U>
			
 
				-	struct Work : public BaseWork {
			
 
				-		C *instance;
			
 
				-		M method;
			
 
				-		U userdata;
			
 
				-		virtual void work() override {
			
 
				-			while (true) {
			
 
				-				uint32_t work_index = index->fetch_add(1, std::memory_order_relaxed);
			
 
				-				if (work_index >= max_elements) {
			
 
				-					break;
			
 
				-				}
			
 
				-				(instance->*method)(work_index, userdata);
			
 
				-			}
			
 
				-		}
			
 
				-	};
			
 
				-
			
 
				-	struct ThreadData {
			
 
				-		Thread thread;
			
 
				-		Semaphore start;
			
 
				-		Semaphore completed;
			
 
				-		std::atomic<bool> exit;
			
 
				-		BaseWork *work = nullptr;
			
 
				-	};
			
 
				-
			
 
				-	ThreadData *threads = nullptr;
			
 
				-	uint32_t thread_count = 0;
			
 
				-	uint32_t threads_working = 0;
			
 
				-	BaseWork *current_work = nullptr;
			
 
				-
			
 
				-	static void _thread_function(void *p_user);
			
 
				-
			
 
				-public:
			
 
				-	template <class C, class M, class U>
			
 
				-	void begin_work(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) {
			
 
				-		ERR_FAIL_COND(!threads); //never initialized
			
 
				-		ERR_FAIL_COND(current_work != nullptr);
			
 
				-
			
 
				-		index.store(0, std::memory_order_release);
			
 
				-
			
 
				-		Work<C, M, U> *w = memnew((Work<C, M, U>));
			
 
				-		w->instance = p_instance;
			
 
				-		w->userdata = p_userdata;
			
 
				-		w->method = p_method;
			
 
				-		w->index = &index;
			
 
				-		w->max_elements = p_elements;
			
 
				-
			
 
				-		current_work = w;
			
 
				-
			
 
				-		threads_working = MIN(p_elements, thread_count);
			
 
				-
			
 
				-		for (uint32_t i = 0; i < threads_working; i++) {
			
 
				-			threads[i].work = w;
			
 
				-			threads[i].start.post();
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	bool is_working() const {
			
 
				-		return current_work != nullptr;
			
 
				-	}
			
 
				-
			
 
				-	bool is_done_dispatching() const {
			
 
				-		ERR_FAIL_COND_V(current_work == nullptr, true);
			
 
				-		return index.load(std::memory_order_acquire) >= current_work->max_elements;
			
 
				-	}
			
 
				-
			
 
				-	uint32_t get_work_index() const {
			
 
				-		ERR_FAIL_COND_V(current_work == nullptr, 0);
			
 
				-		uint32_t idx = index.load(std::memory_order_acquire);
			
 
				-		return MIN(idx, current_work->max_elements);
			
 
				-	}
			
 
				-
			
 
				-	void end_work() {
			
 
				-		ERR_FAIL_COND(current_work == nullptr);
			
 
				-		for (uint32_t i = 0; i < threads_working; i++) {
			
 
				-			threads[i].completed.wait();
			
 
				-			threads[i].work = nullptr;
			
 
				-		}
			
 
				-
			
 
				-		threads_working = 0;
			
 
				-		memdelete(current_work);
			
 
				-		current_work = nullptr;
			
 
				-	}
			
 
				-
			
 
				-	template <class C, class M, class U>
			
 
				-	void do_work(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) {
			
 
				-		switch (p_elements) {
			
 
				-			case 0:
			
 
				-				// Nothing to do, so do nothing.
			
 
				-				break;
			
 
				-			case 1:
			
 
				-				// No value in pushing the work to another thread if it's a single job
			
 
				-				// and we're going to wait for it to finish. Just run it right here.
			
 
				-				(p_instance->*p_method)(0, p_userdata);
			
 
				-				break;
			
 
				-			default:
			
 
				-				// Multiple jobs to do; commence threaded business.
			
 
				-				begin_work(p_elements, p_instance, p_method, p_userdata);
			
 
				-				end_work();
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	_FORCE_INLINE_ int get_thread_count() const { return thread_count; }
			
 
				-	void init(int p_thread_count = -1);
			
 
				-	void finish();
			
 
				-	~ThreadWorkPool();
			
 
				-};
			
 
				-
			
 
				-#endif // THREAD_WORK_POOL_H
			
--- a/doc/classes/WorkerThreadPool.xml
+++ b/doc/classes/WorkerThreadPool.xml
@@ -25,6 +25,12 @@
 
				 			<description>
			
 
				 			</description>
			
 
				 		</method>
			
 
				+		<method name="get_group_processed_element_count" qualifiers="const">
			
 
				+			<return type="int" />
			
 
				+			<argument index="0" name="group_id" type="int" />
			
 
				+			<description>
			
 
				+			</description>
			
 
				+		</method>
			
 
				 		<method name="is_group_task_completed" qualifiers="const">
			
 
				 			<return type="bool" />
			
 
				 			<argument index="0" name="group_id" type="int" />
			
--- a/editor/editor_file_system.cpp
+++ b/editor/editor_file_system.cpp
@@ -36,6 +36,7 @@
 
				 #include "core/io/resource_importer.h"
			
 
				 #include "core/io/resource_loader.h"
			
 
				 #include "core/io/resource_saver.h"
			
 
				+#include "core/object/worker_thread_pool.h"
			
 
				 #include "core/os/os.h"
			
 
				 #include "core/variant/variant_parser.h"
			
 
				 #include "editor/editor_node.h"
			
@@ -2137,7 +2138,7 @@ void EditorFileSystem::reimport_files(const Vector<String> &p_files) {
 
				 					data.reimport_from = from;
			
 
				 					data.reimport_files = reimport_files.ptr();
			
 
				 
			
 
				-					import_threads.begin_work(i - from + 1, this, &EditorFileSystem::_reimport_thread, &data);
			
 
				+					WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &EditorFileSystem::_reimport_thread, &data, i - from + 1, -1, false, vformat(TTR("Import resources of type: %s"), reimport_files[from].importer));
			
 
				 					int current_index = from - 1;
			
 
				 					do {
			
 
				 						if (current_index < data.max_index) {
			
@@ -2145,9 +2146,9 @@ void EditorFileSystem::reimport_files(const Vector<String> &p_files) {
 
				 							pr.step(reimport_files[current_index].path.get_file(), current_index);
			
 
				 						}
			
 
				 						OS::get_singleton()->delay_usec(1);
			
 
				-					} while (!import_threads.is_done_dispatching());
			
 
				+					} while (!WorkerThreadPool::get_singleton()->is_group_task_completed(group_task));
			
 
				 
			
 
				-					import_threads.end_work();
			
 
				+					WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
			
 
				 
			
 
				 					importer->import_threaded_end();
			
 
				 				}
			
@@ -2430,12 +2431,10 @@ EditorFileSystem::EditorFileSystem() {
 
				 
			
 
				 	scan_total = 0;
			
 
				 	update_script_classes_queued.clear();
			
 
				-	import_threads.init();
			
 
				 	ResourceUID::get_singleton()->clear(); //will be updated on scan
			
 
				 	ResourceSaver::set_get_resource_id_for_path(_resource_saver_get_resource_id_for_path);
			
 
				 }
			
 
				 
			
 
				 EditorFileSystem::~EditorFileSystem() {
			
 
				-	import_threads.finish();
			
 
				 	ResourceSaver::set_get_resource_id_for_path(nullptr);
			
 
				 }
			
--- a/editor/editor_file_system.h
+++ b/editor/editor_file_system.h
@@ -36,7 +36,6 @@
 
				 #include "core/os/thread_safe.h"
			
 
				 #include "core/templates/hash_set.h"
			
 
				 #include "core/templates/safe_refcount.h"
			
 
				-#include "core/templates/thread_work_pool.h"
			
 
				 #include "scene/main/node.h"
			
 
				 
			
 
				 class FileAccess;
			
@@ -275,8 +274,6 @@ class EditorFileSystem : public Node {
 
				 
			
 
				 	HashSet<String> group_file_cache;
			
 
				 
			
 
				-	ThreadWorkPool import_threads;
			
 
				-
			
 
				 	struct ImportThreadData {
			
 
				 		const ImportFile *reimport_files;
			
 
				 		int reimport_from;
			
--- a/modules/navigation/nav_map.cpp
+++ b/modules/navigation/nav_map.cpp
@@ -30,9 +30,9 @@
 
				 
			
 
				 #include "nav_map.h"
			
 
				 
			
 
				+#include "core/object/worker_thread_pool.h"
			
 
				 #include "nav_region.h"
			
 
				 #include "rvo_agent.h"
			
 
				-
			
 
				 #include <algorithm>
			
 
				 
			
 
				 #define THREE_POINTS_CROSS_PRODUCT(m_a, m_b, m_c) (((m_c) - (m_a)).cross((m_b) - (m_a)))
			
@@ -683,14 +683,8 @@ void NavMap::compute_single_step(uint32_t index, RvoAgent **agent) {
 
				 void NavMap::step(real_t p_deltatime) {
			
 
				 	deltatime = p_deltatime;
			
 
				 	if (controlled_agents.size() > 0) {
			
 
				-		if (step_work_pool.get_thread_count() == 0) {
			
 
				-			step_work_pool.init();
			
 
				-		}
			
 
				-		step_work_pool.do_work(
			
 
				-				controlled_agents.size(),
			
 
				-				this,
			
 
				-				&NavMap::compute_single_step,
			
 
				-				controlled_agents.data());
			
 
				+		WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &NavMap::compute_single_step, controlled_agents.data(), controlled_agents.size(), -1, true, SNAME("NavigationMapAgents"));
			
 
				+		WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -736,5 +730,4 @@ NavMap::NavMap() {
 
				 }
			
 
				 
			
 
				 NavMap::~NavMap() {
			
 
				-	step_work_pool.finish();
			
 
				 }
			
--- a/modules/navigation/nav_map.h
+++ b/modules/navigation/nav_map.h
@@ -34,8 +34,8 @@
 
				 #include "nav_rid.h"
			
 
				 
			
 
				 #include "core/math/math_defs.h"
			
 
				+#include "core/object/worker_thread_pool.h"
			
 
				 #include "core/templates/rb_map.h"
			
 
				-#include "core/templates/thread_work_pool.h"
			
 
				 #include "nav_utils.h"
			
 
				 
			
 
				 #include <KdTree.h>
			
@@ -81,9 +81,6 @@ class NavMap : public NavRid {
 
				 	/// Change the id each time the map is updated.
			
 
				 	uint32_t map_update_id = 0;
			
 
				 
			
 
				-	/// Pooled threads for computing steps
			
 
				-	ThreadWorkPool step_work_pool;
			
 
				-
			
 
				 public:
			
 
				 	NavMap();
			
 
				 	~NavMap();
			
--- a/modules/raycast/raycast_occlusion_cull.cpp
+++ b/modules/raycast/raycast_occlusion_cull.cpp
@@ -30,6 +30,7 @@
 
				 
			
 
				 #include "raycast_occlusion_cull.h"
			
 
				 #include "core/config/project_settings.h"
			
 
				+#include "core/object/worker_thread_pool.h"
			
 
				 #include "core/templates/local_vector.h"
			
 
				 
			
 
				 #ifdef __SSE2__
			
@@ -78,9 +79,9 @@ void RaycastOcclusionCull::RaycastHZBuffer::resize(const Size2i &p_size) {
 
				 	memset(camera_ray_masks.ptr(), ~0, camera_rays_tile_count * TILE_RAYS * sizeof(uint32_t));
			
 
				 }
			
 
				 
			
 
				-void RaycastOcclusionCull::RaycastHZBuffer::update_camera_rays(const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_work_pool) {
			
 
				+void RaycastOcclusionCull::RaycastHZBuffer::update_camera_rays(const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal) {
			
 
				 	CameraRayThreadData td;
			
 
				-	td.thread_count = p_thread_work_pool.get_thread_count();
			
 
				+	td.thread_count = WorkerThreadPool::get_singleton()->get_thread_count();
			
 
				 
			
 
				 	td.z_near = p_cam_projection.get_z_near();
			
 
				 	td.z_far = p_cam_projection.get_z_far() * 1.05f;
			
@@ -106,7 +107,8 @@ void RaycastOcclusionCull::RaycastHZBuffer::update_camera_rays(const Transform3D
 
				 
			
 
				 	debug_tex_range = td.z_far;
			
 
				 
			
 
				-	p_thread_work_pool.do_work(td.thread_count, this, &RaycastHZBuffer::_camera_rays_threaded, &td);
			
 
				+	WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &RaycastHZBuffer::_camera_rays_threaded, &td, td.thread_count, -1, true, SNAME("RaycastOcclusionCullUpdateCamera"));
			
 
				+	WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
			
 
				 }
			
 
				 
			
 
				 void RaycastOcclusionCull::RaycastHZBuffer::_camera_rays_threaded(uint32_t p_thread, const CameraRayThreadData *p_data) {
			
@@ -331,10 +333,10 @@ void RaycastOcclusionCull::scenario_remove_instance(RID p_scenario, RID p_instan
 
				 }
			
 
				 
			
 
				 void RaycastOcclusionCull::Scenario::_update_dirty_instance_thread(int p_idx, RID *p_instances) {
			
 
				-	_update_dirty_instance(p_idx, p_instances, nullptr);
			
 
				+	_update_dirty_instance(p_idx, p_instances);
			
 
				 }
			
 
				 
			
 
				-void RaycastOcclusionCull::Scenario::_update_dirty_instance(int p_idx, RID *p_instances, ThreadWorkPool *p_thread_pool) {
			
 
				+void RaycastOcclusionCull::Scenario::_update_dirty_instance(int p_idx, RID *p_instances) {
			
 
				 	OccluderInstance *occ_inst = instances.getptr(p_instances[p_idx]);
			
 
				 
			
 
				 	if (!occ_inst) {
			
@@ -355,14 +357,16 @@ void RaycastOcclusionCull::Scenario::_update_dirty_instance(int p_idx, RID *p_in
 
				 	const Vector3 *read_ptr = occ->vertices.ptr();
			
 
				 	Vector3 *write_ptr = occ_inst->xformed_vertices.ptr();
			
 
				 
			
 
				-	if (p_thread_pool && vertices_size > 1024) {
			
 
				+	if (vertices_size > 1024) {
			
 
				 		TransformThreadData td;
			
 
				 		td.xform = occ_inst->xform;
			
 
				 		td.read = read_ptr;
			
 
				 		td.write = write_ptr;
			
 
				 		td.vertex_count = vertices_size;
			
 
				-		td.thread_count = p_thread_pool->get_thread_count();
			
 
				-		p_thread_pool->do_work(td.thread_count, this, &Scenario::_transform_vertices_thread, &td);
			
 
				+		td.thread_count = WorkerThreadPool::get_singleton()->get_thread_count();
			
 
				+		WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &Scenario::_transform_vertices_thread, &td, td.thread_count, -1, true, SNAME("RaycastOcclusionCull"));
			
 
				+		WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
			
 
				+
			
 
				 	} else {
			
 
				 		_transform_vertices_range(read_ptr, write_ptr, occ_inst->xform, 0, vertices_size);
			
 
				 	}
			
@@ -392,7 +396,7 @@ void RaycastOcclusionCull::Scenario::_commit_scene(void *p_ud) {
 
				 	scenario->commit_done = true;
			
 
				 }
			
 
				 
			
 
				-bool RaycastOcclusionCull::Scenario::update(ThreadWorkPool &p_thread_pool) {
			
 
				+bool RaycastOcclusionCull::Scenario::update() {
			
 
				 	ERR_FAIL_COND_V(singleton == nullptr, false);
			
 
				 
			
 
				 	if (commit_thread == nullptr) {
			
@@ -426,13 +430,15 @@ bool RaycastOcclusionCull::Scenario::update(ThreadWorkPool &p_thread_pool) {
 
				 		instances.erase(removed_instances[i]);
			
 
				 	}
			
 
				 
			
 
				-	if (dirty_instances_array.size() / p_thread_pool.get_thread_count() > 128) {
			
 
				+	if (dirty_instances_array.size() / WorkerThreadPool::get_singleton()->get_thread_count() > 128) {
			
 
				 		// Lots of instances, use per-instance threading
			
 
				-		p_thread_pool.do_work(dirty_instances_array.size(), this, &Scenario::_update_dirty_instance_thread, dirty_instances_array.ptr());
			
 
				+		WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &Scenario::_update_dirty_instance_thread, dirty_instances_array.ptr(), dirty_instances_array.size(), -1, true, SNAME("RaycastOcclusionCullUpdate"));
			
 
				+		WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
			
 
				+
			
 
				 	} else {
			
 
				 		// Few instances, use threading on the vertex transforms
			
 
				 		for (unsigned int i = 0; i < dirty_instances_array.size(); i++) {
			
 
				-			_update_dirty_instance(i, dirty_instances_array.ptr(), &p_thread_pool);
			
 
				+			_update_dirty_instance(i, dirty_instances_array.ptr());
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -484,7 +490,7 @@ void RaycastOcclusionCull::Scenario::_raycast(uint32_t p_idx, const RaycastThrea
 
				 	rtcIntersect16((const int *)&p_raycast_data->masks[p_idx * TILE_RAYS], ebr_scene[current_scene_idx], &ctx, &p_raycast_data->rays[p_idx]);
			
 
				 }
			
 
				 
			
 
				-void RaycastOcclusionCull::Scenario::raycast(CameraRayTile *r_rays, const uint32_t *p_valid_masks, uint32_t p_tile_count, ThreadWorkPool &p_thread_pool) const {
			
 
				+void RaycastOcclusionCull::Scenario::raycast(CameraRayTile *r_rays, const uint32_t *p_valid_masks, uint32_t p_tile_count) const {
			
 
				 	ERR_FAIL_COND(singleton == nullptr);
			
 
				 	if (raycast_singleton->ebr_device == nullptr) {
			
 
				 		return; // Embree is initialized on demand when there is some scenario with occluders in it.
			
@@ -498,7 +504,8 @@ void RaycastOcclusionCull::Scenario::raycast(CameraRayTile *r_rays, const uint32
 
				 	td.rays = r_rays;
			
 
				 	td.masks = p_valid_masks;
			
 
				 
			
 
				-	p_thread_pool.do_work(p_tile_count, this, &Scenario::_raycast, &td);
			
 
				+	WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &Scenario::_raycast, &td, p_tile_count, -1, true, SNAME("RaycastOcclusionCullRaycast"));
			
 
				+	WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
			
 
				 }
			
 
				 
			
 
				 ////////////////////////////////////////////////////////
			
@@ -524,7 +531,7 @@ void RaycastOcclusionCull::buffer_set_size(RID p_buffer, const Vector2i &p_size)
 
				 	buffers[p_buffer].resize(p_size);
			
 
				 }
			
 
				 
			
 
				-void RaycastOcclusionCull::buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_pool) {
			
 
				+void RaycastOcclusionCull::buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal) {
			
 
				 	if (!buffers.has(p_buffer)) {
			
 
				 		return;
			
 
				 	}
			
@@ -537,16 +544,16 @@ void RaycastOcclusionCull::buffer_update(RID p_buffer, const Transform3D &p_cam_
 
				 
			
 
				 	Scenario &scenario = scenarios[buffer.scenario_rid];
			
 
				 
			
 
				-	bool removed = scenario.update(p_thread_pool);
			
 
				+	bool removed = scenario.update();
			
 
				 
			
 
				 	if (removed) {
			
 
				 		scenarios.erase(buffer.scenario_rid);
			
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	buffer.update_camera_rays(p_cam_transform, p_cam_projection, p_cam_orthogonal, p_thread_pool);
			
 
				+	buffer.update_camera_rays(p_cam_transform, p_cam_projection, p_cam_orthogonal);
			
 
				 
			
 
				-	scenario.raycast(buffer.camera_rays, buffer.camera_ray_masks.ptr(), buffer.camera_rays_tile_count, p_thread_pool);
			
 
				+	scenario.raycast(buffer.camera_rays, buffer.camera_ray_masks.ptr(), buffer.camera_rays_tile_count);
			
 
				 	buffer.sort_rays(-p_cam_transform.basis.get_column(2), p_cam_orthogonal);
			
 
				 	buffer.update_mips();
			
 
				 }
			
--- a/modules/raycast/raycast_occlusion_cull.h
+++ b/modules/raycast/raycast_occlusion_cull.h
@@ -76,7 +76,7 @@ public:
 
				 		virtual void clear() override;
			
 
				 		virtual void resize(const Size2i &p_size) override;
			
 
				 		void sort_rays(const Vector3 &p_camera_dir, bool p_orthogonal);
			
 
				-		void update_camera_rays(const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_work_pool);
			
 
				+		void update_camera_rays(const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal);
			
 
				 
			
 
				 		~RaycastHZBuffer();
			
 
				 	};
			
@@ -143,14 +143,14 @@ private:
 
				 		LocalVector<RID> removed_instances;
			
 
				 
			
 
				 		void _update_dirty_instance_thread(int p_idx, RID *p_instances);
			
 
				-		void _update_dirty_instance(int p_idx, RID *p_instances, ThreadWorkPool *p_thread_pool);
			
 
				+		void _update_dirty_instance(int p_idx, RID *p_instances);
			
 
				 		void _transform_vertices_thread(uint32_t p_thread, TransformThreadData *p_data);
			
 
				 		void _transform_vertices_range(const Vector3 *p_read, Vector3 *p_write, const Transform3D &p_xform, int p_from, int p_to);
			
 
				 		static void _commit_scene(void *p_ud);
			
 
				-		bool update(ThreadWorkPool &p_thread_pool);
			
 
				+		bool update();
			
 
				 
			
 
				 		void _raycast(uint32_t p_thread, const RaycastThreadData *p_raycast_data) const;
			
 
				-		void raycast(CameraRayTile *r_rays, const uint32_t *p_valid_masks, uint32_t p_tile_count, ThreadWorkPool &p_thread_pool) const;
			
 
				+		void raycast(CameraRayTile *r_rays, const uint32_t *p_valid_masks, uint32_t p_tile_count) const;
			
 
				 	};
			
 
				 
			
 
				 	static RaycastOcclusionCull *raycast_singleton;
			
@@ -183,7 +183,8 @@ public:
 
				 	virtual HZBuffer *buffer_get_ptr(RID p_buffer) override;
			
 
				 	virtual void buffer_set_scenario(RID p_buffer, RID p_scenario) override;
			
 
				 	virtual void buffer_set_size(RID p_buffer, const Vector2i &p_size) override;
			
 
				-	virtual void buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_pool) override;
			
 
				+	virtual void buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal) override;
			
 
				+
			
 
				 	virtual RID buffer_get_debug_texture(RID p_buffer) override;
			
 
				 
			
 
				 	virtual void set_build_quality(RS::ViewportOcclusionCullingBuildQuality p_quality) override;
			
--- a/modules/text_server_adv/text_server_adv.cpp
+++ b/modules/text_server_adv/text_server_adv.cpp
@@ -29,6 +29,7 @@
 
				 /*************************************************************************/
			
 
				 
			
 
				 #include "text_server_adv.h"
			
 
				+#include "core/object/worker_thread_pool.h"
			
 
				 
			
 
				 #ifdef GDEXTENSION
			
 
				 // Headers for building as GDExtension plug-in.
			
@@ -1039,10 +1040,8 @@ _FORCE_INLINE_ TextServerAdvanced::FontGlyph TextServerAdvanced::rasterize_msdf(
 
				 		td.projection = &projection;
			
 
				 		td.distancePixelConversion = &distancePixelConversion;
			
 
				 
			
 
				-		if (p_font_data->work_pool.get_thread_count() == 0) {
			
 
				-			p_font_data->work_pool.init();
			
 
				-		}
			
 
				-		p_font_data->work_pool.do_work(h, this, &TextServerAdvanced::_generateMTSDF_threaded, &td);
			
 
				+		WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &TextServerAdvanced::_generateMTSDF_threaded, &td, h, -1, true, SNAME("FontServerRasterizeMSDF"));
			
 
				+		WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
			
 
				 
			
 
				 		msdfgen::msdfErrorCorrection(image, shape, projection, p_pixel_range, config);
			
 
				 
			
--- a/modules/text_server_adv/text_server_adv.h
+++ b/modules/text_server_adv/text_server_adv.h
@@ -65,11 +65,12 @@
 
				 #include <godot_cpp/classes/image.hpp>
			
 
				 #include <godot_cpp/classes/image_texture.hpp>
			
 
				 #include <godot_cpp/classes/ref.hpp>
			
 
				+#include <godot_cpp/classes/worker_thread_pool.hpp>
			
 
				 
			
 
				 #include <godot_cpp/templates/hash_map.hpp>
			
 
				 #include <godot_cpp/templates/hash_set.hpp>
			
 
				 #include <godot_cpp/templates/rid_owner.hpp>
			
 
				-#include <godot_cpp/templates/thread_work_pool.hpp>
			
 
				+
			
 
				 #include <godot_cpp/templates/vector.hpp>
			
 
				 
			
 
				 using namespace godot;
			
@@ -77,9 +78,9 @@ using namespace godot;
 
				 #else
			
 
				 // Headers for building as built-in module.
			
 
				 
			
 
				+#include "core/object/worker_thread_pool.h"
			
 
				 #include "core/templates/hash_map.h"
			
 
				 #include "core/templates/rid_owner.h"
			
 
				-#include "core/templates/thread_work_pool.h"
			
 
				 #include "scene/resources/texture.h"
			
 
				 #include "servers/text/text_server_extension.h"
			
 
				 
			
@@ -252,10 +253,8 @@ class TextServerAdvanced : public TextServerExtension {
 
				 		const uint8_t *data_ptr;
			
 
				 		size_t data_size;
			
 
				 		int face_index = 0;
			
 
				-		mutable ThreadWorkPool work_pool;
			
 
				 
			
 
				 		~FontAdvanced() {
			
 
				-			work_pool.finish();
			
 
				 			for (const KeyValue<Vector2i, FontForSizeAdvanced *> &E : cache) {
			
 
				 				memdelete(E.value);
			
 
				 			}
			
--- a/modules/text_server_fb/text_server_fb.cpp
+++ b/modules/text_server_fb/text_server_fb.cpp
@@ -461,10 +461,8 @@ _FORCE_INLINE_ TextServerFallback::FontGlyph TextServerFallback::rasterize_msdf(
 
				 		td.projection = &projection;
			
 
				 		td.distancePixelConversion = &distancePixelConversion;
			
 
				 
			
 
				-		if (p_font_data->work_pool.get_thread_count() == 0) {
			
 
				-			p_font_data->work_pool.init();
			
 
				-		}
			
 
				-		p_font_data->work_pool.do_work(h, this, &TextServerFallback::_generateMTSDF_threaded, &td);
			
 
				+		WorkerThreadPool::GroupID group_id = WorkerThreadPool::get_singleton()->add_template_group_task(this, &TextServerFallback::_generateMTSDF_threaded, &td, h, -1, true, SNAME("TextServerFBRenderMSDF"));
			
 
				+		WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_id);
			
 
				 
			
 
				 		msdfgen::msdfErrorCorrection(image, shape, projection, p_pixel_range, config);
			
 
				 
			
--- a/modules/text_server_fb/text_server_fb.h
+++ b/modules/text_server_fb/text_server_fb.h
@@ -79,9 +79,9 @@ using namespace godot;
 
				 
			
 
				 #include "servers/text/text_server_extension.h"
			
 
				 
			
 
				+#include "core/object/worker_thread_pool.h"
			
 
				 #include "core/templates/hash_map.h"
			
 
				 #include "core/templates/rid_owner.h"
			
 
				-#include "core/templates/thread_work_pool.h"
			
 
				 #include "scene/resources/texture.h"
			
 
				 
			
 
				 #include "modules/modules_enabled.gen.h" // For freetype, msdfgen.
			
@@ -208,10 +208,7 @@ class TextServerFallback : public TextServerExtension {
 
				 		size_t data_size;
			
 
				 		int face_index = 0;
			
 
				 
			
 
				-		mutable ThreadWorkPool work_pool;
			
 
				-
			
 
				 		~FontFallback() {
			
 
				-			work_pool.finish();
			
 
				 			for (const KeyValue<Vector2i, FontForSizeFallback *> &E : cache) {
			
 
				 				memdelete(E.value);
			
 
				 			}
			
--- a/scene/3d/gpu_particles_collision_3d.cpp
+++ b/scene/3d/gpu_particles_collision_3d.cpp
@@ -30,6 +30,7 @@
 
				 
			
 
				 #include "gpu_particles_collision_3d.h"
			
 
				 
			
 
				+#include "core/object/worker_thread_pool.h"
			
 
				 #include "mesh_instance_3d.h"
			
 
				 #include "scene/3d/camera_3d.h"
			
 
				 #include "scene/main/viewport.h"
			
@@ -339,15 +340,12 @@ void GPUParticlesCollisionSDF3D::_compute_sdf_z(uint32_t p_z, ComputeSDFParams *
 
				 }
			
 
				 
			
 
				 void GPUParticlesCollisionSDF3D::_compute_sdf(ComputeSDFParams *params) {
			
 
				-	ThreadWorkPool work_pool;
			
 
				-	work_pool.init();
			
 
				-	work_pool.begin_work(params->size.z, this, &GPUParticlesCollisionSDF3D::_compute_sdf_z, params);
			
 
				-	while (!work_pool.is_done_dispatching()) {
			
 
				+	WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &GPUParticlesCollisionSDF3D::_compute_sdf_z, params, params->size.z);
			
 
				+	while (!WorkerThreadPool::get_singleton()->is_group_task_completed(group_task)) {
			
 
				 		OS::get_singleton()->delay_usec(10000);
			
 
				-		bake_step_function(work_pool.get_work_index() * 100 / params->size.z, "Baking SDF");
			
 
				+		bake_step_function(WorkerThreadPool::get_singleton()->get_group_processed_element_count(group_task) * 100 / params->size.z, "Baking SDF");
			
 
				 	}
			
 
				-	work_pool.end_work();
			
 
				-	work_pool.finish();
			
 
				+	WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
			
 
				 }
			
 
				 
			
 
				 Vector3i GPUParticlesCollisionSDF3D::get_estimated_cell_size() const {
			
--- a/servers/physics_2d/godot_step_2d.cpp
+++ b/servers/physics_2d/godot_step_2d.cpp
@@ -239,7 +239,8 @@ void GodotStep2D::step(GodotSpace2D *p_space, real_t p_delta) {
 
				 	/* SETUP CONSTRAINTS / PROCESS COLLISIONS */
			
 
				 
			
 
				 	uint32_t total_contraint_count = all_constraints.size();
			
 
				-	work_pool.do_work(total_contraint_count, this, &GodotStep2D::_setup_contraint, nullptr);
			
 
				+	WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &GodotStep2D::_setup_contraint, nullptr, total_contraint_count, -1, true, SNAME("Physics2DConstraintSetup"));
			
 
				+	WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
			
 
				 
			
 
				 	{ //profile
			
 
				 		profile_endtime = OS::get_singleton()->get_ticks_usec();
			
@@ -258,7 +259,8 @@ void GodotStep2D::step(GodotSpace2D *p_space, real_t p_delta) {
 
				 
			
 
				 	// Warning: _solve_island modifies the constraint islands for optimization purpose,
			
 
				 	// their content is not reliable after these calls and shouldn't be used anymore.
			
 
				-	work_pool.do_work(island_count, this, &GodotStep2D::_solve_island, nullptr);
			
 
				+	group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &GodotStep2D::_solve_island, nullptr, island_count, -1, true, SNAME("Physics2DConstraintSolveIslands"));
			
 
				+	WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
			
 
				 
			
 
				 	{ //profile
			
 
				 		profile_endtime = OS::get_singleton()->get_ticks_usec();
			
@@ -297,10 +299,7 @@ GodotStep2D::GodotStep2D() {
 
				 	body_islands.reserve(BODY_ISLAND_COUNT_RESERVE);
			
 
				 	constraint_islands.reserve(ISLAND_COUNT_RESERVE);
			
 
				 	all_constraints.reserve(CONSTRAINT_COUNT_RESERVE);
			
 
				-
			
 
				-	work_pool.init();
			
 
				 }
			
 
				 
			
 
				 GodotStep2D::~GodotStep2D() {
			
 
				-	work_pool.finish();
			
 
				 }
			
--- a/servers/physics_2d/godot_step_2d.h
+++ b/servers/physics_2d/godot_step_2d.h
@@ -33,8 +33,8 @@
 
				 
			
 
				 #include "godot_space_2d.h"
			
 
				 
			
 
				+#include "core/object/worker_thread_pool.h"
			
 
				 #include "core/templates/local_vector.h"
			
 
				-#include "core/templates/thread_work_pool.h"
			
 
				 
			
 
				 class GodotStep2D {
			
 
				 	uint64_t _step = 1;
			
@@ -42,8 +42,6 @@ class GodotStep2D {
 
				 	int iterations = 0;
			
 
				 	real_t delta = 0.0;
			
 
				 
			
 
				-	ThreadWorkPool work_pool;
			
 
				-
			
 
				 	LocalVector<LocalVector<GodotBody2D *>> body_islands;
			
 
				 	LocalVector<LocalVector<GodotConstraint2D *>> constraint_islands;
			
 
				 	LocalVector<GodotConstraint2D *> all_constraints;
			
--- a/servers/physics_3d/godot_step_3d.cpp
+++ b/servers/physics_3d/godot_step_3d.cpp
@@ -343,7 +343,8 @@ void GodotStep3D::step(GodotSpace3D *p_space, real_t p_delta) {
 
				 	/* SETUP CONSTRAINTS / PROCESS COLLISIONS */
			
 
				 
			
 
				 	uint32_t total_contraint_count = all_constraints.size();
			
 
				-	work_pool.do_work(total_contraint_count, this, &GodotStep3D::_setup_contraint, nullptr);
			
 
				+	WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &GodotStep3D::_setup_contraint, nullptr, total_contraint_count, -1, true, SNAME("Physics3DConstraintSetup"));
			
 
				+	WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
			
 
				 
			
 
				 	{ //profile
			
 
				 		profile_endtime = OS::get_singleton()->get_ticks_usec();
			
@@ -362,7 +363,8 @@ void GodotStep3D::step(GodotSpace3D *p_space, real_t p_delta) {
 
				 
			
 
				 	// Warning: _solve_island modifies the constraint islands for optimization purpose,
			
 
				 	// their content is not reliable after these calls and shouldn't be used anymore.
			
 
				-	work_pool.do_work(island_count, this, &GodotStep3D::_solve_island, nullptr);
			
 
				+	group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &GodotStep3D::_solve_island, nullptr, island_count, -1, true, SNAME("Physics3DConstraintSolveIslands"));
			
 
				+	WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
			
 
				 
			
 
				 	{ //profile
			
 
				 		profile_endtime = OS::get_singleton()->get_ticks_usec();
			
@@ -409,10 +411,7 @@ GodotStep3D::GodotStep3D() {
 
				 	body_islands.reserve(BODY_ISLAND_COUNT_RESERVE);
			
 
				 	constraint_islands.reserve(ISLAND_COUNT_RESERVE);
			
 
				 	all_constraints.reserve(CONSTRAINT_COUNT_RESERVE);
			
 
				-
			
 
				-	work_pool.init();
			
 
				 }
			
 
				 
			
 
				 GodotStep3D::~GodotStep3D() {
			
 
				-	work_pool.finish();
			
 
				 }
			
--- a/servers/physics_3d/godot_step_3d.h
+++ b/servers/physics_3d/godot_step_3d.h
@@ -33,8 +33,8 @@
 
				 
			
 
				 #include "godot_space_3d.h"
			
 
				 
			
 
				+#include "core/object/worker_thread_pool.h"
			
 
				 #include "core/templates/local_vector.h"
			
 
				-#include "core/templates/thread_work_pool.h"
			
 
				 
			
 
				 class GodotStep3D {
			
 
				 	uint64_t _step = 1;
			
@@ -42,8 +42,6 @@ class GodotStep3D {
 
				 	int iterations = 0;
			
 
				 	real_t delta = 0.0;
			
 
				 
			
 
				-	ThreadWorkPool work_pool;
			
 
				-
			
 
				 	LocalVector<LocalVector<GodotBody3D *>> body_islands;
			
 
				 	LocalVector<LocalVector<GodotConstraint3D *>> constraint_islands;
			
 
				 	LocalVector<GodotConstraint3D *> all_constraints;
			
--- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp
+++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp
@@ -775,7 +775,7 @@ void RenderForwardClustered::_render_list(RenderingDevice::DrawListID p_draw_lis
 
				 
			
 
				 void RenderForwardClustered::_render_list_thread_function(uint32_t p_thread, RenderListParameters *p_params) {
			
 
				 	uint32_t render_total = p_params->element_count;
			
 
				-	uint32_t total_threads = RendererThreadPool::singleton->thread_work_pool.get_thread_count();
			
 
				+	uint32_t total_threads = WorkerThreadPool::get_singleton()->get_thread_count();
			
 
				 	uint32_t render_from = p_thread * render_total / total_threads;
			
 
				 	uint32_t render_to = (p_thread + 1 == total_threads) ? render_total : ((p_thread + 1) * render_total / total_threads);
			
 
				 	_render_list(thread_draw_lists[p_thread], p_params->framebuffer_format, p_params, render_from, render_to);
			
@@ -787,9 +787,10 @@ void RenderForwardClustered::_render_list_with_threads(RenderListParameters *p_p
 
				 
			
 
				 	if ((uint32_t)p_params->element_count > render_list_thread_threshold && false) { // secondary command buffers need more testing at this time
			
 
				 		//multi threaded
			
 
				-		thread_draw_lists.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count());
			
 
				+		thread_draw_lists.resize(WorkerThreadPool::get_singleton()->get_thread_count());
			
 
				 		RD::get_singleton()->draw_list_begin_split(p_framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, p_storage_textures);
			
 
				-		RendererThreadPool::singleton->thread_work_pool.do_work(thread_draw_lists.size(), this, &RenderForwardClustered::_render_list_thread_function, p_params);
			
 
				+		WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &RenderForwardClustered::_render_list_thread_function, p_params, thread_draw_lists.size(), -1, true, SNAME("ForwardClusteredRenderList"));
			
 
				+		WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
			
 
				 		RD::get_singleton()->draw_list_end(p_params->barrier);
			
 
				 	} else {
			
 
				 		//single threaded
			
--- a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp
+++ b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp
@@ -758,9 +758,12 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
 
				 			if ((uint32_t)render_list_params.element_count > render_list_thread_threshold && false) {
			
 
				 				// secondary command buffers need more testing at this time
			
 
				 				//multi threaded
			
 
				-				thread_draw_lists.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count());
			
 
				+				thread_draw_lists.resize(WorkerThreadPool::get_singleton()->get_thread_count());
			
 
				 				RD::get_singleton()->draw_list_begin_split(framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, can_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, can_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0);
			
 
				-				RendererThreadPool::singleton->thread_work_pool.do_work(thread_draw_lists.size(), this, &RenderForwardMobile::_render_list_thread_function, &render_list_params);
			
 
				+
			
 
				+				WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &RenderForwardMobile::_render_list_thread_function, &render_list_params, thread_draw_lists.size(), -1, true, SNAME("ForwardMobileRenderList"));
			
 
				+				WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
			
 
				+
			
 
				 			} else {
			
 
				 				//single threaded
			
 
				 				RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, can_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, can_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0);
			
@@ -822,10 +825,12 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
 
				 			if ((uint32_t)render_list_params.element_count > render_list_thread_threshold && false) {
			
 
				 				// secondary command buffers need more testing at this time
			
 
				 				//multi threaded
			
 
				-				thread_draw_lists.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count());
			
 
				+				thread_draw_lists.resize(WorkerThreadPool::get_singleton()->get_thread_count());
			
 
				 				RD::get_singleton()->draw_list_switch_to_next_pass_split(thread_draw_lists.size(), thread_draw_lists.ptr());
			
 
				 				render_list_params.subpass = RD::get_singleton()->draw_list_get_current_pass();
			
 
				-				RendererThreadPool::singleton->thread_work_pool.do_work(thread_draw_lists.size(), this, &RenderForwardMobile::_render_list_thread_function, &render_list_params);
			
 
				+				WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &RenderForwardMobile::_render_list_thread_function, &render_list_params, thread_draw_lists.size(), -1, true, SNAME("ForwardMobileRenderSubpass"));
			
 
				+				WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
			
 
				+
			
 
				 			} else {
			
 
				 				//single threaded
			
 
				 				RD::DrawListID draw_list = RD::get_singleton()->draw_list_switch_to_next_pass();
			
@@ -859,9 +864,11 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
 
				 			if ((uint32_t)render_list_params.element_count > render_list_thread_threshold && false) {
			
 
				 				// secondary command buffers need more testing at this time
			
 
				 				//multi threaded
			
 
				-				thread_draw_lists.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count());
			
 
				+				thread_draw_lists.resize(WorkerThreadPool::get_singleton()->get_thread_count());
			
 
				 				RD::get_singleton()->draw_list_begin_split(framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), can_continue_color ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, can_continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ);
			
 
				-				RendererThreadPool::singleton->thread_work_pool.do_work(thread_draw_lists.size(), this, &RenderForwardMobile::_render_list_thread_function, &render_list_params);
			
 
				+				WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &RenderForwardMobile::_render_list_thread_function, &render_list_params, thread_draw_lists.size(), -1, true, SNAME("ForwardMobileRenderSubpass"));
			
 
				+				WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
			
 
				+
			
 
				 				RD::get_singleton()->draw_list_end(RD::BARRIER_MASK_ALL);
			
 
				 			} else {
			
 
				 				//single threaded
			
@@ -1771,7 +1778,7 @@ void RenderForwardMobile::_render_list(RenderingDevice::DrawListID p_draw_list,
 
				 
			
 
				 void RenderForwardMobile::_render_list_thread_function(uint32_t p_thread, RenderListParameters *p_params) {
			
 
				 	uint32_t render_total = p_params->element_count;
			
 
				-	uint32_t total_threads = RendererThreadPool::singleton->thread_work_pool.get_thread_count();
			
 
				+	uint32_t total_threads = WorkerThreadPool::get_singleton()->get_thread_count();
			
 
				 	uint32_t render_from = p_thread * render_total / total_threads;
			
 
				 	uint32_t render_to = (p_thread + 1 == total_threads) ? render_total : ((p_thread + 1) * render_total / total_threads);
			
 
				 	_render_list(thread_draw_lists[p_thread], p_params->framebuffer_format, p_params, render_from, render_to);
			
@@ -1783,9 +1790,11 @@ void RenderForwardMobile::_render_list_with_threads(RenderListParameters *p_para
 
				 
			
 
				 	if ((uint32_t)p_params->element_count > render_list_thread_threshold && false) { // secondary command buffers need more testing at this time
			
 
				 		//multi threaded
			
 
				-		thread_draw_lists.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count());
			
 
				+		thread_draw_lists.resize(WorkerThreadPool::get_singleton()->get_thread_count());
			
 
				 		RD::get_singleton()->draw_list_begin_split(p_framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, p_storage_textures);
			
 
				-		RendererThreadPool::singleton->thread_work_pool.do_work(thread_draw_lists.size(), this, &RenderForwardMobile::_render_list_thread_function, p_params);
			
 
				+		WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &RenderForwardMobile::_render_list_thread_function, p_params, thread_draw_lists.size(), -1, true, SNAME("ForwardMobileRenderSubpass"));
			
 
				+		WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
			
 
				+
			
 
				 		RD::get_singleton()->draw_list_end(p_params->barrier);
			
 
				 	} else {
			
 
				 		//single threaded
			
--- a/servers/rendering/renderer_rd/renderer_compositor_rd.h
+++ b/servers/rendering/renderer_rd/renderer_compositor_rd.h
@@ -32,7 +32,6 @@
 
				 #define RENDERER_COMPOSITOR_RD_H
			
 
				 
			
 
				 #include "core/os/os.h"
			
 
				-#include "core/templates/thread_work_pool.h"
			
 
				 #include "servers/rendering/renderer_compositor.h"
			
 
				 #include "servers/rendering/renderer_rd/effects_rd.h"
			
 
				 #include "servers/rendering/renderer_rd/environment/fog.h"
			
--- a/servers/rendering/renderer_rd/shader_rd.cpp
+++ b/servers/rendering/renderer_rd/shader_rd.cpp
@@ -476,7 +476,9 @@ void ShaderRD::_compile_version(Version *p_version) {
 
				 
			
 
				 #if 1
			
 
				 
			
 
				-	RendererThreadPool::singleton->thread_work_pool.do_work(variant_defines.size(), this, &ShaderRD::_compile_variant, p_version);
			
 
				+	WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &ShaderRD::_compile_variant, p_version, variant_defines.size(), -1, true, SNAME("ShaderCompilation"));
			
 
				+	WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
			
 
				+
			
 
				 #else
			
 
				 	for (int i = 0; i < variant_defines.size(); i++) {
			
 
				 		_compile_variant(i, p_version);
			
--- a/servers/rendering/renderer_scene_cull.cpp
+++ b/servers/rendering/renderer_scene_cull.cpp
@@ -2518,14 +2518,14 @@ void RendererSceneCull::render_camera(RID p_render_buffers, RID p_camera, RID p_
 
				 
			
 
				 	RENDER_TIMESTAMP("Update Occlusion Buffer")
			
 
				 	// For now just cull on the first camera
			
 
				-	RendererSceneOcclusionCull::get_singleton()->buffer_update(p_viewport, camera_data.main_transform, camera_data.main_projection, camera_data.is_orthogonal, RendererThreadPool::singleton->thread_work_pool);
			
 
				+	RendererSceneOcclusionCull::get_singleton()->buffer_update(p_viewport, camera_data.main_transform, camera_data.main_projection, camera_data.is_orthogonal);
			
 
				 
			
 
				 	_render_scene(&camera_data, p_render_buffers, environment, camera->effects, camera->visible_layers, p_scenario, p_viewport, p_shadow_atlas, RID(), -1, p_screen_mesh_lod_threshold, true, r_render_info);
			
 
				 #endif
			
 
				 }
			
 
				 
			
 
				 void RendererSceneCull::_visibility_cull_threaded(uint32_t p_thread, VisibilityCullData *cull_data) {
			
 
				-	uint32_t total_threads = RendererThreadPool::singleton->thread_work_pool.get_thread_count();
			
 
				+	uint32_t total_threads = WorkerThreadPool::get_singleton()->get_thread_count();
			
 
				 	uint32_t bin_from = p_thread * cull_data->cull_count / total_threads;
			
 
				 	uint32_t bin_to = (p_thread + 1 == total_threads) ? cull_data->cull_count : ((p_thread + 1) * cull_data->cull_count / total_threads);
			
 
				 
			
@@ -2622,7 +2622,7 @@ bool RendererSceneCull::_visibility_parent_check(const CullData &p_cull_data, co
 
				 
			
 
				 void RendererSceneCull::_scene_cull_threaded(uint32_t p_thread, CullData *cull_data) {
			
 
				 	uint32_t cull_total = cull_data->scenario->instance_data.size();
			
 
				-	uint32_t total_threads = RendererThreadPool::singleton->thread_work_pool.get_thread_count();
			
 
				+	uint32_t total_threads = WorkerThreadPool::get_singleton()->get_thread_count();
			
 
				 	uint32_t cull_from = p_thread * cull_total / total_threads;
			
 
				 	uint32_t cull_to = (p_thread + 1 == total_threads) ? cull_total : ((p_thread + 1) * cull_total / total_threads);
			
 
				 
			
@@ -2919,7 +2919,8 @@ void RendererSceneCull::_render_scene(const RendererSceneRender::CameraData *p_c
 
				 			}
			
 
				 
			
 
				 			if (visibility_cull_data.cull_count > thread_cull_threshold) {
			
 
				-				RendererThreadPool::singleton->thread_work_pool.do_work(RendererThreadPool::singleton->thread_work_pool.get_thread_count(), this, &RendererSceneCull::_visibility_cull_threaded, &visibility_cull_data);
			
 
				+				WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &RendererSceneCull::_visibility_cull_threaded, &visibility_cull_data, WorkerThreadPool::get_singleton()->get_thread_count(), -1, true, SNAME("VisibilityCullInstances"));
			
 
				+				WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
			
 
				 			} else {
			
 
				 				_visibility_cull(visibility_cull_data, visibility_cull_data.cull_offset, visibility_cull_data.cull_offset + visibility_cull_data.cull_count);
			
 
				 			}
			
@@ -3024,7 +3025,8 @@ void RendererSceneCull::_render_scene(const RendererSceneRender::CameraData *p_c
 
				 				scene_cull_result_threads[i].clear();
			
 
				 			}
			
 
				 
			
 
				-			RendererThreadPool::singleton->thread_work_pool.do_work(scene_cull_result_threads.size(), this, &RendererSceneCull::_scene_cull_threaded, &cull_data);
			
 
				+			WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_template_group_task(this, &RendererSceneCull::_scene_cull_threaded, &cull_data, scene_cull_result_threads.size(), -1, true, SNAME("RenderCullInstances"));
			
 
				+			WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task);
			
 
				 
			
 
				 			for (uint32_t i = 0; i < scene_cull_result_threads.size(); i++) {
			
 
				 				scene_cull_result.append_from(scene_cull_result_threads[i]);
			
@@ -4030,14 +4032,14 @@ RendererSceneCull::RendererSceneCull() {
 
				 	}
			
 
				 
			
 
				 	scene_cull_result.init(&rid_cull_page_pool, &geometry_instance_cull_page_pool, &instance_cull_page_pool);
			
 
				-	scene_cull_result_threads.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count());
			
 
				+	scene_cull_result_threads.resize(WorkerThreadPool::get_singleton()->get_thread_count());
			
 
				 	for (uint32_t i = 0; i < scene_cull_result_threads.size(); i++) {
			
 
				 		scene_cull_result_threads[i].init(&rid_cull_page_pool, &geometry_instance_cull_page_pool, &instance_cull_page_pool);
			
 
				 	}
			
 
				 
			
 
				 	indexer_update_iterations = GLOBAL_GET("rendering/limits/spatial_indexer/update_iterations_per_frame");
			
 
				 	thread_cull_threshold = GLOBAL_GET("rendering/limits/spatial_indexer/threaded_cull_minimum_instances");
			
 
				-	thread_cull_threshold = MAX(thread_cull_threshold, (uint32_t)RendererThreadPool::singleton->thread_work_pool.get_thread_count()); //make sure there is at least one thread per CPU
			
 
				+	thread_cull_threshold = MAX(thread_cull_threshold, (uint32_t)WorkerThreadPool::get_singleton()->get_thread_count()); //make sure there is at least one thread per CPU
			
 
				 
			
 
				 	taa_jitter_array.resize(TAA_JITTER_COUNT);
			
 
				 	for (int i = 0; i < TAA_JITTER_COUNT; i++) {
			
--- a/servers/rendering/renderer_scene_occlusion_cull.h
+++ b/servers/rendering/renderer_scene_occlusion_cull.h
@@ -183,7 +183,8 @@ public:
 
				 	}
			
 
				 	virtual void buffer_set_scenario(RID p_buffer, RID p_scenario) { _print_warning(); }
			
 
				 	virtual void buffer_set_size(RID p_buffer, const Vector2i &p_size) { _print_warning(); }
			
 
				-	virtual void buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal, ThreadWorkPool &p_thread_pool) {}
			
 
				+	virtual void buffer_update(RID p_buffer, const Transform3D &p_cam_transform, const Projection &p_cam_projection, bool p_cam_orthogonal) {}
			
 
				+
			
 
				 	virtual RID buffer_get_debug_texture(RID p_buffer) {
			
 
				 		_print_warning();
			
 
				 		return RID();
			
--- a/servers/rendering/renderer_thread_pool.cpp
+++ b/servers/rendering/renderer_thread_pool.cpp
@@ -1,42 +0,0 @@
 
				-/*************************************************************************/
			
 
				-/*  renderer_thread_pool.cpp                                             */
			
 
				-/*************************************************************************/
			
 
				-/*                       This file is part of:                           */
			
 
				-/*                           GODOT ENGINE                                */
			
 
				-/*                      https://godotengine.org                          */
			
 
				-/*************************************************************************/
			
 
				-/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
			
 
				-/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
			
 
				-/*                                                                       */
			
 
				-/* Permission is hereby granted, free of charge, to any person obtaining */
			
 
				-/* a copy of this software and associated documentation files (the       */
			
 
				-/* "Software"), to deal in the Software without restriction, including   */
			
 
				-/* without limitation the rights to use, copy, modify, merge, publish,   */
			
 
				-/* distribute, sublicense, and/or sell copies of the Software, and to    */
			
 
				-/* permit persons to whom the Software is furnished to do so, subject to */
			
 
				-/* the following conditions:                                             */
			
 
				-/*                                                                       */
			
 
				-/* The above copyright notice and this permission notice shall be        */
			
 
				-/* included in all copies or substantial portions of the Software.       */
			
 
				-/*                                                                       */
			
 
				-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
			
 
				-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
			
 
				-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
			
 
				-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
			
 
				-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
			
 
				-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
			
 
				-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
			
 
				-/*************************************************************************/
			
 
				-
			
 
				-#include "renderer_thread_pool.h"
			
 
				-
			
 
				-RendererThreadPool *RendererThreadPool::singleton = nullptr;
			
 
				-
			
 
				-RendererThreadPool::RendererThreadPool() {
			
 
				-	singleton = this;
			
 
				-	thread_work_pool.init();
			
 
				-}
			
 
				-
			
 
				-RendererThreadPool::~RendererThreadPool() {
			
 
				-	thread_work_pool.finish();
			
 
				-}
			
--- a/servers/rendering/renderer_thread_pool.h
+++ b/servers/rendering/renderer_thread_pool.h
@@ -1,45 +0,0 @@
 
				-/*************************************************************************/
			
 
				-/*  renderer_thread_pool.h                                               */
			
 
				-/*************************************************************************/
			
 
				-/*                       This file is part of:                           */
			
 
				-/*                           GODOT ENGINE                                */
			
 
				-/*                      https://godotengine.org                          */
			
 
				-/*************************************************************************/
			
 
				-/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
			
 
				-/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
			
 
				-/*                                                                       */
			
 
				-/* Permission is hereby granted, free of charge, to any person obtaining */
			
 
				-/* a copy of this software and associated documentation files (the       */
			
 
				-/* "Software"), to deal in the Software without restriction, including   */
			
 
				-/* without limitation the rights to use, copy, modify, merge, publish,   */
			
 
				-/* distribute, sublicense, and/or sell copies of the Software, and to    */
			
 
				-/* permit persons to whom the Software is furnished to do so, subject to */
			
 
				-/* the following conditions:                                             */
			
 
				-/*                                                                       */
			
 
				-/* The above copyright notice and this permission notice shall be        */
			
 
				-/* included in all copies or substantial portions of the Software.       */
			
 
				-/*                                                                       */
			
 
				-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
			
 
				-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
			
 
				-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
			
 
				-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
			
 
				-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
			
 
				-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
			
 
				-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
			
 
				-/*************************************************************************/
			
 
				-
			
 
				-#ifndef RENDERER_THREAD_POOL_H
			
 
				-#define RENDERER_THREAD_POOL_H
			
 
				-
			
 
				-#include "core/templates/thread_work_pool.h"
			
 
				-
			
 
				-class RendererThreadPool {
			
 
				-public:
			
 
				-	ThreadWorkPool thread_work_pool;
			
 
				-
			
 
				-	static RendererThreadPool *singleton;
			
 
				-	RendererThreadPool();
			
 
				-	~RendererThreadPool();
			
 
				-};
			
 
				-
			
 
				-#endif // RENDERER_THREAD_POOL_H
			
--- a/servers/rendering/renderer_viewport.cpp
+++ b/servers/rendering/renderer_viewport.cpp
@@ -154,7 +154,7 @@ void RendererViewport::_draw_3d(Viewport *p_viewport) {
 
				 	if (p_viewport->use_occlusion_culling) {
			
 
				 		if (p_viewport->occlusion_buffer_dirty) {
			
 
				 			float aspect = p_viewport->size.aspect();
			
 
				-			int max_size = occlusion_rays_per_thread * RendererThreadPool::singleton->thread_work_pool.get_thread_count();
			
 
				+			int max_size = occlusion_rays_per_thread * WorkerThreadPool::get_singleton()->get_thread_count();
			
 
				 
			
 
				 			int viewport_size = p_viewport->size.width * p_viewport->size.height;
			
 
				 			max_size = CLAMP(max_size, viewport_size / (32 * 32), viewport_size / (2 * 2)); // At least one depth pixel for every 16x16 region. At most one depth pixel for every 2x2 region.
			
--- a/servers/rendering_server.cpp
+++ b/servers/rendering_server.cpp
@@ -2831,7 +2831,6 @@ void RenderingServer::set_render_loop_enabled(bool p_enabled) {
 
				 RenderingServer::RenderingServer() {
			
 
				 	//ERR_FAIL_COND(singleton);
			
 
				 
			
 
				-	thread_pool = memnew(RendererThreadPool);
			
 
				 	singleton = this;
			
 
				 }
			
 
				 
			
@@ -3032,6 +3031,5 @@ void RenderingServer::init() {
 
				 }
			
 
				 
			
 
				 RenderingServer::~RenderingServer() {
			
 
				-	memdelete(thread_pool);
			
 
				 	singleton = nullptr;
			
 
				 }
			
--- a/servers/rendering_server.h
+++ b/servers/rendering_server.h
@@ -35,11 +35,11 @@
 
				 #include "core/math/geometry_3d.h"
			
 
				 #include "core/math/transform_2d.h"
			
 
				 #include "core/object/class_db.h"
			
 
				+#include "core/object/worker_thread_pool.h"
			
 
				 #include "core/templates/rid.h"
			
 
				 #include "core/variant/typed_array.h"
			
 
				 #include "core/variant/variant.h"
			
 
				 #include "servers/display_server.h"
			
 
				-#include "servers/rendering/renderer_thread_pool.h"
			
 
				 #include "servers/rendering/rendering_device.h"
			
 
				 
			
 
				 class RenderingServer : public Object {
			
@@ -52,8 +52,6 @@ class RenderingServer : public Object {
 
				 
			
 
				 	Array _get_array_from_surface(uint32_t p_format, Vector<uint8_t> p_vertex_data, Vector<uint8_t> p_attrib_data, Vector<uint8_t> p_skin_data, int p_vertex_len, Vector<uint8_t> p_index_data, int p_index_len) const;
			
 
				 
			
 
				-	RendererThreadPool *thread_pool = nullptr;
			
 
				-
			
 
				 	const Vector2 SMALL_VEC2 = Vector2(CMP_EPSILON, CMP_EPSILON);
			
 
				 	const Vector3 SMALL_VEC3 = Vector3(CMP_EPSILON, CMP_EPSILON, CMP_EPSILON);