Explorar o código

Added a spinlock template as well as a thread work pool class.

Also, optimized shader compilation to happen on threads.
Juan Linietsky %!s(int64=6) %!d(string=hai) anos
pai
achega
c613ead5fa
Modificáronse 37 ficheiros con 430 adicións e 164 borrados
  1. 1 1
      core/bind/core_bind.cpp
  2. 1 1
      core/bind/core_bind.h
  3. 2 2
      core/command_queue_mt.cpp
  4. 2 2
      core/command_queue_mt.h
  5. 3 3
      core/io/file_access_network.cpp
  6. 3 3
      core/io/file_access_network.h
  7. 2 2
      core/io/ip.cpp
  8. 3 3
      core/os/semaphore.cpp
  9. 37 3
      core/os/semaphore.h
  10. 2 2
      core/os/thread_dummy.cpp
  11. 2 2
      core/os/thread_dummy.h
  12. 59 8
      core/rid_owner.h
  13. 20 0
      core/spin_lock.h
  14. 53 0
      core/thread_work_pool.cpp
  15. 78 0
      core/thread_work_pool.h
  16. 1 1
      drivers/unix/semaphore_posix.cpp
  17. 2 2
      drivers/unix/semaphore_posix.h
  18. 2 2
      drivers/vulkan/rendering_device_vulkan.cpp
  19. 13 13
      drivers/vulkan/rendering_device_vulkan.h
  20. 1 1
      drivers/windows/semaphore_windows.cpp
  21. 2 2
      drivers/windows/semaphore_windows.h
  22. 1 1
      editor/editor_resource_preview.cpp
  23. 1 1
      editor/editor_resource_preview.h
  24. 4 4
      modules/glslang/register_types.cpp
  25. 1 1
      platform/iphone/semaphore_iphone.cpp
  26. 2 2
      platform/iphone/semaphore_iphone.h
  27. 1 1
      platform/osx/semaphore_osx.cpp
  28. 2 2
      platform/osx/semaphore_osx.h
  29. 1 1
      servers/physics_2d/physics_2d_server_wrap_mt.cpp
  30. 1 1
      servers/physics_2d/physics_2d_server_wrap_mt.h
  31. 6 1
      servers/visual/rasterizer_rd/rasterizer_rd.cpp
  32. 4 0
      servers/visual/rasterizer_rd/rasterizer_rd.h
  33. 3 2
      servers/visual/rasterizer_rd/rasterizer_storage_rd.h
  34. 107 91
      servers/visual/rasterizer_rd/shader_rd.cpp
  35. 5 1
      servers/visual/rasterizer_rd/shader_rd.h
  36. 1 1
      servers/visual/visual_server_scene.cpp
  37. 1 1
      servers/visual/visual_server_scene.h

+ 1 - 1
core/bind/core_bind.cpp

@@ -2612,7 +2612,7 @@ void _Semaphore::_bind_methods() {
 
 _Semaphore::_Semaphore() {
 
-	semaphore = Semaphore::create();
+	semaphore = SemaphoreOld::create();
 }
 
 _Semaphore::~_Semaphore() {

+ 1 - 1
core/bind/core_bind.h

@@ -642,7 +642,7 @@ public:
 class _Semaphore : public Reference {
 
 	GDCLASS(_Semaphore, Reference);
-	Semaphore *semaphore;
+	SemaphoreOld *semaphore;
 
 	static void _bind_methods();
 

+ 2 - 2
core/command_queue_mt.cpp

@@ -111,11 +111,11 @@ CommandQueueMT::CommandQueueMT(bool p_sync) {
 
 	for (int i = 0; i < SYNC_SEMAPHORES; i++) {
 
-		sync_sems[i].sem = Semaphore::create();
+		sync_sems[i].sem = SemaphoreOld::create();
 		sync_sems[i].in_use = false;
 	}
 	if (p_sync)
-		sync = Semaphore::create();
+		sync = SemaphoreOld::create();
 	else
 		sync = NULL;
 }

+ 2 - 2
core/command_queue_mt.h

@@ -297,7 +297,7 @@ class CommandQueueMT {
 
 	struct SyncSemaphore {
 
-		Semaphore *sem;
+		SemaphoreOld *sem;
 		bool in_use;
 	};
 
@@ -342,7 +342,7 @@ class CommandQueueMT {
 	uint32_t dealloc_ptr;
 	SyncSemaphore sync_sems[SYNC_SEMAPHORES];
 	Mutex *mutex;
-	Semaphore *sync;
+	SemaphoreOld *sync;
 
 	template <class T>
 	T *allocate() {

+ 3 - 3
core/io/file_access_network.cpp

@@ -231,7 +231,7 @@ FileAccessNetworkClient::FileAccessNetworkClient() {
 	singleton = this;
 	last_id = 0;
 	client.instance();
-	sem = Semaphore::create();
+	sem = SemaphoreOld::create();
 	lockcount = 0;
 }
 
@@ -522,8 +522,8 @@ FileAccessNetwork::FileAccessNetwork() {
 	eof_flag = false;
 	opened = false;
 	pos = 0;
-	sem = Semaphore::create();
-	page_sem = Semaphore::create();
+	sem = SemaphoreOld::create();
+	page_sem = SemaphoreOld::create();
 	buffer_mutex = Mutex::create();
 	FileAccessNetworkClient *nc = FileAccessNetworkClient::singleton;
 	nc->lock_mutex();

+ 3 - 3
core/io/file_access_network.h

@@ -49,7 +49,7 @@ class FileAccessNetworkClient {
 
 	List<BlockRequest> block_requests;
 
-	Semaphore *sem;
+	SemaphoreOld *sem;
 	Thread *thread;
 	bool quit;
 	Mutex *mutex;
@@ -85,8 +85,8 @@ public:
 
 class FileAccessNetwork : public FileAccess {
 
-	Semaphore *sem;
-	Semaphore *page_sem;
+	SemaphoreOld *sem;
+	SemaphoreOld *page_sem;
 	Mutex *buffer_mutex;
 	bool opened;
 	size_t total_size;

+ 2 - 2
core/io/ip.cpp

@@ -71,7 +71,7 @@ struct _IP_ResolverPrivate {
 	}
 
 	Mutex *mutex;
-	Semaphore *sem;
+	SemaphoreOld *sem;
 
 	Thread *thread;
 	//Semaphore* semaphore;
@@ -319,7 +319,7 @@ IP::IP() {
 
 #ifndef NO_THREADS
 
-	resolver->sem = Semaphore::create();
+	resolver->sem = SemaphoreOld::create();
 	if (resolver->sem) {
 		resolver->thread_abort = false;
 

+ 3 - 3
core/os/semaphore.cpp

@@ -32,14 +32,14 @@
 
 #include "core/error_macros.h"
 
-Semaphore *(*Semaphore::create_func)() = 0;
+SemaphoreOld *(*SemaphoreOld::create_func)() = 0;
 
-Semaphore *Semaphore::create() {
+SemaphoreOld *SemaphoreOld::create() {
 
 	ERR_FAIL_COND_V(!create_func, 0);
 
 	return create_func();
 }
 
-Semaphore::~Semaphore() {
+SemaphoreOld::~SemaphoreOld() {
 }

+ 37 - 3
core/os/semaphore.h

@@ -32,19 +32,53 @@
 #define SEMAPHORE_H
 
 #include "core/error_list.h"
+#include "core/typedefs.h"
+
+#include <condition_variable>
+#include <mutex>
 
 class Semaphore {
+private:
+	std::mutex mutex_;
+	std::condition_variable condition_;
+	unsigned long count_ = 0; // Initialized as locked.
+
+public:
+	_ALWAYS_INLINE_ void post() {
+		std::lock_guard<decltype(mutex_)> lock(mutex_);
+		++count_;
+		condition_.notify_one();
+	}
+
+	_ALWAYS_INLINE_ void wait() {
+		std::unique_lock<decltype(mutex_)> lock(mutex_);
+		while (!count_) // Handle spurious wake-ups.
+			condition_.wait(lock);
+		--count_;
+	}
+
+	_ALWAYS_INLINE_ bool try_wait() {
+		std::lock_guard<decltype(mutex_)> lock(mutex_);
+		if (count_) {
+			--count_;
+			return true;
+		}
+		return false;
+	}
+};
+
+class SemaphoreOld {
 protected:
-	static Semaphore *(*create_func)();
+	static SemaphoreOld *(*create_func)();
 
 public:
 	virtual Error wait() = 0; ///< wait until semaphore has positive value, then decrement and pass
 	virtual Error post() = 0; ///< unlock the semaphore, incrementing the    value
 	virtual int get() const = 0; ///< get semaphore value
 
-	static Semaphore *create(); ///< Create a mutex
+	static SemaphoreOld *create(); ///< Create a mutex
 
-	virtual ~Semaphore();
+	virtual ~SemaphoreOld();
 };
 
 #endif

+ 2 - 2
core/os/thread_dummy.cpp

@@ -48,12 +48,12 @@ void MutexDummy::make_default() {
 	Mutex::create_func = &MutexDummy::create;
 };
 
-Semaphore *SemaphoreDummy::create() {
+SemaphoreOld *SemaphoreDummy::create() {
 	return memnew(SemaphoreDummy);
 };
 
 void SemaphoreDummy::make_default() {
-	Semaphore::create_func = &SemaphoreDummy::create;
+	SemaphoreOld::create_func = &SemaphoreDummy::create;
 };
 
 RWLock *RWLockDummy::create() {

+ 2 - 2
core/os/thread_dummy.h

@@ -58,9 +58,9 @@ public:
 	static void make_default();
 };
 
-class SemaphoreDummy : public Semaphore {
+class SemaphoreDummy : public SemaphoreOld {
 
-	static Semaphore *create();
+	static SemaphoreOld *create();
 
 public:
 	virtual Error wait() { return OK; };

+ 59 - 8
core/rid_owner.h

@@ -3,6 +3,8 @@
 
 #include "core/print_string.h"
 #include "core/rid.h"
+#include "core/spin_lock.h"
+
 #include <typeinfo>
 
 class RID_AllocBase {
@@ -28,7 +30,7 @@ public:
 	virtual ~RID_AllocBase() {}
 };
 
-template <class T>
+template <class T, bool THREAD_SAFE = false>
 class RID_Alloc : public RID_AllocBase {
 
 	T **chunks;
@@ -41,9 +43,15 @@ class RID_Alloc : public RID_AllocBase {
 
 	const char *description;
 
+	SpinLock spin_lock;
+
 public:
 	RID make_rid(const T &p_value) {
 
+		if (THREAD_SAFE) {
+			spin_lock.lock();
+		}
+
 		if (alloc_count == max_alloc) {
 			//allocate a new chunk
 			uint32_t chunk_count = alloc_count == 0 ? 0 : (max_alloc / elements_in_chunk);
@@ -85,11 +93,19 @@ public:
 		validator_chunks[free_chunk][free_element] = validator;
 		alloc_count++;
 
+		if (THREAD_SAFE) {
+			spin_lock.unlock();
+		}
+
 		return _make_from_id(id);
 	}
 
 	_FORCE_INLINE_ T *getornull(const RID &p_rid) {
 
+		if (THREAD_SAFE) {
+			spin_lock.lock();
+		}
+
 		uint64_t id = p_rid.get_id();
 		uint32_t idx = uint32_t(id & 0xFFFFFFFF);
 		if (unlikely(idx >= max_alloc)) {
@@ -104,14 +120,27 @@ public:
 			return NULL;
 		}
 
-		return &chunks[idx_chunk][idx_element];
+		T *ptr = &chunks[idx_chunk][idx_element];
+
+		if (THREAD_SAFE) {
+			spin_lock.unlock();
+		}
+
+		return ptr;
 	}
 
 	_FORCE_INLINE_ bool owns(const RID &p_rid) {
 
+		if (THREAD_SAFE) {
+			spin_lock.lock();
+		}
+
 		uint64_t id = p_rid.get_id();
 		uint32_t idx = uint32_t(id & 0xFFFFFFFF);
 		if (unlikely(idx >= max_alloc)) {
+			if (THREAD_SAFE) {
+				spin_lock.unlock();
+			}
 			return false;
 		}
 
@@ -119,11 +148,22 @@ public:
 		uint32_t idx_element = idx % elements_in_chunk;
 
 		uint32_t validator = uint32_t(id >> 32);
-		return validator_chunks[idx_chunk][idx_element] == validator;
+
+		bool owned = validator_chunks[idx_chunk][idx_element] == validator;
+
+		if (THREAD_SAFE) {
+			spin_lock.unlock();
+		}
+
+		return owned;
 	}
 
 	_FORCE_INLINE_ void free(const RID &p_rid) {
 
+		if (THREAD_SAFE) {
+			spin_lock.lock();
+		}
+
 		uint64_t id = p_rid.get_id();
 		uint32_t idx = uint32_t(id & 0xFFFFFFFF);
 		ERR_FAIL_COND(idx >= max_alloc);
@@ -139,6 +179,10 @@ public:
 
 		alloc_count--;
 		free_list_chunks[alloc_count / elements_in_chunk][alloc_count % elements_in_chunk] = idx;
+
+		if (THREAD_SAFE) {
+			spin_lock.unlock();
+		}
 	}
 
 	_FORCE_INLINE_ uint32_t get_rid_count() const {
@@ -147,8 +191,15 @@ public:
 
 	_FORCE_INLINE_ T *get_rid_by_index(uint32_t p_index) {
 		ERR_FAIL_INDEX_V(p_index, alloc_count, NULL);
+		if (THREAD_SAFE) {
+			spin_lock.lock();
+		}
 		uint64_t idx = free_list_chunks[p_index / elements_in_chunk][p_index % elements_in_chunk];
-		return &chunks[idx / elements_in_chunk][idx % elements_in_chunk];
+		T *ptr = &chunks[idx / elements_in_chunk][idx % elements_in_chunk];
+		if (THREAD_SAFE) {
+			spin_lock.unlock();
+		}
+		return ptr;
 	}
 
 	void get_owned_list(List<RID> *p_owned) {
@@ -203,9 +254,9 @@ public:
 	}
 };
 
-template <class T>
+template <class T, bool THREAD_SAFE = false>
 class RID_PtrOwner {
-	RID_Alloc<T *> alloc;
+	RID_Alloc<T *, THREAD_SAFE> alloc;
 
 public:
 	_FORCE_INLINE_ RID make_rid(T *p_ptr) {
@@ -239,9 +290,9 @@ public:
 			alloc(p_target_chunk_byte_size) {}
 };
 
-template <class T>
+template <class T, bool THREAD_SAFE = false>
 class RID_Owner {
-	RID_Alloc<T> alloc;
+	RID_Alloc<T, THREAD_SAFE> alloc;
 
 public:
 	_FORCE_INLINE_ RID make_rid(const T &p_ptr) {

+ 20 - 0
core/spin_lock.h

@@ -0,0 +1,20 @@
+#ifndef SPIN_LOCK_H
+#define SPIN_LOCK_H
+
+#include "core/typedefs.h"
+#include <atomic>
+
+class SpinLock {
+	std::atomic_flag locked = ATOMIC_FLAG_INIT;
+
+public:
+	_ALWAYS_INLINE_ void lock() {
+		while (locked.test_and_set(std::memory_order_acquire)) {
+			;
+		}
+	}
+	_ALWAYS_INLINE_ void unlock() {
+		locked.clear(std::memory_order_release);
+	}
+};
+#endif // SPIN_LOCK_H

+ 53 - 0
core/thread_work_pool.cpp

@@ -0,0 +1,53 @@
+#include "thread_work_pool.h"
+#include "core/os/os.h"
+
+void ThreadWorkPool::_thread_function(ThreadData *p_thread) {
+
+	while (true) {
+		p_thread->start.wait();
+		if (p_thread->exit.load()) {
+			break;
+		}
+		p_thread->work->work();
+		p_thread->completed.post();
+	}
+}
+
+void ThreadWorkPool::init(int p_thread_count) {
+	ERR_FAIL_COND(threads != nullptr);
+	if (p_thread_count < 0) {
+		p_thread_count = OS::get_singleton()->get_processor_count();
+	}
+
+	thread_count = p_thread_count;
+	threads = memnew_arr(ThreadData, thread_count);
+
+	for (uint32_t i = 0; i < thread_count; i++) {
+		threads[i].exit.store(false);
+		threads[i].thread = memnew(std::thread(ThreadWorkPool::_thread_function, &threads[i]));
+	}
+}
+
+void ThreadWorkPool::finish() {
+
+	if (threads == nullptr) {
+		return;
+	}
+
+	for (uint32_t i = 0; i < thread_count; i++) {
+		threads[i].exit.store(true);
+		threads[i].start.post();
+	}
+	for (uint32_t i = 0; i < thread_count; i++) {
+		threads[i].thread->join();
+		memdelete(threads[i].thread);
+	}
+
+	memdelete_arr(threads);
+	threads = nullptr;
+}
+
+ThreadWorkPool::~ThreadWorkPool() {
+
+	finish();
+}

+ 78 - 0
core/thread_work_pool.h

@@ -0,0 +1,78 @@
+#ifndef THREAD_WORK_POOL_H
+#define THREAD_WORK_POOL_H
+
+#include "core/os/memory.h"
+#include "core/os/semaphore.h"
+#include <atomic>
+#include <thread>
+class ThreadWorkPool {
+
+	std::atomic<uint32_t> index;
+
+	struct BaseWork {
+		std::atomic<uint32_t> *index;
+		uint32_t max_elements;
+		virtual void work() = 0;
+	};
+
+	template <class C, class M, class U>
+	struct Work : public BaseWork {
+		C *instance;
+		M method;
+		U userdata;
+		virtual void work() {
+
+			while (true) {
+				uint32_t work_index = index->fetch_add(1, std::memory_order_relaxed);
+				if (work_index >= max_elements) {
+					break;
+				}
+				(instance->*method)(work_index, userdata);
+			}
+		}
+	};
+
+	struct ThreadData {
+		std::thread *thread;
+		Semaphore start;
+		Semaphore completed;
+		std::atomic<bool> exit;
+		BaseWork *work;
+	};
+
+	ThreadData *threads = nullptr;
+	uint32_t thread_count = 0;
+
+	static void _thread_function(ThreadData *p_thread);
+
+public:
+	template <class C, class M, class U>
+	void do_work(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) {
+
+		ERR_FAIL_COND(!threads); //never initialized
+
+		index.store(0);
+
+		Work<C, M, U> *w = memnew((Work<C, M, U>));
+		w->instance = p_instance;
+		w->userdata = p_userdata;
+		w->method = p_method;
+		w->index = &index;
+		w->max_elements = p_elements;
+
+		for (uint32_t i = 0; i < thread_count; i++) {
+			threads[i].work = w;
+			threads[i].start.post();
+		}
+		for (uint32_t i = 0; i < thread_count; i++) {
+			threads[i].completed.wait();
+			threads[i].work = nullptr;
+		}
+	}
+
+	void init(int p_thread_count = -1);
+	void finish();
+	~ThreadWorkPool();
+};
+
+#endif // THREAD_POOL_H

+ 1 - 1
drivers/unix/semaphore_posix.cpp

@@ -62,7 +62,7 @@ int SemaphorePosix::get() const {
 	return val;
 }
 
-Semaphore *SemaphorePosix::create_semaphore_posix() {
+SemaphoreOld *SemaphorePosix::create_semaphore_posix() {
 
 	return memnew(SemaphorePosix);
 }

+ 2 - 2
drivers/unix/semaphore_posix.h

@@ -37,11 +37,11 @@
 
 #include <semaphore.h>
 
-class SemaphorePosix : public Semaphore {
+class SemaphorePosix : public SemaphoreOld {
 
 	mutable sem_t sem;
 
-	static Semaphore *create_semaphore_posix();
+	static SemaphoreOld *create_semaphore_posix();
 
 public:
 	virtual Error wait();

+ 2 - 2
drivers/vulkan/rendering_device_vulkan.cpp

@@ -3448,8 +3448,6 @@ bool RenderingDeviceVulkan::_uniform_add_binding(Vector<Vector<VkDescriptorSetLa
 
 RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages) {
 
-	_THREAD_SAFE_METHOD_
-
 	//descriptor layouts
 	Vector<Vector<VkDescriptorSetLayoutBinding> > set_bindings;
 	Vector<Vector<UniformInfo> > uniform_info;
@@ -3694,6 +3692,8 @@ RID RenderingDeviceVulkan::shader_create(const Vector<ShaderStageData> &p_stages
 
 	//all good, let's create modules
 
+	_THREAD_SAFE_METHOD_
+
 	Shader shader;
 
 	shader.vertex_input_locations = vertex_input_locations;

+ 13 - 13
drivers/vulkan/rendering_device_vulkan.h

@@ -114,7 +114,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
 		RID owner;
 	};
 
-	RID_Owner<Texture> texture_owner;
+	RID_Owner<Texture, true> texture_owner;
 	uint32_t texture_upload_region_size_px;
 
 	PoolVector<uint8_t> _texture_get_data_from_image(Texture *tex, VkImage p_image, VmaAllocation p_allocation, uint32_t p_layer);
@@ -264,7 +264,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
 		Size2 size;
 	};
 
-	RID_Owner<Framebuffer> framebuffer_owner;
+	RID_Owner<Framebuffer, true> framebuffer_owner;
 
 	/***********************/
 	/**** VERTEX BUFFER ****/
@@ -279,7 +279,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
 	// This mapping is done here internally, and it's not
 	// exposed.
 
-	RID_Owner<Buffer> vertex_buffer_owner;
+	RID_Owner<Buffer, true> vertex_buffer_owner;
 
 	struct VertexDescriptionKey {
 		Vector<VertexDescription> vertex_formats;
@@ -359,7 +359,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
 		Vector<VkDeviceSize> offsets;
 	};
 
-	RID_Owner<VertexArray> vertex_array_owner;
+	RID_Owner<VertexArray, true> vertex_array_owner;
 
 	struct IndexBuffer : public Buffer {
 		uint32_t max_index; //used for validation
@@ -368,7 +368,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
 		bool supports_restart_indices;
 	};
 
-	RID_Owner<IndexBuffer> index_buffer_owner;
+	RID_Owner<IndexBuffer, true> index_buffer_owner;
 
 	struct IndexArray {
 		uint32_t max_index; //remember the maximum index here too, for validation
@@ -379,7 +379,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
 		bool supports_restart_indices;
 	};
 
-	RID_Owner<IndexArray> index_array_owner;
+	RID_Owner<IndexArray, true> index_array_owner;
 
 	/****************/
 	/**** SHADER ****/
@@ -495,7 +495,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
 
 	String _shader_uniform_debug(RID p_shader, int p_set = -1);
 
-	RID_Owner<Shader> shader_owner;
+	RID_Owner<Shader, true> shader_owner;
 
 	/******************/
 	/**** UNIFORMS ****/
@@ -559,8 +559,8 @@ class RenderingDeviceVulkan : public RenderingDevice {
 	DescriptorPool *_descriptor_pool_allocate(const DescriptorPoolKey &p_key);
 	void _descriptor_pool_free(const DescriptorPoolKey &p_key, DescriptorPool *p_pool);
 
-	RID_Owner<Buffer> uniform_buffer_owner;
-	RID_Owner<Buffer> storage_buffer_owner;
+	RID_Owner<Buffer, true> uniform_buffer_owner;
+	RID_Owner<Buffer, true> storage_buffer_owner;
 
 	//texture buffer needs a view
 	struct TextureBuffer {
@@ -568,7 +568,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
 		VkBufferView view;
 	};
 
-	RID_Owner<TextureBuffer> texture_buffer_owner;
+	RID_Owner<TextureBuffer, true> texture_buffer_owner;
 
 	// This structure contains the descriptor set. They _need_ to be allocated
 	// for a shader (and will be erased when this shader is erased), but should
@@ -589,7 +589,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
 		Vector<RID> attachable_textures; //used for validation
 	};
 
-	RID_Owner<UniformSet> uniform_set_owner;
+	RID_Owner<UniformSet, true> uniform_set_owner;
 
 	/*******************/
 	/**** PIPELINES ****/
@@ -607,7 +607,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
 	// was not supplied as intended.
 
 	struct RenderPipeline {
-	//Cached values for validation
+		//Cached values for validation
 #ifdef DEBUG_ENABLED
 		struct Validation {
 			FramebufferFormatID framebuffer_format;
@@ -627,7 +627,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
 		uint32_t push_constant_stages;
 	};
 
-	RID_Owner<RenderPipeline> pipeline_owner;
+	RID_Owner<RenderPipeline, true> pipeline_owner;
 
 	/*******************/
 	/**** DRAW LIST ****/

+ 1 - 1
drivers/windows/semaphore_windows.cpp

@@ -61,7 +61,7 @@ int SemaphoreWindows::get() const {
 	ERR_FAIL_V(-1);
 }
 
-Semaphore *SemaphoreWindows::create_semaphore_windows() {
+SemaphoreOld *SemaphoreWindows::create_semaphore_windows() {
 
 	return memnew(SemaphoreWindows);
 }

+ 2 - 2
drivers/windows/semaphore_windows.h

@@ -37,11 +37,11 @@
 
 #include <windows.h>
 
-class SemaphoreWindows : public Semaphore {
+class SemaphoreWindows : public SemaphoreOld {
 
 	mutable HANDLE semaphore;
 
-	static Semaphore *create_semaphore_windows();
+	static SemaphoreOld *create_semaphore_windows();
 
 public:
 	virtual Error wait();

+ 1 - 1
editor/editor_resource_preview.cpp

@@ -476,7 +476,7 @@ EditorResourcePreview::EditorResourcePreview() {
 	thread = NULL;
 	singleton = this;
 	preview_mutex = Mutex::create();
-	preview_sem = Semaphore::create();
+	preview_sem = SemaphoreOld::create();
 	order = 0;
 	exit = false;
 	exited = false;

+ 1 - 1
editor/editor_resource_preview.h

@@ -71,7 +71,7 @@ class EditorResourcePreview : public Node {
 	List<QueueItem> queue;
 
 	Mutex *preview_mutex;
-	Semaphore *preview_sem;
+	SemaphoreOld *preview_sem;
 	Thread *thread;
 	volatile bool exit;
 	volatile bool exited;

+ 4 - 4
modules/glslang/register_types.cpp

@@ -140,9 +140,6 @@ static PoolVector<uint8_t> _compile_shader_glsl(RenderingDevice::ShaderStage p_s
 
 	ERR_FAIL_COND_V(p_language==RenderingDevice::SHADER_LANGUAGE_HLSL,ret);
 
-	// initialize in case it's not initialized. This is done once per thread
-	// and it's safe to call multiple times
-	glslang::InitializeProcess();
 	EShLanguage stages[RenderingDevice::SHADER_STAGE_MAX] = {
 		EShLangVertex,
 		EShLangFragment,
@@ -229,6 +226,9 @@ static PoolVector<uint8_t> _compile_shader_glsl(RenderingDevice::ShaderStage p_s
 }
 
 void preregister_glslang_types() {
+	// initialize in case it's not initialized. This is done once per thread
+	// and it's safe to call multiple times
+	glslang::InitializeProcess();
 	RenderingDevice::shader_set_compile_function(_compile_shader_glsl);
 }
 
@@ -236,5 +236,5 @@ void register_glslang_types() {
 }
 void unregister_glslang_types() {
 
-
+	glslang::FinalizeProcess();
 }

+ 1 - 1
platform/iphone/semaphore_iphone.cpp

@@ -91,7 +91,7 @@ int SemaphoreIphone::get() const {
 	return 0;
 }
 
-Semaphore *SemaphoreIphone::create_semaphore_iphone() {
+SemaphoreOld *SemaphoreIphone::create_semaphore_iphone() {
 
 	return memnew(SemaphoreIphone);
 }

+ 2 - 2
platform/iphone/semaphore_iphone.h

@@ -39,11 +39,11 @@ typedef struct cgsem cgsem_t;
 
 #include "core/os/semaphore.h"
 
-class SemaphoreIphone : public Semaphore {
+class SemaphoreIphone : public SemaphoreOld {
 
 	mutable cgsem_t sem;
 
-	static Semaphore *create_semaphore_iphone();
+	static SemaphoreOld *create_semaphore_iphone();
 
 public:
 	virtual Error wait();

+ 1 - 1
platform/osx/semaphore_osx.cpp

@@ -86,7 +86,7 @@ int SemaphoreOSX::get() const {
 	return 0;
 }
 
-Semaphore *SemaphoreOSX::create_semaphore_osx() {
+SemaphoreOld *SemaphoreOSX::create_semaphore_osx() {
 
 	return memnew(SemaphoreOSX);
 }

+ 2 - 2
platform/osx/semaphore_osx.h

@@ -39,11 +39,11 @@ typedef struct cgsem cgsem_t;
 
 #include "core/os/semaphore.h"
 
-class SemaphoreOSX : public Semaphore {
+class SemaphoreOSX : public SemaphoreOld {
 
 	mutable cgsem_t sem;
 
-	static Semaphore *create_semaphore_osx();
+	static SemaphoreOld *create_semaphore_osx();
 
 public:
 	virtual Error wait();

+ 1 - 1
servers/physics_2d/physics_2d_server_wrap_mt.cpp

@@ -107,7 +107,7 @@ void Physics2DServerWrapMT::init() {
 
 	if (create_thread) {
 
-		step_sem = Semaphore::create();
+		step_sem = SemaphoreOld::create();
 		//OS::get_singleton()->release_rendering_thread();
 		if (create_thread) {
 			thread = Thread::create(_thread_callback, this);

+ 1 - 1
servers/physics_2d/physics_2d_server_wrap_mt.h

@@ -58,7 +58,7 @@ class Physics2DServerWrapMT : public Physics2DServer {
 	volatile bool step_thread_up;
 	bool create_thread;
 
-	Semaphore *step_sem;
+	SemaphoreOld *step_sem;
 	int step_pending;
 	void thread_step(real_t p_delta);
 	void thread_flush();

+ 6 - 1
servers/visual/rasterizer_rd/rasterizer_rd.cpp

@@ -76,7 +76,7 @@ void RasterizerRD::initialize() {
 
 		RenderingDevice::ShaderStageData frag;
 		frag.shader_stage = RenderingDevice::SHADER_STAGE_FRAGMENT;
-		frag.spir_v =RenderingDevice::get_singleton()->shader_compile_from_source(RenderingDevice::SHADER_STAGE_FRAGMENT,
+		frag.spir_v = RenderingDevice::get_singleton()->shader_compile_from_source(RenderingDevice::SHADER_STAGE_FRAGMENT,
 				"#version 450\n"
 				"layout (location = 0) in vec2 uv;\n"
 				"layout (location = 0) out vec4 color;\n"
@@ -120,8 +120,12 @@ void RasterizerRD::initialize() {
 	}
 }
 
+ThreadWorkPool RasterizerRD::thread_work_pool;
+
 void RasterizerRD::finalize() {
 
+	thread_work_pool.finish();
+
 	memdelete(scene);
 	memdelete(canvas);
 	memdelete(storage);
@@ -133,6 +137,7 @@ void RasterizerRD::finalize() {
 }
 
 RasterizerRD::RasterizerRD() {
+	thread_work_pool.init();
 	time = 0;
 	storage = memnew(RasterizerStorageRD);
 	canvas = memnew(RasterizerCanvasRD(storage));

+ 4 - 0
servers/visual/rasterizer_rd/rasterizer_rd.h

@@ -2,10 +2,12 @@
 #define RASTERIZER_RD_H
 
 #include "core/os/os.h"
+#include "core/thread_work_pool.h"
 #include "servers/visual/rasterizer.h"
 #include "servers/visual/rasterizer_rd/rasterizer_canvas_rd.h"
 #include "servers/visual/rasterizer_rd/rasterizer_scene_forward_rd.h"
 #include "servers/visual/rasterizer_rd/rasterizer_storage_rd.h"
+
 class RasterizerRD : public Rasterizer {
 protected:
 	RasterizerCanvasRD *canvas;
@@ -51,6 +53,8 @@ public:
 
 	virtual bool is_low_end() const { return true; }
 
+	static ThreadWorkPool thread_work_pool;
+
 	RasterizerRD();
 	~RasterizerRD() {}
 };

+ 3 - 2
servers/visual/rasterizer_rd/rasterizer_storage_rd.h

@@ -4,8 +4,8 @@
 #include "core/rid_owner.h"
 #include "servers/visual/rasterizer.h"
 #include "servers/visual/rasterizer_rd/effects_rd.h"
-#include "servers/visual/rendering_device.h"
 #include "servers/visual/rasterizer_rd/shader_compiler_rd.h"
+#include "servers/visual/rendering_device.h"
 
 class RasterizerStorageRD : public RasterizerStorage {
 public:
@@ -109,7 +109,8 @@ private:
 		}
 	};
 
-	mutable RID_Owner<Texture> texture_owner;
+	//textures can be created from threads, so this RID_Owner is thread safe
+	mutable RID_Owner<Texture, true> texture_owner;
 
 	Ref<Image> _validate_texture_format(const Ref<Image> &p_image, TextureToRDFormat &r_format);
 

+ 107 - 91
servers/visual/rasterizer_rd/shader_rd.cpp

@@ -30,6 +30,7 @@
 
 #include "shader_rd.h"
 #include "core/string_builder.h"
+#include "rasterizer_rd.h"
 #include "servers/visual/rendering_device.h"
 
 void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_name) {
@@ -160,142 +161,157 @@ void ShaderRD::_clear_version(Version *p_version) {
 		p_version->variants = NULL;
 	}
 }
-void ShaderRD::_compile_version(Version *p_version) {
 
-	_clear_version(p_version);
+void ShaderRD::_compile_variant(uint32_t p_variant, Version *p_version) {
 
-	p_version->valid = false;
-	p_version->dirty = false;
+	Vector<RD::ShaderStageData> stages;
 
-	p_version->variants = memnew_arr(RID, variant_defines.size());
+	String error;
+	String current_source;
+	RD::ShaderStage current_stage = RD::SHADER_STAGE_VERTEX;
+	bool build_ok = true;
 
-	for (int i = 0; i < variant_defines.size(); i++) {
+	{
+		//vertex stage
 
-		Vector<RD::ShaderStageData> stages;
+		StringBuilder builder;
 
-		String error;
-		String current_source;
-		RD::ShaderStage current_stage = RD::SHADER_STAGE_VERTEX;
-		bool build_ok=true;
+		builder.append(vertex_codev.get_data()); // version info (if exists)
+		builder.append("\n"); //make sure defines begin at newline
+		builder.append(general_defines.get_data());
+		builder.append(variant_defines[p_variant].get_data());
 
-		{
-			//vertex stage
+		for (int j = 0; j < p_version->custom_defines.size(); j++) {
+			builder.append(p_version->custom_defines[j].get_data());
+		}
 
-			StringBuilder builder;
+		builder.append(vertex_code0.get_data()); //first part of vertex
 
-			builder.append(vertex_codev.get_data()); // version info (if exists)
-			builder.append("\n"); //make sure defines begin at newline
-			builder.append(general_defines.get_data());
-			builder.append(variant_defines[i].get_data());
+		builder.append(p_version->uniforms.get_data()); //uniforms (same for vertex and fragment)
 
-			for (int j = 0; j < p_version->custom_defines.size(); j++) {
-				builder.append(p_version->custom_defines[j].get_data());
-			}
+		builder.append(vertex_code1.get_data()); //second part of vertex
 
-			builder.append(vertex_code0.get_data()); //first part of vertex
+		builder.append(p_version->vertex_globals.get_data()); // vertex globals
 
-			builder.append(p_version->uniforms.get_data()); //uniforms (same for vertex and fragment)
+		builder.append(vertex_code2.get_data()); //third part of vertex
 
-			builder.append(vertex_code1.get_data()); //second part of vertex
+		builder.append(p_version->vertex_code.get_data()); // code
 
-			builder.append(p_version->vertex_globals.get_data()); // vertex globals
+		builder.append(vertex_code3.get_data()); //fourth of vertex
 
-			builder.append(vertex_code2.get_data()); //third part of vertex
+		current_source = builder.as_string();
+		RD::ShaderStageData stage;
+		stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_VERTEX, current_source, RD::SHADER_LANGUAGE_GLSL, &error);
+		if (stage.spir_v.size() == 0) {
+			build_ok = false;
+		} else {
 
-			builder.append(p_version->vertex_code.get_data()); // code
+			stage.shader_stage = RD::SHADER_STAGE_VERTEX;
+			stages.push_back(stage);
+		}
+	}
 
-			builder.append(vertex_code3.get_data()); //fourth of vertex
+	if (build_ok) {
+		//fragment stage
+		current_stage = RD::SHADER_STAGE_FRAGMENT;
 
-			current_source = builder.as_string();
-			RD::ShaderStageData stage;
-			stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_VERTEX,current_source,RD::SHADER_LANGUAGE_GLSL,&error);
-			if (stage.spir_v.size()==0) {
-				build_ok=false;
-			} else {
+		StringBuilder builder;
 
-				stage.shader_stage = RD::SHADER_STAGE_VERTEX;
-				stages.push_back(stage);
-			}
+		builder.append(fragment_codev.get_data()); // version info (if exists)
+		builder.append("\n"); //make sure defines begin at newline
+
+		builder.append(general_defines.get_data());
+		builder.append(variant_defines[p_variant].get_data());
+		for (int j = 0; j < p_version->custom_defines.size(); j++) {
+			builder.append(p_version->custom_defines[j].get_data());
 		}
 
-		if (build_ok){
-			//fragment stage
-			current_stage =RD::SHADER_STAGE_FRAGMENT;
+		builder.append(fragment_code0.get_data()); //first part of fragment
 
-			StringBuilder builder;
+		builder.append(p_version->uniforms.get_data()); //uniforms (same for fragment and fragment)
 
-			builder.append(fragment_codev.get_data()); // version info (if exists)
-			builder.append("\n"); //make sure defines begin at newline
+		builder.append(fragment_code1.get_data()); //first part of fragment
 
-			builder.append(general_defines.get_data());
-			builder.append(variant_defines[i].get_data());
-			for (int j = 0; j < p_version->custom_defines.size(); j++) {
-				builder.append(p_version->custom_defines[j].get_data());
-			}
+		builder.append(p_version->fragment_globals.get_data()); // fragment globals
 
-			builder.append(fragment_code0.get_data()); //first part of fragment
+		builder.append(fragment_code2.get_data()); //third part of fragment
 
-			builder.append(p_version->uniforms.get_data()); //uniforms (same for fragment and fragment)
+		builder.append(p_version->fragment_light.get_data()); // fragment light
 
-			builder.append(fragment_code1.get_data()); //first part of fragment
+		builder.append(fragment_code3.get_data()); //fourth part of fragment
 
-			builder.append(p_version->fragment_globals.get_data()); // fragment globals
+		builder.append(p_version->fragment_code.get_data()); // fragment code
 
-			builder.append(fragment_code2.get_data()); //third part of fragment
+		builder.append(fragment_code4.get_data()); //fourth part of fragment
 
-			builder.append(p_version->fragment_light.get_data()); // fragment light
+		current_source = builder.as_string();
+		RD::ShaderStageData stage;
+		stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_FRAGMENT, current_source, RD::SHADER_LANGUAGE_GLSL, &error);
+		if (stage.spir_v.size() == 0) {
+			build_ok = false;
+		} else {
 
-			builder.append(fragment_code3.get_data()); //fourth part of fragment
+			stage.shader_stage = RD::SHADER_STAGE_FRAGMENT;
+			stages.push_back(stage);
+		}
+	}
 
-			builder.append(p_version->fragment_code.get_data()); // fragment code
+	if (!build_ok) {
+		variant_set_mutex.lock(); //properly print the errors
+		ERR_PRINT("Error compiling " + String(current_stage == RD::SHADER_STAGE_VERTEX ? "Vertex" : "Fragment") + " shader, variant #" + itos(p_variant) + " (" + variant_defines[p_variant].get_data() + ").");
+		ERR_PRINT(error);
 
-			builder.append(fragment_code4.get_data()); //fourth part of fragment
+#ifdef DEBUG_ENABLED
+		ERR_PRINT("code:\n" + current_source.get_with_code_lines());
+#endif
 
-			current_source = builder.as_string();
-			RD::ShaderStageData stage;
-			stage.spir_v = RD::get_singleton()->shader_compile_from_source(RD::SHADER_STAGE_FRAGMENT,current_source,RD::SHADER_LANGUAGE_GLSL,&error);
-			if (stage.spir_v.size()==0) {
-				build_ok=false;
-			} else {
+		variant_set_mutex.unlock();
+		return;
+	}
 
-				stage.shader_stage = RD::SHADER_STAGE_FRAGMENT;
-				stages.push_back(stage);
-			}
+	RID shader = RD::get_singleton()->shader_create(stages);
 
-		}
+	variant_set_mutex.lock();
+	p_version->variants[p_variant] = shader;
+	variant_set_mutex.unlock();
+}
 
+void ShaderRD::_compile_version(Version *p_version) {
 
-		if (!build_ok) {
-			ERR_PRINT("Error compiling " + String(current_stage == RD::SHADER_STAGE_VERTEX ? "Vertex" : "Fragment") + " shader, variant #" + itos(i) + " (" + variant_defines[i].get_data() + ").");
-			ERR_PRINT(error);
+	_clear_version(p_version);
 
-#ifdef DEBUG_ENABLED
-			ERR_PRINT("code:\n" + current_source.get_with_code_lines());
-#endif
-			//clear versions if they exist
-			for (int j = 0; j < i; j++) {
-				RD::get_singleton()->free(p_version->variants[j]);
-			}
+	p_version->valid = false;
+	p_version->dirty = false;
 
-			memdelete_arr(p_version->variants);
-			p_version->variants = NULL;
-			return;
-		}
+	p_version->variants = memnew_arr(RID, variant_defines.size());
+#if 1
 
-		RID shader = RD::get_singleton()->shader_create(stages);
+	RasterizerRD::thread_work_pool.do_work(variant_defines.size(), this, &ShaderRD::_compile_variant, p_version);
+#else
+	for (int i = 0; i < variant_defines.size(); i++) {
 
-		if (shader.is_null()) {
-			//clear versions if they exist
-			for (int j = 0; j < i; j++) {
-				RD::get_singleton()->free(p_version->variants[j]);
-			}
+		_compile_variant(i, p_version);
+	}
+#endif
 
-			memdelete_arr(p_version->variants);
-			p_version->variants = NULL;
-			return;
+	bool all_valid = true;
+	for (int i = 0; i < variant_defines.size(); i++) {
+		if (p_version->variants[i].is_null()) {
+			all_valid = false;
+			break;
 		}
+	}
 
-		p_version->variants[i] = shader;
+	if (!all_valid) {
+		//clear versions if they exist
+		for (int i = 0; i < variant_defines.size(); i++) {
+			if (!p_version->variants[i].is_null()) {
+				RD::get_singleton()->free(p_version->variants[i]);
+			}
+		}
+		memdelete_arr(p_version->variants);
+		p_version->variants = NULL;
+		return;
 	}
 
 	p_version->valid = true;

+ 5 - 1
servers/visual/rasterizer_rd/shader_rd.h

@@ -36,7 +36,7 @@
 #include "core/rid_owner.h"
 #include "core/variant.h"
 #include <stdio.h>
-
+#include <mutex>
 /**
 	@author Juan Linietsky <[email protected]>
 */
@@ -67,6 +67,10 @@ class ShaderRD {
 		bool initialize_needed;
 	};
 
+	std::mutex variant_set_mutex;
+
+	void _compile_variant(uint32_t p_variant, Version *p_version);
+
 	void _clear_version(Version *p_version);
 	void _compile_version(Version *p_version);
 

+ 1 - 1
servers/visual/visual_server_scene.cpp

@@ -3511,7 +3511,7 @@ VisualServerScene *VisualServerScene::singleton = NULL;
 VisualServerScene::VisualServerScene() {
 
 #ifndef NO_THREADS
-	probe_bake_sem = Semaphore::create();
+	probe_bake_sem = SemaphoreOld::create();
 	probe_bake_mutex = Mutex::create();
 	probe_bake_thread = Thread::create(_gi_probe_bake_threads, this);
 	probe_bake_thread_exit = false;

+ 1 - 1
servers/visual/visual_server_scene.h

@@ -517,7 +517,7 @@ public:
 
 	volatile bool probe_bake_thread_exit;
 	Thread *probe_bake_thread;
-	Semaphore *probe_bake_sem;
+	SemaphoreOld *probe_bake_sem;
 	Mutex *probe_bake_mutex;
 	List<Instance *> probe_bake_list;