Browse Source

Threaded optimizations to cull and render

-Reorganize thread work pool for rendering
-Fixes to make secondary command buffers to work (disabled because they need more testing)
reduz 4 years ago
parent
commit
77bc3e9ac3

+ 1 - 0
core/templates/thread_work_pool.h

@@ -125,6 +125,7 @@ public:
 		end_work();
 	}
 
+	_FORCE_INLINE_ int get_thread_count() const { return thread_count; }
 	void init(int p_thread_count = -1);
 	void finish();
 	~ThreadWorkPool();

+ 7 - 7
drivers/vulkan/rendering_device_vulkan.cpp

@@ -5638,7 +5638,7 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin_for_screen(Di
 
 	vkCmdSetScissor(command_buffer, 0, 1, &scissor);
 
-	return ID_TYPE_DRAW_LIST;
+	return int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT;
 }
 
 Error RenderingDeviceVulkan::_draw_list_setup_framebuffer(Framebuffer *p_framebuffer, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, VkFramebuffer *r_framebuffer, VkRenderPass *r_render_pass) {
@@ -5905,7 +5905,7 @@ RenderingDevice::DrawListID RenderingDeviceVulkan::draw_list_begin(RID p_framebu
 	vkCmdSetScissor(command_buffer, 0, 1, &scissor);
 
 	draw_list->viewport = Rect2i(viewport_offset, viewport_size);
-	return ID_TYPE_DRAW_LIST;
+	return int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT;
 }
 
 Error RenderingDeviceVulkan::draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, DrawListID *r_split_ids, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const Vector<RID> &p_storage_textures) {
@@ -6002,7 +6002,7 @@ Error RenderingDeviceVulkan::draw_list_begin_split(RID p_framebuffer, uint32_t p
 
 	for (uint32_t i = 0; i < p_splits; i++) {
 		//take a command buffer and initialize it
-		VkCommandBuffer command_buffer = split_draw_list_allocators[p_splits].command_buffers[frame];
+		VkCommandBuffer command_buffer = split_draw_list_allocators[i].command_buffers[frame];
 
 		VkCommandBufferInheritanceInfo inheritance_info;
 		inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO;
@@ -6060,7 +6060,7 @@ Error RenderingDeviceVulkan::draw_list_begin_split(RID p_framebuffer, uint32_t p
 		scissor.extent.height = viewport_size.height;
 
 		vkCmdSetScissor(command_buffer, 0, 1, &scissor);
-		r_split_ids[i] = (DrawListID(1) << DrawListID(ID_TYPE_SPLIT_DRAW_LIST)) + i;
+		r_split_ids[i] = (int64_t(ID_TYPE_SPLIT_DRAW_LIST) << ID_BASE_SHIFT) + i;
 
 		draw_list[i].viewport = Rect2i(viewport_offset, viewport_size);
 	}
@@ -6075,7 +6075,7 @@ RenderingDeviceVulkan::DrawList *RenderingDeviceVulkan::_get_draw_list_ptr(DrawL
 
 	if (!draw_list) {
 		return nullptr;
-	} else if (p_id == ID_TYPE_DRAW_LIST) {
+	} else if (p_id == (int64_t(ID_TYPE_DRAW_LIST) << ID_BASE_SHIFT)) {
 		if (draw_list_split) {
 			return nullptr;
 		}
@@ -6442,8 +6442,8 @@ void RenderingDeviceVulkan::draw_list_end() {
 		//send all command buffers
 		VkCommandBuffer *command_buffers = (VkCommandBuffer *)alloca(sizeof(VkCommandBuffer) * draw_list_count);
 		for (uint32_t i = 0; i < draw_list_count; i++) {
-			vkEndCommandBuffer(draw_list->command_buffer);
-			command_buffers[i] = draw_list->command_buffer;
+			vkEndCommandBuffer(draw_list[i].command_buffer);
+			command_buffers[i] = draw_list[i].command_buffer;
 		}
 
 		vkCmdExecuteCommands(frames[frame].draw_command_buffer, draw_list_count, command_buffers);

+ 0 - 4
servers/rendering/renderer_rd/renderer_compositor_rd.cpp

@@ -154,12 +154,9 @@ void RendererCompositorRD::initialize() {
 	}
 }
 
-ThreadWorkPool RendererCompositorRD::thread_work_pool;
 uint64_t RendererCompositorRD::frame = 1;
 
 void RendererCompositorRD::finalize() {
-	thread_work_pool.finish();
-
 	memdelete(scene);
 	memdelete(canvas);
 	memdelete(storage);
@@ -174,7 +171,6 @@ RendererCompositorRD *RendererCompositorRD::singleton = nullptr;
 
 RendererCompositorRD::RendererCompositorRD() {
 	singleton = this;
-	thread_work_pool.init();
 	time = 0;
 
 	storage = memnew(RendererStorageRD);

+ 0 - 2
servers/rendering/renderer_rd/renderer_compositor_rd.h

@@ -90,8 +90,6 @@ public:
 
 	virtual bool is_low_end() const { return false; }
 
-	static ThreadWorkPool thread_work_pool;
-
 	static RendererCompositorRD *singleton;
 	RendererCompositorRD();
 	~RendererCompositorRD() {}

+ 83 - 55
servers/rendering/renderer_rd/renderer_scene_render_forward.cpp

@@ -809,13 +809,13 @@ bool RendererSceneRenderForward::free(RID p_rid) {
 /// RENDERING ///
 
 template <RendererSceneRenderForward::PassMode p_pass_mode>
-void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, GeometryInstanceSurfaceDataCache **p_elements, int p_element_count, bool p_reverse_cull, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe, const Vector2 &p_uv_offset, const Plane &p_lod_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold) {
+void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element) {
 	RD::DrawListID draw_list = p_draw_list;
 	RD::FramebufferFormatID framebuffer_format = p_framebuffer_Format;
 
 	//global scope bindings
 	RD::get_singleton()->draw_list_bind_uniform_set(draw_list, render_base_uniform_set, SCENE_UNIFORM_SET);
-	RD::get_singleton()->draw_list_bind_uniform_set(draw_list, p_render_pass_uniform_set, RENDER_PASS_UNIFORM_SET);
+	RD::get_singleton()->draw_list_bind_uniform_set(draw_list, p_params->render_pass_uniform_set, RENDER_PASS_UNIFORM_SET);
 	RD::get_singleton()->draw_list_bind_uniform_set(draw_list, default_vec4_xform_uniform_set, TRANSFORMS_UNIFORM_SET);
 
 	RID prev_material_uniform_set;
@@ -825,12 +825,12 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList
 	RID prev_pipeline_rd;
 	RID prev_xforms_uniform_set;
 
-	bool shadow_pass = (p_pass_mode == PASS_MODE_SHADOW) || (p_pass_mode == PASS_MODE_SHADOW_DP);
+	bool shadow_pass = (p_params->pass_mode == PASS_MODE_SHADOW) || (p_params->pass_mode == PASS_MODE_SHADOW_DP);
 
-	float old_offset[2];
+	float old_offset[2] = { 0, 0 };
 
-	for (int i = 0; i < p_element_count; i++) {
-		const GeometryInstanceSurfaceDataCache *surf = p_elements[i];
+	for (uint32_t i = p_from_element; i < p_to_element; i++) {
+		const GeometryInstanceSurfaceDataCache *surf = p_params->elements[i];
 
 		RID material_uniform_set;
 		ShaderData *shader;
@@ -851,21 +851,21 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList
 			continue;
 		}
 
-		if (p_pass_mode == PASS_MODE_DEPTH_MATERIAL) {
+		if (p_params->pass_mode == PASS_MODE_DEPTH_MATERIAL) {
 			old_offset[0] = surf->owner->push_constant.lightmap_uv_scale[0];
 			old_offset[1] = surf->owner->push_constant.lightmap_uv_scale[1];
-			surf->owner->push_constant.lightmap_uv_scale[0] = p_uv_offset.x;
-			surf->owner->push_constant.lightmap_uv_scale[1] = p_uv_offset.y;
+			surf->owner->push_constant.lightmap_uv_scale[0] = p_params->uv_offset.x;
+			surf->owner->push_constant.lightmap_uv_scale[1] = p_params->uv_offset.y;
 		}
 
 		//find cull variant
 		ShaderData::CullVariant cull_variant;
 
-		if (p_pass_mode == PASS_MODE_DEPTH_MATERIAL || p_pass_mode == PASS_MODE_SDF || ((p_pass_mode == PASS_MODE_SHADOW || p_pass_mode == PASS_MODE_SHADOW_DP) && surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_DOUBLE_SIDED_SHADOWS)) {
+		if (p_params->pass_mode == PASS_MODE_DEPTH_MATERIAL || p_params->pass_mode == PASS_MODE_SDF || ((p_params->pass_mode == PASS_MODE_SHADOW || p_params->pass_mode == PASS_MODE_SHADOW_DP) && surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_DOUBLE_SIDED_SHADOWS)) {
 			cull_variant = ShaderData::CULL_VARIANT_DOUBLE_SIDED;
 		} else {
 			bool mirror = surf->owner->mirror;
-			if (p_reverse_cull) {
+			if (p_params->reverse_cull) {
 				mirror = !mirror;
 			}
 			cull_variant = mirror ? ShaderData::CULL_VARIANT_REVERSED : ShaderData::CULL_VARIANT_NORMAL;
@@ -876,7 +876,7 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList
 
 		ShaderVersion shader_version = SHADER_VERSION_MAX; // Assigned to silence wrong -Wmaybe-initialized.
 
-		switch (p_pass_mode) {
+		switch (p_params->pass_mode) {
 			case PASS_MODE_COLOR:
 			case PASS_MODE_COLOR_TRANSPARENT: {
 				if (surf->sort.uses_lightmap) {
@@ -930,13 +930,13 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList
 			storage->mesh_surface_get_vertex_arrays_and_format(mesh_surface, pipeline->get_vertex_input_mask(), vertex_array_rd, vertex_format);
 		}
 
-		if (p_screen_lod_threshold > 0.0 && storage->mesh_surface_has_lod(mesh_surface)) {
+		if (p_params->screen_lod_threshold > 0.0 && storage->mesh_surface_has_lod(mesh_surface)) {
 			//lod
-			Vector3 support_min = surf->owner->transformed_aabb.get_support(-p_lod_plane.normal);
-			Vector3 support_max = surf->owner->transformed_aabb.get_support(p_lod_plane.normal);
+			Vector3 support_min = surf->owner->transformed_aabb.get_support(-p_params->lod_plane.normal);
+			Vector3 support_max = surf->owner->transformed_aabb.get_support(p_params->lod_plane.normal);
 
-			float distance_min = p_lod_plane.distance_to(support_min);
-			float distance_max = p_lod_plane.distance_to(support_max);
+			float distance_min = p_params->lod_plane.distance_to(support_min);
+			float distance_max = p_params->lod_plane.distance_to(support_max);
 
 			float distance = 0.0;
 
@@ -949,7 +949,7 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList
 				distance = -distance_max;
 			}
 
-			index_array_rd = storage->mesh_surface_get_index_array_with_lod(mesh_surface, surf->owner->lod_model_scale * surf->owner->lod_bias, distance * p_lod_distance_multiplier, p_screen_lod_threshold);
+			index_array_rd = storage->mesh_surface_get_index_array_with_lod(mesh_surface, surf->owner->lod_model_scale * surf->owner->lod_bias, distance * p_params->lod_distance_multiplier, p_params->screen_lod_threshold);
 
 		} else {
 			//no lod
@@ -968,7 +968,7 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList
 			prev_index_array_rd = index_array_rd;
 		}
 
-		RID pipeline_rd = pipeline->get_render_pipeline(vertex_format, framebuffer_format, p_force_wireframe);
+		RID pipeline_rd = pipeline->get_render_pipeline(vertex_format, framebuffer_format, p_params->force_wireframe);
 
 		if (pipeline_rd != prev_pipeline_rd) {
 			// checking with prev shader does not make so much sense, as
@@ -995,49 +995,76 @@ void RendererSceneRenderForward::_render_list_template(RenderingDevice::DrawList
 
 		RD::get_singleton()->draw_list_draw(draw_list, index_array_rd.is_valid(), surf->owner->instance_count);
 
-		if (p_pass_mode == PASS_MODE_DEPTH_MATERIAL) {
+		if (p_params->pass_mode == PASS_MODE_DEPTH_MATERIAL) {
 			surf->owner->push_constant.lightmap_uv_scale[0] = old_offset[0];
 			surf->owner->push_constant.lightmap_uv_scale[1] = old_offset[1];
 		}
 	}
 }
 
-void RendererSceneRenderForward::_render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, GeometryInstanceSurfaceDataCache **p_elements, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe, const Vector2 &p_uv_offset, const Plane &p_lod_plane, float p_lod_distance_multiplier, float p_screen_lod_threshold) {
+void RendererSceneRenderForward::_render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element) {
 	//use template for faster performance (pass mode comparisons are inlined)
-	switch (p_pass_mode) {
+
+	switch (p_params->pass_mode) {
 		case PASS_MODE_COLOR: {
-			_render_list_template<PASS_MODE_COLOR>(p_draw_list, p_framebuffer_Format, p_elements, p_element_count, p_reverse_cull, p_no_gi, p_render_pass_uniform_set, p_force_wireframe, p_uv_offset, p_lod_plane, p_lod_distance_multiplier, p_screen_lod_threshold);
+			_render_list_template<PASS_MODE_COLOR>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element);
 		} break;
 		case PASS_MODE_COLOR_SPECULAR: {
-			_render_list_template<PASS_MODE_COLOR_SPECULAR>(p_draw_list, p_framebuffer_Format, p_elements, p_element_count, p_reverse_cull, p_no_gi, p_render_pass_uniform_set, p_force_wireframe, p_uv_offset, p_lod_plane, p_lod_distance_multiplier, p_screen_lod_threshold);
+			_render_list_template<PASS_MODE_COLOR_SPECULAR>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element);
 		} break;
 		case PASS_MODE_COLOR_TRANSPARENT: {
-			_render_list_template<PASS_MODE_COLOR_TRANSPARENT>(p_draw_list, p_framebuffer_Format, p_elements, p_element_count, p_reverse_cull, p_no_gi, p_render_pass_uniform_set, p_force_wireframe, p_uv_offset, p_lod_plane, p_lod_distance_multiplier, p_screen_lod_threshold);
+			_render_list_template<PASS_MODE_COLOR_TRANSPARENT>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element);
 		} break;
 		case PASS_MODE_SHADOW: {
-			_render_list_template<PASS_MODE_SHADOW>(p_draw_list, p_framebuffer_Format, p_elements, p_element_count, p_reverse_cull, p_no_gi, p_render_pass_uniform_set, p_force_wireframe, p_uv_offset, p_lod_plane, p_lod_distance_multiplier, p_screen_lod_threshold);
+			_render_list_template<PASS_MODE_SHADOW>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element);
 		} break;
 		case PASS_MODE_SHADOW_DP: {
-			_render_list_template<PASS_MODE_SHADOW_DP>(p_draw_list, p_framebuffer_Format, p_elements, p_element_count, p_reverse_cull, p_no_gi, p_render_pass_uniform_set, p_force_wireframe, p_uv_offset, p_lod_plane, p_lod_distance_multiplier, p_screen_lod_threshold);
+			_render_list_template<PASS_MODE_SHADOW_DP>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element);
 		} break;
 		case PASS_MODE_DEPTH: {
-			_render_list_template<PASS_MODE_DEPTH>(p_draw_list, p_framebuffer_Format, p_elements, p_element_count, p_reverse_cull, p_no_gi, p_render_pass_uniform_set, p_force_wireframe, p_uv_offset, p_lod_plane, p_lod_distance_multiplier, p_screen_lod_threshold);
+			_render_list_template<PASS_MODE_DEPTH>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element);
 		} break;
 		case PASS_MODE_DEPTH_NORMAL_ROUGHNESS: {
-			_render_list_template<PASS_MODE_DEPTH_NORMAL_ROUGHNESS>(p_draw_list, p_framebuffer_Format, p_elements, p_element_count, p_reverse_cull, p_no_gi, p_render_pass_uniform_set, p_force_wireframe, p_uv_offset, p_lod_plane, p_lod_distance_multiplier, p_screen_lod_threshold);
+			_render_list_template<PASS_MODE_DEPTH_NORMAL_ROUGHNESS>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element);
 		} break;
 		case PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE: {
-			_render_list_template<PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE>(p_draw_list, p_framebuffer_Format, p_elements, p_element_count, p_reverse_cull, p_no_gi, p_render_pass_uniform_set, p_force_wireframe, p_uv_offset, p_lod_plane, p_lod_distance_multiplier, p_screen_lod_threshold);
+			_render_list_template<PASS_MODE_DEPTH_NORMAL_ROUGHNESS_GIPROBE>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element);
 		} break;
 		case PASS_MODE_DEPTH_MATERIAL: {
-			_render_list_template<PASS_MODE_DEPTH_MATERIAL>(p_draw_list, p_framebuffer_Format, p_elements, p_element_count, p_reverse_cull, p_no_gi, p_render_pass_uniform_set, p_force_wireframe, p_uv_offset, p_lod_plane, p_lod_distance_multiplier, p_screen_lod_threshold);
+			_render_list_template<PASS_MODE_DEPTH_MATERIAL>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element);
 		} break;
 		case PASS_MODE_SDF: {
-			_render_list_template<PASS_MODE_SDF>(p_draw_list, p_framebuffer_Format, p_elements, p_element_count, p_reverse_cull, p_no_gi, p_render_pass_uniform_set, p_force_wireframe, p_uv_offset, p_lod_plane, p_lod_distance_multiplier, p_screen_lod_threshold);
+			_render_list_template<PASS_MODE_SDF>(p_draw_list, p_framebuffer_Format, p_params, p_from_element, p_to_element);
 		} break;
 	}
 }
 
+void RendererSceneRenderForward::_render_list_thread_function(uint32_t p_thread, RenderListParameters *p_params) {
+	uint32_t render_total = p_params->element_count;
+	uint32_t total_threads = RendererThreadPool::singleton->thread_work_pool.get_thread_count();
+	uint32_t render_from = p_thread * render_total / total_threads;
+	uint32_t render_to = (p_thread + 1 == total_threads) ? render_total : ((p_thread + 1) * render_total / total_threads);
+	_render_list(thread_draw_lists[p_thread], p_params->framebuffer_format, p_params, render_from, render_to);
+}
+
+void RendererSceneRenderForward::_render_list_with_threads(RenderListParameters *p_params, RID p_framebuffer, RD::InitialAction p_initial_color_action, RD::FinalAction p_final_color_action, RD::InitialAction p_initial_depth_action, RD::FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const Vector<RID> &p_storage_textures) {
+	RD::FramebufferFormatID fb_format = RD::get_singleton()->framebuffer_get_format(p_framebuffer);
+	p_params->framebuffer_format = fb_format;
+
+	if ((uint32_t)p_params->element_count > render_list_thread_threshold && false) { // secondary command buffers need more testing at this time
+		//multi threaded
+		thread_draw_lists.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count());
+		RD::get_singleton()->draw_list_begin_split(p_framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, p_storage_textures);
+		RendererThreadPool::singleton->thread_work_pool.do_work(thread_draw_lists.size(), this, &RendererSceneRenderForward::_render_list_thread_function, p_params);
+		RD::get_singleton()->draw_list_end();
+	} else {
+		//single threaded
+		RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, p_initial_color_action, p_final_color_action, p_initial_depth_action, p_final_depth_action, p_clear_color_values, p_clear_depth, p_clear_stencil, p_region, p_storage_textures);
+		_render_list(draw_list, fb_format, p_params, 0, p_params->element_count);
+		RD::get_singleton()->draw_list_end();
+	}
+}
+
 void RendererSceneRenderForward::_setup_environment(RID p_environment, RID p_render_buffers, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, RID p_reflection_probe, bool p_no_fog, const Size2 &p_screen_pixel_size, RID p_shadow_atlas, bool p_flip_y, const Color &p_default_bg_color, float p_znear, float p_zfar, bool p_opaque_render_buffers, bool p_pancake_shadows) {
 	//CameraMatrix projection = p_cam_projection;
 	//projection.flip_y(); // Vulkan and modern APIs use Y-Down
@@ -1428,7 +1455,7 @@ void RendererSceneRenderForward::_fill_render_list(const PagedArray<GeometryInst
 }
 
 void RendererSceneRenderForward::_setup_giprobes(const PagedArray<RID> &p_giprobes) {
-	scene_state.giprobes_used = MIN(p_giprobes.size(), MAX_GI_PROBES);
+	scene_state.giprobes_used = MIN(p_giprobes.size(), uint32_t(MAX_GI_PROBES));
 	for (uint32_t i = 0; i < scene_state.giprobes_used; i++) {
 		scene_state.giprobe_ids[i] = p_giprobes[i];
 	}
@@ -1681,9 +1708,8 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf
 		RID rp_uniform_set = _setup_render_pass_uniform_set(RID(), RID(), RID(), RID(), PagedArray<RID>(), PagedArray<RID>());
 
 		bool finish_depth = using_ssao || using_sdfgi || using_giprobe;
-		RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(depth_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, finish_depth ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE, depth_pass_clear);
-		_render_list(draw_list, RD::get_singleton()->framebuffer_get_format(depth_framebuffer), render_list.elements, render_list.element_count, false, depth_pass_mode, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold);
-		RD::get_singleton()->draw_list_end();
+		RenderListParameters render_list_params(render_list.elements, render_list.element_count, false, depth_pass_mode, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold);
+		_render_list_with_threads(&render_list_params, depth_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, finish_depth ? RD::FINAL_ACTION_READ : RD::FINAL_ACTION_CONTINUE, depth_pass_clear);
 
 		if (render_buffer && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) {
 			RENDER_TIMESTAMP("Resolve Depth Pre-Pass");
@@ -1731,13 +1757,13 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf
 		}
 
 		RID framebuffer = using_separate_specular ? opaque_specular_framebuffer : opaque_framebuffer;
-		RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, depth_pre_pass ? (continue_depth ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CONTINUE) : RD::INITIAL_ACTION_CLEAR, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0);
-		_render_list(draw_list, RD::get_singleton()->framebuffer_get_format(framebuffer), render_list.elements, render_list.element_count, false, using_separate_specular ? PASS_MODE_COLOR_SPECULAR : PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold);
-		RD::get_singleton()->draw_list_end();
+		RenderListParameters render_list_params(render_list.elements, render_list.element_count, false, using_separate_specular ? PASS_MODE_COLOR_SPECULAR : PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold);
+
+		_render_list_with_threads(&render_list_params, framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, depth_pre_pass ? (continue_depth ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CONTINUE) : RD::INITIAL_ACTION_CLEAR, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0);
 
 		if (will_continue_color && using_separate_specular) {
 			// close the specular framebuffer, as it's no longer used
-			draw_list = RD::get_singleton()->draw_list_begin(render_buffer->specular_only_fb, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_CONTINUE);
+			RD::get_singleton()->draw_list_begin(render_buffer->specular_only_fb, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_CONTINUE);
 			RD::get_singleton()->draw_list_end();
 		}
 	}
@@ -1817,9 +1843,8 @@ void RendererSceneRenderForward::_render_scene(RID p_render_buffer, const Transf
 	render_list.sort_by_reverse_depth_and_priority(true);
 
 	{
-		RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(alpha_framebuffer, can_continue_color ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, can_continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ);
-		_render_list(draw_list, RD::get_singleton()->framebuffer_get_format(alpha_framebuffer), &render_list.elements[render_list.max_elements - render_list.alpha_element_count], render_list.alpha_element_count, false, PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold);
-		RD::get_singleton()->draw_list_end();
+		RenderListParameters render_list_params(&render_list.elements[render_list.max_elements - render_list.alpha_element_count], render_list.alpha_element_count, false, PASS_MODE_COLOR, render_buffer == nullptr, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), lod_camera_plane, lod_distance_multiplier, p_screen_lod_threshold);
+		_render_list_with_threads(&render_list_params, alpha_framebuffer, can_continue_color ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, can_continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ);
 	}
 
 	if (render_buffer && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) {
@@ -1854,9 +1879,8 @@ void RendererSceneRenderForward::_render_shadow(RID p_framebuffer, const PagedAr
 
 	{
 		//regular forward for now
-		RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ);
-		_render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), render_list.elements, render_list.element_count, p_use_dp_flip, pass_mode, true, rp_uniform_set, false, Vector2(), p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold);
-		RD::get_singleton()->draw_list_end();
+		RenderListParameters render_list_params(render_list.elements, render_list.element_count, p_use_dp_flip, pass_mode, true, rp_uniform_set, false, Vector2(), p_camera_plane, p_lod_distance_multiplier, p_screen_lod_threshold);
+		_render_list_with_threads(&render_list_params, p_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ);
 	}
 }
 
@@ -1883,9 +1907,8 @@ void RendererSceneRenderForward::_render_particle_collider_heightfield(RID p_fb,
 
 	{
 		//regular forward for now
-		RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ);
-		_render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_fb), render_list.elements, render_list.element_count, false, pass_mode, true, rp_uniform_set);
-		RD::get_singleton()->draw_list_end();
+		RenderListParameters render_list_params(render_list.elements, render_list.element_count, false, pass_mode, true, rp_uniform_set);
+		_render_list_with_threads(&render_list_params, p_fb, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ);
 	}
 }
 
@@ -1911,6 +1934,7 @@ void RendererSceneRenderForward::_render_material(const Transform &p_cam_transfo
 	render_list.sort_by_key(false);
 
 	{
+		RenderListParameters render_list_params(render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set);
 		//regular forward for now
 		Vector<Color> clear;
 		clear.push_back(Color(0, 0, 0, 0));
@@ -1919,7 +1943,7 @@ void RendererSceneRenderForward::_render_material(const Transform &p_cam_transfo
 		clear.push_back(Color(0, 0, 0, 0));
 		clear.push_back(Color(0, 0, 0, 0));
 		RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, RD::FINAL_ACTION_READ, clear, 1.0, 0, p_region);
-		_render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set);
+		_render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), &render_list_params, 0, render_list_params.element_count);
 		RD::get_singleton()->draw_list_end();
 	}
 }
@@ -1946,6 +1970,7 @@ void RendererSceneRenderForward::_render_uv2(const PagedArray<GeometryInstance *
 	render_list.sort_by_key(false);
 
 	{
+		RenderListParameters render_list_params(render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set, true);
 		//regular forward for now
 		Vector<Color> clear;
 		clear.push_back(Color(0, 0, 0, 0));
@@ -1973,9 +1998,11 @@ void RendererSceneRenderForward::_render_uv2(const PagedArray<GeometryInstance *
 			Vector2 ofs = uv_offsets[i];
 			ofs.x /= p_region.size.width;
 			ofs.y /= p_region.size.height;
-			_render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set, true, ofs); //first wireframe, for pseudo conservative
+			render_list_params.uv_offset = ofs;
+			_render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), &render_list_params, 0, render_list_params.element_count); //first wireframe, for pseudo conservative
 		}
-		_render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set, false); //second regular triangles
+		render_list_params.uv_offset = Vector2();
+		_render_list(draw_list, RD::get_singleton()->framebuffer_get_format(p_framebuffer), &render_list_params, 0, render_list_params.element_count); //second regular triangles
 
 		RD::get_singleton()->draw_list_end();
 	}
@@ -2054,9 +2081,8 @@ void RendererSceneRenderForward::_render_sdfgi(RID p_render_buffers, const Vecto
 			E = sdfgi_framebuffer_size_cache.insert(fb_size, fb);
 		}
 
-		RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(E->get(), RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, Vector<Color>(), 1.0, 0, Rect2(), sbs);
-		_render_list(draw_list, RD::get_singleton()->framebuffer_get_format(E->get()), render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set, false); //second regular triangles
-		RD::get_singleton()->draw_list_end();
+		RenderListParameters render_list_params(render_list.elements, render_list.element_count, true, pass_mode, true, rp_uniform_set, false);
+		_render_list_with_threads(&render_list_params, E->get(), RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, Vector<Color>(), 1.0, 0, Rect2(), sbs);
 	}
 }
 
@@ -3380,6 +3406,8 @@ RendererSceneRenderForward::RendererSceneRenderForward(RendererStorageRD *p_stor
 		sampler.compare_op = RD::COMPARE_OP_LESS;
 		shadow_sampler = RD::get_singleton()->sampler_create(sampler);
 	}
+
+	render_list_thread_threshold = GLOBAL_GET("rendering/forward_renderer/threaded_render_minimum_instances");
 }
 
 RendererSceneRenderForward::~RendererSceneRenderForward() {

+ 36 - 2
servers/rendering/renderer_rd/renderer_scene_render_forward.h

@@ -425,10 +425,44 @@ class RendererSceneRenderForward : public RendererSceneRenderRD {
 
 	struct GeometryInstanceSurfaceDataCache;
 
+	struct RenderListParameters {
+		GeometryInstanceSurfaceDataCache **elements = nullptr;
+		int element_count = 0;
+		bool reverse_cull = false;
+		PassMode pass_mode = PASS_MODE_COLOR;
+		bool no_gi = false;
+		RID render_pass_uniform_set;
+		bool force_wireframe = false;
+		Vector2 uv_offset;
+		Plane lod_plane;
+		float lod_distance_multiplier = 0.0;
+		float screen_lod_threshold = 0.0;
+		RD::FramebufferFormatID framebuffer_format = 0;
+		RenderListParameters(GeometryInstanceSurfaceDataCache **p_elements, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), const Plane &p_lod_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0) {
+			elements = p_elements;
+			element_count = p_element_count;
+			reverse_cull = p_reverse_cull;
+			pass_mode = p_pass_mode;
+			no_gi = p_no_gi;
+			render_pass_uniform_set = p_render_pass_uniform_set;
+			force_wireframe = p_force_wireframe;
+			uv_offset = p_uv_offset;
+			lod_plane = p_lod_plane;
+			lod_distance_multiplier = p_lod_distance_multiplier;
+			screen_lod_threshold = p_screen_lod_threshold;
+		}
+	};
+
 	template <PassMode p_pass_mode>
-	_FORCE_INLINE_ void _render_list_template(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, GeometryInstanceSurfaceDataCache **p_elements, int p_element_count, bool p_reverse_cull, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), const Plane &p_lod_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0);
+	_FORCE_INLINE_ void _render_list_template(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element);
+
+	void _render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element);
+
+	LocalVector<RD::DrawListID> thread_draw_lists;
+	void _render_list_thread_function(uint32_t p_thread, RenderListParameters *p_params);
+	void _render_list_with_threads(RenderListParameters *p_params, RID p_framebuffer, RD::InitialAction p_initial_color_action, RD::FinalAction p_final_color_action, RD::InitialAction p_initial_depth_action, RD::FinalAction p_final_depth_action, const Vector<Color> &p_clear_color_values = Vector<Color>(), float p_clear_depth = 1.0, uint32_t p_clear_stencil = 0, const Rect2 &p_region = Rect2(), const Vector<RID> &p_storage_textures = Vector<RID>());
 
-	void _render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, GeometryInstanceSurfaceDataCache **p_elements, int p_element_count, bool p_reverse_cull, PassMode p_pass_mode, bool p_no_gi, RID p_render_pass_uniform_set, bool p_force_wireframe = false, const Vector2 &p_uv_offset = Vector2(), const Plane &p_lod_plane = Plane(), float p_lod_distance_multiplier = 0.0, float p_screen_lod_threshold = 0.0);
+	uint32_t render_list_thread_threshold = 500;
 
 	void _fill_render_list(const PagedArray<GeometryInstance *> &p_instances, PassMode p_pass_mode, const CameraMatrix &p_cam_projection, const Transform &p_cam_transform, bool p_using_sdfgi = false, bool p_using_opaque_gi = false);
 

+ 1 - 1
servers/rendering/renderer_rd/shader_rd.cpp

@@ -360,7 +360,7 @@ void ShaderRD::_compile_version(Version *p_version) {
 	p_version->variants = memnew_arr(RID, variant_defines.size());
 #if 1
 
-	RendererCompositorRD::thread_work_pool.do_work(variant_defines.size(), this, &ShaderRD::_compile_variant, p_version);
+	RendererThreadPool::singleton->thread_work_pool.do_work(variant_defines.size(), this, &ShaderRD::_compile_variant, p_version);
 #else
 	for (int i = 0; i < variant_defines.size(); i++) {
 		_compile_variant(i, p_version);

+ 278 - 288
servers/rendering/renderer_scene_cull.cpp

@@ -2224,6 +2224,222 @@ void RendererSceneCull::render_camera(RID p_render_buffers, Ref<XRInterface> &p_
 	_render_scene(p_render_buffers, cam_transform, camera_matrix, false, environment, camera->effects, p_scenario, p_shadow_atlas, RID(), -1, p_screen_lod_threshold);
 };
 
+void RendererSceneCull::_frustum_cull_threaded(uint32_t p_thread, FrustumCullData *cull_data) {
+	uint32_t cull_total = cull_data->scenario->instance_data.size();
+	uint32_t total_threads = RendererThreadPool::singleton->thread_work_pool.get_thread_count();
+	uint32_t cull_from = p_thread * cull_total / total_threads;
+	uint32_t cull_to = (p_thread + 1 == total_threads) ? cull_total : ((p_thread + 1) * cull_total / total_threads);
+
+	_frustum_cull(*cull_data, frustum_cull_result_threads[p_thread], cull_from, cull_to);
+}
+
+void RendererSceneCull::_frustum_cull(FrustumCullData &cull_data, FrustumCullResult &cull_result, uint64_t p_from, uint64_t p_to) {
+	uint64_t frame_number = RSG::rasterizer->get_frame_number();
+	float lightmap_probe_update_speed = RSG::storage->lightmap_get_probe_capture_update_speed() * RSG::rasterizer->get_frame_delta_time();
+
+	uint32_t sdfgi_last_light_index = 0xFFFFFFFF;
+	uint32_t sdfgi_last_light_cascade = 0xFFFFFFFF;
+
+	RID instance_pair_buffer[MAX_INSTANCE_PAIRS];
+
+	for (uint64_t i = p_from; i < p_to; i++) {
+		bool mesh_visible = false;
+
+		if (cull_data.scenario->instance_aabbs[i].in_frustum(cull_data.cull->frustum)) {
+			InstanceData &idata = cull_data.scenario->instance_data[i];
+			uint32_t base_type = idata.flags & InstanceData::FLAG_BASE_TYPE_MASK;
+
+			if ((cull_data.visible_layers & idata.layer_mask) == 0) {
+				//failure
+			} else if (base_type == RS::INSTANCE_LIGHT) {
+				cull_result.lights.push_back(idata.instance);
+				cull_result.light_instances.push_back(RID::from_uint64(idata.instance_data_rid));
+				if (cull_data.shadow_atlas.is_valid() && RSG::storage->light_has_shadow(idata.base_rid)) {
+					scene_render->light_instance_mark_visible(RID::from_uint64(idata.instance_data_rid)); //mark it visible for shadow allocation later
+				}
+
+			} else if (base_type == RS::INSTANCE_REFLECTION_PROBE) {
+				if (cull_data.render_reflection_probe != idata.instance) {
+					//avoid entering The Matrix
+
+					if ((idata.flags & InstanceData::FLAG_REFLECTION_PROBE_DIRTY) || scene_render->reflection_probe_instance_needs_redraw(RID::from_uint64(idata.instance_data_rid))) {
+						InstanceReflectionProbeData *reflection_probe = static_cast<InstanceReflectionProbeData *>(idata.instance->base_data);
+						cull_data.cull->lock.lock();
+						if (!reflection_probe->update_list.in_list()) {
+							reflection_probe->render_step = 0;
+							reflection_probe_render_list.add_last(&reflection_probe->update_list);
+						}
+						cull_data.cull->lock.unlock();
+
+						idata.flags &= ~uint32_t(InstanceData::FLAG_REFLECTION_PROBE_DIRTY);
+					}
+
+					if (scene_render->reflection_probe_instance_has_reflection(RID::from_uint64(idata.instance_data_rid))) {
+						cull_result.reflections.push_back(RID::from_uint64(idata.instance_data_rid));
+					}
+				}
+			} else if (base_type == RS::INSTANCE_DECAL) {
+				cull_result.decals.push_back(RID::from_uint64(idata.instance_data_rid));
+
+			} else if (base_type == RS::INSTANCE_GI_PROBE) {
+				InstanceGIProbeData *gi_probe = static_cast<InstanceGIProbeData *>(idata.instance->base_data);
+				cull_data.cull->lock.lock();
+				if (!gi_probe->update_element.in_list()) {
+					gi_probe_update_list.add(&gi_probe->update_element);
+				}
+				cull_data.cull->lock.unlock();
+				cull_result.gi_probes.push_back(RID::from_uint64(idata.instance_data_rid));
+
+			} else if (base_type == RS::INSTANCE_LIGHTMAP) {
+				cull_result.gi_probes.push_back(RID::from_uint64(idata.instance_data_rid));
+			} else if (((1 << base_type) & RS::INSTANCE_GEOMETRY_MASK) && !(idata.flags & InstanceData::FLAG_CAST_SHADOWS_ONLY)) {
+				bool keep = true;
+
+				if (idata.flags & InstanceData::FLAG_REDRAW_IF_VISIBLE) {
+					RenderingServerDefault::redraw_request();
+				}
+
+				if (base_type == RS::INSTANCE_MESH) {
+					mesh_visible = true;
+				} else if (base_type == RS::INSTANCE_PARTICLES) {
+					//particles visible? process them
+					if (RSG::storage->particles_is_inactive(idata.base_rid)) {
+						//but if nothing is going on, don't do it.
+						keep = false;
+					} else {
+						cull_data.cull->lock.lock();
+						RSG::storage->particles_request_process(idata.base_rid);
+						cull_data.cull->lock.unlock();
+						RSG::storage->particles_set_view_axis(idata.base_rid, -cull_data.cam_transform.basis.get_axis(2).normalized());
+						//particles visible? request redraw
+						RenderingServerDefault::redraw_request();
+					}
+				}
+
+				if (geometry_instance_pair_mask & (1 << RS::INSTANCE_LIGHT) && (idata.flags & InstanceData::FLAG_GEOM_LIGHTING_DIRTY)) {
+					InstanceGeometryData *geom = static_cast<InstanceGeometryData *>(idata.instance->base_data);
+					uint32_t idx = 0;
+
+					for (Set<Instance *>::Element *E = geom->lights.front(); E; E = E->next()) {
+						InstanceLightData *light = static_cast<InstanceLightData *>(E->get()->base_data);
+						instance_pair_buffer[idx++] = light->instance;
+						if (idx == MAX_INSTANCE_PAIRS) {
+							break;
+						}
+					}
+
+					scene_render->geometry_instance_pair_light_instances(geom->geometry_instance, instance_pair_buffer, idx);
+					idata.flags &= ~uint32_t(InstanceData::FLAG_GEOM_LIGHTING_DIRTY);
+				}
+
+				if (geometry_instance_pair_mask & (1 << RS::INSTANCE_REFLECTION_PROBE) && (idata.flags & InstanceData::FLAG_GEOM_REFLECTION_DIRTY)) {
+					InstanceGeometryData *geom = static_cast<InstanceGeometryData *>(idata.instance->base_data);
+					uint32_t idx = 0;
+
+					for (Set<Instance *>::Element *E = geom->reflection_probes.front(); E; E = E->next()) {
+						InstanceReflectionProbeData *reflection_probe = static_cast<InstanceReflectionProbeData *>(E->get()->base_data);
+
+						instance_pair_buffer[idx++] = reflection_probe->instance;
+						if (idx == MAX_INSTANCE_PAIRS) {
+							break;
+						}
+					}
+
+					scene_render->geometry_instance_pair_reflection_probe_instances(geom->geometry_instance, instance_pair_buffer, idx);
+					idata.flags &= ~uint32_t(InstanceData::FLAG_GEOM_REFLECTION_DIRTY);
+				}
+
+				if (geometry_instance_pair_mask & (1 << RS::INSTANCE_DECAL) && (idata.flags & InstanceData::FLAG_GEOM_DECAL_DIRTY)) {
+					//InstanceGeometryData *geom = static_cast<InstanceGeometryData *>(idata.instance->base_data);
+					//todo for GLES3
+					idata.flags &= ~uint32_t(InstanceData::FLAG_GEOM_DECAL_DIRTY);
+					/*for (Set<Instance *>::Element *E = geom->dec.front(); E; E = E->next()) {
+					InstanceReflectionProbeData *reflection_probe = static_cast<InstanceReflectionProbeData *>(E->get()->base_data);
+
+					instance_pair_buffer[idx++] = reflection_probe->instance;
+					if (idx==MAX_INSTANCE_PAIRS) {
+						break;
+					}
+				}*/
+					//scene_render->geometry_instance_pair_decal_instances(geom->geometry_instance, light_instances, idx);
+				}
+
+				if (idata.flags & InstanceData::FLAG_GEOM_GI_PROBE_DIRTY) {
+					InstanceGeometryData *geom = static_cast<InstanceGeometryData *>(idata.instance->base_data);
+					uint32_t idx = 0;
+					for (Set<Instance *>::Element *E = geom->gi_probes.front(); E; E = E->next()) {
+						InstanceGIProbeData *gi_probe = static_cast<InstanceGIProbeData *>(E->get()->base_data);
+
+						instance_pair_buffer[idx++] = gi_probe->probe_instance;
+						if (idx == MAX_INSTANCE_PAIRS) {
+							break;
+						}
+					}
+
+					scene_render->geometry_instance_pair_gi_probe_instances(geom->geometry_instance, instance_pair_buffer, idx);
+					idata.flags &= ~uint32_t(InstanceData::FLAG_GEOM_GI_PROBE_DIRTY);
+				}
+
+				if ((idata.flags & InstanceData::FLAG_LIGHTMAP_CAPTURE) && idata.instance->last_frame_pass != frame_number && !idata.instance->lightmap_target_sh.is_empty() && !idata.instance->lightmap_sh.is_empty()) {
+					InstanceGeometryData *geom = static_cast<InstanceGeometryData *>(idata.instance->base_data);
+					Color *sh = idata.instance->lightmap_sh.ptrw();
+					const Color *target_sh = idata.instance->lightmap_target_sh.ptr();
+					for (uint32_t j = 0; j < 9; j++) {
+						sh[j] = sh[j].lerp(target_sh[j], MIN(1.0, lightmap_probe_update_speed));
+					}
+					scene_render->geometry_instance_set_lightmap_capture(geom->geometry_instance, sh);
+					idata.instance->last_frame_pass = frame_number;
+				}
+
+				if (keep) {
+					cull_result.geometry_instances.push_back(idata.instance_geometry);
+				}
+			}
+		}
+
+		for (uint32_t j = 0; j < cull_data.cull->shadow_count; j++) {
+			for (uint32_t k = 0; k < cull_data.cull->shadows[j].cascade_count; k++) {
+				if (cull_data.scenario->instance_aabbs[i].in_frustum(cull_data.cull->shadows[j].cascades[k].frustum)) {
+					InstanceData &idata = cull_data.scenario->instance_data[i];
+					uint32_t base_type = idata.flags & InstanceData::FLAG_BASE_TYPE_MASK;
+
+					if (((1 << base_type) & RS::INSTANCE_GEOMETRY_MASK) && idata.flags & InstanceData::FLAG_CAST_SHADOWS) {
+						cull_result.directional_shadows[j].cascade_geometry_instances[k].push_back(idata.instance_geometry);
+						mesh_visible = true;
+					}
+				}
+			}
+		}
+
+		for (uint32_t j = 0; j < cull_data.cull->sdfgi.region_count; j++) {
+			if (cull_data.scenario->instance_aabbs[i].in_aabb(cull_data.cull->sdfgi.region_aabb[j])) {
+				InstanceData &idata = cull_data.scenario->instance_data[i];
+				uint32_t base_type = idata.flags & InstanceData::FLAG_BASE_TYPE_MASK;
+
+				if (base_type == RS::INSTANCE_LIGHT) {
+					InstanceLightData *instance_light = (InstanceLightData *)idata.instance->base_data;
+					if (instance_light->bake_mode == RS::LIGHT_BAKE_STATIC && cull_data.cull->sdfgi.region_cascade[j] <= instance_light->max_sdfgi_cascade) {
+						if (sdfgi_last_light_index != i || sdfgi_last_light_cascade != cull_data.cull->sdfgi.region_cascade[j]) {
+							sdfgi_last_light_index = i;
+							sdfgi_last_light_cascade = cull_data.cull->sdfgi.region_cascade[j];
+							cull_result.sdfgi_cascade_lights[sdfgi_last_light_cascade].push_back(instance_light->instance);
+						}
+					}
+				} else if ((1 << base_type) & RS::INSTANCE_GEOMETRY_MASK) {
+					if (idata.flags & InstanceData::FLAG_USES_BAKED_LIGHT) {
+						cull_result.sdfgi_region_geometry_instances[j].push_back(idata.instance_geometry);
+						mesh_visible = true;
+					}
+				}
+			}
+		}
+
+		if (mesh_visible && cull_data.scenario->instance_data[i].flags & InstanceData::FLAG_USES_MESH_INSTANCE) {
+			cull_result.mesh_instances.push_back(cull_data.scenario->instance_data[i].instance->mesh_instance);
+		}
+	}
+}
+
 void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_render_buffers, RID p_environment, uint32_t p_visible_layers, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, float p_screen_lod_threshold, bool p_using_shadows) {
 	// Note, in stereo rendering:
 	// - p_cam_transform will be a transform in the middle of our two eyes
@@ -2249,9 +2465,6 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca
 
 	Plane near_plane(p_cam_transform.origin, -p_cam_transform.basis.get_axis(2).normalized());
 
-	uint64_t frame_number = RSG::rasterizer->get_frame_number();
-	float lightmap_probe_update_speed = RSG::storage->lightmap_get_probe_capture_update_speed() * RSG::rasterizer->get_frame_delta_time();
-
 	/* STEP 2 - CULL */
 
 	cull.frustum = Frustum(planes);
@@ -2259,13 +2472,6 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca
 	Vector<RID> directional_lights;
 	// directional lights
 	{
-		//reset shadows
-		for (int i = 0; i < RendererSceneRender::MAX_DIRECTIONAL_LIGHTS; i++) {
-			for (int j = 0; j < RendererSceneRender::MAX_DIRECTIONAL_LIGHT_CASCADES; j++) {
-				cull.shadows[i].cascades[j].cull_result.clear();
-			}
-		}
-
 		cull.shadow_count = 0;
 
 		Vector<Instance *> lights_with_shadow;
@@ -2302,18 +2508,7 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca
 	{ //sdfgi
 		cull.sdfgi.region_count = 0;
 
-		for (int i = 0; i < SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE; i++) {
-			cull.sdfgi.region_cull_result[i].clear();
-		}
-
-		for (int i = 0; i < SDFGI_MAX_CASCADES; i++) {
-			cull.sdfgi.cascade_lights[i].clear();
-		}
-
 		if (p_render_buffers.is_valid()) {
-			for (int i = 0; i < SDFGI_MAX_CASCADES; i++) {
-				cull.sdfgi.cascade_lights[i].clear();
-			}
 			cull.sdfgi.cascade_light_count = 0;
 
 			uint32_t prev_cascade = 0xFFFFFFFF;
@@ -2335,226 +2530,53 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca
 		}
 	}
 
-	{
-		//pre-clear results
-		geometry_instances_to_render.clear();
-		light_cull_result.clear();
-		lightmap_cull_result.clear();
-		reflection_probe_instance_cull_result.clear();
-		light_instance_cull_result.clear();
-		gi_probe_instance_cull_result.clear();
-		lightmap_cull_result.clear();
-		decal_instance_cull_result.clear();
-		mesh_instance_cull_result.clear();
-	}
+	frustum_cull_result.clear();
 
 	{
-		uint64_t cull_count = scenario->instance_data.size();
-		uint32_t sdfgi_last_light_index = 0xFFFFFFFF;
-		uint32_t sdfgi_last_light_cascade = 0xFFFFFFFF;
-
-		RID instance_pair_buffer[MAX_INSTANCE_PAIRS];
-
-		for (uint64_t i = 0; i < cull_count; i++) {
-			bool mesh_visible = false;
-
-			if (scenario->instance_aabbs[i].in_frustum(cull.frustum)) {
-				InstanceData &idata = scenario->instance_data[i];
-				uint32_t base_type = idata.flags & InstanceData::FLAG_BASE_TYPE_MASK;
-
-				if ((p_visible_layers & idata.layer_mask) == 0) {
-					//failure
-				} else if (base_type == RS::INSTANCE_LIGHT) {
-					light_cull_result.push_back(idata.instance);
-					light_instance_cull_result.push_back(RID::from_uint64(idata.instance_data_rid));
-					if (p_shadow_atlas.is_valid() && RSG::storage->light_has_shadow(idata.base_rid)) {
-						scene_render->light_instance_mark_visible(RID::from_uint64(idata.instance_data_rid)); //mark it visible for shadow allocation later
-					}
-
-				} else if (base_type == RS::INSTANCE_REFLECTION_PROBE) {
-					if (render_reflection_probe != idata.instance) {
-						//avoid entering The Matrix
-
-						if ((idata.flags & InstanceData::FLAG_REFLECTION_PROBE_DIRTY) || scene_render->reflection_probe_instance_needs_redraw(RID::from_uint64(idata.instance_data_rid))) {
-							InstanceReflectionProbeData *reflection_probe = static_cast<InstanceReflectionProbeData *>(idata.instance->base_data);
-							cull.lock.lock();
-							if (!reflection_probe->update_list.in_list()) {
-								reflection_probe->render_step = 0;
-								reflection_probe_render_list.add_last(&reflection_probe->update_list);
-							}
-							cull.lock.unlock();
-
-							idata.flags &= ~uint32_t(InstanceData::FLAG_REFLECTION_PROBE_DIRTY);
-						}
-
-						if (scene_render->reflection_probe_instance_has_reflection(RID::from_uint64(idata.instance_data_rid))) {
-							reflection_probe_instance_cull_result.push_back(RID::from_uint64(idata.instance_data_rid));
-						}
-					}
-				} else if (base_type == RS::INSTANCE_DECAL) {
-					decal_instance_cull_result.push_back(RID::from_uint64(idata.instance_data_rid));
-
-				} else if (base_type == RS::INSTANCE_GI_PROBE) {
-					InstanceGIProbeData *gi_probe = static_cast<InstanceGIProbeData *>(idata.instance->base_data);
-					cull.lock.lock();
-					if (!gi_probe->update_element.in_list()) {
-						gi_probe_update_list.add(&gi_probe->update_element);
-					}
-					cull.lock.unlock();
-					gi_probe_instance_cull_result.push_back(RID::from_uint64(idata.instance_data_rid));
-
-				} else if (base_type == RS::INSTANCE_LIGHTMAP) {
-					lightmap_cull_result.push_back(RID::from_uint64(idata.instance_data_rid));
-				} else if (((1 << base_type) & RS::INSTANCE_GEOMETRY_MASK) && !(idata.flags & InstanceData::FLAG_CAST_SHADOWS_ONLY)) {
-					bool keep = true;
-
-					if (idata.flags & InstanceData::FLAG_REDRAW_IF_VISIBLE) {
-						RenderingServerDefault::redraw_request();
-					}
-
-					if (base_type == RS::INSTANCE_MESH) {
-						mesh_visible = true;
-					} else if (base_type == RS::INSTANCE_PARTICLES) {
-						//particles visible? process them
-						if (RSG::storage->particles_is_inactive(idata.base_rid)) {
-							//but if nothing is going on, don't do it.
-							keep = false;
-						} else {
-							cull.lock.lock();
-							RSG::storage->particles_request_process(idata.base_rid);
-							cull.lock.unlock();
-							RSG::storage->particles_set_view_axis(idata.base_rid, -p_cam_transform.basis.get_axis(2).normalized());
-							//particles visible? request redraw
-							RenderingServerDefault::redraw_request();
-						}
-					}
-
-					if (geometry_instance_pair_mask & (1 << RS::INSTANCE_LIGHT) && (idata.flags & InstanceData::FLAG_GEOM_LIGHTING_DIRTY)) {
-						InstanceGeometryData *geom = static_cast<InstanceGeometryData *>(idata.instance->base_data);
-						uint32_t idx = 0;
-
-						for (Set<Instance *>::Element *E = geom->lights.front(); E; E = E->next()) {
-							InstanceLightData *light = static_cast<InstanceLightData *>(E->get()->base_data);
-							instance_pair_buffer[idx++] = light->instance;
-							if (idx == MAX_INSTANCE_PAIRS) {
-								break;
-							}
-						}
-
-						scene_render->geometry_instance_pair_light_instances(geom->geometry_instance, instance_pair_buffer, idx);
-						idata.flags &= ~uint32_t(InstanceData::FLAG_GEOM_LIGHTING_DIRTY);
-					}
-
-					if (geometry_instance_pair_mask & (1 << RS::INSTANCE_REFLECTION_PROBE) && (idata.flags & InstanceData::FLAG_GEOM_REFLECTION_DIRTY)) {
-						InstanceGeometryData *geom = static_cast<InstanceGeometryData *>(idata.instance->base_data);
-						uint32_t idx = 0;
-
-						for (Set<Instance *>::Element *E = geom->reflection_probes.front(); E; E = E->next()) {
-							InstanceReflectionProbeData *reflection_probe = static_cast<InstanceReflectionProbeData *>(E->get()->base_data);
-
-							instance_pair_buffer[idx++] = reflection_probe->instance;
-							if (idx == MAX_INSTANCE_PAIRS) {
-								break;
-							}
-						}
-
-						scene_render->geometry_instance_pair_reflection_probe_instances(geom->geometry_instance, instance_pair_buffer, idx);
-						idata.flags &= ~uint32_t(InstanceData::FLAG_GEOM_REFLECTION_DIRTY);
-					}
-
-					if (geometry_instance_pair_mask & (1 << RS::INSTANCE_DECAL) && (idata.flags & InstanceData::FLAG_GEOM_DECAL_DIRTY)) {
-						//InstanceGeometryData *geom = static_cast<InstanceGeometryData *>(idata.instance->base_data);
-						//todo for GLES3
-						idata.flags &= ~uint32_t(InstanceData::FLAG_GEOM_DECAL_DIRTY);
-						/*for (Set<Instance *>::Element *E = geom->dec.front(); E; E = E->next()) {
-							InstanceReflectionProbeData *reflection_probe = static_cast<InstanceReflectionProbeData *>(E->get()->base_data);
-
-							instance_pair_buffer[idx++] = reflection_probe->instance;
-							if (idx==MAX_INSTANCE_PAIRS) {
-								break;
-							}
-						}*/
-						//scene_render->geometry_instance_pair_decal_instances(geom->geometry_instance, light_instances, idx);
-					}
-
-					if (idata.flags & InstanceData::FLAG_GEOM_GI_PROBE_DIRTY) {
-						InstanceGeometryData *geom = static_cast<InstanceGeometryData *>(idata.instance->base_data);
-						uint32_t idx = 0;
-						for (Set<Instance *>::Element *E = geom->gi_probes.front(); E; E = E->next()) {
-							InstanceGIProbeData *gi_probe = static_cast<InstanceGIProbeData *>(E->get()->base_data);
-
-							instance_pair_buffer[idx++] = gi_probe->probe_instance;
-							if (idx == MAX_INSTANCE_PAIRS) {
-								break;
-							}
-						}
-
-						scene_render->geometry_instance_pair_gi_probe_instances(geom->geometry_instance, instance_pair_buffer, idx);
-						idata.flags &= ~uint32_t(InstanceData::FLAG_GEOM_GI_PROBE_DIRTY);
-					}
-
-					if ((idata.flags & InstanceData::FLAG_LIGHTMAP_CAPTURE) && idata.instance->last_frame_pass != frame_number && !idata.instance->lightmap_target_sh.is_empty() && !idata.instance->lightmap_sh.is_empty()) {
-						InstanceGeometryData *geom = static_cast<InstanceGeometryData *>(idata.instance->base_data);
-						Color *sh = idata.instance->lightmap_sh.ptrw();
-						const Color *target_sh = idata.instance->lightmap_target_sh.ptr();
-						for (uint32_t j = 0; j < 9; j++) {
-							sh[j] = sh[j].lerp(target_sh[j], MIN(1.0, lightmap_probe_update_speed));
-						}
-						scene_render->geometry_instance_set_lightmap_capture(geom->geometry_instance, sh);
-						idata.instance->last_frame_pass = frame_number;
-					}
-
-					if (keep) {
-						geometry_instances_to_render.push_back(idata.instance_geometry);
-					}
-				}
+		uint64_t cull_from = 0;
+		uint64_t cull_to = scenario->instance_data.size();
+
+		FrustumCullData cull_data;
+
+		//prepare for eventual thread usage
+		cull_data.cull = &cull;
+		cull_data.scenario = scenario;
+		cull_data.shadow_atlas = p_shadow_atlas;
+		cull_data.cam_transform = p_cam_transform;
+		cull_data.visible_layers = p_visible_layers;
+		cull_data.render_reflection_probe = render_reflection_probe;
+//#define DEBUG_CULL_TIME
+#ifdef DEBUG_CULL_TIME
+		uint64_t time_from = OS::get_singleton()->get_ticks_usec();
+#endif
+		if (cull_to > thread_cull_threshold) {
+			//multiple threads
+			for (uint32_t i = 0; i < frustum_cull_result_threads.size(); i++) {
+				frustum_cull_result_threads[i].clear();
 			}
 
-			for (uint32_t j = 0; j < cull.shadow_count; j++) {
-				for (uint32_t k = 0; k < cull.shadows[j].cascade_count; k++) {
-					if (scenario->instance_aabbs[i].in_frustum(cull.shadows[j].cascades[k].frustum)) {
-						InstanceData &idata = scenario->instance_data[i];
-						uint32_t base_type = idata.flags & InstanceData::FLAG_BASE_TYPE_MASK;
+			RendererThreadPool::singleton->thread_work_pool.do_work(frustum_cull_result_threads.size(), this, &RendererSceneCull::_frustum_cull_threaded, &cull_data);
 
-						if (((1 << base_type) & RS::INSTANCE_GEOMETRY_MASK) && idata.flags & InstanceData::FLAG_CAST_SHADOWS) {
-							cull.shadows[j].cascades[k].cull_result.push_back(idata.instance_geometry);
-							mesh_visible = true;
-						}
-					}
-				}
+			for (uint32_t i = 0; i < frustum_cull_result_threads.size(); i++) {
+				frustum_cull_result.append_from(frustum_cull_result_threads[i]);
 			}
 
-			for (uint32_t j = 0; j < cull.sdfgi.region_count; j++) {
-				if (scenario->instance_aabbs[i].in_aabb(cull.sdfgi.region_aabb[j])) {
-					InstanceData &idata = scenario->instance_data[i];
-					uint32_t base_type = idata.flags & InstanceData::FLAG_BASE_TYPE_MASK;
-
-					if (base_type == RS::INSTANCE_LIGHT) {
-						InstanceLightData *instance_light = (InstanceLightData *)idata.instance->base_data;
-						if (instance_light->bake_mode == RS::LIGHT_BAKE_STATIC && cull.sdfgi.region_cascade[j] <= instance_light->max_sdfgi_cascade) {
-							if (sdfgi_last_light_index != i || sdfgi_last_light_cascade != cull.sdfgi.region_cascade[j]) {
-								sdfgi_last_light_index = i;
-								sdfgi_last_light_cascade = cull.sdfgi.region_cascade[j];
-								cull.sdfgi.cascade_lights[sdfgi_last_light_cascade].push_back(instance_light->instance);
-							}
-						}
-					} else if ((1 << base_type) & RS::INSTANCE_GEOMETRY_MASK) {
-						if (idata.flags & InstanceData::FLAG_USES_BAKED_LIGHT) {
-							cull.sdfgi.region_cull_result[j].push_back(idata.instance_geometry);
-							mesh_visible = true;
-						}
-					}
-				}
-			}
-
-			if (mesh_visible && scenario->instance_data[i].flags & InstanceData::FLAG_USES_MESH_INSTANCE) {
-				mesh_instance_cull_result.push_back(scenario->instance_data[i].instance->mesh_instance);
-			}
+		} else {
+			//single threaded
+			_frustum_cull(cull_data, frustum_cull_result, cull_from, cull_to);
 		}
 
-		if (mesh_instance_cull_result.size()) {
-			for (uint64_t i = 0; i < mesh_instance_cull_result.size(); i++) {
-				RSG::storage->mesh_instance_check_for_update(mesh_instance_cull_result[i]);
+#ifdef DEBUG_CULL_TIME
+		static float time_avg = 0;
+		static uint32_t time_count = 0;
+		time_avg += double(OS::get_singleton()->get_ticks_usec() - time_from) / 1000.0;
+		time_count++;
+		print_line("time taken: " + rtos(time_avg / time_count));
+#endif
+
+		if (frustum_cull_result.mesh_instances.size()) {
+			for (uint64_t i = 0; i < frustum_cull_result.mesh_instances.size(); i++) {
+				RSG::storage->mesh_instance_check_for_update(frustum_cull_result.mesh_instances[i]);
 			}
 			RSG::storage->update_mesh_instances();
 		}
@@ -2567,7 +2589,7 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca
 			const Cull::Shadow::Cascade &c = cull.shadows[i].cascades[j];
 			//			print_line("shadow " + itos(i) + " cascade " + itos(j) + " elements: " + itos(c.cull_result.size()));
 			scene_render->light_instance_set_shadow_transform(cull.shadows[i].light_instance, c.projection, c.transform, c.zfar, c.split, j, c.shadow_texel_size, c.bias_scale, c.range_begin, c.uv_scale);
-			scene_render->render_shadow(cull.shadows[i].light_instance, p_shadow_atlas, j, c.cull_result, near_plane, p_cam_projection.get_lod_multiplier(), p_screen_lod_threshold);
+			scene_render->render_shadow(cull.shadows[i].light_instance, p_shadow_atlas, j, frustum_cull_result.directional_shadows[i].cascade_geometry_instances[j], near_plane, p_cam_projection.get_lod_multiplier(), p_screen_lod_threshold);
 		}
 	}
 
@@ -2577,19 +2599,19 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca
 		if (cull.sdfgi.region_count > 0) {
 			//update regions
 			for (uint32_t i = 0; i < cull.sdfgi.region_count; i++) {
-				scene_render->render_sdfgi(p_render_buffers, i, cull.sdfgi.region_cull_result[i]);
+				scene_render->render_sdfgi(p_render_buffers, i, frustum_cull_result.sdfgi_region_geometry_instances[i]);
 			}
 			//check if static lights were culled
 			bool static_lights_culled = false;
 			for (uint32_t i = 0; i < cull.sdfgi.cascade_light_count; i++) {
-				if (cull.sdfgi.cascade_lights[i].size()) {
+				if (frustum_cull_result.sdfgi_cascade_lights[i].size()) {
 					static_lights_culled = true;
 					break;
 				}
 			}
 
 			if (static_lights_culled) {
-				scene_render->render_sdfgi_static_lights(p_render_buffers, cull.sdfgi.cascade_light_count, cull.sdfgi.cascade_light_index, cull.sdfgi.cascade_lights);
+				scene_render->render_sdfgi_static_lights(p_render_buffers, cull.sdfgi.cascade_light_count, cull.sdfgi.cascade_light_index, frustum_cull_result.sdfgi_cascade_lights);
 			}
 		}
 
@@ -2618,8 +2640,8 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca
 
 		//SortArray<Instance*,_InstanceLightsort> sorter;
 		//sorter.sort(light_cull_result,light_cull_count);
-		for (uint32_t i = 0; i < (uint32_t)light_cull_result.size(); i++) {
-			Instance *ins = light_cull_result[i];
+		for (uint32_t i = 0; i < (uint32_t)frustum_cull_result.lights.size(); i++) {
+			Instance *ins = frustum_cull_result.lights[i];
 
 			if (!p_shadow_atlas.is_valid() || !RSG::storage->light_has_shadow(ins->base)) {
 				continue;
@@ -2715,7 +2737,7 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca
 
 	//append the directional lights to the lights culled
 	for (int i = 0; i < directional_lights.size(); i++) {
-		light_instance_cull_result.push_back(directional_lights[i]);
+		frustum_cull_result.light_instances.push_back(directional_lights[i]);
 	}
 }
 
@@ -2752,7 +2774,7 @@ void RendererSceneCull::_render_scene(RID p_render_buffers, const Transform p_ca
 	/* PROCESS GEOMETRY AND DRAW SCENE */
 
 	RENDER_TIMESTAMP("Render Scene ");
-	scene_render->render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_orthogonal, geometry_instances_to_render, light_instance_cull_result, reflection_probe_instance_cull_result, gi_probe_instance_cull_result, decal_instance_cull_result, lightmap_cull_result, p_environment, camera_effects, p_shadow_atlas, p_reflection_probe.is_valid() ? RID() : scenario->reflection_atlas, p_reflection_probe, p_reflection_probe_pass, p_screen_lod_threshold);
+	scene_render->render_scene(p_render_buffers, p_cam_transform, p_cam_projection, p_cam_orthogonal, frustum_cull_result.geometry_instances, frustum_cull_result.light_instances, frustum_cull_result.reflections, frustum_cull_result.gi_probes, frustum_cull_result.decals, frustum_cull_result.lightmaps, p_environment, camera_effects, p_shadow_atlas, p_reflection_probe.is_valid() ? RID() : scenario->reflection_atlas, p_reflection_probe, p_reflection_probe_pass, p_screen_lod_threshold);
 }
 
 void RendererSceneCull::render_empty_scene(RID p_render_buffers, RID p_scenario, RID p_shadow_atlas) {
@@ -3042,7 +3064,7 @@ void RendererSceneCull::render_probes() {
 			update_lights = true;
 		}
 
-		geometry_instances_to_render.clear();
+		frustum_cull_result.geometry_instances.clear();
 
 		RID instance_pair_buffer[MAX_INSTANCE_PAIRS];
 
@@ -3069,10 +3091,10 @@ void RendererSceneCull::render_probes() {
 				ins->scenario->instance_data[ins->array_index].flags &= ~uint32_t(InstanceData::FLAG_GEOM_GI_PROBE_DIRTY);
 			}
 
-			geometry_instances_to_render.push_back(geom->geometry_instance);
+			frustum_cull_result.geometry_instances.push_back(geom->geometry_instance);
 		}
 
-		scene_render->gi_probe_update(probe->probe_instance, update_lights, probe->light_instances, geometry_instances_to_render);
+		scene_render->gi_probe_update(probe->probe_instance, update_lights, probe->light_instances, frustum_cull_result.geometry_instances);
 
 		gi_probe_update_list.remove(gi_probe);
 
@@ -3087,7 +3109,7 @@ void RendererSceneCull::render_particle_colliders() {
 		if (hfpc->scenario && hfpc->base_type == RS::INSTANCE_PARTICLES_COLLISION && RSG::storage->particles_collision_is_heightfield(hfpc->base)) {
 			//update heightfield
 			instance_cull_result.clear();
-			geometry_instances_to_render.clear();
+			frustum_cull_result.geometry_instances.clear();
 
 			struct CullAABB {
 				PagedArray<Instance *> *result;
@@ -3109,10 +3131,10 @@ void RendererSceneCull::render_particle_colliders() {
 					continue;
 				}
 				InstanceGeometryData *geom = static_cast<InstanceGeometryData *>(instance->base_data);
-				geometry_instances_to_render.push_back(geom->geometry_instance);
+				frustum_cull_result.geometry_instances.push_back(geom->geometry_instance);
 			}
 
-			scene_render->render_particle_collider_heightfield(hfpc->base, hfpc->transform, geometry_instances_to_render);
+			scene_render->render_particle_collider_heightfield(hfpc->base, hfpc->transform, frustum_cull_result.geometry_instances);
 		}
 		heightfield_particle_colliders_update_list.erase(heightfield_particle_colliders_update_list.front());
 	}
@@ -3457,62 +3479,30 @@ RendererSceneCull::RendererSceneCull() {
 	singleton = this;
 
 	instance_cull_result.set_page_pool(&instance_cull_page_pool);
-	mesh_instance_cull_result.set_page_pool(&rid_cull_page_pool);
 	instance_shadow_cull_result.set_page_pool(&instance_cull_page_pool);
-	light_cull_result.set_page_pool(&instance_cull_page_pool);
 
-	geometry_instances_to_render.set_page_pool(&geometry_instance_cull_page_pool);
 	geometry_instances_to_shadow_render.set_page_pool(&geometry_instance_cull_page_pool);
 
-	lightmap_cull_result.set_page_pool(&rid_cull_page_pool);
-	reflection_probe_instance_cull_result.set_page_pool(&rid_cull_page_pool);
-	light_instance_cull_result.set_page_pool(&rid_cull_page_pool);
-	gi_probe_instance_cull_result.set_page_pool(&rid_cull_page_pool);
-	decal_instance_cull_result.set_page_pool(&rid_cull_page_pool);
-
-	for (int i = 0; i < RendererSceneRender::MAX_DIRECTIONAL_LIGHTS; i++) {
-		for (int j = 0; j < RendererSceneRender::MAX_DIRECTIONAL_LIGHT_CASCADES; j++) {
-			cull.shadows[i].cascades[j].cull_result.set_page_pool(&geometry_instance_cull_page_pool);
-		}
-	}
-
-	for (int i = 0; i < SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE; i++) {
-		cull.sdfgi.region_cull_result[i].set_page_pool(&geometry_instance_cull_page_pool);
-	}
-
-	for (int i = 0; i < SDFGI_MAX_CASCADES; i++) {
-		cull.sdfgi.cascade_lights[i].set_page_pool(&rid_cull_page_pool);
+	frustum_cull_result.init(&rid_cull_page_pool, &geometry_instance_cull_page_pool, &instance_cull_page_pool);
+	frustum_cull_result_threads.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count());
+	for (uint32_t i = 0; i < frustum_cull_result_threads.size(); i++) {
+		frustum_cull_result_threads[i].init(&rid_cull_page_pool, &geometry_instance_cull_page_pool, &instance_cull_page_pool);
 	}
 
 	indexer_update_iterations = GLOBAL_GET("rendering/spatial_indexer/update_iterations_per_frame");
+	thread_cull_threshold = GLOBAL_GET("rendering/spatial_indexer/threaded_cull_minimum_instances");
+	thread_cull_threshold = MAX(thread_cull_threshold, (uint32_t)RendererThreadPool::singleton->thread_work_pool.get_thread_count()); //make sure there is at least one thread per CPU
 }
 
 RendererSceneCull::~RendererSceneCull() {
 	instance_cull_result.reset();
-	mesh_instance_cull_result.reset();
 	instance_shadow_cull_result.reset();
-	light_cull_result.reset();
 
-	geometry_instances_to_render.reset();
 	geometry_instances_to_shadow_render.reset();
-	lightmap_cull_result.reset();
-
-	reflection_probe_instance_cull_result.reset();
-	light_instance_cull_result.reset();
-	gi_probe_instance_cull_result.reset();
-	decal_instance_cull_result.reset();
-
-	for (int i = 0; i < RendererSceneRender::MAX_DIRECTIONAL_LIGHTS; i++) {
-		for (int j = 0; j < RendererSceneRender::MAX_DIRECTIONAL_LIGHT_CASCADES; j++) {
-			cull.shadows[i].cascades[j].cull_result.reset();
-		}
-	}
-
-	for (int i = 0; i < SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE; i++) {
-		cull.sdfgi.region_cull_result[i].reset();
-	}
 
-	for (int i = 0; i < SDFGI_MAX_CASCADES; i++) {
-		cull.sdfgi.cascade_lights[i].reset();
+	frustum_cull_result.reset();
+	for (uint32_t i = 0; i < frustum_cull_result_threads.size(); i++) {
+		frustum_cull_result_threads[i].reset();
 	}
+	frustum_cull_result_threads.clear();
 }

+ 131 - 22
servers/rendering/renderer_scene_cull.h

@@ -352,7 +352,7 @@ public:
 		bool receive_shadows : 8;
 		bool visible : 8;
 		bool baked_light : 2; //this flag is only to know if it actually did use baked light
-		bool dynamic_gi : 2; //this flag is only to know if it actually did use baked light
+		bool dynamic_gi : 2; //same above for dynamic objects
 		bool redraw_if_visible : 4;
 
 		Instance *lightmap;
@@ -688,15 +688,6 @@ public:
 		}
 	};
 
-	struct CullResult {
-		PagedArray<Instance *> *result;
-		_FORCE_INLINE_ bool operator()(void *p_data) {
-			Instance *p_instance = (Instance *)p_data;
-			result->push_back(p_instance);
-			return false;
-		}
-	};
-
 	Set<Instance *> heightfield_particle_colliders_update_list;
 
 	PagedArrayPool<Instance *> instance_cull_page_pool;
@@ -704,17 +695,127 @@ public:
 	PagedArrayPool<RID> rid_cull_page_pool;
 
 	PagedArray<Instance *> instance_cull_result;
-	PagedArray<RID> mesh_instance_cull_result;
-	PagedArray<RendererSceneRender::GeometryInstance *> geometry_instances_to_render;
 	PagedArray<Instance *> instance_shadow_cull_result;
 	PagedArray<RendererSceneRender::GeometryInstance *> geometry_instances_to_shadow_render;
-	PagedArray<Instance *> light_cull_result;
-	PagedArray<RID> lightmap_cull_result;
-	PagedArray<RID> reflection_probe_instance_cull_result;
-	PagedArray<RID> light_instance_cull_result;
 
-	PagedArray<RID> gi_probe_instance_cull_result;
-	PagedArray<RID> decal_instance_cull_result;
+	struct FrustumCullResult {
+		PagedArray<RendererSceneRender::GeometryInstance *> geometry_instances;
+		PagedArray<Instance *> lights;
+		PagedArray<RID> light_instances;
+		PagedArray<RID> lightmaps;
+		PagedArray<RID> reflections;
+		PagedArray<RID> decals;
+		PagedArray<RID> gi_probes;
+		PagedArray<RID> mesh_instances;
+
+		struct DirectionalShadow {
+			PagedArray<RendererSceneRender::GeometryInstance *> cascade_geometry_instances[RendererSceneRender::MAX_DIRECTIONAL_LIGHT_CASCADES];
+		} directional_shadows[RendererSceneRender::MAX_DIRECTIONAL_LIGHTS];
+
+		PagedArray<RendererSceneRender::GeometryInstance *> sdfgi_region_geometry_instances[SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE];
+		PagedArray<RID> sdfgi_cascade_lights[SDFGI_MAX_CASCADES];
+
+		void clear() {
+			geometry_instances.clear();
+			lights.clear();
+			light_instances.clear();
+			lightmaps.clear();
+			reflections.clear();
+			decals.clear();
+			gi_probes.clear();
+			mesh_instances.clear();
+			for (int i = 0; i < RendererSceneRender::MAX_DIRECTIONAL_LIGHTS; i++) {
+				for (int j = 0; j < RendererSceneRender::MAX_DIRECTIONAL_LIGHT_CASCADES; j++) {
+					directional_shadows[i].cascade_geometry_instances[j].clear();
+				}
+			}
+
+			for (int i = 0; i < SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE; i++) {
+				sdfgi_region_geometry_instances[i].clear();
+			}
+
+			for (int i = 0; i < SDFGI_MAX_CASCADES; i++) {
+				sdfgi_cascade_lights[i].clear();
+			}
+		}
+
+		void reset() {
+			geometry_instances.reset();
+			lights.reset();
+			light_instances.reset();
+			lightmaps.reset();
+			reflections.reset();
+			decals.reset();
+			gi_probes.reset();
+			mesh_instances.reset();
+			for (int i = 0; i < RendererSceneRender::MAX_DIRECTIONAL_LIGHTS; i++) {
+				for (int j = 0; j < RendererSceneRender::MAX_DIRECTIONAL_LIGHT_CASCADES; j++) {
+					directional_shadows[i].cascade_geometry_instances[j].reset();
+				}
+			}
+
+			for (int i = 0; i < SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE; i++) {
+				sdfgi_region_geometry_instances[i].reset();
+			}
+
+			for (int i = 0; i < SDFGI_MAX_CASCADES; i++) {
+				sdfgi_cascade_lights[i].reset();
+			}
+		}
+
+		void append_from(FrustumCullResult &p_cull_result) {
+			geometry_instances.merge_unordered(p_cull_result.geometry_instances);
+			lights.merge_unordered(p_cull_result.lights);
+			light_instances.merge_unordered(p_cull_result.light_instances);
+			lightmaps.merge_unordered(p_cull_result.lightmaps);
+			reflections.merge_unordered(p_cull_result.reflections);
+			decals.merge_unordered(p_cull_result.decals);
+			gi_probes.merge_unordered(p_cull_result.gi_probes);
+			mesh_instances.merge_unordered(p_cull_result.mesh_instances);
+
+			for (int i = 0; i < RendererSceneRender::MAX_DIRECTIONAL_LIGHTS; i++) {
+				for (int j = 0; j < RendererSceneRender::MAX_DIRECTIONAL_LIGHT_CASCADES; j++) {
+					directional_shadows[i].cascade_geometry_instances[j].merge_unordered(p_cull_result.directional_shadows[i].cascade_geometry_instances[j]);
+				}
+			}
+
+			for (int i = 0; i < SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE; i++) {
+				sdfgi_region_geometry_instances[i].merge_unordered(p_cull_result.sdfgi_region_geometry_instances[i]);
+			}
+
+			for (int i = 0; i < SDFGI_MAX_CASCADES; i++) {
+				sdfgi_cascade_lights[i].merge_unordered(p_cull_result.sdfgi_cascade_lights[i]);
+			}
+		}
+
+		void init(PagedArrayPool<RID> *p_rid_pool, PagedArrayPool<RendererSceneRender::GeometryInstance *> *p_geometry_instance_pool, PagedArrayPool<Instance *> *p_instance_pool) {
+			geometry_instances.set_page_pool(p_geometry_instance_pool);
+			light_instances.set_page_pool(p_rid_pool);
+			lights.set_page_pool(p_instance_pool);
+			lightmaps.set_page_pool(p_rid_pool);
+			reflections.set_page_pool(p_rid_pool);
+			decals.set_page_pool(p_rid_pool);
+			mesh_instances.set_page_pool(p_rid_pool);
+			for (int i = 0; i < RendererSceneRender::MAX_DIRECTIONAL_LIGHTS; i++) {
+				for (int j = 0; j < RendererSceneRender::MAX_DIRECTIONAL_LIGHT_CASCADES; j++) {
+					directional_shadows[i].cascade_geometry_instances[j].set_page_pool(p_geometry_instance_pool);
+				}
+			}
+
+			for (int i = 0; i < SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE; i++) {
+				sdfgi_region_geometry_instances[i].set_page_pool(p_geometry_instance_pool);
+			}
+
+			for (int i = 0; i < SDFGI_MAX_CASCADES; i++) {
+				sdfgi_cascade_lights[i].set_page_pool(p_rid_pool);
+			}
+		}
+	};
+
+	FrustumCullResult frustum_cull_result;
+	LocalVector<FrustumCullResult> frustum_cull_result_threads;
+
+	uint32_t thread_cull_threshold = 200;
 
 	RID_PtrOwner<Instance> instance_owner;
 
@@ -786,8 +887,6 @@ public:
 				real_t range_begin;
 				Vector2 uv_scale;
 
-				PagedArray<RendererSceneRender::GeometryInstance *> cull_result;
-
 			} cascades[RendererSceneRender::MAX_DIRECTIONAL_LIGHT_CASCADES]; //max 4 cascades
 			uint32_t cascade_count;
 
@@ -797,12 +896,10 @@ public:
 
 		struct SDFGI {
 			//have arrays here because SDFGI functions expects this, plus regions can have areas
-			PagedArray<RendererSceneRender::GeometryInstance *> region_cull_result[SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE];
 			AABB region_aabb[SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE]; //max 3 regions per cascade
 			uint32_t region_cascade[SDFGI_MAX_CASCADES * SDFGI_MAX_REGIONS_PER_CASCADE]; //max 3 regions per cascade
 			uint32_t region_count = 0;
 
-			PagedArray<RID> cascade_lights[SDFGI_MAX_CASCADES];
 			uint32_t cascade_light_index[SDFGI_MAX_CASCADES];
 			uint32_t cascade_light_count = 0;
 
@@ -813,6 +910,18 @@ public:
 		Frustum frustum;
 	} cull;
 
+	struct FrustumCullData {
+		Cull *cull;
+		Scenario *scenario;
+		RID shadow_atlas;
+		Transform cam_transform;
+		uint32_t visible_layers;
+		Instance *render_reflection_probe;
+	};
+
+	void _frustum_cull_threaded(uint32_t p_thread, FrustumCullData *cull_data);
+	void _frustum_cull(FrustumCullData &cull_data, FrustumCullResult &cull_result, uint64_t p_from, uint64_t p_to);
+
 	bool _render_reflection_probe_step(Instance *p_instance, int p_step);
 	void _prepare_scene(const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, bool p_cam_vaspect, RID p_render_buffers, RID p_environment, uint32_t p_visible_layers, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, float p_screen_lod_threshold, bool p_using_shadows = true);
 	void _render_scene(RID p_render_buffers, const Transform p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, RID p_environment, RID p_force_camera_effects, RID p_scenario, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_lod_threshold);

+ 1 - 1
servers/rendering/renderer_storage.h

@@ -113,7 +113,7 @@ public:
 		~DependencyTracker() { clear(); }
 
 	private:
-		friend class Dependency;
+		friend struct Dependency;
 		uint32_t instance_version = 0;
 		Set<Dependency *> dependencies;
 	};

+ 42 - 0
servers/rendering/renderer_thread_pool.cpp

@@ -0,0 +1,42 @@
+/*************************************************************************/
+/*  renderer_thread_pool.cpp                                             */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "renderer_thread_pool.h"
+
+RendererThreadPool *RendererThreadPool::singleton = nullptr;
+
+RendererThreadPool::RendererThreadPool() {
+	singleton = this;
+	thread_work_pool.init();
+}
+
+RendererThreadPool::~RendererThreadPool() {
+	thread_work_pool.finish();
+}

+ 45 - 0
servers/rendering/renderer_thread_pool.h

@@ -0,0 +1,45 @@
+/*************************************************************************/
+/*  renderer_thread_pool.h                                               */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef RENDERERTHREADPOOL_H
+#define RENDERERTHREADPOOL_H
+
+#include "core/templates/thread_work_pool.h"
+
+class RendererThreadPool {
+public:
+	ThreadWorkPool thread_work_pool;
+
+	static RendererThreadPool *singleton;
+	RendererThreadPool();
+	~RendererThreadPool();
+};
+
+#endif // RENDERERTHREADPOOL_H

+ 7 - 0
servers/rendering_server.cpp

@@ -2253,6 +2253,8 @@ void RenderingServer::set_render_loop_enabled(bool p_enabled) {
 
 RenderingServer::RenderingServer() {
 	//ERR_FAIL_COND(singleton);
+
+	thread_pool = memnew(RendererThreadPool);
 	singleton = this;
 
 	GLOBAL_DEF_RST("rendering/vram_compression/import_bptc", false);
@@ -2383,8 +2385,13 @@ RenderingServer::RenderingServer() {
 
 	GLOBAL_DEF("rendering/spatial_indexer/update_iterations_per_frame", 10);
 	ProjectSettings::get_singleton()->set_custom_property_info("rendering/spatial_indexer/update_iterations_per_frame", PropertyInfo(Variant::INT, "rendering/spatial_indexer/update_iterations_per_frame", PROPERTY_HINT_RANGE, "0,1024,1"));
+	GLOBAL_DEF("rendering/spatial_indexer/threaded_cull_minimum_instances", 1000);
+	ProjectSettings::get_singleton()->set_custom_property_info("rendering/spatial_indexer/threaded_cull_minimum_instances", PropertyInfo(Variant::INT, "rendering/spatial_indexer/threaded_cull_minimum_instances", PROPERTY_HINT_RANGE, "32,65536,1"));
+	GLOBAL_DEF("rendering/forward_renderer/threaded_render_minimum_instances", 500);
+	ProjectSettings::get_singleton()->set_custom_property_info("rendering/forward_renderer/threaded_render_minimum_instances", PropertyInfo(Variant::INT, "rendering/forward_renderer/threaded_render_minimum_instances", PROPERTY_HINT_RANGE, "32,65536,1"));
 }
 
 RenderingServer::~RenderingServer() {
+	memdelete(thread_pool);
 	singleton = nullptr;
 }

+ 3 - 0
servers/rendering_server.h

@@ -39,6 +39,7 @@
 #include "core/variant/typed_array.h"
 #include "core/variant/variant.h"
 #include "servers/display_server.h"
+#include "servers/rendering/renderer_thread_pool.h"
 #include "servers/rendering/rendering_device.h"
 #include "servers/rendering/shader_language.h"
 
@@ -52,6 +53,8 @@ class RenderingServer : public Object {
 
 	Array _get_array_from_surface(uint32_t p_format, Vector<uint8_t> p_vertex_data, Vector<uint8_t> p_attrib_data, Vector<uint8_t> p_skin_data, int p_vertex_len, Vector<uint8_t> p_index_data, int p_index_len) const;
 
+	RendererThreadPool *thread_pool = nullptr;
+
 protected:
 	RID _make_test_cube();
 	void _free_internal_rids();