Kaynağa Gözat

Optimize PointLight2D shadow rendering by reducing draw calls and RD state changes.

This dramatically reduces the CPU time spent on rendering shadows for PointLight2Ds
clayjohn 10 ay önce
ebeveyn
işleme
7c61252dd7

+ 1 - 1
drivers/gles3/rasterizer_canvas_gles3.cpp

@@ -1636,7 +1636,7 @@ void RasterizerCanvasGLES3::light_set_use_shadow(RID p_rid, bool p_enable) {
 	cl->shadow.enabled = p_enable;
 	cl->shadow.enabled = p_enable;
 }
 }
 
 
-void RasterizerCanvasGLES3::light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders) {
+void RasterizerCanvasGLES3::light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders, const Rect2 &p_light_rect) {
 	GLES3::Config *config = GLES3::Config::get_singleton();
 	GLES3::Config *config = GLES3::Config::get_singleton();
 
 
 	CanvasLight *cl = canvas_light_owner.get_or_null(p_rid);
 	CanvasLight *cl = canvas_light_owner.get_or_null(p_rid);

+ 1 - 1
drivers/gles3/rasterizer_canvas_gles3.h

@@ -343,7 +343,7 @@ public:
 	RID light_create() override;
 	RID light_create() override;
 	void light_set_texture(RID p_rid, RID p_texture) override;
 	void light_set_texture(RID p_rid, RID p_texture) override;
 	void light_set_use_shadow(RID p_rid, bool p_enable) override;
 	void light_set_use_shadow(RID p_rid, bool p_enable) override;
-	void light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders) override;
+	void light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders, const Rect2 &p_light_rect) override;
 	void light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) override;
 	void light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) override;
 
 
 	void render_sdf(RID p_render_target, LightOccluderInstance *p_occluders) override;
 	void render_sdf(RID p_render_target, LightOccluderInstance *p_occluders) override;

+ 1 - 1
servers/rendering/dummy/rasterizer_canvas_dummy.h

@@ -43,7 +43,7 @@ public:
 	RID light_create() override { return RID(); }
 	RID light_create() override { return RID(); }
 	void light_set_texture(RID p_rid, RID p_texture) override {}
 	void light_set_texture(RID p_rid, RID p_texture) override {}
 	void light_set_use_shadow(RID p_rid, bool p_enable) override {}
 	void light_set_use_shadow(RID p_rid, bool p_enable) override {}
-	void light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders) override {}
+	void light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders, const Rect2 &p_light_rect) override {}
 	void light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) override {}
 	void light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) override {}
 
 
 	void render_sdf(RID p_render_target, LightOccluderInstance *p_occluders) override {}
 	void render_sdf(RID p_render_target, LightOccluderInstance *p_occluders) override {}

+ 1 - 1
servers/rendering/renderer_canvas_render.h

@@ -531,7 +531,7 @@ public:
 	virtual RID light_create() = 0;
 	virtual RID light_create() = 0;
 	virtual void light_set_texture(RID p_rid, RID p_texture) = 0;
 	virtual void light_set_texture(RID p_rid, RID p_texture) = 0;
 	virtual void light_set_use_shadow(RID p_rid, bool p_enable) = 0;
 	virtual void light_set_use_shadow(RID p_rid, bool p_enable) = 0;
-	virtual void light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders) = 0;
+	virtual void light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders, const Rect2 &p_light_rect) = 0;
 	virtual void light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) = 0;
 	virtual void light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) = 0;
 
 
 	virtual void render_sdf(RID p_render_target, LightOccluderInstance *p_occluders) = 0;
 	virtual void render_sdf(RID p_render_target, LightOccluderInstance *p_occluders) = 0;

+ 109 - 55
servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp

@@ -985,7 +985,34 @@ void RendererCanvasRenderRD::_update_shadow_atlas() {
 	}
 	}
 }
 }
 
 
-void RendererCanvasRenderRD::light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders) {
+void RendererCanvasRenderRD::_update_occluder_buffer(uint32_t p_size) {
+	bool needs_update = state.shadow_occluder_buffer.is_null();
+
+	if (p_size > state.shadow_occluder_buffer_size) {
+		needs_update = true;
+		state.shadow_occluder_buffer_size = next_power_of_2(p_size);
+		if (state.shadow_occluder_buffer.is_valid()) {
+			RD::get_singleton()->free(state.shadow_occluder_buffer);
+		}
+	}
+
+	if (needs_update) {
+		state.shadow_occluder_buffer = RD::get_singleton()->storage_buffer_create(state.shadow_occluder_buffer_size);
+
+		Vector<RD::Uniform> uniforms;
+
+		{
+			RD::Uniform u;
+			u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+			u.binding = 0;
+			u.append_id(state.shadow_occluder_buffer);
+			uniforms.push_back(u);
+		}
+		state.shadow_ocluder_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shadow_render.shader.version_get_shader(shadow_render.shader_version, SHADOW_RENDER_MODE_POSITIONAL_SHADOW), 0);
+	}
+}
+
+void RendererCanvasRenderRD::light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders, const Rect2 &p_light_rect) {
 	CanvasLight *cl = canvas_light_owner.get_or_null(p_rid);
 	CanvasLight *cl = canvas_light_owner.get_or_null(p_rid);
 	ERR_FAIL_COND(!cl->shadow.enabled);
 	ERR_FAIL_COND(!cl->shadow.enabled);
 
 
@@ -996,75 +1023,97 @@ void RendererCanvasRenderRD::light_update_shadow(RID p_rid, int p_shadow_index,
 	Vector<Color> cc;
 	Vector<Color> cc;
 	cc.push_back(Color(p_far, p_far, p_far, 1.0));
 	cc.push_back(Color(p_far, p_far, p_far, 1.0));
 
 
-	Projection projection;
-	{
-		real_t fov = 90;
-		real_t nearp = p_near;
-		real_t farp = p_far;
-		real_t aspect = 1.0;
+	// First, do a culling pass and record what occluders need to be drawn for this light.
+	static thread_local LocalVector<OccluderPolygon *> occluders;
+	static thread_local LocalVector<uint32_t> occluder_indices;
+	occluders.clear();
+	occluder_indices.clear();
+
+	uint32_t occluder_count = 0;
+
+	LightOccluderInstance *instance = p_occluders;
+	while (instance) {
+		OccluderPolygon *co = occluder_polygon_owner.get_or_null(instance->occluder);
+
+		if (!co || co->index_array.is_null()) {
+			instance = instance->next;
+			continue;
+		}
+
+		occluder_count++;
 
 
-		real_t ymax = nearp * Math::tan(Math::deg_to_rad(fov * 0.5));
-		real_t ymin = -ymax;
-		real_t xmin = ymin * aspect;
-		real_t xmax = ymax * aspect;
+		if (!(p_light_mask & instance->light_mask) || !p_light_rect.intersects(instance->aabb_cache)) {
+			instance = instance->next;
+			continue;
+		}
+
+		occluders.push_back(co);
+		occluder_indices.push_back(occluder_count - 1);
 
 
-		projection.set_frustum(xmin, xmax, ymin, ymax, nearp, farp);
+		instance = instance->next;
 	}
 	}
 
 
-	// Precomputed:
-	// Vector3 cam_target = Basis::from_euler(Vector3(0, 0, Math_TAU * ((i + 3) / 4.0))).xform(Vector3(0, 1, 0));
-	// projection = projection * Projection(Transform3D().looking_at(cam_targets[i], Vector3(0, 0, -1)).affine_inverse());
-	const Projection projections[4] = {
-		projection * Projection(Vector4(0, 0, -1, 0), Vector4(1, 0, 0, 0), Vector4(0, -1, 0, 0), Vector4(0, 0, 0, 1)),
+	// Then, upload all the occluder transforms to a shared buffer.
+	// We only do this for the first light so we can avoid uploading the same
+	// Transforms over and over again.
+	if (p_shadow_index == 0) {
+		static thread_local LocalVector<float> transforms;
+		transforms.clear();
+		transforms.resize(occluder_count * 8);
 
 
-		projection * Projection(Vector4(-1, 0, 0, 0), Vector4(0, 0, -1, 0), Vector4(0, -1, 0, 0), Vector4(0, 0, 0, 1)),
+		instance = p_occluders;
+		uint32_t index = 0;
+		while (instance) {
+			_update_transform_2d_to_mat2x4(instance->xform_cache, &transforms[index * 8]);
+			index++;
+			instance = instance->next;
+		}
 
 
-		projection * Projection(Vector4(0, 0, 1, 0), Vector4(-1, 0, 0, 0), Vector4(0, -1, 0, 0), Vector4(0, 0, 0, 1)),
+		_update_occluder_buffer(occluder_count * 8 * sizeof(float));
+		RD::get_singleton()->buffer_update(state.shadow_occluder_buffer, 0, transforms.size() * sizeof(float), transforms.ptr());
+	}
 
 
-		projection * Projection(Vector4(1, 0, 0, 0), Vector4(0, 0, 1, 0), Vector4(0, -1, 0, 0), Vector4(0, 0, 0, 1))
+	Rect2i rect(0, p_shadow_index * 2, state.shadow_texture_size, 2);
+	RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(state.shadow_fb, RD::DRAW_CLEAR_ALL, cc, 1.0f, 0, rect);
 
 
-	};
+	RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, shadow_render.render_pipelines[SHADOW_RENDER_MODE_POSITIONAL_SHADOW]);
+	RD::get_singleton()->draw_list_bind_uniform_set(draw_list, state.shadow_ocluder_uniform_set, 0);
 
 
 	for (int i = 0; i < 4; i++) {
 	for (int i = 0; i < 4; i++) {
-		Rect2i rect((state.shadow_texture_size / 4) * i, p_shadow_index * 2, (state.shadow_texture_size / 4), 2);
-		RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(state.shadow_fb, RD::DRAW_CLEAR_ALL, cc, 1.0f, 0, rect);
+		Rect2i sub_rect((state.shadow_texture_size / 4) * i, p_shadow_index * 2, (state.shadow_texture_size / 4), 2);
+		RD::get_singleton()->draw_list_set_viewport(draw_list, sub_rect);
 
 
-		ShadowRenderPushConstant push_constant;
-		for (int y = 0; y < 4; y++) {
-			for (int x = 0; x < 4; x++) {
-				push_constant.projection[y * 4 + x] = projections[i].columns[y][x];
-			}
-		}
 		static const Vector2 directions[4] = { Vector2(1, 0), Vector2(0, 1), Vector2(-1, 0), Vector2(0, -1) };
 		static const Vector2 directions[4] = { Vector2(1, 0), Vector2(0, 1), Vector2(-1, 0), Vector2(0, -1) };
+		static const Vector4 rotations[4] = { Vector4(0, -1, 1, 0), Vector4(-1, 0, 0, -1), Vector4(0, 1, -1, 0), Vector4(1, 0, 0, 1) };
+
+		PositionalShadowRenderPushConstant push_constant;
+		_update_transform_2d_to_mat2x4(p_light_xform, push_constant.modelview);
 		push_constant.direction[0] = directions[i].x;
 		push_constant.direction[0] = directions[i].x;
 		push_constant.direction[1] = directions[i].y;
 		push_constant.direction[1] = directions[i].y;
+		push_constant.rotation[0] = rotations[i].x;
+		push_constant.rotation[1] = rotations[i].y;
+		push_constant.rotation[2] = rotations[i].z;
+		push_constant.rotation[3] = rotations[i].w;
 		push_constant.z_far = p_far;
 		push_constant.z_far = p_far;
-		push_constant.pad = 0;
+		push_constant.z_near = p_near;
 
 
-		LightOccluderInstance *instance = p_occluders;
-
-		while (instance) {
-			OccluderPolygon *co = occluder_polygon_owner.get_or_null(instance->occluder);
-
-			if (!co || co->index_array.is_null() || !(p_light_mask & instance->light_mask)) {
-				instance = instance->next;
-				continue;
-			}
+		for (uint32_t j = 0; j < occluders.size(); j++) {
+			OccluderPolygon *co = occluders[j];
 
 
-			_update_transform_2d_to_mat2x4(p_light_xform * instance->xform_cache, push_constant.modelview);
+			push_constant.pad = occluder_indices[j];
+			push_constant.cull_mode = uint32_t(co->cull_mode);
 
 
-			RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, shadow_render.render_pipelines[co->cull_mode]);
+			// The slowest part about this whole function is that we have to draw the occluders one by one, 4 times.
+			// We can optimize this so that all occluders draw at once if we store vertices and indices in a giant
+			// SSBO and just save an index into that SSBO for each occluder.
 			RD::get_singleton()->draw_list_bind_vertex_array(draw_list, co->vertex_array);
 			RD::get_singleton()->draw_list_bind_vertex_array(draw_list, co->vertex_array);
 			RD::get_singleton()->draw_list_bind_index_array(draw_list, co->index_array);
 			RD::get_singleton()->draw_list_bind_index_array(draw_list, co->index_array);
-			RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(ShadowRenderPushConstant));
+			RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(PositionalShadowRenderPushConstant));
 
 
 			RD::get_singleton()->draw_list_draw(draw_list, true);
 			RD::get_singleton()->draw_list_draw(draw_list, true);
-
-			instance = instance->next;
 		}
 		}
-
-		RD::get_singleton()->draw_list_end();
 	}
 	}
+	RD::get_singleton()->draw_list_end();
 }
 }
 
 
 void RendererCanvasRenderRD::light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) {
 void RendererCanvasRenderRD::light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) {
@@ -1099,6 +1148,7 @@ void RendererCanvasRenderRD::light_update_directional_shadow(RID p_rid, int p_sh
 
 
 	Rect2i rect(0, p_shadow_index * 2, state.shadow_texture_size, 2);
 	Rect2i rect(0, p_shadow_index * 2, state.shadow_texture_size, 2);
 	RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(state.shadow_fb, RD::DRAW_CLEAR_ALL, cc, 1.0f, 0, rect);
 	RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(state.shadow_fb, RD::DRAW_CLEAR_ALL, cc, 1.0f, 0, rect);
+	RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, shadow_render.render_pipelines[SHADOW_RENDER_MODE_DIRECTIONAL_SHADOW]);
 
 
 	Projection projection;
 	Projection projection;
 	projection.set_orthogonal(-half_size, half_size, -0.5, 0.5, 0.0, distance);
 	projection.set_orthogonal(-half_size, half_size, -0.5, 0.5, 0.0, distance);
@@ -1114,7 +1164,6 @@ void RendererCanvasRenderRD::light_update_directional_shadow(RID p_rid, int p_sh
 	push_constant.direction[0] = 0.0;
 	push_constant.direction[0] = 0.0;
 	push_constant.direction[1] = 1.0;
 	push_constant.direction[1] = 1.0;
 	push_constant.z_far = distance;
 	push_constant.z_far = distance;
-	push_constant.pad = 0;
 
 
 	LightOccluderInstance *instance = p_occluders;
 	LightOccluderInstance *instance = p_occluders;
 
 
@@ -1127,8 +1176,8 @@ void RendererCanvasRenderRD::light_update_directional_shadow(RID p_rid, int p_sh
 		}
 		}
 
 
 		_update_transform_2d_to_mat2x4(to_light_xform * instance->xform_cache, push_constant.modelview);
 		_update_transform_2d_to_mat2x4(to_light_xform * instance->xform_cache, push_constant.modelview);
+		push_constant.cull_mode = uint32_t(co->cull_mode);
 
 
-		RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, shadow_render.render_pipelines[co->cull_mode]);
 		RD::get_singleton()->draw_list_bind_vertex_array(draw_list, co->vertex_array);
 		RD::get_singleton()->draw_list_bind_vertex_array(draw_list, co->vertex_array);
 		RD::get_singleton()->draw_list_bind_index_array(draw_list, co->index_array);
 		RD::get_singleton()->draw_list_bind_index_array(draw_list, co->index_array);
 		RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(ShadowRenderPushConstant));
 		RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(ShadowRenderPushConstant));
@@ -1182,7 +1231,7 @@ void RendererCanvasRenderRD::render_sdf(RID p_render_target, LightOccluderInstan
 	push_constant.direction[0] = 0.0;
 	push_constant.direction[0] = 0.0;
 	push_constant.direction[1] = 0.0;
 	push_constant.direction[1] = 0.0;
 	push_constant.z_far = 0;
 	push_constant.z_far = 0;
-	push_constant.pad = 0;
+	push_constant.cull_mode = 0;
 
 
 	LightOccluderInstance *instance = p_occluders;
 	LightOccluderInstance *instance = p_occluders;
 
 
@@ -1791,8 +1840,9 @@ RendererCanvasRenderRD::RendererCanvasRenderRD() {
 
 
 	{ //shadow rendering
 	{ //shadow rendering
 		Vector<String> versions;
 		Vector<String> versions;
-		versions.push_back("\n#define MODE_SHADOW\n"); //shadow
-		versions.push_back("\n#define MODE_SDF\n"); //sdf
+		versions.push_back("\n#define MODE_SHADOW\n"); // Shadow.
+		versions.push_back("\n#define MODE_SHADOW\n#define POSITIONAL_SHADOW\n"); // Positional shadow.
+		versions.push_back("\n#define MODE_SDF\n"); // SDF.
 		shadow_render.shader.initialize(versions);
 		shadow_render.shader.initialize(versions);
 
 
 		{
 		{
@@ -1843,14 +1893,13 @@ RendererCanvasRenderRD::RendererCanvasRenderRD() {
 
 
 		shadow_render.shader_version = shadow_render.shader.version_create();
 		shadow_render.shader_version = shadow_render.shader.version_create();
 
 
-		for (int i = 0; i < 3; i++) {
+		for (int i = 0; i < 2; i++) {
 			RD::PipelineRasterizationState rs;
 			RD::PipelineRasterizationState rs;
-			rs.cull_mode = i == 0 ? RD::POLYGON_CULL_DISABLED : (i == 1 ? RD::POLYGON_CULL_FRONT : RD::POLYGON_CULL_BACK);
 			RD::PipelineDepthStencilState ds;
 			RD::PipelineDepthStencilState ds;
 			ds.enable_depth_write = true;
 			ds.enable_depth_write = true;
 			ds.enable_depth_test = true;
 			ds.enable_depth_test = true;
 			ds.depth_compare_operator = RD::COMPARE_OP_LESS;
 			ds.depth_compare_operator = RD::COMPARE_OP_LESS;
-			shadow_render.render_pipelines[i] = RD::get_singleton()->render_pipeline_create(shadow_render.shader.version_get_shader(shadow_render.shader_version, SHADOW_RENDER_MODE_SHADOW), shadow_render.framebuffer_format, shadow_render.vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, rs, RD::PipelineMultisampleState(), ds, RD::PipelineColorBlendState::create_disabled(), 0);
+			shadow_render.render_pipelines[i] = RD::get_singleton()->render_pipeline_create(shadow_render.shader.version_get_shader(shadow_render.shader_version, ShadowRenderMode(i)), shadow_render.framebuffer_format, shadow_render.vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, rs, RD::PipelineMultisampleState(), ds, RD::PipelineColorBlendState::create_disabled(), 0);
 		}
 		}
 
 
 		for (int i = 0; i < 2; i++) {
 		for (int i = 0; i < 2; i++) {
@@ -1858,7 +1907,8 @@ RendererCanvasRenderRD::RendererCanvasRenderRD() {
 		}
 		}
 
 
 		// Unload shader modules to save memory.
 		// Unload shader modules to save memory.
-		RD::get_singleton()->shader_destroy_modules(shadow_render.shader.version_get_shader(shadow_render.shader_version, SHADOW_RENDER_MODE_SHADOW));
+		RD::get_singleton()->shader_destroy_modules(shadow_render.shader.version_get_shader(shadow_render.shader_version, SHADOW_RENDER_MODE_DIRECTIONAL_SHADOW));
+		RD::get_singleton()->shader_destroy_modules(shadow_render.shader.version_get_shader(shadow_render.shader_version, SHADOW_RENDER_MODE_POSITIONAL_SHADOW));
 		RD::get_singleton()->shader_destroy_modules(shadow_render.shader.version_get_shader(shadow_render.shader_version, SHADOW_RENDER_MODE_SDF));
 		RD::get_singleton()->shader_destroy_modules(shadow_render.shader.version_get_shader(shadow_render.shader_version, SHADOW_RENDER_MODE_SDF));
 	}
 	}
 
 
@@ -3251,6 +3301,10 @@ RendererCanvasRenderRD::~RendererCanvasRenderRD() {
 	}
 	}
 	RD::get_singleton()->free(state.shadow_texture);
 	RD::get_singleton()->free(state.shadow_texture);
 
 
+	if (state.shadow_occluder_buffer.is_valid()) {
+		RD::get_singleton()->free(state.shadow_occluder_buffer);
+	}
+
 	memdelete_arr(state.instance_data_array);
 	memdelete_arr(state.instance_data_array);
 	for (uint32_t i = 0; i < BATCH_DATA_BUFFER_COUNT; i++) {
 	for (uint32_t i = 0; i < BATCH_DATA_BUFFER_COUNT; i++) {
 		for (uint32_t j = 0; j < state.canvas_instance_data_buffers[i].instance_buffers.size(); j++) {
 		for (uint32_t j = 0; j < state.canvas_instance_data_buffers[i].instance_buffers.size(); j++) {

+ 21 - 4
servers/rendering/renderer_rd/renderer_canvas_render_rd.h

@@ -268,12 +268,23 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 
 
 	RID_Owner<CanvasLight> canvas_light_owner;
 	RID_Owner<CanvasLight> canvas_light_owner;
 
 
+	struct PositionalShadowRenderPushConstant {
+		float modelview[8];
+		float rotation[4];
+		float direction[2];
+		float z_far;
+		uint32_t pad;
+		float z_near;
+		uint32_t cull_mode;
+		float pad2[2];
+	};
+
 	struct ShadowRenderPushConstant {
 	struct ShadowRenderPushConstant {
 		float projection[16];
 		float projection[16];
 		float modelview[8];
 		float modelview[8];
 		float direction[2];
 		float direction[2];
 		float z_far;
 		float z_far;
-		float pad;
+		uint32_t cull_mode;
 	};
 	};
 
 
 	struct OccluderPolygon {
 	struct OccluderPolygon {
@@ -313,7 +324,8 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 	RID_Owner<OccluderPolygon> occluder_polygon_owner;
 	RID_Owner<OccluderPolygon> occluder_polygon_owner;
 
 
 	enum ShadowRenderMode {
 	enum ShadowRenderMode {
-		SHADOW_RENDER_MODE_SHADOW,
+		SHADOW_RENDER_MODE_DIRECTIONAL_SHADOW,
+		SHADOW_RENDER_MODE_POSITIONAL_SHADOW,
 		SHADOW_RENDER_MODE_SDF,
 		SHADOW_RENDER_MODE_SDF,
 	};
 	};
 
 
@@ -325,7 +337,7 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 	struct {
 	struct {
 		CanvasOcclusionShaderRD shader;
 		CanvasOcclusionShaderRD shader;
 		RID shader_version;
 		RID shader_version;
-		RID render_pipelines[3];
+		RID render_pipelines[2];
 		RID sdf_render_pipelines[2];
 		RID sdf_render_pipelines[2];
 		RD::VertexFormatID vertex_format;
 		RD::VertexFormatID vertex_format;
 		RD::VertexFormatID sdf_vertex_format;
 		RD::VertexFormatID sdf_vertex_format;
@@ -564,6 +576,10 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 		RID shadow_fb;
 		RID shadow_fb;
 		int shadow_texture_size = 2048;
 		int shadow_texture_size = 2048;
 
 
+		RID shadow_occluder_buffer;
+		uint32_t shadow_occluder_buffer_size;
+		RID shadow_ocluder_uniform_set;
+
 		RID default_transforms_uniform_set;
 		RID default_transforms_uniform_set;
 
 
 		uint32_t max_lights_per_render;
 		uint32_t max_lights_per_render;
@@ -617,6 +633,7 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 	_FORCE_INLINE_ void _update_transform_to_mat4(const Transform3D &p_transform, float *p_mat4);
 	_FORCE_INLINE_ void _update_transform_to_mat4(const Transform3D &p_transform, float *p_mat4);
 
 
 	void _update_shadow_atlas();
 	void _update_shadow_atlas();
+	void _update_occluder_buffer(uint32_t p_size);
 
 
 public:
 public:
 	PolygonID request_polygon(const Vector<int> &p_indices, const Vector<Point2> &p_points, const Vector<Color> &p_colors, const Vector<Point2> &p_uvs = Vector<Point2>(), const Vector<int> &p_bones = Vector<int>(), const Vector<float> &p_weights = Vector<float>()) override;
 	PolygonID request_polygon(const Vector<int> &p_indices, const Vector<Point2> &p_points, const Vector<Color> &p_colors, const Vector<Point2> &p_uvs = Vector<Point2>(), const Vector<int> &p_bones = Vector<int>(), const Vector<float> &p_weights = Vector<float>()) override;
@@ -625,7 +642,7 @@ public:
 	RID light_create() override;
 	RID light_create() override;
 	void light_set_texture(RID p_rid, RID p_texture) override;
 	void light_set_texture(RID p_rid, RID p_texture) override;
 	void light_set_use_shadow(RID p_rid, bool p_enable) override;
 	void light_set_use_shadow(RID p_rid, bool p_enable) override;
-	void light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders) override;
+	void light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders, const Rect2 &p_light_rect) override;
 	void light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) override;
 	void light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) override;
 
 
 	virtual void render_sdf(RID p_render_target, LightOccluderInstance *p_occluders) override;
 	virtual void render_sdf(RID p_render_target, LightOccluderInstance *p_occluders) override;

+ 75 - 4
servers/rendering/renderer_rd/shaders/canvas_occlusion.glsl

@@ -6,26 +6,69 @@
 
 
 layout(location = 0) in highp vec3 vertex;
 layout(location = 0) in highp vec3 vertex;
 
 
+#ifdef POSITIONAL_SHADOW
+layout(push_constant, std430) uniform Constants {
+	mat2x4 modelview;
+	vec4 rotation;
+	vec2 direction;
+	float z_far;
+	uint pad;
+	float z_near;
+	uint cull_mode;
+	float pad3;
+	float pad4;
+}
+constants;
+
+layout(set = 0, binding = 0, std430) restrict readonly buffer OccluderTransforms {
+	mat2x4 transforms[];
+}
+occluder_transforms;
+
+#else
+
 layout(push_constant, std430) uniform Constants {
 layout(push_constant, std430) uniform Constants {
 	mat4 projection;
 	mat4 projection;
 	mat2x4 modelview;
 	mat2x4 modelview;
 	vec2 direction;
 	vec2 direction;
 	float z_far;
 	float z_far;
-	float pad;
+	uint cull_mode;
 }
 }
 constants;
 constants;
 
 
+#endif
+
 #ifdef MODE_SHADOW
 #ifdef MODE_SHADOW
 layout(location = 0) out highp float depth;
 layout(location = 0) out highp float depth;
 #endif
 #endif
 
 
 void main() {
 void main() {
-	highp vec4 vtx = vec4(vertex, 1.0) * mat4(constants.modelview[0], constants.modelview[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
+#ifdef POSITIONAL_SHADOW
+	float c = -(constants.z_far + constants.z_near) / (constants.z_far - constants.z_near);
+	float d = -2.0 * constants.z_far * constants.z_near / (constants.z_far - constants.z_near);
+
+	mat4 projection = mat4(vec4(1.0, 0.0, 0.0, 0.0),
+			vec4(0.0, 1.0, 0.0, 0.0),
+			vec4(0.0, 0.0, c, -1.0),
+			vec4(0.0, 0.0, d, 0.0));
+
+	// Precomputed:
+	// Vector3 cam_target = Basis::from_euler(Vector3(0, 0, Math_TAU * ((i + 3) / 4.0))).xform(Vector3(0, 1, 0));
+	// projection = projection * Projection(Transform3D().looking_at(cam_targets[i], Vector3(0, 0, -1)).affine_inverse());
+	projection *= mat4(vec4(constants.rotation.x, 0.0, constants.rotation.y, 0.0), vec4(constants.rotation.z, 0.0, constants.rotation.w, 0.0), vec4(0.0, -1.0, 0.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
+	mat4 modelview = mat4(occluder_transforms.transforms[constants.pad]) * mat4(constants.modelview);
+#else
+	mat4 projection = constants.projection;
+	mat4 modelview = mat4(constants.modelview[0], constants.modelview[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
+#endif
+
+	highp vec4 vtx = vec4(vertex, 1.0) * modelview;
 
 
 #ifdef MODE_SHADOW
 #ifdef MODE_SHADOW
 	depth = dot(constants.direction, vtx.xy);
 	depth = dot(constants.direction, vtx.xy);
 #endif
 #endif
-	gl_Position = constants.projection * vtx;
+
+	gl_Position = projection * vtx;
 }
 }
 
 
 #[fragment]
 #[fragment]
@@ -34,15 +77,33 @@ void main() {
 
 
 #VERSION_DEFINES
 #VERSION_DEFINES
 
 
+#ifdef POSITIONAL_SHADOW
+layout(push_constant, std430) uniform Constants {
+	mat2x4 modelview;
+	vec4 rotation;
+	vec2 direction;
+	float z_far;
+	uint pad;
+	float z_near;
+	uint cull_mode;
+	float pad3;
+	float pad4;
+}
+constants;
+
+#else
+
 layout(push_constant, std430) uniform Constants {
 layout(push_constant, std430) uniform Constants {
 	mat4 projection;
 	mat4 projection;
 	mat2x4 modelview;
 	mat2x4 modelview;
 	vec2 direction;
 	vec2 direction;
 	float z_far;
 	float z_far;
-	float pad;
+	uint cull_mode;
 }
 }
 constants;
 constants;
 
 
+#endif
+
 #ifdef MODE_SHADOW
 #ifdef MODE_SHADOW
 layout(location = 0) in highp float depth;
 layout(location = 0) in highp float depth;
 layout(location = 0) out highp float distance_buf;
 layout(location = 0) out highp float distance_buf;
@@ -50,8 +111,18 @@ layout(location = 0) out highp float distance_buf;
 layout(location = 0) out highp float sdf_buf;
 layout(location = 0) out highp float sdf_buf;
 #endif
 #endif
 
 
+#define POLYGON_CULL_DISABLED 0
+#define POLYGON_CULL_FRONT 1
+#define POLYGON_CULL_BACK 2
+
 void main() {
 void main() {
 #ifdef MODE_SHADOW
 #ifdef MODE_SHADOW
+	bool front_facing = gl_FrontFacing;
+	if (constants.cull_mode == POLYGON_CULL_BACK && !front_facing) {
+		discard;
+	} else if (constants.cull_mode == POLYGON_CULL_FRONT && front_facing) {
+		discard;
+	}
 	distance_buf = depth / constants.z_far;
 	distance_buf = depth / constants.z_far;
 #else
 #else
 	sdf_buf = 1.0;
 	sdf_buf = 1.0;

+ 1 - 1
servers/rendering/renderer_viewport.cpp

@@ -498,7 +498,7 @@ void RendererViewport::_draw_viewport(Viewport *p_viewport) {
 			while (light) {
 			while (light) {
 				RENDER_TIMESTAMP("Render PointLight2D Shadow");
 				RENDER_TIMESTAMP("Render PointLight2D Shadow");
 
 
-				RSG::canvas_render->light_update_shadow(light->light_internal, shadow_count++, light->xform_cache.affine_inverse(), light->item_shadow_mask, light->radius_cache / 1000.0, light->radius_cache * 1.1, occluders);
+				RSG::canvas_render->light_update_shadow(light->light_internal, shadow_count++, light->xform_cache.affine_inverse(), light->item_shadow_mask, light->radius_cache / 1000.0, light->radius_cache * 1.1, occluders, light->rect_cache);
 				light = light->shadows_next_ptr;
 				light = light->shadows_next_ptr;
 			}
 			}