Browse Source

Optimize PointLight2D shadow rendering by reducing draw calls and RD state changes.

This dramatically reduces the CPU time spent on rendering shadows for PointLight2Ds
clayjohn 8 months ago
parent
commit
7c61252dd7

+ 1 - 1
drivers/gles3/rasterizer_canvas_gles3.cpp

@@ -1636,7 +1636,7 @@ void RasterizerCanvasGLES3::light_set_use_shadow(RID p_rid, bool p_enable) {
 	cl->shadow.enabled = p_enable;
 	cl->shadow.enabled = p_enable;
 }
 }
 
 
-void RasterizerCanvasGLES3::light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders) {
+void RasterizerCanvasGLES3::light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders, const Rect2 &p_light_rect) {
 	GLES3::Config *config = GLES3::Config::get_singleton();
 	GLES3::Config *config = GLES3::Config::get_singleton();
 
 
 	CanvasLight *cl = canvas_light_owner.get_or_null(p_rid);
 	CanvasLight *cl = canvas_light_owner.get_or_null(p_rid);

+ 1 - 1
drivers/gles3/rasterizer_canvas_gles3.h

@@ -343,7 +343,7 @@ public:
 	RID light_create() override;
 	RID light_create() override;
 	void light_set_texture(RID p_rid, RID p_texture) override;
 	void light_set_texture(RID p_rid, RID p_texture) override;
 	void light_set_use_shadow(RID p_rid, bool p_enable) override;
 	void light_set_use_shadow(RID p_rid, bool p_enable) override;
-	void light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders) override;
+	void light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders, const Rect2 &p_light_rect) override;
 	void light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) override;
 	void light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) override;
 
 
 	void render_sdf(RID p_render_target, LightOccluderInstance *p_occluders) override;
 	void render_sdf(RID p_render_target, LightOccluderInstance *p_occluders) override;

+ 1 - 1
servers/rendering/dummy/rasterizer_canvas_dummy.h

@@ -43,7 +43,7 @@ public:
 	RID light_create() override { return RID(); }
 	RID light_create() override { return RID(); }
 	void light_set_texture(RID p_rid, RID p_texture) override {}
 	void light_set_texture(RID p_rid, RID p_texture) override {}
 	void light_set_use_shadow(RID p_rid, bool p_enable) override {}
 	void light_set_use_shadow(RID p_rid, bool p_enable) override {}
-	void light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders) override {}
+	void light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders, const Rect2 &p_light_rect) override {}
 	void light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) override {}
 	void light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) override {}
 
 
 	void render_sdf(RID p_render_target, LightOccluderInstance *p_occluders) override {}
 	void render_sdf(RID p_render_target, LightOccluderInstance *p_occluders) override {}

+ 1 - 1
servers/rendering/renderer_canvas_render.h

@@ -531,7 +531,7 @@ public:
 	virtual RID light_create() = 0;
 	virtual RID light_create() = 0;
 	virtual void light_set_texture(RID p_rid, RID p_texture) = 0;
 	virtual void light_set_texture(RID p_rid, RID p_texture) = 0;
 	virtual void light_set_use_shadow(RID p_rid, bool p_enable) = 0;
 	virtual void light_set_use_shadow(RID p_rid, bool p_enable) = 0;
-	virtual void light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders) = 0;
+	virtual void light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders, const Rect2 &p_light_rect) = 0;
 	virtual void light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) = 0;
 	virtual void light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) = 0;
 
 
 	virtual void render_sdf(RID p_render_target, LightOccluderInstance *p_occluders) = 0;
 	virtual void render_sdf(RID p_render_target, LightOccluderInstance *p_occluders) = 0;

+ 109 - 55
servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp

@@ -985,7 +985,34 @@ void RendererCanvasRenderRD::_update_shadow_atlas() {
 	}
 	}
 }
 }
 
 
-void RendererCanvasRenderRD::light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders) {
+void RendererCanvasRenderRD::_update_occluder_buffer(uint32_t p_size) {
+	bool needs_update = state.shadow_occluder_buffer.is_null();
+
+	if (p_size > state.shadow_occluder_buffer_size) {
+		needs_update = true;
+		state.shadow_occluder_buffer_size = next_power_of_2(p_size);
+		if (state.shadow_occluder_buffer.is_valid()) {
+			RD::get_singleton()->free(state.shadow_occluder_buffer);
+		}
+	}
+
+	if (needs_update) {
+		state.shadow_occluder_buffer = RD::get_singleton()->storage_buffer_create(state.shadow_occluder_buffer_size);
+
+		Vector<RD::Uniform> uniforms;
+
+		{
+			RD::Uniform u;
+			u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+			u.binding = 0;
+			u.append_id(state.shadow_occluder_buffer);
+			uniforms.push_back(u);
+		}
+		state.shadow_ocluder_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shadow_render.shader.version_get_shader(shadow_render.shader_version, SHADOW_RENDER_MODE_POSITIONAL_SHADOW), 0);
+	}
+}
+
+void RendererCanvasRenderRD::light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders, const Rect2 &p_light_rect) {
 	CanvasLight *cl = canvas_light_owner.get_or_null(p_rid);
 	CanvasLight *cl = canvas_light_owner.get_or_null(p_rid);
 	ERR_FAIL_COND(!cl->shadow.enabled);
 	ERR_FAIL_COND(!cl->shadow.enabled);
 
 
@@ -996,75 +1023,97 @@ void RendererCanvasRenderRD::light_update_shadow(RID p_rid, int p_shadow_index,
 	Vector<Color> cc;
 	Vector<Color> cc;
 	cc.push_back(Color(p_far, p_far, p_far, 1.0));
 	cc.push_back(Color(p_far, p_far, p_far, 1.0));
 
 
-	Projection projection;
-	{
-		real_t fov = 90;
-		real_t nearp = p_near;
-		real_t farp = p_far;
-		real_t aspect = 1.0;
+	// First, do a culling pass and record what occluders need to be drawn for this light.
+	static thread_local LocalVector<OccluderPolygon *> occluders;
+	static thread_local LocalVector<uint32_t> occluder_indices;
+	occluders.clear();
+	occluder_indices.clear();
+
+	uint32_t occluder_count = 0;
+
+	LightOccluderInstance *instance = p_occluders;
+	while (instance) {
+		OccluderPolygon *co = occluder_polygon_owner.get_or_null(instance->occluder);
+
+		if (!co || co->index_array.is_null()) {
+			instance = instance->next;
+			continue;
+		}
+
+		occluder_count++;
 
 
-		real_t ymax = nearp * Math::tan(Math::deg_to_rad(fov * 0.5));
-		real_t ymin = -ymax;
-		real_t xmin = ymin * aspect;
-		real_t xmax = ymax * aspect;
+		if (!(p_light_mask & instance->light_mask) || !p_light_rect.intersects(instance->aabb_cache)) {
+			instance = instance->next;
+			continue;
+		}
+
+		occluders.push_back(co);
+		occluder_indices.push_back(occluder_count - 1);
 
 
-		projection.set_frustum(xmin, xmax, ymin, ymax, nearp, farp);
+		instance = instance->next;
 	}
 	}
 
 
-	// Precomputed:
-	// Vector3 cam_target = Basis::from_euler(Vector3(0, 0, Math_TAU * ((i + 3) / 4.0))).xform(Vector3(0, 1, 0));
-	// projection = projection * Projection(Transform3D().looking_at(cam_targets[i], Vector3(0, 0, -1)).affine_inverse());
-	const Projection projections[4] = {
-		projection * Projection(Vector4(0, 0, -1, 0), Vector4(1, 0, 0, 0), Vector4(0, -1, 0, 0), Vector4(0, 0, 0, 1)),
+	// Then, upload all the occluder transforms to a shared buffer.
+	// We only do this for the first light so we can avoid uploading the same
+	// Transforms over and over again.
+	if (p_shadow_index == 0) {
+		static thread_local LocalVector<float> transforms;
+		transforms.clear();
+		transforms.resize(occluder_count * 8);
 
 
-		projection * Projection(Vector4(-1, 0, 0, 0), Vector4(0, 0, -1, 0), Vector4(0, -1, 0, 0), Vector4(0, 0, 0, 1)),
+		instance = p_occluders;
+		uint32_t index = 0;
+		while (instance) {
+			_update_transform_2d_to_mat2x4(instance->xform_cache, &transforms[index * 8]);
+			index++;
+			instance = instance->next;
+		}
 
 
-		projection * Projection(Vector4(0, 0, 1, 0), Vector4(-1, 0, 0, 0), Vector4(0, -1, 0, 0), Vector4(0, 0, 0, 1)),
+		_update_occluder_buffer(occluder_count * 8 * sizeof(float));
+		RD::get_singleton()->buffer_update(state.shadow_occluder_buffer, 0, transforms.size() * sizeof(float), transforms.ptr());
+	}
 
 
-		projection * Projection(Vector4(1, 0, 0, 0), Vector4(0, 0, 1, 0), Vector4(0, -1, 0, 0), Vector4(0, 0, 0, 1))
+	Rect2i rect(0, p_shadow_index * 2, state.shadow_texture_size, 2);
+	RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(state.shadow_fb, RD::DRAW_CLEAR_ALL, cc, 1.0f, 0, rect);
 
 
-	};
+	RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, shadow_render.render_pipelines[SHADOW_RENDER_MODE_POSITIONAL_SHADOW]);
+	RD::get_singleton()->draw_list_bind_uniform_set(draw_list, state.shadow_ocluder_uniform_set, 0);
 
 
 	for (int i = 0; i < 4; i++) {
 	for (int i = 0; i < 4; i++) {
-		Rect2i rect((state.shadow_texture_size / 4) * i, p_shadow_index * 2, (state.shadow_texture_size / 4), 2);
-		RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(state.shadow_fb, RD::DRAW_CLEAR_ALL, cc, 1.0f, 0, rect);
+		Rect2i sub_rect((state.shadow_texture_size / 4) * i, p_shadow_index * 2, (state.shadow_texture_size / 4), 2);
+		RD::get_singleton()->draw_list_set_viewport(draw_list, sub_rect);
 
 
-		ShadowRenderPushConstant push_constant;
-		for (int y = 0; y < 4; y++) {
-			for (int x = 0; x < 4; x++) {
-				push_constant.projection[y * 4 + x] = projections[i].columns[y][x];
-			}
-		}
 		static const Vector2 directions[4] = { Vector2(1, 0), Vector2(0, 1), Vector2(-1, 0), Vector2(0, -1) };
 		static const Vector2 directions[4] = { Vector2(1, 0), Vector2(0, 1), Vector2(-1, 0), Vector2(0, -1) };
+		static const Vector4 rotations[4] = { Vector4(0, -1, 1, 0), Vector4(-1, 0, 0, -1), Vector4(0, 1, -1, 0), Vector4(1, 0, 0, 1) };
+
+		PositionalShadowRenderPushConstant push_constant;
+		_update_transform_2d_to_mat2x4(p_light_xform, push_constant.modelview);
 		push_constant.direction[0] = directions[i].x;
 		push_constant.direction[0] = directions[i].x;
 		push_constant.direction[1] = directions[i].y;
 		push_constant.direction[1] = directions[i].y;
+		push_constant.rotation[0] = rotations[i].x;
+		push_constant.rotation[1] = rotations[i].y;
+		push_constant.rotation[2] = rotations[i].z;
+		push_constant.rotation[3] = rotations[i].w;
 		push_constant.z_far = p_far;
 		push_constant.z_far = p_far;
-		push_constant.pad = 0;
+		push_constant.z_near = p_near;
 
 
-		LightOccluderInstance *instance = p_occluders;
-
-		while (instance) {
-			OccluderPolygon *co = occluder_polygon_owner.get_or_null(instance->occluder);
-
-			if (!co || co->index_array.is_null() || !(p_light_mask & instance->light_mask)) {
-				instance = instance->next;
-				continue;
-			}
+		for (uint32_t j = 0; j < occluders.size(); j++) {
+			OccluderPolygon *co = occluders[j];
 
 
-			_update_transform_2d_to_mat2x4(p_light_xform * instance->xform_cache, push_constant.modelview);
+			push_constant.pad = occluder_indices[j];
+			push_constant.cull_mode = uint32_t(co->cull_mode);
 
 
-			RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, shadow_render.render_pipelines[co->cull_mode]);
+			// The slowest part about this whole function is that we have to draw the occluders one by one, 4 times.
+			// We can optimize this so that all occluders draw at once if we store vertices and indices in a giant
+			// SSBO and just save an index into that SSBO for each occluder.
 			RD::get_singleton()->draw_list_bind_vertex_array(draw_list, co->vertex_array);
 			RD::get_singleton()->draw_list_bind_vertex_array(draw_list, co->vertex_array);
 			RD::get_singleton()->draw_list_bind_index_array(draw_list, co->index_array);
 			RD::get_singleton()->draw_list_bind_index_array(draw_list, co->index_array);
-			RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(ShadowRenderPushConstant));
+			RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(PositionalShadowRenderPushConstant));
 
 
 			RD::get_singleton()->draw_list_draw(draw_list, true);
 			RD::get_singleton()->draw_list_draw(draw_list, true);
-
-			instance = instance->next;
 		}
 		}
-
-		RD::get_singleton()->draw_list_end();
 	}
 	}
+	RD::get_singleton()->draw_list_end();
 }
 }
 
 
 void RendererCanvasRenderRD::light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) {
 void RendererCanvasRenderRD::light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) {
@@ -1099,6 +1148,7 @@ void RendererCanvasRenderRD::light_update_directional_shadow(RID p_rid, int p_sh
 
 
 	Rect2i rect(0, p_shadow_index * 2, state.shadow_texture_size, 2);
 	Rect2i rect(0, p_shadow_index * 2, state.shadow_texture_size, 2);
 	RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(state.shadow_fb, RD::DRAW_CLEAR_ALL, cc, 1.0f, 0, rect);
 	RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(state.shadow_fb, RD::DRAW_CLEAR_ALL, cc, 1.0f, 0, rect);
+	RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, shadow_render.render_pipelines[SHADOW_RENDER_MODE_DIRECTIONAL_SHADOW]);
 
 
 	Projection projection;
 	Projection projection;
 	projection.set_orthogonal(-half_size, half_size, -0.5, 0.5, 0.0, distance);
 	projection.set_orthogonal(-half_size, half_size, -0.5, 0.5, 0.0, distance);
@@ -1114,7 +1164,6 @@ void RendererCanvasRenderRD::light_update_directional_shadow(RID p_rid, int p_sh
 	push_constant.direction[0] = 0.0;
 	push_constant.direction[0] = 0.0;
 	push_constant.direction[1] = 1.0;
 	push_constant.direction[1] = 1.0;
 	push_constant.z_far = distance;
 	push_constant.z_far = distance;
-	push_constant.pad = 0;
 
 
 	LightOccluderInstance *instance = p_occluders;
 	LightOccluderInstance *instance = p_occluders;
 
 
@@ -1127,8 +1176,8 @@ void RendererCanvasRenderRD::light_update_directional_shadow(RID p_rid, int p_sh
 		}
 		}
 
 
 		_update_transform_2d_to_mat2x4(to_light_xform * instance->xform_cache, push_constant.modelview);
 		_update_transform_2d_to_mat2x4(to_light_xform * instance->xform_cache, push_constant.modelview);
+		push_constant.cull_mode = uint32_t(co->cull_mode);
 
 
-		RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, shadow_render.render_pipelines[co->cull_mode]);
 		RD::get_singleton()->draw_list_bind_vertex_array(draw_list, co->vertex_array);
 		RD::get_singleton()->draw_list_bind_vertex_array(draw_list, co->vertex_array);
 		RD::get_singleton()->draw_list_bind_index_array(draw_list, co->index_array);
 		RD::get_singleton()->draw_list_bind_index_array(draw_list, co->index_array);
 		RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(ShadowRenderPushConstant));
 		RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(ShadowRenderPushConstant));
@@ -1182,7 +1231,7 @@ void RendererCanvasRenderRD::render_sdf(RID p_render_target, LightOccluderInstan
 	push_constant.direction[0] = 0.0;
 	push_constant.direction[0] = 0.0;
 	push_constant.direction[1] = 0.0;
 	push_constant.direction[1] = 0.0;
 	push_constant.z_far = 0;
 	push_constant.z_far = 0;
-	push_constant.pad = 0;
+	push_constant.cull_mode = 0;
 
 
 	LightOccluderInstance *instance = p_occluders;
 	LightOccluderInstance *instance = p_occluders;
 
 
@@ -1791,8 +1840,9 @@ RendererCanvasRenderRD::RendererCanvasRenderRD() {
 
 
 	{ //shadow rendering
 	{ //shadow rendering
 		Vector<String> versions;
 		Vector<String> versions;
-		versions.push_back("\n#define MODE_SHADOW\n"); //shadow
-		versions.push_back("\n#define MODE_SDF\n"); //sdf
+		versions.push_back("\n#define MODE_SHADOW\n"); // Shadow.
+		versions.push_back("\n#define MODE_SHADOW\n#define POSITIONAL_SHADOW\n"); // Positional shadow.
+		versions.push_back("\n#define MODE_SDF\n"); // SDF.
 		shadow_render.shader.initialize(versions);
 		shadow_render.shader.initialize(versions);
 
 
 		{
 		{
@@ -1843,14 +1893,13 @@ RendererCanvasRenderRD::RendererCanvasRenderRD() {
 
 
 		shadow_render.shader_version = shadow_render.shader.version_create();
 		shadow_render.shader_version = shadow_render.shader.version_create();
 
 
-		for (int i = 0; i < 3; i++) {
+		for (int i = 0; i < 2; i++) {
 			RD::PipelineRasterizationState rs;
 			RD::PipelineRasterizationState rs;
-			rs.cull_mode = i == 0 ? RD::POLYGON_CULL_DISABLED : (i == 1 ? RD::POLYGON_CULL_FRONT : RD::POLYGON_CULL_BACK);
 			RD::PipelineDepthStencilState ds;
 			RD::PipelineDepthStencilState ds;
 			ds.enable_depth_write = true;
 			ds.enable_depth_write = true;
 			ds.enable_depth_test = true;
 			ds.enable_depth_test = true;
 			ds.depth_compare_operator = RD::COMPARE_OP_LESS;
 			ds.depth_compare_operator = RD::COMPARE_OP_LESS;
-			shadow_render.render_pipelines[i] = RD::get_singleton()->render_pipeline_create(shadow_render.shader.version_get_shader(shadow_render.shader_version, SHADOW_RENDER_MODE_SHADOW), shadow_render.framebuffer_format, shadow_render.vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, rs, RD::PipelineMultisampleState(), ds, RD::PipelineColorBlendState::create_disabled(), 0);
+			shadow_render.render_pipelines[i] = RD::get_singleton()->render_pipeline_create(shadow_render.shader.version_get_shader(shadow_render.shader_version, ShadowRenderMode(i)), shadow_render.framebuffer_format, shadow_render.vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, rs, RD::PipelineMultisampleState(), ds, RD::PipelineColorBlendState::create_disabled(), 0);
 		}
 		}
 
 
 		for (int i = 0; i < 2; i++) {
 		for (int i = 0; i < 2; i++) {
@@ -1858,7 +1907,8 @@ RendererCanvasRenderRD::RendererCanvasRenderRD() {
 		}
 		}
 
 
 		// Unload shader modules to save memory.
 		// Unload shader modules to save memory.
-		RD::get_singleton()->shader_destroy_modules(shadow_render.shader.version_get_shader(shadow_render.shader_version, SHADOW_RENDER_MODE_SHADOW));
+		RD::get_singleton()->shader_destroy_modules(shadow_render.shader.version_get_shader(shadow_render.shader_version, SHADOW_RENDER_MODE_DIRECTIONAL_SHADOW));
+		RD::get_singleton()->shader_destroy_modules(shadow_render.shader.version_get_shader(shadow_render.shader_version, SHADOW_RENDER_MODE_POSITIONAL_SHADOW));
 		RD::get_singleton()->shader_destroy_modules(shadow_render.shader.version_get_shader(shadow_render.shader_version, SHADOW_RENDER_MODE_SDF));
 		RD::get_singleton()->shader_destroy_modules(shadow_render.shader.version_get_shader(shadow_render.shader_version, SHADOW_RENDER_MODE_SDF));
 	}
 	}
 
 
@@ -3251,6 +3301,10 @@ RendererCanvasRenderRD::~RendererCanvasRenderRD() {
 	}
 	}
 	RD::get_singleton()->free(state.shadow_texture);
 	RD::get_singleton()->free(state.shadow_texture);
 
 
+	if (state.shadow_occluder_buffer.is_valid()) {
+		RD::get_singleton()->free(state.shadow_occluder_buffer);
+	}
+
 	memdelete_arr(state.instance_data_array);
 	memdelete_arr(state.instance_data_array);
 	for (uint32_t i = 0; i < BATCH_DATA_BUFFER_COUNT; i++) {
 	for (uint32_t i = 0; i < BATCH_DATA_BUFFER_COUNT; i++) {
 		for (uint32_t j = 0; j < state.canvas_instance_data_buffers[i].instance_buffers.size(); j++) {
 		for (uint32_t j = 0; j < state.canvas_instance_data_buffers[i].instance_buffers.size(); j++) {

+ 21 - 4
servers/rendering/renderer_rd/renderer_canvas_render_rd.h

@@ -268,12 +268,23 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 
 
 	RID_Owner<CanvasLight> canvas_light_owner;
 	RID_Owner<CanvasLight> canvas_light_owner;
 
 
+	struct PositionalShadowRenderPushConstant {
+		float modelview[8];
+		float rotation[4];
+		float direction[2];
+		float z_far;
+		uint32_t pad;
+		float z_near;
+		uint32_t cull_mode;
+		float pad2[2];
+	};
+
 	struct ShadowRenderPushConstant {
 	struct ShadowRenderPushConstant {
 		float projection[16];
 		float projection[16];
 		float modelview[8];
 		float modelview[8];
 		float direction[2];
 		float direction[2];
 		float z_far;
 		float z_far;
-		float pad;
+		uint32_t cull_mode;
 	};
 	};
 
 
 	struct OccluderPolygon {
 	struct OccluderPolygon {
@@ -313,7 +324,8 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 	RID_Owner<OccluderPolygon> occluder_polygon_owner;
 	RID_Owner<OccluderPolygon> occluder_polygon_owner;
 
 
 	enum ShadowRenderMode {
 	enum ShadowRenderMode {
-		SHADOW_RENDER_MODE_SHADOW,
+		SHADOW_RENDER_MODE_DIRECTIONAL_SHADOW,
+		SHADOW_RENDER_MODE_POSITIONAL_SHADOW,
 		SHADOW_RENDER_MODE_SDF,
 		SHADOW_RENDER_MODE_SDF,
 	};
 	};
 
 
@@ -325,7 +337,7 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 	struct {
 	struct {
 		CanvasOcclusionShaderRD shader;
 		CanvasOcclusionShaderRD shader;
 		RID shader_version;
 		RID shader_version;
-		RID render_pipelines[3];
+		RID render_pipelines[2];
 		RID sdf_render_pipelines[2];
 		RID sdf_render_pipelines[2];
 		RD::VertexFormatID vertex_format;
 		RD::VertexFormatID vertex_format;
 		RD::VertexFormatID sdf_vertex_format;
 		RD::VertexFormatID sdf_vertex_format;
@@ -564,6 +576,10 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 		RID shadow_fb;
 		RID shadow_fb;
 		int shadow_texture_size = 2048;
 		int shadow_texture_size = 2048;
 
 
+		RID shadow_occluder_buffer;
+		uint32_t shadow_occluder_buffer_size;
+		RID shadow_ocluder_uniform_set;
+
 		RID default_transforms_uniform_set;
 		RID default_transforms_uniform_set;
 
 
 		uint32_t max_lights_per_render;
 		uint32_t max_lights_per_render;
@@ -617,6 +633,7 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 	_FORCE_INLINE_ void _update_transform_to_mat4(const Transform3D &p_transform, float *p_mat4);
 	_FORCE_INLINE_ void _update_transform_to_mat4(const Transform3D &p_transform, float *p_mat4);
 
 
 	void _update_shadow_atlas();
 	void _update_shadow_atlas();
+	void _update_occluder_buffer(uint32_t p_size);
 
 
 public:
 public:
 	PolygonID request_polygon(const Vector<int> &p_indices, const Vector<Point2> &p_points, const Vector<Color> &p_colors, const Vector<Point2> &p_uvs = Vector<Point2>(), const Vector<int> &p_bones = Vector<int>(), const Vector<float> &p_weights = Vector<float>()) override;
 	PolygonID request_polygon(const Vector<int> &p_indices, const Vector<Point2> &p_points, const Vector<Color> &p_colors, const Vector<Point2> &p_uvs = Vector<Point2>(), const Vector<int> &p_bones = Vector<int>(), const Vector<float> &p_weights = Vector<float>()) override;
@@ -625,7 +642,7 @@ public:
 	RID light_create() override;
 	RID light_create() override;
 	void light_set_texture(RID p_rid, RID p_texture) override;
 	void light_set_texture(RID p_rid, RID p_texture) override;
 	void light_set_use_shadow(RID p_rid, bool p_enable) override;
 	void light_set_use_shadow(RID p_rid, bool p_enable) override;
-	void light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders) override;
+	void light_update_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_near, float p_far, LightOccluderInstance *p_occluders, const Rect2 &p_light_rect) override;
 	void light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) override;
 	void light_update_directional_shadow(RID p_rid, int p_shadow_index, const Transform2D &p_light_xform, int p_light_mask, float p_cull_distance, const Rect2 &p_clip_rect, LightOccluderInstance *p_occluders) override;
 
 
 	virtual void render_sdf(RID p_render_target, LightOccluderInstance *p_occluders) override;
 	virtual void render_sdf(RID p_render_target, LightOccluderInstance *p_occluders) override;

+ 75 - 4
servers/rendering/renderer_rd/shaders/canvas_occlusion.glsl

@@ -6,26 +6,69 @@
 
 
 layout(location = 0) in highp vec3 vertex;
 layout(location = 0) in highp vec3 vertex;
 
 
+#ifdef POSITIONAL_SHADOW
+layout(push_constant, std430) uniform Constants {
+	mat2x4 modelview;
+	vec4 rotation;
+	vec2 direction;
+	float z_far;
+	uint pad;
+	float z_near;
+	uint cull_mode;
+	float pad3;
+	float pad4;
+}
+constants;
+
+layout(set = 0, binding = 0, std430) restrict readonly buffer OccluderTransforms {
+	mat2x4 transforms[];
+}
+occluder_transforms;
+
+#else
+
 layout(push_constant, std430) uniform Constants {
 layout(push_constant, std430) uniform Constants {
 	mat4 projection;
 	mat4 projection;
 	mat2x4 modelview;
 	mat2x4 modelview;
 	vec2 direction;
 	vec2 direction;
 	float z_far;
 	float z_far;
-	float pad;
+	uint cull_mode;
 }
 }
 constants;
 constants;
 
 
+#endif
+
 #ifdef MODE_SHADOW
 #ifdef MODE_SHADOW
 layout(location = 0) out highp float depth;
 layout(location = 0) out highp float depth;
 #endif
 #endif
 
 
 void main() {
 void main() {
-	highp vec4 vtx = vec4(vertex, 1.0) * mat4(constants.modelview[0], constants.modelview[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
+#ifdef POSITIONAL_SHADOW
+	float c = -(constants.z_far + constants.z_near) / (constants.z_far - constants.z_near);
+	float d = -2.0 * constants.z_far * constants.z_near / (constants.z_far - constants.z_near);
+
+	mat4 projection = mat4(vec4(1.0, 0.0, 0.0, 0.0),
+			vec4(0.0, 1.0, 0.0, 0.0),
+			vec4(0.0, 0.0, c, -1.0),
+			vec4(0.0, 0.0, d, 0.0));
+
+	// Precomputed:
+	// Vector3 cam_target = Basis::from_euler(Vector3(0, 0, Math_TAU * ((i + 3) / 4.0))).xform(Vector3(0, 1, 0));
+	// projection = projection * Projection(Transform3D().looking_at(cam_targets[i], Vector3(0, 0, -1)).affine_inverse());
+	projection *= mat4(vec4(constants.rotation.x, 0.0, constants.rotation.y, 0.0), vec4(constants.rotation.z, 0.0, constants.rotation.w, 0.0), vec4(0.0, -1.0, 0.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
+	mat4 modelview = mat4(occluder_transforms.transforms[constants.pad]) * mat4(constants.modelview);
+#else
+	mat4 projection = constants.projection;
+	mat4 modelview = mat4(constants.modelview[0], constants.modelview[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
+#endif
+
+	highp vec4 vtx = vec4(vertex, 1.0) * modelview;
 
 
 #ifdef MODE_SHADOW
 #ifdef MODE_SHADOW
 	depth = dot(constants.direction, vtx.xy);
 	depth = dot(constants.direction, vtx.xy);
 #endif
 #endif
-	gl_Position = constants.projection * vtx;
+
+	gl_Position = projection * vtx;
 }
 }
 
 
 #[fragment]
 #[fragment]
@@ -34,15 +77,33 @@ void main() {
 
 
 #VERSION_DEFINES
 #VERSION_DEFINES
 
 
+#ifdef POSITIONAL_SHADOW
+layout(push_constant, std430) uniform Constants {
+	mat2x4 modelview;
+	vec4 rotation;
+	vec2 direction;
+	float z_far;
+	uint pad;
+	float z_near;
+	uint cull_mode;
+	float pad3;
+	float pad4;
+}
+constants;
+
+#else
+
 layout(push_constant, std430) uniform Constants {
 layout(push_constant, std430) uniform Constants {
 	mat4 projection;
 	mat4 projection;
 	mat2x4 modelview;
 	mat2x4 modelview;
 	vec2 direction;
 	vec2 direction;
 	float z_far;
 	float z_far;
-	float pad;
+	uint cull_mode;
 }
 }
 constants;
 constants;
 
 
+#endif
+
 #ifdef MODE_SHADOW
 #ifdef MODE_SHADOW
 layout(location = 0) in highp float depth;
 layout(location = 0) in highp float depth;
 layout(location = 0) out highp float distance_buf;
 layout(location = 0) out highp float distance_buf;
@@ -50,8 +111,18 @@ layout(location = 0) out highp float distance_buf;
 layout(location = 0) out highp float sdf_buf;
 layout(location = 0) out highp float sdf_buf;
 #endif
 #endif
 
 
+#define POLYGON_CULL_DISABLED 0
+#define POLYGON_CULL_FRONT 1
+#define POLYGON_CULL_BACK 2
+
 void main() {
 void main() {
 #ifdef MODE_SHADOW
 #ifdef MODE_SHADOW
+	bool front_facing = gl_FrontFacing;
+	if (constants.cull_mode == POLYGON_CULL_BACK && !front_facing) {
+		discard;
+	} else if (constants.cull_mode == POLYGON_CULL_FRONT && front_facing) {
+		discard;
+	}
 	distance_buf = depth / constants.z_far;
 	distance_buf = depth / constants.z_far;
 #else
 #else
 	sdf_buf = 1.0;
 	sdf_buf = 1.0;

+ 1 - 1
servers/rendering/renderer_viewport.cpp

@@ -498,7 +498,7 @@ void RendererViewport::_draw_viewport(Viewport *p_viewport) {
 			while (light) {
 			while (light) {
 				RENDER_TIMESTAMP("Render PointLight2D Shadow");
 				RENDER_TIMESTAMP("Render PointLight2D Shadow");
 
 
-				RSG::canvas_render->light_update_shadow(light->light_internal, shadow_count++, light->xform_cache.affine_inverse(), light->item_shadow_mask, light->radius_cache / 1000.0, light->radius_cache * 1.1, occluders);
+				RSG::canvas_render->light_update_shadow(light->light_internal, shadow_count++, light->xform_cache.affine_inverse(), light->item_shadow_mask, light->radius_cache / 1000.0, light->radius_cache * 1.1, occluders, light->rect_cache);
 				light = light->shadows_next_ptr;
 				light = light->shadows_next_ptr;
 			}
 			}