Browse Source

Merge pull request #98652 from stuartcarnie/2d_texture_state_fixes

2D: Fix various issues and minor performance optimisations
Thaddeus Crews 9 months ago
parent
commit
a8931f1aa9

+ 46 - 4
core/templates/lru.h

@@ -35,9 +35,21 @@
 #include "hash_map.h"
 #include "list.h"
 
-template <typename TKey, typename TData, typename Hasher = HashMapHasherDefault, typename Comparator = HashMapComparatorDefault<TKey>>
+#if defined(__GNUC__) && !defined(__clang__)
+#define ADDRESS_DIAGNOSTIC_WARNING_DISABLE \
+	_Pragma("GCC diagnostic push");        \
+	_Pragma("GCC diagnostic ignored \"-Waddress\"");
+
+#define ADDRESS_DIAGNOSTIC_POP \
+	_Pragma("GCC diagnostic pop");
+#else
+#define ADDRESS_DIAGNOSTIC_WARNING_DISABLE
+#define ADDRESS_DIAGNOSTIC_POP
+#endif
+
+template <typename TKey, typename TData, typename Hasher = HashMapHasherDefault, typename Comparator = HashMapComparatorDefault<TKey>, void (*BeforeEvict)(TKey &, TData &) = nullptr>
 class LRUCache {
-private:
+public:
 	struct Pair {
 		TKey key;
 		TData data;
@@ -51,16 +63,22 @@ private:
 
 	typedef typename List<Pair>::Element *Element;
 
+private:
 	List<Pair> _list;
 	HashMap<TKey, Element, Hasher, Comparator> _map;
 	size_t capacity;
 
 public:
-	const TData *insert(const TKey &p_key, const TData &p_value) {
+	const Pair *insert(const TKey &p_key, const TData &p_value) {
 		Element *e = _map.getptr(p_key);
 		Element n = _list.push_front(Pair(p_key, p_value));
 
 		if (e) {
+			ADDRESS_DIAGNOSTIC_WARNING_DISABLE;
+			if constexpr (BeforeEvict != nullptr) {
+				BeforeEvict((*e)->get().key, (*e)->get().data);
+			}
+			ADDRESS_DIAGNOSTIC_POP;
 			_list.erase(*e);
 			_map.erase(p_key);
 		}
@@ -68,11 +86,16 @@ public:
 
 		while (_map.size() > capacity) {
 			Element d = _list.back();
+			ADDRESS_DIAGNOSTIC_WARNING_DISABLE
+			if constexpr (BeforeEvict != nullptr) {
+				BeforeEvict(d->get().key, d->get().data);
+			}
+			ADDRESS_DIAGNOSTIC_POP
 			_map.erase(d->get().key);
 			_list.pop_back();
 		}
 
-		return &n->get().data;
+		return &n->get();
 	}
 
 	void clear() {
@@ -84,6 +107,17 @@ public:
 		return _map.getptr(p_key);
 	}
 
+	bool erase(const TKey &p_key) {
+		Element *e = _map.getptr(p_key);
+		if (!e) {
+			return false;
+		}
+		_list.move_to_front(*e);
+		_map.erase(p_key);
+		_list.pop_front();
+		return true;
+	}
+
 	const TData &get(const TKey &p_key) {
 		Element *e = _map.getptr(p_key);
 		CRASH_COND(!e);
@@ -109,6 +143,11 @@ public:
 			capacity = p_capacity;
 			while (_map.size() > capacity) {
 				Element d = _list.back();
+				ADDRESS_DIAGNOSTIC_WARNING_DISABLE;
+				if constexpr (BeforeEvict != nullptr) {
+					BeforeEvict(d->get().key, d->get().data);
+				}
+				ADDRESS_DIAGNOSTIC_POP;
 				_map.erase(d->get().key);
 				_list.pop_back();
 			}
@@ -124,4 +163,7 @@ public:
 	}
 };
 
+#undef ADDRESS_DIAGNOSTIC_WARNING_DISABLE
+#undef ADDRESS_DIAGNOSTIC_POP
+
 #endif // LRU_H

+ 4 - 0
doc/classes/ProjectSettings.xml

@@ -2367,6 +2367,10 @@
 		<member name="rendering/2d/batching/item_buffer_size" type="int" setter="" getter="" default="16384">
 			Maximum number of canvas item commands that can be batched into a single draw call.
 		</member>
+		<member name="rendering/2d/batching/uniform_set_cache_size" type="int" setter="" getter="" default="256">
+			Maximum number of uniform sets that will be cached by the 2D renderer when batching draw calls.
+			[b]Note:[/b] A project that uses a large number of unique sprite textures per frame may benefit from increasing this value.
+		</member>
 		<member name="rendering/2d/sdf/oversize" type="int" setter="" getter="" default="1">
 			Controls how much of the original viewport size should be covered by the 2D signed distance field. This SDF can be sampled in [CanvasItem] shaders and is used for [GPUParticles2D] collision. Higher values allow portions of occluders located outside the viewport to still be taken into account in the generated signed distance field, at the cost of performance. If you notice particles falling through [LightOccluder2D]s as the occluders leave the viewport, increase this setting.
 			The percentage specified is added on each axis and on both sides. For example, with the default setting of 120%, the signed distance field will cover 20% of the viewport's size outside the viewport on each side (top, right, bottom, left).

+ 258 - 117
servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp

@@ -911,7 +911,7 @@ void RendererCanvasRenderRD::canvas_render_items(RID p_to_render_target, Item *p
 		RenderingServerDefault::redraw_request();
 	}
 
-	state.current_data_buffer_index = (state.current_data_buffer_index + 1) % state.canvas_instance_data_buffers.size();
+	state.current_data_buffer_index = (state.current_data_buffer_index + 1) % BATCH_DATA_BUFFER_COUNT;
 	state.current_instance_buffer_index = 0;
 }
 
@@ -1647,6 +1647,9 @@ RendererCanvasRenderRD::RendererCanvasRenderRD() {
 		default_samplers.default_repeat = RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED;
 	}
 
+	// preallocate 5 slots for uniform set 3
+	state.batch_texture_uniforms.resize(5);
+
 	{ //shader variants
 
 		String global_defines;
@@ -1973,16 +1976,19 @@ void fragment() {
 		material_storage->material_set_shader(default_clip_children_material, default_clip_children_shader);
 	}
 
+	{
+		uint32_t cache_size = uint32_t(GLOBAL_GET("rendering/2d/batching/uniform_set_cache_size"));
+		rid_set_to_uniform_set.set_capacity(cache_size);
+	}
+
 	{
 		state.max_instances_per_buffer = uint32_t(GLOBAL_GET("rendering/2d/batching/item_buffer_size"));
 		state.max_instance_buffer_size = state.max_instances_per_buffer * sizeof(InstanceData);
-		state.canvas_instance_data_buffers.resize(3);
 		state.canvas_instance_batches.reserve(200);
 
-		for (int i = 0; i < 3; i++) {
-			DataBuffer db;
+		for (uint32_t i = 0; i < BATCH_DATA_BUFFER_COUNT; i++) {
+			DataBuffer &db = state.canvas_instance_data_buffers[i];
 			db.instance_buffers.push_back(RD::get_singleton()->storage_buffer_create(state.max_instance_buffer_size));
-			state.canvas_instance_data_buffers[i] = db;
 		}
 		state.instance_data_array = memnew_arr(InstanceData, state.max_instances_per_buffer);
 	}
@@ -2159,7 +2165,7 @@ void RendererCanvasRenderRD::_render_batch_items(RenderTarget p_to_render_target
 
 	RD::FramebufferFormatID fb_format = RD::get_singleton()->framebuffer_get_format(framebuffer);
 
-	RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, clear ? RD::INITIAL_ACTION_CLEAR : RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD, clear_colors);
+	RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, clear ? RD::INITIAL_ACTION_CLEAR : RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_STORE, RD::INITIAL_ACTION_LOAD, RD::FINAL_ACTION_DISCARD, clear_colors, 1, 0, Rect2(), RDD::BreadcrumbMarker::UI_PASS);
 
 	RD::get_singleton()->draw_list_bind_uniform_set(draw_list, fb_uniform_set, BASE_UNIFORM_SET);
 	RD::get_singleton()->draw_list_bind_uniform_set(draw_list, state.default_transforms_uniform_set, TRANSFORMS_UNIFORM_SET);
@@ -2203,14 +2209,47 @@ void RendererCanvasRenderRD::_render_batch_items(RenderTarget p_to_render_target
 
 	RD::get_singleton()->draw_list_end();
 
+	texture_info_map.clear();
 	state.current_batch_index = 0;
 	state.canvas_instance_batches.clear();
 	state.last_instance_index += instance_index;
 }
 
+RendererCanvasRenderRD::InstanceData *RendererCanvasRenderRD::new_instance_data(float *p_world, uint32_t *p_lights, uint32_t p_base_flags, uint32_t p_index, TextureInfo *p_info) {
+	InstanceData *instance_data = &state.instance_data_array[p_index];
+	// Zero out most fields.
+	for (int i = 0; i < 4; i++) {
+		instance_data->modulation[i] = 0.0;
+		instance_data->ninepatch_margins[i] = 0.0;
+		instance_data->src_rect[i] = 0.0;
+		instance_data->dst_rect[i] = 0.0;
+	}
+
+	instance_data->pad[0] = 0.0;
+	instance_data->pad[1] = 0.0;
+
+	instance_data->lights[0] = p_lights[0];
+	instance_data->lights[1] = p_lights[1];
+	instance_data->lights[2] = p_lights[2];
+	instance_data->lights[3] = p_lights[3];
+
+	for (int i = 0; i < 6; i++) {
+		instance_data->world[i] = p_world[i];
+	}
+
+	instance_data->flags = p_base_flags | p_info->flags; // Reset on each command for safety, keep canvas texture binding config.
+
+	instance_data->color_texture_pixel_size[0] = p_info->texpixel_size.width;
+	instance_data->color_texture_pixel_size[1] = p_info->texpixel_size.height;
+
+	instance_data->pad1 = 0;
+
+	return instance_data;
+}
+
 void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTarget p_render_target, const Transform2D &p_base_transform, Item *&r_current_clip, Light *p_lights, uint32_t &r_index, bool &r_batch_broken, bool &r_sdf_used, Batch *&r_current_batch) {
-	RenderingServer::CanvasItemTextureFilter texture_filter = p_item->texture_filter == RS::CANVAS_ITEM_TEXTURE_FILTER_DEFAULT ? default_filter : p_item->texture_filter;
-	RenderingServer::CanvasItemTextureRepeat texture_repeat = p_item->texture_repeat == RS::CANVAS_ITEM_TEXTURE_REPEAT_DEFAULT ? default_repeat : p_item->texture_repeat;
+	const RenderingServer::CanvasItemTextureFilter texture_filter = p_item->texture_filter == RS::CANVAS_ITEM_TEXTURE_FILTER_DEFAULT ? default_filter : p_item->texture_filter;
+	const RenderingServer::CanvasItemTextureRepeat texture_repeat = p_item->texture_repeat == RS::CANVAS_ITEM_TEXTURE_REPEAT_DEFAULT ? default_repeat : p_item->texture_repeat;
 
 	Transform2D base_transform = p_base_transform;
 
@@ -2241,7 +2280,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
 
 				light_count++;
 
-				if (light_count == state.max_lights_per_item - 1) {
+				if (light_count == MAX_LIGHTS_PER_ITEM - 1) {
 					break;
 				}
 			}
@@ -2258,38 +2297,6 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
 		r_current_batch->use_lighting = use_lighting;
 	}
 
-	// new_instance_data should be called after the current_batch is set.
-	auto new_instance_data = [&]() -> InstanceData * {
-		InstanceData *instance_data = &state.instance_data_array[r_index];
-		// Zero out most fields.
-		for (int i = 0; i < 4; i++) {
-			instance_data->modulation[i] = 0.0;
-			instance_data->ninepatch_margins[i] = 0.0;
-			instance_data->src_rect[i] = 0.0;
-			instance_data->dst_rect[i] = 0.0;
-		}
-
-		instance_data->pad[0] = 0.0;
-		instance_data->pad[1] = 0.0;
-
-		instance_data->lights[0] = lights[0];
-		instance_data->lights[1] = lights[1];
-		instance_data->lights[2] = lights[2];
-		instance_data->lights[3] = lights[3];
-
-		for (int i = 0; i < 6; i++) {
-			instance_data->world[i] = world[i];
-		}
-
-		instance_data->flags = base_flags | r_current_batch->tex_info.flags; // Reset on each command for safety, keep canvas texture binding config.
-
-		instance_data->color_texture_pixel_size[0] = r_current_batch->tex_info.texpixel_size.width;
-		instance_data->color_texture_pixel_size[1] = r_current_batch->tex_info.texpixel_size.height;
-		instance_data->specular_shininess = r_current_batch->tex_info.specular_shininess;
-
-		return instance_data;
-	};
-
 	const Item::Command *c = p_item->commands;
 	while (c) {
 		if (skipping && c->type != Item::Command::TYPE_ANIMATION_SLICE) {
@@ -2311,17 +2318,9 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
 					r_current_batch->render_primitive = RD::RENDER_PRIMITIVE_TRIANGLES;
 				}
 
+				RenderingServer::CanvasItemTextureRepeat rect_repeat = texture_repeat;
 				if (bool(rect->flags & CANVAS_RECT_TILE)) {
-					texture_repeat = RenderingServer::CanvasItemTextureRepeat::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED;
-				}
-
-				bool has_msdf = bool(rect->flags & CANVAS_RECT_MSDF);
-				TextureState tex_state(rect->texture, texture_filter, texture_repeat, has_msdf, use_linear_colors);
-
-				if (tex_state != r_current_batch->tex_info.state) {
-					r_current_batch = _new_batch(r_batch_broken);
-					r_current_batch->tex_info.state = tex_state;
-					_prepare_batch_texture_info(r_current_batch, rect->texture);
+					rect_repeat = RenderingServer::CanvasItemTextureRepeat::CANVAS_ITEM_TEXTURE_REPEAT_ENABLED;
 				}
 
 				Color modulated = rect->modulate * base_color;
@@ -2340,12 +2339,25 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
 					r_current_batch->render_primitive = RD::RENDER_PRIMITIVE_TRIANGLES;
 				}
 
-				InstanceData *instance_data = new_instance_data();
+				bool has_msdf = bool(rect->flags & CANVAS_RECT_MSDF);
+				TextureState tex_state(rect->texture, texture_filter, rect_repeat, has_msdf, use_linear_colors);
+				TextureInfo *tex_info = texture_info_map.getptr(tex_state);
+				if (!tex_info) {
+					tex_info = &texture_info_map.insert(tex_state, TextureInfo())->value;
+					_prepare_batch_texture_info(rect->texture, tex_state, tex_info);
+				}
+
+				if (r_current_batch->tex_info != tex_info) {
+					r_current_batch = _new_batch(r_batch_broken);
+					r_current_batch->tex_info = tex_info;
+				}
+
+				InstanceData *instance_data = new_instance_data(world, lights, base_flags, r_index, tex_info);
 				Rect2 src_rect;
 				Rect2 dst_rect;
 
 				if (rect->texture.is_valid()) {
-					src_rect = (rect->flags & CANVAS_RECT_REGION) ? Rect2(rect->source.position * r_current_batch->tex_info.texpixel_size, rect->source.size * r_current_batch->tex_info.texpixel_size) : Rect2(0, 0, 1, 1);
+					src_rect = (rect->flags & CANVAS_RECT_REGION) ? Rect2(rect->source.position * tex_info->texpixel_size, rect->source.size * tex_info->texpixel_size) : Rect2(0, 0, 1, 1);
 					dst_rect = Rect2(rect->rect.position, rect->rect.size);
 
 					if (dst_rect.size.width < 0) {
@@ -2431,13 +2443,18 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
 				}
 
 				TextureState tex_state(np->texture, texture_filter, texture_repeat, false, use_linear_colors);
-				if (tex_state != r_current_batch->tex_info.state) {
+				TextureInfo *tex_info = texture_info_map.getptr(tex_state);
+				if (!tex_info) {
+					tex_info = &texture_info_map.insert(tex_state, TextureInfo())->value;
+					_prepare_batch_texture_info(np->texture, tex_state, tex_info);
+				}
+
+				if (r_current_batch->tex_info != tex_info) {
 					r_current_batch = _new_batch(r_batch_broken);
-					r_current_batch->tex_info.state = tex_state;
-					_prepare_batch_texture_info(r_current_batch, np->texture);
+					r_current_batch->tex_info = tex_info;
 				}
 
-				InstanceData *instance_data = new_instance_data();
+				InstanceData *instance_data = new_instance_data(world, lights, base_flags, r_index, tex_info);
 
 				Rect2 src_rect;
 				Rect2 dst_rect(np->rect.position.x, np->rect.position.y, np->rect.size.x, np->rect.size.y);
@@ -2446,7 +2463,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
 					src_rect = Rect2(0, 0, 1, 1);
 				} else {
 					if (np->source != Rect2()) {
-						src_rect = Rect2(np->source.position.x * r_current_batch->tex_info.texpixel_size.width, np->source.position.y * r_current_batch->tex_info.texpixel_size.height, np->source.size.x * r_current_batch->tex_info.texpixel_size.width, np->source.size.y * r_current_batch->tex_info.texpixel_size.height);
+						src_rect = Rect2(np->source.position.x * tex_info->texpixel_size.width, np->source.position.y * tex_info->texpixel_size.height, np->source.size.x * tex_info->texpixel_size.width, np->source.size.y * tex_info->texpixel_size.height);
 						instance_data->color_texture_pixel_size[0] = 1.0 / np->source.size.width;
 						instance_data->color_texture_pixel_size[1] = 1.0 / np->source.size.height;
 					} else {
@@ -2500,10 +2517,15 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
 				r_current_batch->command = c;
 
 				TextureState tex_state(polygon->texture, texture_filter, texture_repeat, false, use_linear_colors);
-				if (tex_state != r_current_batch->tex_info.state) {
+				TextureInfo *tex_info = texture_info_map.getptr(tex_state);
+				if (!tex_info) {
+					tex_info = &texture_info_map.insert(tex_state, TextureInfo())->value;
+					_prepare_batch_texture_info(polygon->texture, tex_state, tex_info);
+				}
+
+				if (r_current_batch->tex_info != tex_info) {
 					r_current_batch = _new_batch(r_batch_broken);
-					r_current_batch->tex_info.state = tex_state;
-					_prepare_batch_texture_info(r_current_batch, polygon->texture);
+					r_current_batch->tex_info = tex_info;
 				}
 
 				// pipeline variant
@@ -2513,7 +2535,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
 					r_current_batch->render_primitive = _primitive_type_to_render_primitive(polygon->primitive);
 				}
 
-				InstanceData *instance_data = new_instance_data();
+				InstanceData *instance_data = new_instance_data(world, lights, base_flags, r_index, tex_info);
 
 				Color color = base_color;
 				if (use_linear_colors) {
@@ -2557,17 +2579,22 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
 						default:
 							// Unknown point count.
 							break;
-					};
-
-					TextureState tex_state(primitive->texture, texture_filter, texture_repeat, false, use_linear_colors);
-					if (tex_state != r_current_batch->tex_info.state) {
-						r_current_batch = _new_batch(r_batch_broken);
-						r_current_batch->tex_info.state = tex_state;
-						_prepare_batch_texture_info(r_current_batch, primitive->texture);
 					}
 				}
 
-				InstanceData *instance_data = new_instance_data();
+				TextureState tex_state(primitive->texture, texture_filter, texture_repeat, false, use_linear_colors);
+				TextureInfo *tex_info = texture_info_map.getptr(tex_state);
+				if (!tex_info) {
+					tex_info = &texture_info_map.insert(tex_state, TextureInfo())->value;
+					_prepare_batch_texture_info(primitive->texture, tex_state, tex_info);
+				}
+
+				if (r_current_batch->tex_info != tex_info) {
+					r_current_batch = _new_batch(r_batch_broken);
+					r_current_batch->tex_info = tex_info;
+				}
+
+				InstanceData *instance_data = new_instance_data(world, lights, base_flags, r_index, tex_info);
 
 				for (uint32_t j = 0; j < MIN(3u, primitive->point_count); j++) {
 					instance_data->points[j * 2 + 0] = primitive->points[j].x;
@@ -2585,7 +2612,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
 				_add_to_batch(r_index, r_batch_broken, r_current_batch);
 
 				if (primitive->point_count == 4) {
-					instance_data = new_instance_data();
+					instance_data = new_instance_data(world, lights, base_flags, r_index, tex_info);
 
 					for (uint32_t j = 0; j < 3; j++) {
 						int offset = j == 0 ? 0 : 1;
@@ -2594,7 +2621,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
 						instance_data->points[j * 2 + 1] = primitive->points[j + offset].y;
 						instance_data->uvs[j * 2 + 0] = primitive->uvs[j + offset].x;
 						instance_data->uvs[j * 2 + 1] = primitive->uvs[j + offset].y;
-						Color col = primitive->colors[j] * base_color;
+						Color col = primitive->colors[j + offset] * base_color;
 						if (use_linear_colors) {
 							col = col.srgb_to_linear();
 						}
@@ -2621,9 +2648,13 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
 				if (c->type == Item::Command::TYPE_MESH) {
 					const Item::CommandMesh *m = static_cast<const Item::CommandMesh *>(c);
 					TextureState tex_state(m->texture, texture_filter, texture_repeat, false, use_linear_colors);
-					r_current_batch->tex_info.state = tex_state;
-					_prepare_batch_texture_info(r_current_batch, m->texture);
-					instance_data = new_instance_data();
+					TextureInfo *tex_info = texture_info_map.getptr(tex_state);
+					if (!tex_info) {
+						tex_info = &texture_info_map.insert(tex_state, TextureInfo())->value;
+						_prepare_batch_texture_info(m->texture, tex_state, tex_info);
+					}
+					r_current_batch->tex_info = tex_info;
+					instance_data = new_instance_data(world, lights, base_flags, r_index, tex_info);
 
 					r_current_batch->mesh_instance_count = 1;
 					_update_transform_2d_to_mat2x3(base_transform * draw_transform * m->transform, instance_data->world);
@@ -2644,9 +2675,13 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
 					}
 
 					TextureState tex_state(mm->texture, texture_filter, texture_repeat, false, use_linear_colors);
-					r_current_batch->tex_info.state = tex_state;
-					_prepare_batch_texture_info(r_current_batch, mm->texture);
-					instance_data = new_instance_data();
+					TextureInfo *tex_info = texture_info_map.getptr(tex_state);
+					if (!tex_info) {
+						tex_info = &texture_info_map.insert(tex_state, TextureInfo())->value;
+						_prepare_batch_texture_info(mm->texture, tex_state, tex_info);
+					}
+					r_current_batch->tex_info = tex_info;
+					instance_data = new_instance_data(world, lights, base_flags, r_index, tex_info);
 
 					instance_data->flags |= 1; // multimesh, trails disabled
 
@@ -2662,10 +2697,13 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
 
 					const Item::CommandParticles *pt = static_cast<const Item::CommandParticles *>(c);
 					TextureState tex_state(pt->texture, texture_filter, texture_repeat, false, use_linear_colors);
-					r_current_batch->tex_info.state = tex_state;
-					_prepare_batch_texture_info(r_current_batch, pt->texture);
-
-					instance_data = new_instance_data();
+					TextureInfo *tex_info = texture_info_map.getptr(tex_state);
+					if (!tex_info) {
+						tex_info = &texture_info_map.insert(tex_state, TextureInfo())->value;
+						_prepare_batch_texture_info(pt->texture, tex_state, tex_info);
+					}
+					r_current_batch->tex_info = tex_info;
+					instance_data = new_instance_data(world, lights, base_flags, r_index, tex_info);
 
 					uint32_t divisor = 1;
 					r_current_batch->mesh_instance_count = particles_storage->particles_get_amount(pt->particles, divisor);
@@ -2747,36 +2785,137 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
 		r_batch_broken = false;
 	}
 
+#ifdef DEBUG_ENABLED
+	if (debug_redraw && p_item->debug_redraw_time > 0.0) {
+		Color dc = debug_redraw_color;
+		dc.a *= p_item->debug_redraw_time / debug_redraw_time;
+
+		// 1: If commands are different, start a new batch.
+		if (r_current_batch->command_type != Item::Command::TYPE_RECT) {
+			r_current_batch = _new_batch(r_batch_broken);
+			r_current_batch->command_type = Item::Command::TYPE_RECT;
+			// it is ok to be null for a TYPE_RECT
+			r_current_batch->command = nullptr;
+			// default variant
+			r_current_batch->shader_variant = SHADER_VARIANT_QUAD;
+			r_current_batch->render_primitive = RD::RENDER_PRIMITIVE_TRIANGLES;
+		}
+
+		// 2: If the current batch has lighting, start a new batch.
+		if (r_current_batch->use_lighting) {
+			r_current_batch = _new_batch(r_batch_broken);
+			r_current_batch->use_lighting = false;
+		}
+
+		// 3: If the current batch has blend, start a new batch.
+		if (r_current_batch->has_blend) {
+			r_current_batch = _new_batch(r_batch_broken);
+			r_current_batch->has_blend = false;
+		}
+
+		TextureState tex_state(default_canvas_texture, texture_filter, texture_repeat, false, use_linear_colors);
+		TextureInfo *tex_info = texture_info_map.getptr(tex_state);
+		if (!tex_info) {
+			tex_info = &texture_info_map.insert(tex_state, TextureInfo())->value;
+			_prepare_batch_texture_info(default_canvas_texture, tex_state, tex_info);
+		}
+
+		if (r_current_batch->tex_info != tex_info) {
+			r_current_batch = _new_batch(r_batch_broken);
+			r_current_batch->tex_info = tex_info;
+		}
+
+		InstanceData *instance_data = new_instance_data(world, lights, base_flags, r_index, tex_info);
+
+		Rect2 src_rect;
+		Rect2 dst_rect;
+
+		dst_rect = Rect2(Vector2(), p_item->rect.size);
+		if (dst_rect.size.width < 0) {
+			dst_rect.position.x += dst_rect.size.width;
+			dst_rect.size.width *= -1;
+		}
+		if (dst_rect.size.height < 0) {
+			dst_rect.position.y += dst_rect.size.height;
+			dst_rect.size.height *= -1;
+		}
+
+		src_rect = Rect2(0, 0, 1, 1);
+
+		instance_data->modulation[0] = dc.r;
+		instance_data->modulation[1] = dc.g;
+		instance_data->modulation[2] = dc.b;
+		instance_data->modulation[3] = dc.a;
+
+		instance_data->src_rect[0] = src_rect.position.x;
+		instance_data->src_rect[1] = src_rect.position.y;
+		instance_data->src_rect[2] = src_rect.size.width;
+		instance_data->src_rect[3] = src_rect.size.height;
+
+		instance_data->dst_rect[0] = dst_rect.position.x;
+		instance_data->dst_rect[1] = dst_rect.position.y;
+		instance_data->dst_rect[2] = dst_rect.size.width;
+		instance_data->dst_rect[3] = dst_rect.size.height;
+
+		_add_to_batch(r_index, r_batch_broken, r_current_batch);
+
+		p_item->debug_redraw_time -= RSG::rasterizer->get_frame_delta_time();
+
+		RenderingServerDefault::redraw_request();
+
+		r_batch_broken = false;
+	}
+#endif
+
 	if (r_current_clip && reclip) {
 		// will make it re-enable clipping if needed afterwards
 		r_current_clip = nullptr;
 	}
 }
 
-void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasShaderData *p_shader_data, RenderingDevice::FramebufferFormatID p_framebuffer_format, Light *p_lights, Batch const *p_batch, RenderingMethod::RenderInfo *r_render_info) {
-	UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton();
-	ERR_FAIL_NULL(uniform_set_cache);
+void RendererCanvasRenderRD::_before_evict(RendererCanvasRenderRD::RIDSetKey &p_key, RID &p_rid) {
+	RD::get_singleton()->uniform_set_set_invalidation_callback(p_rid, nullptr, nullptr);
+	RD::get_singleton()->free(p_rid);
+}
 
-	ERR_FAIL_NULL(p_batch->command);
+void RendererCanvasRenderRD::_uniform_set_invalidation_callback(void *p_userdata) {
+	const RIDSetKey *key = static_cast<RIDSetKey *>(p_userdata);
+	static_cast<RendererCanvasRenderRD *>(singleton)->rid_set_to_uniform_set.erase(*key);
+}
 
+void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasShaderData *p_shader_data, RenderingDevice::FramebufferFormatID p_framebuffer_format, Light *p_lights, Batch const *p_batch, RenderingMethod::RenderInfo *r_render_info) {
 	{
-		RD::Uniform u_diffuse(RD::UNIFORM_TYPE_TEXTURE, 0, p_batch->tex_info.diffuse);
-		RD::Uniform u_normal(RD::UNIFORM_TYPE_TEXTURE, 1, p_batch->tex_info.normal);
-		RD::Uniform u_specular(RD::UNIFORM_TYPE_TEXTURE, 2, p_batch->tex_info.specular);
-		RD::Uniform u_sampler(RD::UNIFORM_TYPE_SAMPLER, 3, p_batch->tex_info.sampler);
-		RD::Uniform u_instance_data(RD::UNIFORM_TYPE_STORAGE_BUFFER, 4, state.canvas_instance_data_buffers[state.current_data_buffer_index].instance_buffers[p_batch->instance_buffer_index]);
-
-		RID uniform_set = uniform_set_cache->get_cache(shader.default_version_rd_shader, BATCH_UNIFORM_SET, u_diffuse, u_normal, u_specular, u_sampler, u_instance_data);
+		RIDSetKey key(
+				p_batch->tex_info->state,
+				state.canvas_instance_data_buffers[state.current_data_buffer_index].instance_buffers[p_batch->instance_buffer_index]);
+
+		const RID *uniform_set = rid_set_to_uniform_set.getptr(key);
+		if (uniform_set == nullptr) {
+			state.batch_texture_uniforms.write[0] = RD::Uniform(RD::UNIFORM_TYPE_TEXTURE, 0, p_batch->tex_info->diffuse);
+			state.batch_texture_uniforms.write[1] = RD::Uniform(RD::UNIFORM_TYPE_TEXTURE, 1, p_batch->tex_info->normal);
+			state.batch_texture_uniforms.write[2] = RD::Uniform(RD::UNIFORM_TYPE_TEXTURE, 2, p_batch->tex_info->specular);
+			state.batch_texture_uniforms.write[3] = RD::Uniform(RD::UNIFORM_TYPE_SAMPLER, 3, p_batch->tex_info->sampler);
+			state.batch_texture_uniforms.write[4] = RD::Uniform(RD::UNIFORM_TYPE_STORAGE_BUFFER, 4, state.canvas_instance_data_buffers[state.current_data_buffer_index].instance_buffers[p_batch->instance_buffer_index]);
+
+			RID rid = RD::get_singleton()->uniform_set_create(state.batch_texture_uniforms, shader.default_version_rd_shader, BATCH_UNIFORM_SET);
+			ERR_FAIL_COND_MSG(rid.is_null(), "Failed to create uniform set for batch.");
+
+			const RIDCache::Pair *iter = rid_set_to_uniform_set.insert(key, rid);
+			uniform_set = &iter->data;
+			RD::get_singleton()->uniform_set_set_invalidation_callback(rid, RendererCanvasRenderRD::_uniform_set_invalidation_callback, (void *)&iter->key);
+		}
 
-		if (state.current_batch_uniform_set != uniform_set) {
-			state.current_batch_uniform_set = uniform_set;
-			RD::get_singleton()->draw_list_bind_uniform_set(p_draw_list, uniform_set, BATCH_UNIFORM_SET);
+		if (state.current_batch_uniform_set != *uniform_set) {
+			state.current_batch_uniform_set = *uniform_set;
+			RD::get_singleton()->draw_list_bind_uniform_set(p_draw_list, *uniform_set, BATCH_UNIFORM_SET);
 		}
 	}
+	PushConstant push_constant;
+	push_constant.base_instance_index = p_batch->start;
+	push_constant.specular_shininess = p_batch->tex_info->specular_shininess;
 
 	RID pipeline;
 	PipelineKey pipeline_key;
-	PushConstant push_constant;
 	pipeline_key.framebuffer_format_id = p_framebuffer_format;
 	pipeline_key.variant = p_batch->shader_variant;
 	pipeline_key.render_primitive = p_batch->render_primitive;
@@ -2792,7 +2931,6 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha
 				RD::get_singleton()->draw_list_set_blend_constants(p_draw_list, p_batch->modulate);
 			}
 
-			push_constant.base_instance_index = p_batch->start;
 			RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(PushConstant));
 			RD::get_singleton()->draw_list_bind_index_array(p_draw_list, shader.quad_index_array);
 			RD::get_singleton()->draw_list_draw(p_draw_list, true, p_batch->instance_count);
@@ -2805,6 +2943,8 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha
 		} break;
 
 		case Item::Command::TYPE_POLYGON: {
+			ERR_FAIL_NULL(p_batch->command);
+
 			const Item::CommandPolygon *polygon = static_cast<const Item::CommandPolygon *>(p_batch->command);
 
 			PolygonBuffers *pb = polygon_buffers.polygons.getptr(polygon->polygon.polygon_id);
@@ -2814,7 +2954,6 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha
 			pipeline = _get_pipeline_specialization_or_ubershader(p_shader_data, pipeline_key, push_constant);
 			RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, pipeline);
 
-			push_constant.base_instance_index = p_batch->start;
 			RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(PushConstant));
 			RD::get_singleton()->draw_list_bind_vertex_array(p_draw_list, pb->vertex_array);
 			if (pb->indices.is_valid()) {
@@ -2830,12 +2969,13 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha
 		} break;
 
 		case Item::Command::TYPE_PRIMITIVE: {
+			ERR_FAIL_NULL(p_batch->command);
+
 			const Item::CommandPrimitive *primitive = static_cast<const Item::CommandPrimitive *>(p_batch->command);
 
 			pipeline = _get_pipeline_specialization_or_ubershader(p_shader_data, pipeline_key, push_constant);
 			RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, pipeline);
 
-			push_constant.base_instance_index = p_batch->start;
 			RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(PushConstant));
 			RD::get_singleton()->draw_list_bind_index_array(p_draw_list, primitive_arrays.index_array[MIN(3u, primitive->point_count) - 1]);
 			uint32_t instance_count = p_batch->instance_count;
@@ -2852,6 +2992,8 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha
 		case Item::Command::TYPE_MESH:
 		case Item::Command::TYPE_MULTIMESH:
 		case Item::Command::TYPE_PARTICLES: {
+			ERR_FAIL_NULL(p_batch->command);
+
 			RendererRD::MeshStorage *mesh_storage = RendererRD::MeshStorage::get_singleton();
 			RendererRD::ParticlesStorage *particles_storage = RendererRD::ParticlesStorage::get_singleton();
 
@@ -2912,7 +3054,6 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha
 				pipeline = _get_pipeline_specialization_or_ubershader(p_shader_data, pipeline_key, push_constant, mesh_instance, surface, j, &vertex_array);
 				RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, pipeline);
 
-				push_constant.base_instance_index = p_batch->start;
 				RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(PushConstant));
 
 				RID index_array = mesh_storage->mesh_surface_get_index_array(surface, 0);
@@ -2994,7 +3135,7 @@ void RendererCanvasRenderRD::_allocate_instance_buffer() {
 	state.canvas_instance_data_buffers[state.current_data_buffer_index].instance_buffers.push_back(buf);
 }
 
-void RendererCanvasRenderRD::_prepare_batch_texture_info(Batch *p_current_batch, RID p_texture) const {
+void RendererCanvasRenderRD::_prepare_batch_texture_info(RID p_texture, TextureState &p_state, TextureInfo *p_info) {
 	if (p_texture.is_null()) {
 		p_texture = default_canvas_texture;
 	}
@@ -3002,38 +3143,38 @@ void RendererCanvasRenderRD::_prepare_batch_texture_info(Batch *p_current_batch,
 	RendererRD::TextureStorage::CanvasTextureInfo info =
 			RendererRD::TextureStorage::get_singleton()->canvas_texture_get_info(
 					p_texture,
-					p_current_batch->tex_info.state.texture_filter(),
-					p_current_batch->tex_info.state.texture_repeat(),
-					p_current_batch->tex_info.state.linear_colors(),
-					p_current_batch->tex_info.state.texture_is_data());
-
+					p_state.texture_filter(),
+					p_state.texture_repeat(),
+					p_state.linear_colors(),
+					p_state.texture_is_data());
 	// something odd happened
 	if (info.is_null()) {
-		_prepare_batch_texture_info(p_current_batch, default_canvas_texture);
+		_prepare_batch_texture_info(default_canvas_texture, p_state, p_info);
 		return;
 	}
 
-	p_current_batch->tex_info.diffuse = info.diffuse;
-	p_current_batch->tex_info.normal = info.normal;
-	p_current_batch->tex_info.specular = info.specular;
-	p_current_batch->tex_info.sampler = info.sampler;
+	p_info->state = p_state;
+	p_info->diffuse = info.diffuse;
+	p_info->normal = info.normal;
+	p_info->specular = info.specular;
+	p_info->sampler = info.sampler;
 
 	// cache values to be copied to instance data
 	if (info.specular_color.a < 0.999) {
-		p_current_batch->tex_info.flags |= FLAGS_DEFAULT_SPECULAR_MAP_USED;
+		p_info->flags |= FLAGS_DEFAULT_SPECULAR_MAP_USED;
 	}
 
 	if (info.use_normal) {
-		p_current_batch->tex_info.flags |= FLAGS_DEFAULT_NORMAL_MAP_USED;
+		p_info->flags |= FLAGS_DEFAULT_NORMAL_MAP_USED;
 	}
 
 	uint8_t a = uint8_t(CLAMP(info.specular_color.a * 255.0, 0.0, 255.0));
 	uint8_t b = uint8_t(CLAMP(info.specular_color.b * 255.0, 0.0, 255.0));
 	uint8_t g = uint8_t(CLAMP(info.specular_color.g * 255.0, 0.0, 255.0));
 	uint8_t r = uint8_t(CLAMP(info.specular_color.r * 255.0, 0.0, 255.0));
-	p_current_batch->tex_info.specular_shininess = uint32_t(a) << 24 | uint32_t(b) << 16 | uint32_t(g) << 8 | uint32_t(r);
+	p_info->specular_shininess = uint32_t(a) << 24 | uint32_t(b) << 16 | uint32_t(g) << 8 | uint32_t(r);
 
-	p_current_batch->tex_info.texpixel_size = Vector2(1.0 / float(info.size.width), 1.0 / float(info.size.height));
+	p_info->texpixel_size = Vector2(1.0 / float(info.size.width), 1.0 / float(info.size.height));
 }
 
 RendererCanvasRenderRD::~RendererCanvasRenderRD() {
@@ -3075,7 +3216,7 @@ RendererCanvasRenderRD::~RendererCanvasRenderRD() {
 	RD::get_singleton()->free(state.shadow_texture);
 
 	memdelete_arr(state.instance_data_array);
-	for (uint32_t i = 0; i < state.canvas_instance_data_buffers.size(); i++) {
+	for (uint32_t i = 0; i < BATCH_DATA_BUFFER_COUNT; i++) {
 		for (uint32_t j = 0; j < state.canvas_instance_data_buffers[i].instance_buffers.size(); j++) {
 			RD::get_singleton()->free(state.canvas_instance_data_buffers[i].instance_buffers[j]);
 		}

+ 65 - 17
servers/rendering/renderer_rd/renderer_canvas_render_rd.h

@@ -31,6 +31,7 @@
 #ifndef RENDERER_CANVAS_RENDER_RD_H
 #define RENDERER_CANVAS_RENDER_RD_H
 
+#include "core/templates/lru.h"
 #include "servers/rendering/renderer_canvas_render.h"
 #include "servers/rendering/renderer_compositor.h"
 #include "servers/rendering/renderer_rd/pipeline_cache_rd.h"
@@ -50,6 +51,8 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 	};
 
 	const int SAMPLERS_BINDING_FIRST_INDEX = 10;
+	// The size of the ring buffer to store GPU buffers. Triple-buffering the max expected frames in flight.
+	static const uint32_t BATCH_DATA_BUFFER_COUNT = 3;
 
 	enum ShaderVariant {
 		SHADER_VARIANT_QUAD,
@@ -339,7 +342,7 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 	struct InstanceData {
 		float world[6];
 		uint32_t flags;
-		uint32_t specular_shininess;
+		uint32_t pad1;
 		union {
 			//rect
 			struct {
@@ -366,8 +369,8 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 	struct PushConstant {
 		uint32_t base_instance_index;
 		ShaderSpecialization shader_specialization;
-		uint32_t pad2;
-		uint32_t pad3;
+		uint32_t specular_shininess;
+		uint32_t pad;
 	};
 
 	// TextureState is used to determine when a new batch is required due to a change of texture state.
@@ -398,50 +401,91 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 					(((uint32_t)p_use_linear_colors & LINEAR_COLORS_MASK) << LINEAR_COLORS_SHIFT);
 		}
 
-		_FORCE_INLINE_ RS::CanvasItemTextureFilter texture_filter() const {
+		_ALWAYS_INLINE_ RS::CanvasItemTextureFilter texture_filter() const {
 			return (RS::CanvasItemTextureFilter)((other >> FILTER_SHIFT) & FILTER_MASK);
 		}
 
-		_FORCE_INLINE_ RS::CanvasItemTextureRepeat texture_repeat() const {
+		_ALWAYS_INLINE_ RS::CanvasItemTextureRepeat texture_repeat() const {
 			return (RS::CanvasItemTextureRepeat)((other >> REPEAT_SHIFT) & REPEAT_MASK);
 		}
 
-		_FORCE_INLINE_ bool linear_colors() const {
+		_ALWAYS_INLINE_ bool linear_colors() const {
 			return (other >> LINEAR_COLORS_SHIFT) & LINEAR_COLORS_MASK;
 		}
 
-		_FORCE_INLINE_ bool texture_is_data() const {
+		_ALWAYS_INLINE_ bool texture_is_data() const {
 			return (other >> TEXTURE_IS_DATA_SHIFT) & TEXTURE_IS_DATA_MASK;
 		}
 
-		bool operator==(const TextureState &p_val) const {
+		_ALWAYS_INLINE_ bool operator==(const TextureState &p_val) const {
 			return (texture == p_val.texture) && (other == p_val.other);
 		}
 
-		bool operator!=(const TextureState &p_val) const {
+		_ALWAYS_INLINE_ bool operator!=(const TextureState &p_val) const {
 			return (texture != p_val.texture) || (other != p_val.other);
 		}
+
+		_ALWAYS_INLINE_ bool is_valid() const { return texture.is_valid(); }
+		_ALWAYS_INLINE_ bool is_null() const { return texture.is_null(); }
+
+		uint32_t hash() const {
+			uint32_t hash = hash_murmur3_one_64(texture.get_id());
+			return hash_murmur3_one_32(other, hash);
+		}
 	};
 
 	struct TextureInfo {
 		TextureState state;
-		uint32_t specular_shininess = 0;
-		uint32_t flags = 0;
-		Vector2 texpixel_size;
-
 		RID diffuse;
 		RID normal;
 		RID specular;
 		RID sampler;
+		Vector2 texpixel_size;
+		uint32_t specular_shininess = 0;
+		uint32_t flags = 0;
+	};
+
+	/// A key used to uniquely identify a distinct BATCH_UNIFORM_SET
+	struct RIDSetKey {
+		TextureState state;
+		RID instance_data;
+
+		RIDSetKey() {
+		}
+
+		RIDSetKey(TextureState p_state, RID p_instance_data) :
+				state(p_state),
+				instance_data(p_instance_data) {
+		}
+
+		_ALWAYS_INLINE_ bool operator==(const RIDSetKey &p_val) const {
+			return state == p_val.state && instance_data == p_val.instance_data;
+		}
+
+		_ALWAYS_INLINE_ bool operator!=(const RIDSetKey &p_val) const {
+			return !(*this == p_val);
+		}
+
+		_ALWAYS_INLINE_ uint32_t hash() const {
+			uint32_t h = state.hash();
+			h = hash_murmur3_one_64(instance_data.get_id(), h);
+			return hash_fmix32(h);
+		}
 	};
 
+	static void _before_evict(RendererCanvasRenderRD::RIDSetKey &p_key, RID &p_rid);
+	static void _uniform_set_invalidation_callback(void *p_userdata);
+
+	typedef LRUCache<RIDSetKey, RID, HashableHasher<RIDSetKey>, HashMapComparatorDefault<RIDSetKey>, _before_evict> RIDCache;
+	RIDCache rid_set_to_uniform_set;
+
 	struct Batch {
 		// Position in the UBO measured in bytes
 		uint32_t start = 0;
 		uint32_t instance_count = 0;
 		uint32_t instance_buffer_index = 0;
 
-		TextureInfo tex_info;
+		TextureInfo *tex_info;
 
 		Color modulate = Color(1.0, 1.0, 1.0, 1.0);
 
@@ -466,6 +510,9 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 		bool has_blend = false;
 	};
 
+	HashMap<TextureState, TextureInfo, HashableHasher<TextureState>> texture_info_map;
+
+	// per-frame buffers
 	struct DataBuffer {
 		LocalVector<RID> instance_buffers;
 	};
@@ -492,7 +539,7 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 			uint32_t pad2;
 		};
 
-		LocalVector<DataBuffer> canvas_instance_data_buffers;
+		DataBuffer canvas_instance_data_buffers[BATCH_DATA_BUFFER_COUNT];
 		LocalVector<Batch> canvas_instance_batches;
 		uint32_t current_data_buffer_index = 0;
 		uint32_t current_instance_buffer_index = 0;
@@ -503,6 +550,7 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 		uint32_t max_instances_per_buffer = 16384;
 		uint32_t max_instance_buffer_size = 16384 * sizeof(InstanceData);
 
+		Vector<RD::Uniform> batch_texture_uniforms;
 		RID current_batch_uniform_set;
 
 		LightUniform *light_uniforms = nullptr;
@@ -518,7 +566,6 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 		RID default_transforms_uniform_set;
 
 		uint32_t max_lights_per_render;
-		uint32_t max_lights_per_item;
 
 		double time;
 
@@ -558,7 +605,8 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
 	void _render_batch_items(RenderTarget p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, bool &r_sdf_used, bool p_to_backbuffer = false, RenderingMethod::RenderInfo *r_render_info = nullptr);
 	void _record_item_commands(const Item *p_item, RenderTarget p_render_target, const Transform2D &p_base_transform, Item *&r_current_clip, Light *p_lights, uint32_t &r_index, bool &r_batch_broken, bool &r_sdf_used, Batch *&r_current_batch);
 	void _render_batch(RD::DrawListID p_draw_list, CanvasShaderData *p_shader_data, RenderingDevice::FramebufferFormatID p_framebuffer_format, Light *p_lights, Batch const *p_batch, RenderingMethod::RenderInfo *r_render_info = nullptr);
-	void _prepare_batch_texture_info(Batch *p_current_batch, RID p_texture) const;
+	void _prepare_batch_texture_info(RID p_texture, TextureState &p_state, TextureInfo *p_info);
+	InstanceData *new_instance_data(float *p_world, uint32_t *p_lights, uint32_t p_base_flags, uint32_t p_index, TextureInfo *p_info);
 	[[nodiscard]] Batch *_new_batch(bool &r_batch_broken);
 	void _add_to_batch(uint32_t &r_index, bool &r_batch_broken, Batch *&r_current_batch);
 	void _allocate_instance_buffer();

+ 2 - 6
servers/rendering/renderer_rd/shaders/canvas.glsl

@@ -28,7 +28,7 @@ layout(location = 11) in vec4 weight_attrib;
 
 layout(location = 4) out flat uint instance_index_interp;
 
-#endif // USE_ATTRIBUTES
+#endif // !USE_ATTRIBUTES
 
 layout(location = 0) out vec2 uv_interp;
 layout(location = 1) out vec4 color_interp;
@@ -322,11 +322,7 @@ vec4 light_compute(
 #ifdef USE_NINEPATCH
 
 float map_ninepatch_axis(float pixel, float draw_size, float tex_pixel_size, float margin_begin, float margin_end, int np_repeat, inout int draw_center) {
-#ifdef USE_ATTRIBUTES
-	const InstanceData draw_data = instances.data[params.base_instance_index];
-#else
 	const InstanceData draw_data = instances.data[instance_index];
-#endif // USE_ATTRIBUTES
 
 	float tex_size = 1.0 / tex_pixel_size;
 
@@ -567,7 +563,7 @@ void main() {
 
 	if (specular_shininess_used || (using_light && normal_used && bool(draw_data.flags & FLAGS_DEFAULT_SPECULAR_MAP_USED))) {
 		specular_shininess = texture(sampler2D(specular_texture, texture_sampler), uv);
-		specular_shininess *= unpackUnorm4x8(draw_data.specular_shininess);
+		specular_shininess *= unpackUnorm4x8(params.specular_shininess);
 		specular_shininess_used = true;
 	} else {
 		specular_shininess = vec4(1.0);

+ 4 - 4
servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl

@@ -37,7 +37,7 @@ struct InstanceData {
 	vec2 world_y;
 	vec2 world_ofs;
 	uint flags;
-	uint specular_shininess;
+	uint pad2;
 #ifdef USE_PRIMITIVE
 	vec2 points[3];
 	vec2 uvs[3];
@@ -57,8 +57,8 @@ struct InstanceData {
 layout(push_constant, std430) uniform Params {
 	uint base_instance_index; // base index to instance data
 	uint sc_packed_0;
-	uint pad2;
-	uint pad3;
+	uint specular_shininess;
+	uint pad;
 }
 params;
 
@@ -68,7 +68,7 @@ params;
 
 // Pull the constants from the draw call's push constants.
 uint sc_packed_0() {
-	return draw_call.sc_packed_0;
+	return params.sc_packed_0;
 }
 
 #else

+ 0 - 1
servers/rendering/renderer_rd/storage_rd/texture_storage.h

@@ -122,7 +122,6 @@ private:
 		Size2i size_cache = Size2i(1, 1);
 		bool use_normal_cache = false;
 		bool use_specular_cache = false;
-		bool cleared_cache = true;
 
 		void clear_cache();
 		~CanvasTexture();

+ 1 - 0
servers/rendering_server.cpp

@@ -3569,6 +3569,7 @@ void RenderingServer::init() {
 
 	GLOBAL_DEF(PropertyInfo(Variant::INT, "rendering/2d/shadow_atlas/size", PROPERTY_HINT_RANGE, "128,16384"), 2048);
 	GLOBAL_DEF_RST(PropertyInfo(Variant::INT, "rendering/2d/batching/item_buffer_size", PROPERTY_HINT_RANGE, "128,1048576,1"), 16384);
+	GLOBAL_DEF_RST(PropertyInfo(Variant::INT, "rendering/2d/batching/uniform_set_cache_size", PROPERTY_HINT_RANGE, "256,1048576,1"), 256);
 
 	// Number of commands that can be drawn per frame.
 	GLOBAL_DEF_RST(PropertyInfo(Variant::INT, "rendering/gl_compatibility/item_buffer_size", PROPERTY_HINT_RANGE, "128,1048576,1"), 16384);