Browse Source

Merge pull request #68960 from lawnjelly/multirect

Batching - Add MultiRect command
Rémi Verschelde 2 years ago
parent
commit
4c5a934408

+ 136 - 0
core/fixed_array.h

@@ -0,0 +1,136 @@
+/**************************************************************************/
+/*  fixed_array.h                                                         */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#ifndef FIXED_ARRAY_H
+#define FIXED_ARRAY_H
+
+#include "core/local_vector.h"
+
+// High performance fixed size array, single threaded.
+// Especially useful if you need to create an array on the stack, to
+// prevent dynamic allocations (especially in bottleneck code).
+
+template <class T, uint32_t CAPACITY = 8, bool force_trivial = false, uint32_t ALIGN = 1>
+class FixedArray {
+	static_assert(ALIGN > 0, "ALIGN must be at least 1.");
+	const static uint32_t UNIT_SIZE = ((sizeof(T) + ALIGN - 1) / ALIGN * ALIGN);
+	const static bool CONSTRUCT = !std::is_trivially_constructible<T>::value && !force_trivial;
+	const static bool DESTRUCT = !std::is_trivially_destructible<T>::value && !force_trivial;
+
+	uint32_t _size = 0;
+	uint8_t _data[CAPACITY * UNIT_SIZE];
+
+	const T &get(uint32_t p_index) const {
+		return *(const T *)&_data[p_index * UNIT_SIZE];
+	}
+	T &get(uint32_t p_index) {
+		return *(T *)&_data[p_index * UNIT_SIZE];
+	}
+
+public:
+	uint32_t size() const { return _size; }
+	bool is_empty() const { return !_size; }
+	bool is_full() const { return _size == CAPACITY; }
+	uint32_t capacity() const { return CAPACITY; }
+
+	T *request(bool p_construct = true) {
+		if (size() < CAPACITY) {
+			T *ele = &get(_size++);
+			if (CONSTRUCT && p_construct) {
+				memnew_placement(ele, T);
+			}
+			return ele;
+		}
+		return nullptr;
+	}
+	void push_back(const T &p_val) {
+		T *mem = request(false);
+		ERR_FAIL_NULL(mem);
+		*mem = p_val;
+	}
+	void clear() {
+		resize(0);
+	}
+	void remove_unordered(uint32_t p_index) {
+		ERR_FAIL_UNSIGNED_INDEX(p_index, _size);
+
+		_size--;
+		if (_size > p_index) {
+			get(p_index) = get(_size);
+		}
+
+		if (DESTRUCT) {
+			get(_size).~T();
+		}
+	}
+	void resize(uint32_t p_size) {
+		ERR_FAIL_COND(p_size > CAPACITY);
+
+		if (DESTRUCT && (p_size < _size)) {
+			for (uint32_t i = p_size; i < _size; i++) {
+				get(i).~T();
+			}
+		}
+
+		if (CONSTRUCT && (p_size > _size)) {
+			for (uint32_t i = _size; i < p_size; i++) {
+				memnew_placement(&get(i), T);
+			}
+		}
+
+		_size = p_size;
+	}
+	const T &operator[](uint32_t p_index) const {
+		DEV_ASSERT(p_index < size());
+		return get(p_index);
+	}
+	T &operator[](uint32_t p_index) {
+		DEV_ASSERT(p_index < size());
+		return get(p_index);
+	}
+
+	operator Vector<T>() const {
+		Vector<T> ret;
+		if (size()) {
+			ret.resize(size());
+			T *dest = ret.ptrw();
+			if (ALIGN <= 1) {
+				memcpy(dest, _data, sizeof(T) * _size);
+			} else {
+				for (uint32_t n = 0; n < _size; n++) {
+					dest[n] = get(n);
+				}
+			}
+		}
+		return ret;
+	}
+};
+
+#endif // FIXED_ARRAY_H

+ 4 - 0
doc/classes/ProjectSettings.xml

@@ -1477,6 +1477,10 @@
 		<member name="rendering/batching/options/use_batching_in_editor" type="bool" setter="" getter="" default="true">
 			Switches on 2D batching within the editor.
 		</member>
+		<member name="rendering/batching/options/use_multirect" type="bool" setter="" getter="" default="true">
+			Allows use of the [code]MultiRect[/code] command in the Rasterizer.
+			This can provide some acceleration for large groups of rects, especially text and tilemaps.
+		</member>
 		<member name="rendering/batching/parameters/batch_buffer_size" type="int" setter="" getter="" default="16384">
 			Size of buffer reserved for batched vertices. Larger size enables larger batches, but there are diminishing returns for the memory used. This should only have a minor effect on performance.
 		</member>

+ 476 - 24
drivers/gles_common/rasterizer_canvas_batcher.h

@@ -37,6 +37,7 @@
 #include "rasterizer_asserts.h"
 #include "rasterizer_storage_common.h"
 #include "servers/visual/rasterizer.h"
+#include "servers/visual/visual_server_canvas_helper.h"
 
 // We are using the curiously recurring template pattern
 // https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern
@@ -296,7 +297,6 @@ public:
 			settings_use_batching = false;
 			settings_max_join_item_commands = 0;
 			settings_colored_vertex_format_threshold = 0.0f;
-			settings_batch_buffer_num_verts = 0;
 			scissor_threshold_area = 0.0f;
 			joined_item_batch_flags = 0;
 			diagnose_frame = false;
@@ -432,7 +432,6 @@ public:
 		bool settings_diagnose_frame; // print out batches to help optimize / regression test
 		int settings_max_join_item_commands;
 		float settings_colored_vertex_format_threshold;
-		int settings_batch_buffer_num_verts;
 		bool settings_scissor_lights;
 		float settings_scissor_threshold; // 0.0 to 1.0
 		int settings_item_reordering_lookahead;
@@ -609,6 +608,8 @@ private:
 	bool _prefill_polygon(RasterizerCanvas::Item::CommandPolygon *p_poly, FillState &r_fill_state, int &r_command_start, int command_num, int command_count, RasterizerCanvas::Item *p_item, bool multiply_final_modulate);
 	template <bool SEND_LIGHT_ANGLES>
 	bool _prefill_rect(RasterizerCanvas::Item::CommandRect *rect, FillState &r_fill_state, int &r_command_start, int command_num, int command_count, RasterizerCanvas::Item::Command *const *commands, RasterizerCanvas::Item *p_item, bool multiply_final_modulate);
+	template <bool SEND_LIGHT_ANGLES>
+	bool _prefill_multirect(RasterizerCanvas::Item::CommandMultiRect *mrect, FillState &r_fill_state, int &r_command_start, int command_num, bool multiply_final_modulate);
 
 	// dealing with textures
 	int _batch_find_or_create_tex(const RID &p_texture, const RID &p_normal, bool p_tile, int p_previous_match);
@@ -1078,7 +1079,7 @@ PREAMBLE(void)::batch_initialize() {
 	// The sweet spot on my desktop for cache is actually smaller than the max, and this
 	// is the default. This saves memory too so we will use it for now, needs testing to see whether this varies according
 	// to device / platform.
-	bdata.settings_batch_buffer_num_verts = GLOBAL_GET("rendering/batching/parameters/batch_buffer_size");
+	int batch_buffer_num_verts_requested = GLOBAL_GET("rendering/batching/parameters/batch_buffer_size");
 
 	// override the use_batching setting in the editor
 	// (note that if the editor can't start, you can't change the use_batching project setting!)
@@ -1113,7 +1114,6 @@ PREAMBLE(void)::batch_initialize() {
 	// frame diagnosis. print out the batches every nth frame
 	bdata.settings_diagnose_frame = false;
 	if (!Engine::get_singleton()->is_editor_hint() && bdata.settings_use_batching) {
-		//	{
 		bdata.settings_diagnose_frame = GLOBAL_GET("rendering/batching/debug/diagnose_frame");
 	}
 
@@ -1123,10 +1123,15 @@ PREAMBLE(void)::batch_initialize() {
 	// Note this determines the memory use by the vertex buffer vector. max quads (65536/4)-1
 	// but can be reduced to save memory if really required (will result in more batches though)
 	const int max_possible_quads = (65536 / 4) - 1;
-	const int min_possible_quads = 8; // some reasonable small value
+
+	// We must have enough quads to fit in a MultiRect
+	const int min_possible_quads = MAX(8, MultiRect::MAX_RECTS); // some reasonable small value
 
 	// value from project settings
-	int max_quads = bdata.settings_batch_buffer_num_verts / 4;
+	int max_quads = batch_buffer_num_verts_requested / 4;
+
+	bool use_multirect = GLOBAL_GET("rendering/batching/options/use_multirect");
+	VisualServerCanvasHelper::_multirect_enabled = (bdata.settings_use_batching && use_multirect);
 
 	// sanity checks
 	max_quads = CLAMP(max_quads, min_possible_quads, max_possible_quads);
@@ -1136,22 +1141,6 @@ PREAMBLE(void)::batch_initialize() {
 	bdata.settings_light_max_join_items = CLAMP(bdata.settings_light_max_join_items, 0, 65535);
 	bdata.settings_item_reordering_lookahead = CLAMP(bdata.settings_item_reordering_lookahead, 0, 65535);
 
-	// For debug purposes, output a string with the batching options.
-	if (bdata.settings_use_batching) {
-		String batching_options_string = "OpenGL ES 2D Batching: ON\n";
-		batching_options_string += "Batching Options:\n";
-		batching_options_string += "\tmax_join_item_commands " + itos(bdata.settings_max_join_item_commands) + "\n";
-		batching_options_string += "\tcolored_vertex_format_threshold " + String(Variant(bdata.settings_colored_vertex_format_threshold)) + "\n";
-		batching_options_string += "\tbatch_buffer_size " + itos(bdata.settings_batch_buffer_num_verts) + "\n";
-		batching_options_string += "\tlight_scissor_area_threshold " + String(Variant(bdata.settings_scissor_threshold)) + "\n";
-		batching_options_string += "\titem_reordering_lookahead " + itos(bdata.settings_item_reordering_lookahead) + "\n";
-		batching_options_string += "\tlight_max_join_items " + itos(bdata.settings_light_max_join_items) + "\n";
-		batching_options_string += "\tsingle_rect_fallback " + String(Variant(bdata.settings_use_single_rect_fallback)) + "\n";
-		batching_options_string += "\tdebug_flash " + String(Variant(bdata.settings_flash_batching)) + "\n";
-		batching_options_string += "\tdiagnose_frame " + String(Variant(bdata.settings_diagnose_frame));
-		print_verbose(batching_options_string);
-	}
-
 	// special case, for colored vertex format threshold.
 	// as the comparison is >=, we want to be able to totally turn on or off
 	// conversion to colored vertex format at the extremes, so we will force
@@ -1181,6 +1170,22 @@ PREAMBLE(void)::batch_initialize() {
 	bdata.vertex_buffer_size_bytes = max_verts * sizeof_batch_vert;
 	bdata.index_buffer_size_bytes = bdata.index_buffer_size_units * 2; // 16 bit inds
 
+	// For debug purposes, output a string with the batching options.
+	if (bdata.settings_use_batching) {
+		String batching_options_string = "OpenGL ES 2D Batching: ON\n";
+		batching_options_string += "Batching Options:\n";
+		batching_options_string += "\tmax_join_item_commands " + itos(bdata.settings_max_join_item_commands) + "\n";
+		batching_options_string += "\tcolored_vertex_format_threshold " + String(Variant(bdata.settings_colored_vertex_format_threshold)) + "\n";
+		batching_options_string += "\tbatch_buffer_effective_size " + itos(bdata.vertex_buffer_size_units) + "\n";
+		batching_options_string += "\tlight_scissor_area_threshold " + String(Variant(bdata.settings_scissor_threshold)) + "\n";
+		batching_options_string += "\titem_reordering_lookahead " + itos(bdata.settings_item_reordering_lookahead) + "\n";
+		batching_options_string += "\tlight_max_join_items " + itos(bdata.settings_light_max_join_items) + "\n";
+		batching_options_string += "\tsingle_rect_fallback " + String(Variant(bdata.settings_use_single_rect_fallback)) + "\n";
+		batching_options_string += "\tdebug_flash " + String(Variant(bdata.settings_flash_batching)) + "\n";
+		batching_options_string += "\tdiagnose_frame " + String(Variant(bdata.settings_diagnose_frame));
+		print_verbose(batching_options_string);
+	}
+
 	// create equal number of normal and (max) unit sized verts (as the normal may need to be translated to a larger FVF)
 	bdata.vertices.create(max_verts); // 512k
 	bdata.unit_vertices.create(max_verts, sizeof(BatchVertexLarge));
@@ -2247,6 +2252,425 @@ bool C_PREAMBLE::_prefill_rect(RasterizerCanvas::Item::CommandRect *rect, FillSt
 	return false;
 }
 
+T_PREAMBLE
+template <bool SEND_LIGHT_ANGLES>
+bool C_PREAMBLE::_prefill_multirect(RasterizerCanvas::Item::CommandMultiRect *mrect, FillState &r_fill_state, int &r_command_start, int command_num, bool multiply_final_modulate) {
+	bool change_batch = false;
+
+	// conditions for creating a new batch
+	if (r_fill_state.curr_batch->type != RasterizerStorageCommon::BT_RECT) {
+		// don't allow joining to a different sequence type
+		if (r_fill_state.sequence_batch_type_flags & (~RasterizerStorageCommon::BTF_RECT)) {
+			// don't allow joining to a different sequence type
+			r_command_start = command_num;
+			return true;
+		}
+		r_fill_state.sequence_batch_type_flags |= RasterizerStorageCommon::BTF_RECT;
+
+		change_batch = true;
+	}
+
+	// try to create vertices BEFORE creating a batch,
+	// because if the vertex buffer is full, we need to finish this
+	// function, draw what we have so far, and then start a new set of batches
+
+	// request ALL vertices at a time, this is more efficient
+	uint32_t total_verts = 4 * mrect->rects.size();
+
+	BatchVertex *bvs = bdata.vertices.request(total_verts);
+	if (!bvs) {
+		// run out of space in the vertex buffer .. finish this function and draw what we have so far
+		// return where we got to
+		r_command_start = command_num;
+
+		// Check for an error condition - if we have been creating MultiRects that require more than
+		// the maximum number of verts in the buffer, this could cause an infinite loop.
+		ERR_FAIL_COND_V(total_verts > bdata.vertex_buffer_size_units, false);
+		return true;
+	}
+
+	// are we using large FVF?
+	const bool use_large_verts = bdata.use_large_verts;
+	const bool use_modulate = bdata.use_modulate;
+
+	Color col = mrect->modulate;
+
+	// use_modulate and use_large_verts should have been checked in the calling prefill_item function.
+	// we don't want to apply the modulate on the CPU if it is stored in the vertex format, it will
+	// be applied in the shader
+	if (multiply_final_modulate) {
+		col *= r_fill_state.final_modulate;
+	}
+
+	// instead of doing all the texture preparation for EVERY rect,
+	// we build a list of texture combinations and do this once off.
+	// This means we have a potentially rather slow step to identify which texture combo
+	// using the RIDs.
+	int old_batch_tex_id = r_fill_state.batch_tex_id;
+	r_fill_state.batch_tex_id = _batch_find_or_create_tex(mrect->texture, mrect->normal_map, mrect->flags & RasterizerCanvas::CANVAS_RECT_TILE, old_batch_tex_id);
+
+	//r_fill_state.use_light_angles = send_light_angles;
+	if (SEND_LIGHT_ANGLES) {
+		bdata.use_light_angles = true;
+	}
+
+	// conditions for creating a new batch
+	if (old_batch_tex_id != r_fill_state.batch_tex_id) {
+		change_batch = true;
+	}
+
+	// we need to treat color change separately because we need to count these
+	// to decide whether to switch on the fly to colored vertices.
+	if (!change_batch && !r_fill_state.curr_batch->color.equals(col)) {
+		change_batch = true;
+		bdata.total_color_changes++;
+	}
+
+	uint32_t num_rects = mrect->rects.size();
+
+	if (change_batch) {
+		// put the tex pixel size  in a local (less verbose and can be a register)
+		const BatchTex &batchtex = bdata.batch_textures[r_fill_state.batch_tex_id];
+		batchtex.tex_pixel_size.to(r_fill_state.texpixel_size);
+
+		if (bdata.settings_uv_contract) {
+			r_fill_state.contract_uvs = (batchtex.flags & VS::TEXTURE_FLAG_FILTER) == 0;
+		}
+
+		// need to preserve texpixel_size between items
+		//r_fill_state.texpixel_size = r_fill_state.texpixel_size;
+
+		// open new batch (this should never fail, it dynamically grows)
+		r_fill_state.curr_batch = _batch_request_new(false);
+
+		r_fill_state.curr_batch->type = RasterizerStorageCommon::BT_RECT;
+		r_fill_state.curr_batch->color.set(col);
+		r_fill_state.curr_batch->batch_texture_id = r_fill_state.batch_tex_id;
+		r_fill_state.curr_batch->first_command = command_num;
+		r_fill_state.curr_batch->num_commands = num_rects;
+		//r_fill_state.curr_batch->first_quad = bdata.total_quads;
+		r_fill_state.curr_batch->first_vert = bdata.total_verts;
+	} else {
+		// we could alternatively do the count when closing a batch .. perhaps more efficient
+		r_fill_state.curr_batch->num_commands += num_rects;
+	}
+
+	// test for simplified pipeline
+	const uint8_t disallow_flags = RasterizerCanvas::CANVAS_RECT_TRANSPOSE | RasterizerCanvas::CANVAS_RECT_FLIP_H | RasterizerCanvas::CANVAS_RECT_FLIP_V;
+	if ((mrect->flags & RasterizerCanvas::CANVAS_RECT_REGION) && ((mrect->flags & disallow_flags) == 0)) {
+		// simplified pipeline
+		for (uint32_t n = 0; n < num_rects; n++) {
+			const Rect2 &rect = mrect->rects[n];
+			const Rect2 &source = mrect->sources[n];
+
+			// fill the quad geometry
+			Vector2 mins = rect.position;
+
+			// just aliases
+			BatchVertex *bA = &bvs[0];
+			BatchVertex *bB = &bvs[1];
+			BatchVertex *bC = &bvs[2];
+			BatchVertex *bD = &bvs[3];
+
+			// possibility of applying flips here for normal mapping .. but they don't seem to be used
+#ifdef TOOLS_ENABLED
+			if (rect.size.x < 0) {
+				ERR_PRINT_ONCE("MultiRect with negative size detected. Ensure rects are non-negative.");
+			}
+			if (rect.size.y < 0) {
+				ERR_PRINT_ONCE("MultiRect with negative size detected. Ensure rects are non-negative.");
+			}
+#endif
+
+			if (r_fill_state.transform_mode == TM_TRANSLATE) {
+				if (!use_large_verts) {
+					_software_transform_vertex(mins, r_fill_state.transform_combined);
+				}
+			}
+			Vector2 maxs = mins + rect.size;
+
+			bA->pos.x = mins.x;
+			bA->pos.y = mins.y;
+
+			bB->pos.x = maxs.x;
+			bB->pos.y = mins.y;
+
+			bC->pos.x = maxs.x;
+			bC->pos.y = maxs.y;
+
+			bD->pos.x = mins.x;
+			bD->pos.y = maxs.y;
+
+			if (r_fill_state.transform_mode == TM_ALL) {
+				if (!use_large_verts) {
+					_software_transform_vertex(bA->pos, r_fill_state.transform_combined);
+					_software_transform_vertex(bB->pos, r_fill_state.transform_combined);
+					_software_transform_vertex(bC->pos, r_fill_state.transform_combined);
+					_software_transform_vertex(bD->pos, r_fill_state.transform_combined);
+				}
+			}
+
+			// uvs
+			Vector2 src_min;
+			Vector2 src_max;
+			src_min = source.position;
+			src_max = src_min + source.size;
+
+			src_min *= r_fill_state.texpixel_size;
+			src_max *= r_fill_state.texpixel_size;
+
+			const float uv_epsilon = bdata.settings_uv_contract_amount;
+
+			// nudge offset for the maximum to prevent precision error on GPU reading into line outside the source rect
+			// this is very difficult to get right.
+			if (r_fill_state.contract_uvs) {
+				src_min.x += uv_epsilon;
+				src_min.y += uv_epsilon;
+				src_max.x -= uv_epsilon;
+				src_max.y -= uv_epsilon;
+			}
+
+			// 10% faster calculating the max first
+			Vector2 uvs[4] = {
+				src_min,
+				Vector2(src_max.x, src_min.y),
+				src_max,
+				Vector2(src_min.x, src_max.y),
+			};
+
+			bA->uv.set(uvs[0]);
+			bB->uv.set(uvs[1]);
+			bC->uv.set(uvs[2]);
+			bD->uv.set(uvs[3]);
+
+			bvs += 4; // move the destination verts on by 4 each rect
+		} // for n through rects
+
+	} else {
+		// full pipeline
+		for (uint32_t n = 0; n < num_rects; n++) {
+			const Rect2 &rect = mrect->rects[n];
+			const Rect2 &source = mrect->sources[n];
+
+			// fill the quad geometry
+			Vector2 mins = rect.position;
+
+			if (r_fill_state.transform_mode == TM_TRANSLATE) {
+				if (!use_large_verts) {
+					_software_transform_vertex(mins, r_fill_state.transform_combined);
+				}
+			}
+
+			Vector2 maxs = mins + rect.size;
+
+			// just aliases
+			BatchVertex *bA = &bvs[0];
+			BatchVertex *bB = &bvs[1];
+			BatchVertex *bC = &bvs[2];
+			BatchVertex *bD = &bvs[3];
+
+			bA->pos.x = mins.x;
+			bA->pos.y = mins.y;
+
+			bB->pos.x = maxs.x;
+			bB->pos.y = mins.y;
+
+			bC->pos.x = maxs.x;
+			bC->pos.y = maxs.y;
+
+			bD->pos.x = mins.x;
+			bD->pos.y = maxs.y;
+
+			// possibility of applying flips here for normal mapping .. but they don't seem to be used
+#ifdef TOOLS_ENABLED
+			if (rect.size.x < 0) {
+				//SWAP(bA->pos, bB->pos);
+				//SWAP(bC->pos, bD->pos);
+				ERR_PRINT_ONCE("MultiRect with negative size detected. Ensure rects are non-negative.");
+			}
+			if (rect.size.y < 0) {
+				//SWAP(bA->pos, bD->pos);
+				//SWAP(bB->pos, bC->pos);
+				ERR_PRINT_ONCE("MultiRect with negative size detected. Ensure rects are non-negative.");
+			}
+#endif
+
+			if (r_fill_state.transform_mode == TM_ALL) {
+				if (!use_large_verts) {
+					_software_transform_vertex(bA->pos, r_fill_state.transform_combined);
+					_software_transform_vertex(bB->pos, r_fill_state.transform_combined);
+					_software_transform_vertex(bC->pos, r_fill_state.transform_combined);
+					_software_transform_vertex(bD->pos, r_fill_state.transform_combined);
+				}
+			}
+
+			// uvs
+			Vector2 src_min;
+			Vector2 src_max;
+			if (mrect->flags & RasterizerCanvas::CANVAS_RECT_REGION) {
+				src_min = source.position;
+				src_max = src_min + source.size;
+
+				src_min *= r_fill_state.texpixel_size;
+				src_max *= r_fill_state.texpixel_size;
+
+				const float uv_epsilon = bdata.settings_uv_contract_amount;
+
+				// nudge offset for the maximum to prevent precision error on GPU reading into line outside the source rect
+				// this is very difficult to get right.
+				if (r_fill_state.contract_uvs) {
+					src_min.x += uv_epsilon;
+					src_min.y += uv_epsilon;
+					src_max.x -= uv_epsilon;
+					src_max.y -= uv_epsilon;
+				}
+			} else {
+				src_min = Vector2(0, 0);
+				src_max = Vector2(1, 1);
+			}
+
+			// 10% faster calculating the max first
+			Vector2 uvs[4] = {
+				src_min,
+				Vector2(src_max.x, src_min.y),
+				src_max,
+				Vector2(src_min.x, src_max.y),
+			};
+
+			if (mrect->flags & RasterizerCanvas::CANVAS_RECT_TRANSPOSE) {
+				SWAP(uvs[1], uvs[3]);
+			}
+
+			if (mrect->flags & RasterizerCanvas::CANVAS_RECT_FLIP_H) {
+				SWAP(uvs[0], uvs[1]);
+				SWAP(uvs[2], uvs[3]);
+			}
+			if (mrect->flags & RasterizerCanvas::CANVAS_RECT_FLIP_V) {
+				SWAP(uvs[0], uvs[3]);
+				SWAP(uvs[1], uvs[2]);
+			}
+
+			bA->uv.set(uvs[0]);
+			bB->uv.set(uvs[1]);
+			bC->uv.set(uvs[2]);
+			bD->uv.set(uvs[3]);
+
+			bvs += 4; // move the destination verts on by 4 each rect
+		} // for n through rects
+	} // full pipeline
+
+	// modulate
+	if (use_modulate) {
+		// store the final modulate separately from the rect modulate
+		BatchColor *pBC = bdata.vertex_modulates.request(total_verts);
+		RAST_DEBUG_ASSERT(pBC);
+		pBC[0].set(r_fill_state.final_modulate);
+		for (uint32_t n = 1; n < total_verts; n++) {
+			pBC[n] = pBC[0];
+		}
+	}
+
+	// they will all have the same vertex transforms
+	if (use_large_verts) {
+		// store the transform separately
+		BatchTransform *pBT = bdata.vertex_transforms.request(total_verts);
+		RAST_DEBUG_ASSERT(pBT);
+		BatchTransform *pBT_first = pBT;
+
+		const Transform2D &tr = r_fill_state.transform_combined;
+
+		pBT[0].translate.set(tr.elements[2]);
+
+		pBT[0].basis[0].set(tr.elements[0][0], tr.elements[0][1]);
+		pBT[0].basis[1].set(tr.elements[1][0], tr.elements[1][1]);
+
+		for (uint32_t n = 1; n < num_rects * 4; n++) {
+			pBT++;
+			*pBT = *pBT_first;
+		}
+	}
+
+	if (SEND_LIGHT_ANGLES) {
+		// SAME FOR ALL
+		// for encoding in light angle
+		bool flip_h = false;
+		bool flip_v = false;
+
+		if (mrect->flags & RasterizerCanvas::CANVAS_RECT_FLIP_H) {
+			flip_h = !flip_h;
+			flip_v = !flip_v;
+		}
+		if (mrect->flags & RasterizerCanvas::CANVAS_RECT_FLIP_V) {
+			flip_v = !flip_v;
+		}
+
+		// we can either keep the light angles in sync with the verts when writing,
+		// or sync them up during translation. We are syncing in translation.
+		// N.B. There may be batches that don't require light_angles between batches that do.
+		float *angles = bdata.light_angles.request(total_verts);
+		RAST_DEBUG_ASSERT(angles);
+
+		float angle = 0.0f;
+		const float TWO_PI = Math_PI * 2;
+
+		if (r_fill_state.transform_mode != TM_NONE) {
+			const Transform2D &tr = r_fill_state.transform_combined;
+
+			// apply to an x axis
+			// the x axis and y axis can be taken directly from the transform (no need to xform identity vectors)
+			Vector2 x_axis(tr.elements[0][0], tr.elements[0][1]);
+
+			// have to do a y axis to check for scaling flips
+			// this is hassle and extra slowness. We could only allow flips via the flags.
+			Vector2 y_axis(tr.elements[1][0], tr.elements[1][1]);
+
+			// has the x / y axis flipped due to scaling?
+			float cross = x_axis.cross(y_axis);
+			if (cross < 0.0f) {
+				flip_v = !flip_v;
+			}
+
+			// passing an angle is smaller than a vector, it can be reconstructed in the shader
+			angle = x_axis.angle();
+
+			// we don't want negative angles, as negative is used to encode flips.
+			// This moves range from -PI to PI to 0 to TWO_PI
+			if (angle < 0.0f) {
+				angle += TWO_PI;
+			}
+
+		} // if transform needed
+
+		// if horizontal flip, angle is shifted by 180 degrees
+		if (flip_h) {
+			angle += Math_PI;
+
+			// mod to get back to 0 to TWO_PI range
+			angle = fmodf(angle, TWO_PI);
+		}
+
+		// add 1 (to take care of zero floating point error with sign)
+		angle += 1.0f;
+
+		// flip if necessary to indicate a vertical flip in the shader
+		if (flip_v) {
+			angle *= -1.0f;
+		}
+
+		// light angle must be sent for each vert, instead as a single uniform in the uniform draw method
+		// this has the benefit of enabling batching with light angles.
+		for (uint32_t n = 0; n < total_verts; n++) {
+			angles[n] = angle;
+		}
+	}
+
+	// increment quad count
+	bdata.total_quads += num_rects;
+	bdata.total_verts += total_verts;
+
+	return false;
+}
+
 // This function may be called MULTIPLE TIMES for each item, so needs to record how far it has got
 PREAMBLE(bool)::prefill_joined_item(FillState &r_fill_state, int &r_command_start, RasterizerCanvas::Item *p_item, RasterizerCanvas::Item *p_current_clip, bool &r_reclip, typename T_STORAGE::Material *p_material) {
 	// we will prefill batches and vertices ready for sending in one go to the vertex buffer
@@ -2338,6 +2762,29 @@ PREAMBLE(bool)::prefill_joined_item(FillState &r_fill_state, int &r_command_star
 					return true;
 				}
 
+			} break;
+			case RasterizerCanvas::Item::Command::TYPE_MULTIRECT: {
+				RasterizerCanvas::Item::CommandMultiRect *mrect = static_cast<RasterizerCanvas::Item::CommandMultiRect *>(command);
+
+				// MultRects with no rects should ideally not be created
+				ERR_CONTINUE(!mrect->rects.size());
+
+				bool send_light_angles = mrect->normal_map != RID();
+
+				bool buffer_full = false;
+
+				// the template params must be explicit for compilation,
+				// this forces building the multiple versions of the function.
+				if (send_light_angles) {
+					buffer_full = _prefill_multirect<true>(mrect, r_fill_state, r_command_start, command_num, multiply_final_modulate);
+				} else {
+					buffer_full = _prefill_multirect<false>(mrect, r_fill_state, r_command_start, command_num, multiply_final_modulate);
+				}
+
+				if (buffer_full) {
+					return true;
+				}
+
 			} break;
 			case RasterizerCanvas::Item::Command::TYPE_NINEPATCH: {
 				RasterizerCanvas::Item::CommandNinePatch *np = static_cast<RasterizerCanvas::Item::CommandNinePatch *>(command);
@@ -2767,7 +3214,7 @@ PREAMBLE(bool)::_sort_items_match(const BSortItem &p_a, const BSortItem &p_b) co
 	//		return false;
 
 	const RasterizerCanvas::Item::Command &cb = *b->commands[0];
-	if (cb.type != RasterizerCanvas::Item::Command::TYPE_RECT) {
+	if ((cb.type != RasterizerCanvas::Item::Command::TYPE_RECT) && (cb.type != RasterizerCanvas::Item::Command::TYPE_MULTIRECT)) {
 		return false;
 	}
 
@@ -2830,7 +3277,7 @@ PREAMBLE(bool)::sort_items_from(int p_start) {
 		return false;
 	}
 	const RasterizerCanvas::Item::Command &command_start = *start.item->commands[0];
-	if (command_start.type != RasterizerCanvas::Item::Command::TYPE_RECT) {
+	if ((command_start.type != RasterizerCanvas::Item::Command::TYPE_RECT) && (command_start.type != RasterizerCanvas::Item::Command::TYPE_MULTIRECT)) {
 		return false;
 	}
 
@@ -3215,6 +3662,11 @@ PREAMBLE(bool)::_detect_item_batch_break(RenderItemState &r_ris, RasterizerCanva
 						return true;
 					}
 				} break;
+				case RasterizerCanvas::Item::Command::TYPE_MULTIRECT: {
+					if (_disallow_item_join_if_batch_types_too_different(r_ris, RasterizerStorageCommon::BTF_RECT)) {
+						return true;
+					}
+				} break;
 				case RasterizerCanvas::Item::Command::TYPE_NINEPATCH: {
 					// do not handle tiled ninepatches, these can't be batched and need to use legacy method
 					RasterizerCanvas::Item::CommandNinePatch *np = static_cast<RasterizerCanvas::Item::CommandNinePatch *>(command);

+ 12 - 1
scene/2d/tile_map.cpp

@@ -37,6 +37,7 @@
 #include "scene/2d/area_2d.h"
 #include "servers/navigation_2d_server.h"
 #include "servers/physics_2d_server.h"
+#include "servers/visual/visual_server_canvas_helper.h"
 
 void TileMap::Quadrant::clear_navpoly() {
 	for (Map<PosKey, Quadrant::NavPoly>::Element *E = navpoly_ids.front(); E; E = E->next()) {
@@ -406,6 +407,8 @@ void TileMap::update_dirty_quadrants() {
 		RID prev_canvas_item;
 		RID prev_debug_canvas_item;
 
+		bool multirect_started = false;
+
 		for (int i = 0; i < q.cells.size(); i++) {
 			Map<PosKey, Cell>::Element *E = tile_map.find(q.cells[i]);
 			Cell &c = E->get();
@@ -568,7 +571,11 @@ void TileMap::update_dirty_quadrants() {
 			if (r == Rect2()) {
 				tex->draw_rect(canvas_item, rect, false, modulate, c.transpose, normal_map);
 			} else {
-				tex->draw_rect_region(canvas_item, rect, r, modulate, c.transpose, normal_map, clip_uv);
+				if (!multirect_started) {
+					multirect_started = true;
+					VisualServerCanvasHelper::tilemap_begin();
+				}
+				VisualServerCanvasHelper::tilemap_add_rect(canvas_item, rect, tex->get_rid(), r, modulate, c.transpose, normal_map.is_valid() ? normal_map->get_rid() : RID(), clip_uv);
 			}
 
 			Vector<TileSet::ShapeData> shapes = tile_set->tile_get_shapes(c.id);
@@ -719,6 +726,10 @@ void TileMap::update_dirty_quadrants() {
 			}
 		}
 
+		if (multirect_started) {
+			VisualServerCanvasHelper::tilemap_end();
+		}
+
 		dirty_quadrant_list.remove(dirty_quadrant_list.first());
 		quadrant_order_dirty = true;
 	}

+ 6 - 4
scene/gui/rich_text_label.cpp

@@ -404,6 +404,8 @@ int RichTextLabel::_process_line(ItemFrame *p_frame, const Vector2 &p_ofs, int &
 				bool just_breaked_in_middle = false;
 				rchar = 0;
 				FontDrawer drawer(font, Color(1, 1, 1));
+				MultiRect &multirect = drawer.get_multirect();
+
 				while (*c) {
 					int end = 0;
 					float w = 0.0f;
@@ -613,12 +615,12 @@ int RichTextLabel::_process_line(ItemFrame *p_frame, const Vector2 &p_ofs, int &
 
 									const Point2 base_pos = p_ofs + Point2(align_ofs + pofs + spacing_char, y + lh - line_descent);
 									if (shadow_color.a > 0) {
-										font->draw_char(ci, base_pos + shadow_ofs + fx_offset, fx_char, c[i + 1], shadow_color);
+										font->draw_char_ex(ci, base_pos + shadow_ofs + fx_offset, fx_char, c[i + 1], shadow_color, false, &multirect);
 
 										if (p_shadow_as_outline) {
-											font->draw_char(ci, base_pos + Vector2(-shadow_ofs.x, shadow_ofs.y) + fx_offset, fx_char, c[i + 1], shadow_color);
-											font->draw_char(ci, base_pos + Vector2(shadow_ofs.x, -shadow_ofs.y) + fx_offset, fx_char, c[i + 1], shadow_color);
-											font->draw_char(ci, base_pos + Vector2(-shadow_ofs.x, -shadow_ofs.y) + fx_offset, fx_char, c[i + 1], shadow_color);
+											font->draw_char_ex(ci, base_pos + Vector2(-shadow_ofs.x, shadow_ofs.y) + fx_offset, fx_char, c[i + 1], shadow_color, false, &multirect);
+											font->draw_char_ex(ci, base_pos + Vector2(shadow_ofs.x, -shadow_ofs.y) + fx_offset, fx_char, c[i + 1], shadow_color, false, &multirect);
+											font->draw_char_ex(ci, base_pos + Vector2(-shadow_ofs.x, -shadow_ofs.y) + fx_offset, fx_char, c[i + 1], shadow_color, false, &multirect);
 										}
 									}
 

+ 11 - 6
scene/resources/dynamic_font.cpp

@@ -35,6 +35,7 @@
 
 #include "core/os/file_access.h"
 #include "core/os/os.h"
+#include "servers/visual/visual_server_canvas_helper.h"
 
 #include FT_STROKER_H
 
@@ -498,7 +499,7 @@ Rect2 DynamicFontAtSize::get_char_tx_uv_rect(CharType p_char, CharType p_next, c
 	return Rect2();
 }
 
-float DynamicFontAtSize::draw_char(RID p_canvas_item, const Point2 &p_pos, CharType p_char, CharType p_next, const Color &p_modulate, const Vector<Ref<DynamicFontAtSize>> &p_fallbacks, bool p_advance_only, bool p_outline) const {
+float DynamicFontAtSize::draw_char(RID p_canvas_item, const Point2 &p_pos, CharType p_char, CharType p_next, const Color &p_modulate, const Vector<Ref<DynamicFontAtSize>> &p_fallbacks, bool p_advance_only, bool p_outline, MultiRect *p_multirect) const {
 	if (!valid) {
 		return 0;
 	}
@@ -561,7 +562,11 @@ float DynamicFontAtSize::draw_char(RID p_canvas_item, const Point2 &p_pos, CharT
 				modulate.r = modulate.g = modulate.b = 1.0;
 			}
 			RID texture = font->textures[ch->texture_idx].texture->get_rid();
-			VisualServer::get_singleton()->canvas_item_add_texture_rect_region(p_canvas_item, Rect2(cpos, ch->rect.size), texture, ch->rect_uv, modulate, false, RID(), false);
+			if (p_multirect) {
+				p_multirect->add_rect(p_canvas_item, Rect2(cpos, ch->rect.size), texture, ch->rect_uv, modulate, false, RID(), false);
+			} else {
+				VisualServer::get_singleton()->canvas_item_add_texture_rect_region(p_canvas_item, Rect2(cpos, ch->rect.size), texture, ch->rect_uv, modulate, false, RID(), false);
+			}
 		}
 
 		advance = ch->advance;
@@ -1191,7 +1196,7 @@ Rect2 DynamicFont::get_char_tx_uv_rect(CharType p_char, CharType p_next, bool p_
 	}
 }
 
-float DynamicFont::draw_char(RID p_canvas_item, const Point2 &p_pos, CharType p_char, CharType p_next, const Color &p_modulate, bool p_outline) const {
+float DynamicFont::draw_char_ex(RID p_canvas_item, const Point2 &p_pos, CharType p_char, CharType p_next, const Color &p_modulate, bool p_outline, MultiRect *p_multirect) const {
 	if (!data_at_size.is_valid()) {
 		return 0;
 	}
@@ -1203,11 +1208,11 @@ float DynamicFont::draw_char(RID p_canvas_item, const Point2 &p_pos, CharType p_
 
 	if (p_outline) {
 		if (outline_data_at_size.is_valid() && outline_cache_id.outline_size > 0) {
-			outline_data_at_size->draw_char(p_canvas_item, p_pos, p_char, p_next, p_modulate * outline_color, fallback_outline_data_at_size, false, true); // Draw glyph outline.
+			outline_data_at_size->draw_char(p_canvas_item, p_pos, p_char, p_next, p_modulate * outline_color, fallback_outline_data_at_size, false, true, p_multirect); // Draw glyph outline.
 		}
-		return data_at_size->draw_char(p_canvas_item, p_pos, p_char, p_next, p_modulate, fallback_data_at_size, true, false) + spacing; // Return advance of the base glyph.
+		return data_at_size->draw_char(p_canvas_item, p_pos, p_char, p_next, p_modulate, fallback_data_at_size, true, false, p_multirect) + spacing; // Return advance of the base glyph.
 	} else {
-		return data_at_size->draw_char(p_canvas_item, p_pos, p_char, p_next, p_modulate, fallback_data_at_size, false, false) + spacing; // Draw base glyph and return advance.
+		return data_at_size->draw_char(p_canvas_item, p_pos, p_char, p_next, p_modulate, fallback_data_at_size, false, false, p_multirect) + spacing; // Draw base glyph and return advance.
 	}
 }
 

+ 2 - 2
scene/resources/dynamic_font.h

@@ -259,7 +259,7 @@ public:
 	Size2 get_char_size(CharType p_char, CharType p_next, const Vector<Ref<DynamicFontAtSize>> &p_fallbacks) const;
 	String get_available_chars() const;
 
-	float draw_char(RID p_canvas_item, const Point2 &p_pos, CharType p_char, CharType p_next, const Color &p_modulate, const Vector<Ref<DynamicFontAtSize>> &p_fallbacks, bool p_advance_only = false, bool p_outline = false) const;
+	float draw_char(RID p_canvas_item, const Point2 &p_pos, CharType p_char, CharType p_next, const Color &p_modulate, const Vector<Ref<DynamicFontAtSize>> &p_fallbacks, bool p_advance_only = false, bool p_outline = false, MultiRect *p_multirect = nullptr) const;
 
 	RID get_char_texture(CharType p_char, CharType p_next, const Vector<Ref<DynamicFontAtSize>> &p_fallbacks) const;
 	Size2 get_char_texture_size(CharType p_char, CharType p_next, const Vector<Ref<DynamicFontAtSize>> &p_fallbacks) const;
@@ -361,7 +361,7 @@ public:
 
 	virtual bool has_outline() const;
 
-	virtual float draw_char(RID p_canvas_item, const Point2 &p_pos, CharType p_char, CharType p_next = 0, const Color &p_modulate = Color(1, 1, 1), bool p_outline = false) const;
+	virtual float draw_char_ex(RID p_canvas_item, const Point2 &p_pos, CharType p_char, CharType p_next = 0, const Color &p_modulate = Color(1, 1, 1), bool p_outline = false, MultiRect *p_multirect = nullptr) const;
 
 	RID get_char_texture(CharType p_char, CharType p_next, bool p_outline) const;
 	Size2 get_char_texture_size(CharType p_char, CharType p_next, bool p_outline) const;

+ 27 - 4
scene/resources/font.cpp

@@ -64,6 +64,9 @@ void Font::draw(RID p_canvas_item, const Point2 &p_pos, const String &p_text, co
 
 	int chars_drawn = 0;
 	bool with_outline = has_outline();
+
+	MultiRect multirect;
+
 	for (int i = 0; i < p_text.length(); i++) {
 		int width = get_char_size(p_text[i]).width;
 
@@ -71,14 +74,14 @@ void Font::draw(RID p_canvas_item, const Point2 &p_pos, const String &p_text, co
 			break; //clip
 		}
 
-		ofs.x += draw_char(p_canvas_item, p_pos + ofs, p_text[i], p_text[i + 1], with_outline ? p_outline_modulate : p_modulate, with_outline);
+		ofs.x += draw_char_ex(p_canvas_item, p_pos + ofs, p_text[i], p_text[i + 1], with_outline ? p_outline_modulate : p_modulate, with_outline, &multirect);
 		++chars_drawn;
 	}
 
 	if (has_outline()) {
 		ofs = Vector2(0, 0);
 		for (int i = 0; i < chars_drawn; i++) {
-			ofs.x += draw_char(p_canvas_item, p_pos + ofs, p_text[i], p_text[i + 1], p_modulate, false);
+			ofs.x += draw_char_ex(p_canvas_item, p_pos + ofs, p_text[i], p_text[i + 1], p_modulate, false, &multirect);
 		}
 	}
 }
@@ -535,6 +538,21 @@ Size2 Font::total_size_of_lines(Vector<String> p_lines) {
 	return size;
 }
 
+FontDrawer::FontDrawer(const Ref<Font> &p_font, const Color &p_outline_color) :
+		font(p_font),
+		outline_color(p_outline_color) {
+	has_outline = p_font->has_outline();
+	multirect.begin();
+}
+
+FontDrawer::~FontDrawer() {
+	for (int i = 0; i < pending_draws.size(); ++i) {
+		const PendingDraw &draw = pending_draws[i];
+		font->draw_char_ex(draw.canvas_item, draw.pos, draw.chr, draw.next, draw.modulate, false, &multirect);
+	}
+	multirect.end();
+}
+
 void BitmapFont::set_fallback(const Ref<BitmapFont> &p_fallback) {
 	for (Ref<BitmapFont> fallback_child = p_fallback; fallback_child != nullptr; fallback_child = fallback_child->get_fallback()) {
 		ERR_FAIL_COND_MSG(fallback_child == this, "Can't set as fallback one of its parents to prevent crashes due to recursive loop.");
@@ -681,7 +699,7 @@ Rect2 BitmapFont::get_char_tx_uv_rect(CharType p_char, CharType p_next, bool p_o
 	}
 }
 
-float BitmapFont::draw_char(RID p_canvas_item, const Point2 &p_pos, CharType p_char, CharType p_next, const Color &p_modulate, bool p_outline) const {
+float BitmapFont::draw_char_ex(RID p_canvas_item, const Point2 &p_pos, CharType p_char, CharType p_next, const Color &p_modulate, bool p_outline, MultiRect *p_multirect) const {
 	int32_t ch = p_char;
 	if (((p_char & 0xfffffc00) == 0xd800) && (p_next & 0xfffffc00) == 0xdc00) { // decode surrogate pair.
 		ch = (p_char << 10UL) + p_next - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
@@ -705,7 +723,12 @@ float BitmapFont::draw_char(RID p_canvas_item, const Point2 &p_pos, CharType p_c
 		cpos.x += c->h_align;
 		cpos.y -= ascent;
 		cpos.y += c->v_align;
-		VisualServer::get_singleton()->canvas_item_add_texture_rect_region(p_canvas_item, Rect2(cpos, c->rect.size), textures[c->texture_idx]->get_rid(), c->rect, p_modulate, false, RID(), false);
+
+		if (p_multirect) {
+			p_multirect->add_rect(p_canvas_item, Rect2(cpos, c->rect.size), textures[c->texture_idx]->get_rid(), c->rect, p_modulate, false, RID(), false);
+		} else {
+			VisualServer::get_singleton()->canvas_item_add_texture_rect_region(p_canvas_item, Rect2(cpos, c->rect.size), textures[c->texture_idx]->get_rid(), c->rect, p_modulate, false, RID(), false);
+		}
 	}
 
 	return get_char_size(p_char, p_next).width;

+ 9 - 14
scene/resources/font.h

@@ -34,6 +34,7 @@
 #include "core/map.h"
 #include "core/resource.h"
 #include "scene/resources/texture.h"
+#include "servers/visual/visual_server_canvas_helper.h"
 
 class Font : public Resource {
 	GDCLASS(Font, Resource);
@@ -65,7 +66,8 @@ public:
 	void draw_halign(RID p_canvas_item, const Point2 &p_pos, HAlign p_align, float p_width, const String &p_text, const Color &p_modulate = Color(1, 1, 1), const Color &p_outline_modulate = Color(1, 1, 1)) const;
 
 	virtual bool has_outline() const { return false; }
-	virtual float draw_char(RID p_canvas_item, const Point2 &p_pos, CharType p_char, CharType p_next = 0, const Color &p_modulate = Color(1, 1, 1), bool p_outline = false) const = 0;
+	float draw_char(RID p_canvas_item, const Point2 &p_pos, CharType p_char, CharType p_next = 0, const Color &p_modulate = Color(1, 1, 1), bool p_outline = false) const { return draw_char_ex(p_canvas_item, p_pos, p_char, p_next, p_modulate, p_outline); }
+	virtual float draw_char_ex(RID p_canvas_item, const Point2 &p_pos, CharType p_char, CharType p_next = 0, const Color &p_modulate = Color(1, 1, 1), bool p_outline = false, MultiRect *p_multirect = nullptr) const = 0;
 
 	virtual RID get_char_texture(CharType p_char, CharType p_next, bool p_outline) const = 0;
 	virtual Size2 get_char_texture_size(CharType p_char, CharType p_next, bool p_outline) const = 0;
@@ -85,6 +87,7 @@ class FontDrawer {
 	const Ref<Font> &font;
 	Color outline_color;
 	bool has_outline;
+	MultiRect multirect;
 
 	struct PendingDraw {
 		RID canvas_item;
@@ -97,26 +100,18 @@ class FontDrawer {
 	Vector<PendingDraw> pending_draws;
 
 public:
-	FontDrawer(const Ref<Font> &p_font, const Color &p_outline_color) :
-			font(p_font),
-			outline_color(p_outline_color) {
-		has_outline = p_font->has_outline();
-	}
+	FontDrawer(const Ref<Font> &p_font, const Color &p_outline_color);
 
 	float draw_char(RID p_canvas_item, const Point2 &p_pos, CharType p_char, CharType p_next = 0, const Color &p_modulate = Color(1, 1, 1)) {
 		if (has_outline) {
 			PendingDraw draw = { p_canvas_item, p_pos, p_char, p_next, p_modulate };
 			pending_draws.push_back(draw);
 		}
-		return font->draw_char(p_canvas_item, p_pos, p_char, p_next, has_outline ? outline_color : p_modulate, has_outline);
+		return font->draw_char_ex(p_canvas_item, p_pos, p_char, p_next, has_outline ? outline_color : p_modulate, has_outline, &multirect);
 	}
+	MultiRect &get_multirect() { return multirect; }
 
-	~FontDrawer() {
-		for (int i = 0; i < pending_draws.size(); ++i) {
-			const PendingDraw &draw = pending_draws[i];
-			font->draw_char(draw.canvas_item, draw.pos, draw.chr, draw.next, draw.modulate, false);
-		}
-	}
+	~FontDrawer();
 };
 
 class BitmapFont : public Font {
@@ -208,7 +203,7 @@ public:
 	void set_distance_field_hint(bool p_distance_field);
 	bool is_distance_field_hint() const;
 
-	float draw_char(RID p_canvas_item, const Point2 &p_pos, CharType p_char, CharType p_next = 0, const Color &p_modulate = Color(1, 1, 1), bool p_outline = false) const;
+	float draw_char_ex(RID p_canvas_item, const Point2 &p_pos, CharType p_char, CharType p_next = 0, const Color &p_modulate = Color(1, 1, 1), bool p_outline = false, MultiRect *p_multirect = nullptr) const;
 
 	RID get_char_texture(CharType p_char, CharType p_next, bool p_outline) const;
 	Size2 get_char_texture_size(CharType p_char, CharType p_next, bool p_outline) const;

+ 25 - 0
servers/visual/rasterizer.h

@@ -808,6 +808,7 @@ public:
 				TYPE_CIRCLE,
 				TYPE_TRANSFORM,
 				TYPE_CLIP_IGNORE,
+				TYPE_MULTIRECT,
 			};
 
 			Type type;
@@ -849,6 +850,20 @@ public:
 			}
 		};
 
+		struct CommandMultiRect : public Command {
+			RID texture;
+			RID normal_map;
+			Color modulate;
+			Vector<Rect2> rects;
+			Vector<Rect2> sources;
+			uint8_t flags;
+
+			CommandMultiRect() {
+				flags = 0;
+				type = TYPE_MULTIRECT;
+			}
+		};
+
 		struct CommandNinePatch : public Command {
 			Rect2 rect;
 			Rect2 source;
@@ -1055,6 +1070,16 @@ public:
 						r = crect->rect;
 
 					} break;
+					case Item::Command::TYPE_MULTIRECT: {
+						const Item::CommandMultiRect *mrect = static_cast<const Item::CommandMultiRect *>(c);
+						int num_rects = mrect->rects.size();
+						if (num_rects) {
+							r = mrect->rects[0];
+							for (int n = 1; n < num_rects; n++) {
+								r = mrect->rects[n].merge(r);
+							}
+						}
+					} break;
 					case Item::Command::TYPE_NINEPATCH: {
 						const Item::CommandNinePatch *style = static_cast<const Item::CommandNinePatch *>(c);
 						r = style->rect;

+ 26 - 0
servers/visual/visual_server_canvas.cpp

@@ -670,6 +670,32 @@ void VisualServerCanvas::canvas_item_add_texture_rect(RID p_item, const Rect2 &p
 	canvas_item->commands.push_back(rect);
 }
 
+void VisualServerCanvas::canvas_item_add_texture_multirect_region(RID p_item, const Vector<Rect2> &p_rects, RID p_texture, const Vector<Rect2> &p_src_rects, const Color &p_modulate, uint32_t p_canvas_rect_flags, RID p_normal_map) {
+	Item *canvas_item = canvas_item_owner.getornull(p_item);
+	ERR_FAIL_COND(!canvas_item);
+	ERR_FAIL_COND(p_rects.size() != p_src_rects.size());
+	ERR_FAIL_COND(!p_rects.size());
+
+	Item::CommandMultiRect *rect = memnew(Item::CommandMultiRect);
+	ERR_FAIL_COND(!rect);
+	rect->modulate = p_modulate;
+	rect->texture = p_texture;
+	rect->normal_map = p_normal_map;
+
+	// Rects should have flips and transposes pre-applied, and the relevant
+	// flags added to p_canvas_rect_flags.
+	// A single Multirect should contain rects ALL of the same flag type.
+	// The idea is to simplify the renderer as much as possible, and push the complexity
+	// to the one off creation code.
+	rect->flags = p_canvas_rect_flags | RasterizerCanvas::CANVAS_RECT_REGION;
+
+	rect->rects = p_rects;
+	rect->sources = p_src_rects;
+
+	canvas_item->rect_dirty = true;
+	canvas_item->commands.push_back(rect);
+}
+
 void VisualServerCanvas::canvas_item_add_texture_rect_region(RID p_item, const Rect2 &p_rect, RID p_texture, const Rect2 &p_src_rect, const Color &p_modulate, bool p_transpose, RID p_normal_map, bool p_clip_uv) {
 	Item *canvas_item = canvas_item_owner.getornull(p_item);
 	ERR_FAIL_COND(!canvas_item);

+ 1 - 0
servers/visual/visual_server_canvas.h

@@ -194,6 +194,7 @@ public:
 	void canvas_item_add_circle(RID p_item, const Point2 &p_pos, float p_radius, const Color &p_color);
 	void canvas_item_add_texture_rect(RID p_item, const Rect2 &p_rect, RID p_texture, bool p_tile = false, const Color &p_modulate = Color(1, 1, 1), bool p_transpose = false, RID p_normal_map = RID());
 	void canvas_item_add_texture_rect_region(RID p_item, const Rect2 &p_rect, RID p_texture, const Rect2 &p_src_rect, const Color &p_modulate = Color(1, 1, 1), bool p_transpose = false, RID p_normal_map = RID(), bool p_clip_uv = false);
+	void canvas_item_add_texture_multirect_region(RID p_item, const Vector<Rect2> &p_rects, RID p_texture, const Vector<Rect2> &p_src_rects, const Color &p_modulate = Color(1, 1, 1), uint32_t p_canvas_rect_flags = 0, RID p_normal_map = RID());
 	void canvas_item_add_nine_patch(RID p_item, const Rect2 &p_rect, const Rect2 &p_source, RID p_texture, const Vector2 &p_topleft, const Vector2 &p_bottomright, VS::NinePatchAxisMode p_x_axis_mode = VS::NINE_PATCH_STRETCH, VS::NinePatchAxisMode p_y_axis_mode = VS::NINE_PATCH_STRETCH, bool p_draw_center = true, const Color &p_modulate = Color(1, 1, 1), RID p_normal_map = RID());
 	void canvas_item_add_primitive(RID p_item, const Vector<Point2> &p_points, const Vector<Color> &p_colors, const Vector<Point2> &p_uvs, RID p_texture, float p_width = 1.0, RID p_normal_map = RID());
 	void canvas_item_add_polygon(RID p_item, const Vector<Point2> &p_points, const Vector<Color> &p_colors, const Vector<Point2> &p_uvs = Vector<Point2>(), RID p_texture = RID(), RID p_normal_map = RID(), bool p_antialiased = false);

+ 280 - 0
servers/visual/visual_server_canvas_helper.cpp

@@ -0,0 +1,280 @@
+/**************************************************************************/
+/*  visual_server_canvas_helper.cpp                                       */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#include "visual_server_canvas_helper.h"
+
+#include "servers/visual/rasterizer.h"
+#include "servers/visual_server.h"
+
+LocalVector<MultiRect> VisualServerCanvasHelper::_tilemap_multirects;
+Mutex VisualServerCanvasHelper::_tilemap_mutex;
+
+bool VisualServerCanvasHelper::_multirect_enabled = true;
+
+MultiRect::MultiRect() {
+	begin();
+}
+MultiRect::~MultiRect() {
+	end();
+}
+
+void MultiRect::begin() {
+	DEV_CHECK_ONCE(!rects.size());
+	rects.clear();
+	sources.clear();
+
+	state.flags = 0;
+	state_set = false;
+}
+
+void MultiRect::add_rect(RID p_canvas_item, const Rect2 &p_rect, RID p_texture, const Rect2 &p_src_rect, const Color &p_modulate, bool p_transpose, RID p_normal_map, bool p_clip_uv) {
+	bool new_common_data = true;
+
+	Rect2 rect = p_rect;
+	Rect2 source = p_src_rect;
+
+	// To make the rendering code as efficient as possible,
+	// a single MultiRect command should have identical flips and transpose etc.
+	// If these change, it flushes the previous multirect and starts a new one.
+	uint32_t flags = 0;
+
+	if (p_rect.size.x < 0) {
+		flags |= RasterizerCanvas::CANVAS_RECT_FLIP_H;
+		rect.size.x = -rect.size.x;
+	}
+	if (source.size.x < 0) {
+		flags ^= RasterizerCanvas::CANVAS_RECT_FLIP_H;
+		source.size.x = -source.size.x;
+	}
+	if (p_rect.size.y < 0) {
+		flags |= RasterizerCanvas::CANVAS_RECT_FLIP_V;
+		rect.size.y = -rect.size.y;
+	}
+	if (source.size.y < 0) {
+		flags ^= RasterizerCanvas::CANVAS_RECT_FLIP_V;
+		source.size.y = -source.size.y;
+	}
+
+	if (p_transpose) {
+		flags |= RasterizerCanvas::CANVAS_RECT_TRANSPOSE;
+		SWAP(rect.size.x, rect.size.y);
+	}
+
+	if (p_clip_uv) {
+		flags |= RasterizerCanvas::CANVAS_RECT_CLIP_UV;
+	}
+
+	VisualServerCanvasHelper::State s;
+	s.item = p_canvas_item;
+	s.texture = p_texture;
+	s.modulate = p_modulate;
+	s.normal_map = p_normal_map;
+	s.flags = flags;
+
+	if (!is_empty()) {
+		if ((state != s) ||
+				(rects.size() >= MAX_RECTS)) {
+			end();
+		} else {
+			new_common_data = false;
+		}
+	}
+
+	if (new_common_data) {
+		state = s;
+	}
+
+	rects.push_back(rect);
+	sources.push_back(source);
+}
+
+void MultiRect::begin(const VisualServerCanvasHelper::State &p_state) {
+	DEV_CHECK_ONCE(!rects.size());
+	rects.clear();
+	sources.clear();
+
+	state = p_state;
+	state_set = true;
+}
+
+uint32_t MultiRect::flags_from_rects(Rect2 &r_rect, Rect2 &r_source) {
+	uint32_t flags = 0;
+
+	if (r_rect.size.x < 0) {
+		flags |= RasterizerCanvas::CANVAS_RECT_FLIP_H;
+		r_rect.size.x = -r_rect.size.x;
+	}
+	if (r_rect.size.y < 0) {
+		flags |= RasterizerCanvas::CANVAS_RECT_FLIP_V;
+		r_rect.size.y = -r_rect.size.y;
+	}
+
+	if (r_source.size.x < 0) {
+		flags ^= RasterizerCanvas::CANVAS_RECT_FLIP_H;
+		r_source.size.x = -r_source.size.x;
+	}
+	if (r_source.size.y < 0) {
+		flags ^= RasterizerCanvas::CANVAS_RECT_FLIP_V;
+		r_source.size.y = -r_source.size.y;
+	}
+
+	return flags;
+}
+
+bool MultiRect::add_pre_flipped(const Rect2 &p_rect, const Rect2 &p_src_rect) {
+	if (rects.is_full()) {
+		return false;
+	}
+	*rects.request() = p_rect;
+	*sources.request() = p_src_rect;
+	return true;
+}
+
+bool MultiRect::add(const Rect2 &p_rect, const Rect2 &p_src_rect, bool p_commit_on_flip_change) {
+	if (rects.is_full()) {
+		return false;
+	}
+
+	Rect2 rect = p_rect;
+	Rect2 source = p_src_rect;
+
+	uint32_t flags = flags_from_rects(rect, source);
+
+	if (state_set) {
+		// if we are changing these flips, we can no longer continue the same multirect
+		if ((state.flags & (RasterizerCanvas::CANVAS_RECT_FLIP_H | RasterizerCanvas::CANVAS_RECT_FLIP_V)) != flags) {
+			// different state requires a new multirect
+			return false;
+		}
+
+	} else {
+		state.flags |= flags;
+		state_set = true;
+	}
+
+	*rects.request() = rect;
+	*sources.request() = source;
+	return true;
+}
+
+void MultiRect::end() {
+	if (!is_empty()) {
+		if (VisualServerCanvasHelper::_multirect_enabled) {
+			VisualServer::get_singleton()->canvas_item_add_texture_multirect_region(state.item, rects, state.texture, sources, state.modulate, state.flags, state.normal_map);
+
+		} else {
+			// legacy path
+			bool transpose = state.flags & RasterizerCanvas::CANVAS_RECT_TRANSPOSE;
+			bool clip_uv = state.flags & RasterizerCanvas::CANVAS_RECT_CLIP_UV;
+
+			for (uint32_t n = 0; n < rects.size(); n++) {
+				VisualServer::get_singleton()->canvas_item_add_texture_rect_region(state.item, rects[n], state.texture, sources[n], state.modulate, transpose, state.normal_map, clip_uv);
+			}
+		}
+
+		rects.clear();
+		sources.clear();
+	}
+	state_set = false;
+}
+
+void VisualServerCanvasHelper::tilemap_begin() {
+	if (_multirect_enabled) {
+		_tilemap_mutex.lock();
+	}
+}
+
+void VisualServerCanvasHelper::tilemap_add_rect(RID p_canvas_item, const Rect2 &p_rect, RID p_texture, const Rect2 &p_src_rect, const Color &p_modulate, bool p_transpose, RID p_normal_map, bool p_clip_uv) {
+	if (!_multirect_enabled) {
+		VisualServer::get_singleton()->canvas_item_add_texture_rect_region(p_canvas_item, p_rect, p_texture, p_src_rect, p_modulate, p_transpose, p_normal_map, p_clip_uv);
+		return;
+	}
+
+	Rect2 rect = p_rect;
+	Rect2 source = p_src_rect;
+
+	// To make the rendering code as efficient as possible,
+	// a single MultiRect command should have identical flips and transpose etc.
+	// If these change, it flushes the previous multirect and starts a new one.
+	uint32_t flags = MultiRect::flags_from_rects(rect, source);
+
+	if (p_transpose) {
+		flags |= RasterizerCanvas::CANVAS_RECT_TRANSPOSE;
+		SWAP(rect.size.x, rect.size.y);
+	}
+
+	if (p_clip_uv) {
+		flags |= RasterizerCanvas::CANVAS_RECT_CLIP_UV;
+	}
+
+	State state;
+	state.item = p_canvas_item;
+	state.texture = p_texture;
+	state.modulate = p_modulate;
+	state.normal_map = p_normal_map;
+	state.flags = flags;
+
+	// attempt to add to existing multirect
+	for (int n = _tilemap_multirects.size() - 1; n >= 0; n--) {
+		MultiRect &mr = _tilemap_multirects[n];
+
+		// matches state?
+		if (mr.state == state) {
+			// add .. this may fail if the multirect is full
+			if (mr.add_pre_flipped(rect, source)) {
+				return;
+			}
+		}
+
+		// disallow if we overlap a multirect
+		if (mr.overlaps(rect)) {
+			break;
+		}
+	}
+
+	// create new multirect
+	_tilemap_multirects.resize(_tilemap_multirects.size() + 1);
+	MultiRect &mr = _tilemap_multirects[_tilemap_multirects.size() - 1];
+	mr.begin(state);
+	mr.add_pre_flipped(rect, source);
+}
+
+void VisualServerCanvasHelper::tilemap_end() {
+	if (!_multirect_enabled) {
+		return;
+	}
+
+	for (uint32_t n = 0; n < _tilemap_multirects.size(); n++) {
+		_tilemap_multirects[n].end();
+	}
+
+	_tilemap_multirects.clear();
+	_tilemap_mutex.unlock();
+}

+ 114 - 0
servers/visual/visual_server_canvas_helper.h

@@ -0,0 +1,114 @@
+/**************************************************************************/
+/*  visual_server_canvas_helper.h                                         */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#ifndef VISUAL_SERVER_CANVAS_HELPER_H
+#define VISUAL_SERVER_CANVAS_HELPER_H
+
+#include "core/color.h"
+#include "core/fixed_array.h"
+#include "core/local_vector.h"
+#include "core/math/rect2.h"
+#include "core/rid.h"
+
+class MultiRect;
+
+class VisualServerCanvasHelper {
+public:
+	struct State {
+		RID item;
+		RID texture;
+		Color modulate;
+		RID normal_map;
+		uint32_t flags = 0;
+
+		bool operator==(const State &p_state) const {
+			return ((item == p_state.item) &&
+					(texture == p_state.texture) &&
+					(modulate == p_state.modulate) &&
+					(normal_map == p_state.normal_map) &&
+					(flags == p_state.flags));
+		}
+		bool operator!=(const State &p_state) const { return !(*this == p_state); }
+	};
+
+private:
+	// There is a single mutex for tilemaps, only one quadrant can be adding
+	// at a time.
+	static LocalVector<MultiRect> _tilemap_multirects;
+	static Mutex _tilemap_mutex;
+
+public:
+	static void tilemap_begin();
+	static void tilemap_add_rect(RID p_canvas_item, const Rect2 &p_rect, RID p_texture, const Rect2 &p_src_rect, const Color &p_modulate = Color(1, 1, 1), bool p_transpose = false, RID p_normal_map = RID(), bool p_clip_uv = false);
+	static void tilemap_end();
+
+	static bool _multirect_enabled;
+};
+
+class MultiRect {
+	friend class VisualServerCanvasHelper;
+
+public:
+	enum { MAX_RECTS = 2048 };
+
+private:
+	VisualServerCanvasHelper::State state;
+	bool state_set = false;
+	FixedArray<Rect2, MAX_RECTS, true> rects;
+	FixedArray<Rect2, MAX_RECTS, true> sources;
+
+	static uint32_t flags_from_rects(Rect2 &r_rect, Rect2 &r_source);
+	bool overlaps(const Rect2 &p_rect) const {
+		for (uint32_t n = 0; n < rects.size(); n++) {
+			if (rects[n].intersects(p_rect)) {
+				return true;
+			}
+		}
+		return false;
+	}
+	bool add_pre_flipped(const Rect2 &p_rect, const Rect2 &p_src_rect);
+
+public:
+	// Simple API
+	void begin();
+	void add_rect(RID p_canvas_item, const Rect2 &p_rect, RID p_texture, const Rect2 &p_src_rect, const Color &p_modulate = Color(1, 1, 1), bool p_transpose = false, RID p_normal_map = RID(), bool p_clip_uv = false);
+
+	// Efficient API
+	void begin(const VisualServerCanvasHelper::State &p_state);
+	bool add(const Rect2 &p_rect, const Rect2 &p_src_rect, bool p_commit_on_flip_change = true);
+	bool is_empty() const { return rects.is_empty(); }
+	bool is_full() const { return rects.is_full(); }
+	void end();
+
+	MultiRect();
+	~MultiRect();
+};
+
+#endif // VISUAL_SERVER_CANVAS_HELPER_H

+ 1 - 0
servers/visual/visual_server_raster.h

@@ -704,6 +704,7 @@ public:
 	BIND4(canvas_item_add_circle, RID, const Point2 &, float, const Color &)
 	BIND7(canvas_item_add_texture_rect, RID, const Rect2 &, RID, bool, const Color &, bool, RID)
 	BIND8(canvas_item_add_texture_rect_region, RID, const Rect2 &, RID, const Rect2 &, const Color &, bool, RID, bool)
+	BIND7(canvas_item_add_texture_multirect_region, RID, const Vector<Rect2> &, RID, const Vector<Rect2> &, const Color &, uint32_t, RID)
 	BIND11(canvas_item_add_nine_patch, RID, const Rect2 &, const Rect2 &, RID, const Vector2 &, const Vector2 &, NinePatchAxisMode, NinePatchAxisMode, bool, const Color &, RID)
 	BIND7(canvas_item_add_primitive, RID, const Vector<Point2> &, const Vector<Color> &, const Vector<Point2> &, RID, float, RID)
 	BIND7(canvas_item_add_polygon, RID, const Vector<Point2> &, const Vector<Color> &, const Vector<Point2> &, RID, RID, bool)

+ 1 - 0
servers/visual/visual_server_wrap_mt.h

@@ -605,6 +605,7 @@ public:
 	FUNC4(canvas_item_add_circle, RID, const Point2 &, float, const Color &)
 	FUNC7(canvas_item_add_texture_rect, RID, const Rect2 &, RID, bool, const Color &, bool, RID)
 	FUNC8(canvas_item_add_texture_rect_region, RID, const Rect2 &, RID, const Rect2 &, const Color &, bool, RID, bool)
+	FUNC7(canvas_item_add_texture_multirect_region, RID, const Vector<Rect2> &, RID, const Vector<Rect2> &, const Color &, uint32_t, RID)
 	FUNC11(canvas_item_add_nine_patch, RID, const Rect2 &, const Rect2 &, RID, const Vector2 &, const Vector2 &, NinePatchAxisMode, NinePatchAxisMode, bool, const Color &, RID)
 	FUNC7(canvas_item_add_primitive, RID, const Vector<Point2> &, const Vector<Color> &, const Vector<Point2> &, RID, float, RID)
 	FUNC7(canvas_item_add_polygon, RID, const Vector<Point2> &, const Vector<Color> &, const Vector<Point2> &, RID, RID, bool)

+ 2 - 1
servers/visual_server.cpp

@@ -2704,6 +2704,7 @@ VisualServer::VisualServer() {
 	GLOBAL_DEF("rendering/batching/options/use_batching", true);
 	GLOBAL_DEF_RST("rendering/batching/options/use_batching_in_editor", true);
 	GLOBAL_DEF("rendering/batching/options/single_rect_fallback", false);
+	GLOBAL_DEF("rendering/batching/options/use_multirect", true);
 	GLOBAL_DEF("rendering/batching/parameters/max_join_item_commands", 16);
 	GLOBAL_DEF("rendering/batching/parameters/colored_vertex_format_threshold", 0.25f);
 	GLOBAL_DEF("rendering/batching/lights/scissor_area_threshold", 1.0f);
@@ -2720,7 +2721,7 @@ VisualServer::VisualServer() {
 
 	ProjectSettings::get_singleton()->set_custom_property_info("rendering/batching/parameters/max_join_item_commands", PropertyInfo(Variant::INT, "rendering/batching/parameters/max_join_item_commands", PROPERTY_HINT_RANGE, "0,65535"));
 	ProjectSettings::get_singleton()->set_custom_property_info("rendering/batching/parameters/colored_vertex_format_threshold", PropertyInfo(Variant::REAL, "rendering/batching/parameters/colored_vertex_format_threshold", PROPERTY_HINT_RANGE, "0.0,1.0,0.01"));
-	ProjectSettings::get_singleton()->set_custom_property_info("rendering/batching/parameters/batch_buffer_size", PropertyInfo(Variant::INT, "rendering/batching/parameters/batch_buffer_size", PROPERTY_HINT_RANGE, "1024,65535,1024"));
+	ProjectSettings::get_singleton()->set_custom_property_info("rendering/batching/parameters/batch_buffer_size", PropertyInfo(Variant::INT, "rendering/batching/parameters/batch_buffer_size", PROPERTY_HINT_RANGE, "8192,65536,1024"));
 	ProjectSettings::get_singleton()->set_custom_property_info("rendering/batching/lights/scissor_area_threshold", PropertyInfo(Variant::REAL, "rendering/batching/lights/scissor_area_threshold", PROPERTY_HINT_RANGE, "0.0,1.0"));
 	ProjectSettings::get_singleton()->set_custom_property_info("rendering/batching/lights/max_join_items", PropertyInfo(Variant::INT, "rendering/batching/lights/max_join_items", PROPERTY_HINT_RANGE, "0,512"));
 	ProjectSettings::get_singleton()->set_custom_property_info("rendering/batching/parameters/item_reordering_lookahead", PropertyInfo(Variant::INT, "rendering/batching/parameters/item_reordering_lookahead", PROPERTY_HINT_RANGE, "0,256"));

+ 1 - 0
servers/visual_server.h

@@ -1037,6 +1037,7 @@ public:
 	virtual void canvas_item_add_circle(RID p_item, const Point2 &p_pos, float p_radius, const Color &p_color) = 0;
 	virtual void canvas_item_add_texture_rect(RID p_item, const Rect2 &p_rect, RID p_texture, bool p_tile = false, const Color &p_modulate = Color(1, 1, 1), bool p_transpose = false, RID p_normal_map = RID()) = 0;
 	virtual void canvas_item_add_texture_rect_region(RID p_item, const Rect2 &p_rect, RID p_texture, const Rect2 &p_src_rect, const Color &p_modulate = Color(1, 1, 1), bool p_transpose = false, RID p_normal_map = RID(), bool p_clip_uv = false) = 0;
+	virtual void canvas_item_add_texture_multirect_region(RID p_item, const Vector<Rect2> &p_rects, RID p_texture, const Vector<Rect2> &p_src_rects, const Color &p_modulate = Color(1, 1, 1), uint32_t p_canvas_rect_flags = 0, RID p_normal_map = RID()) = 0;
 	virtual void canvas_item_add_nine_patch(RID p_item, const Rect2 &p_rect, const Rect2 &p_source, RID p_texture, const Vector2 &p_topleft, const Vector2 &p_bottomright, NinePatchAxisMode p_x_axis_mode = NINE_PATCH_STRETCH, NinePatchAxisMode p_y_axis_mode = NINE_PATCH_STRETCH, bool p_draw_center = true, const Color &p_modulate = Color(1, 1, 1), RID p_normal_map = RID()) = 0;
 	virtual void canvas_item_add_primitive(RID p_item, const Vector<Point2> &p_points, const Vector<Color> &p_colors, const Vector<Point2> &p_uvs, RID p_texture, float p_width = 1.0, RID p_normal_map = RID()) = 0;
 	virtual void canvas_item_add_polygon(RID p_item, const Vector<Point2> &p_points, const Vector<Color> &p_colors, const Vector<Point2> &p_uvs = Vector<Point2>(), RID p_texture = RID(), RID p_normal_map = RID(), bool p_antialiased = false) = 0;