Browse Source

Merge pull request #65418 from JFonS/taa_multimesh

Add motion vectors support for MultiMeshInstance
Clay John 3 years ago
parent
commit
f709596631

+ 13 - 3
servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp

@@ -477,6 +477,10 @@ void RenderForwardClustered::_render_list_template(RenderingDevice::DrawListID p
 			prev_material_uniform_set = material_uniform_set;
 		}
 
+		if (surf->owner->base_flags & INSTANCE_DATA_FLAG_MULTIMESH) {
+			mesh_storage->_multimesh_get_motion_vectors_offsets(surf->owner->data->base, push_constant.multimesh_motion_vectors_current_offset, push_constant.multimesh_motion_vectors_previous_offset);
+		}
+
 		RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(SceneState::PushConstant));
 
 		uint32_t instance_count = surf->owner->instance_count > 1 ? surf->owner->instance_count : element_info.repeat;
@@ -924,7 +928,7 @@ _FORCE_INLINE_ static uint32_t _indices_to_primitives(RS::PrimitiveType p_primit
 	static const uint32_t subtractor[RS::PRIMITIVE_MAX] = { 0, 0, 1, 0, 1 };
 	return (p_indices - subtractor[p_primitive]) / divisor[p_primitive];
 }
-void RenderForwardClustered::_fill_render_list(RenderListType p_render_list, const RenderDataRD *p_render_data, PassMode p_pass_mode, bool p_using_sdfgi, bool p_using_opaque_gi, bool p_append) {
+void RenderForwardClustered::_fill_render_list(RenderListType p_render_list, const RenderDataRD *p_render_data, PassMode p_pass_mode, uint32_t p_color_pass_flags = 0, bool p_using_sdfgi, bool p_using_opaque_gi, bool p_append) {
 	RendererRD::MeshStorage *mesh_storage = RendererRD::MeshStorage::get_singleton();
 
 	if (p_render_list == RENDER_LIST_OPAQUE) {
@@ -1149,6 +1153,12 @@ void RenderForwardClustered::_fill_render_list(RenderListType p_render_list, con
 					scene_state.used_depth_texture = true;
 				}
 
+				if (p_color_pass_flags & COLOR_PASS_FLAG_MOTION_VECTORS && flags & INSTANCE_DATA_FLAG_MULTIMESH) {
+					if (RendererRD::MeshStorage::get_singleton()->_multimesh_enable_motion_vectors(inst->data->base)) {
+						inst->transforms_uniform_set = mesh_storage->multimesh_get_3d_uniform_set(inst->data->base, scene_shader.default_shader_rd, TRANSFORMS_UNIFORM_SET);
+					}
+				}
+
 			} else if (p_pass_mode == PASS_MODE_SHADOW || p_pass_mode == PASS_MODE_SHADOW_DP) {
 				if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_PASS_SHADOW) {
 					rl->add_element(surf);
@@ -1322,7 +1332,7 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co
 
 	_update_render_base_uniform_set(); //may have changed due to the above (light buffer enlarged, as an example)
 
-	_fill_render_list(RENDER_LIST_OPAQUE, p_render_data, PASS_MODE_COLOR, using_sdfgi, using_sdfgi || using_voxelgi);
+	_fill_render_list(RENDER_LIST_OPAQUE, p_render_data, PASS_MODE_COLOR, color_pass_flags, using_sdfgi, using_sdfgi || using_voxelgi);
 	render_list[RENDER_LIST_OPAQUE].sort_by_key();
 	render_list[RENDER_LIST_ALPHA].sort_by_reverse_depth_and_priority();
 	_fill_instance_data(RENDER_LIST_OPAQUE, p_render_data->render_info ? p_render_data->render_info->info[RS::VIEWPORT_RENDER_INFO_TYPE_VISIBLE] : (int *)nullptr);
@@ -1723,7 +1733,7 @@ void RenderForwardClustered::_render_shadow_append(RID p_framebuffer, const Page
 	PassMode pass_mode = p_use_dp ? PASS_MODE_SHADOW_DP : PASS_MODE_SHADOW;
 
 	uint32_t render_list_from = render_list[RENDER_LIST_SECONDARY].elements.size();
-	_fill_render_list(RENDER_LIST_SECONDARY, &render_data, pass_mode, false, false, true);
+	_fill_render_list(RENDER_LIST_SECONDARY, &render_data, pass_mode, 0, false, false, true);
 	uint32_t render_list_size = render_list[RENDER_LIST_SECONDARY].elements.size() - render_list_from;
 	render_list[RENDER_LIST_SECONDARY].sort_by_key_range(render_list_from, render_list_size);
 	_fill_instance_data(RENDER_LIST_SECONDARY, p_render_info ? p_render_info->info[RS::VIEWPORT_RENDER_INFO_TYPE_SHADOW] : (int *)nullptr, render_list_from, render_list_size, false);

+ 3 - 2
servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h

@@ -337,7 +337,8 @@ class RenderForwardClustered : public RendererSceneRenderRD {
 		struct PushConstant {
 			uint32_t base_index; //
 			uint32_t uv_offset; //packed
-			uint32_t pad[2];
+			uint32_t multimesh_motion_vectors_current_offset;
+			uint32_t multimesh_motion_vectors_previous_offset;
 		};
 
 		struct InstanceData {
@@ -429,7 +430,7 @@ class RenderForwardClustered : public RendererSceneRenderRD {
 
 	void _update_instance_data_buffer(RenderListType p_render_list);
 	void _fill_instance_data(RenderListType p_render_list, int *p_render_info = nullptr, uint32_t p_offset = 0, int32_t p_max_elements = -1, bool p_update_buffer = true);
-	void _fill_render_list(RenderListType p_render_list, const RenderDataRD *p_render_data, PassMode p_pass_mode, bool p_using_sdfgi = false, bool p_using_opaque_gi = false, bool p_append = false);
+	void _fill_render_list(RenderListType p_render_list, const RenderDataRD *p_render_data, PassMode p_pass_mode, uint32_t p_color_pass_flags, bool p_using_sdfgi = false, bool p_using_opaque_gi = false, bool p_append = false);
 
 	HashMap<Size2i, RID> sdfgi_framebuffer_size_cache;
 

+ 5 - 5
servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl

@@ -129,7 +129,7 @@ invariant gl_Position;
 
 #GLOBALS
 
-void vertex_shader(in uint instance_index, in bool is_multimesh, in SceneData scene_data, in mat4 model_matrix, out vec4 screen_pos) {
+void vertex_shader(in uint instance_index, in bool is_multimesh, in uint multimesh_offset, in SceneData scene_data, in mat4 model_matrix, out vec4 screen_pos) {
 	vec4 instance_custom = vec4(0.0);
 #if defined(COLOR_USED)
 	color_interp = color_attrib;
@@ -208,7 +208,7 @@ void vertex_shader(in uint instance_index, in bool is_multimesh, in SceneData sc
 			}
 		}
 
-		uint offset = stride * gl_InstanceIndex;
+		uint offset = stride * (gl_InstanceIndex + multimesh_offset);
 
 		if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_FORMAT_2D)) {
 			matrix = mat4(transforms.data[offset + 0], transforms.data[offset + 1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
@@ -397,13 +397,13 @@ void main() {
 	mat4 model_matrix = instances.data[instance_index].transform;
 #if defined(MOTION_VECTORS)
 	global_time = scene_data_block.prev_data.time;
-	vertex_shader(instance_index, is_multimesh, scene_data_block.prev_data, instances.data[instance_index].prev_transform, prev_screen_position);
+	vertex_shader(instance_index, is_multimesh, draw_call.multimesh_motion_vectors_previous_offset, scene_data_block.prev_data, instances.data[instance_index].prev_transform, prev_screen_position);
 	global_time = scene_data_block.data.time;
-	vertex_shader(instance_index, is_multimesh, scene_data_block.data, model_matrix, screen_position);
+	vertex_shader(instance_index, is_multimesh, draw_call.multimesh_motion_vectors_current_offset, scene_data_block.data, model_matrix, screen_position);
 #else
 	global_time = scene_data_block.data.time;
 	vec4 screen_position;
-	vertex_shader(instance_index, is_multimesh, scene_data_block.data, model_matrix, screen_position);
+	vertex_shader(instance_index, is_multimesh, draw_call.multimesh_motion_vectors_current_offset, scene_data_block.data, model_matrix, screen_position);
 #endif
 }
 

+ 2 - 2
servers/rendering/renderer_rd/shaders/scene_forward_clustered_inc.glsl

@@ -29,8 +29,8 @@
 layout(push_constant, std430) uniform DrawCall {
 	uint instance_index;
 	uint uv_offset;
-	uint pad0;
-	uint pad1;
+	uint multimesh_motion_vectors_current_offset;
+	uint multimesh_motion_vectors_previous_offset;
 }
 draw_call;
 

+ 156 - 53
servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp

@@ -29,6 +29,7 @@
 /*************************************************************************/
 
 #include "mesh_storage.h"
+#include "../../rendering_server_globals.h"
 
 using namespace RendererRD;
 
@@ -1211,7 +1212,13 @@ void MeshStorage::multimesh_allocate_data(RID p_multimesh, int p_instances, RS::
 	if (multimesh->data_cache_dirty_regions) {
 		memdelete_arr(multimesh->data_cache_dirty_regions);
 		multimesh->data_cache_dirty_regions = nullptr;
-		multimesh->data_cache_used_dirty_regions = 0;
+		multimesh->data_cache_dirty_region_count = 0;
+	}
+
+	if (multimesh->previous_data_cache_dirty_regions) {
+		memdelete_arr(multimesh->previous_data_cache_dirty_regions);
+		multimesh->previous_data_cache_dirty_regions = nullptr;
+		multimesh->previous_data_cache_dirty_region_count = 0;
 	}
 
 	multimesh->instances = p_instances;
@@ -1228,14 +1235,67 @@ void MeshStorage::multimesh_allocate_data(RID p_multimesh, int p_instances, RS::
 	multimesh->aabb = AABB();
 	multimesh->aabb_dirty = false;
 	multimesh->visible_instances = MIN(multimesh->visible_instances, multimesh->instances);
+	multimesh->motion_vectors_current_offset = 0;
+	multimesh->motion_vectors_previous_offset = 0;
+	multimesh->motion_vectors_last_change = -1;
 
 	if (multimesh->instances) {
-		multimesh->buffer = RD::get_singleton()->storage_buffer_create(multimesh->instances * multimesh->stride_cache * 4);
+		uint32_t buffer_size = multimesh->instances * multimesh->stride_cache * sizeof(float);
+		if (multimesh->motion_vectors_enabled) {
+			buffer_size *= 2;
+		}
+		multimesh->buffer = RD::get_singleton()->storage_buffer_create(buffer_size);
 	}
 
 	multimesh->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_MULTIMESH);
 }
 
+bool MeshStorage::_multimesh_enable_motion_vectors(RID p_multimesh) {
+	MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
+	ERR_FAIL_COND_V(!multimesh, false);
+
+	if (multimesh->motion_vectors_enabled) {
+		return false;
+	}
+
+	multimesh->motion_vectors_enabled = true;
+
+	multimesh->motion_vectors_current_offset = 0;
+	multimesh->motion_vectors_previous_offset = 0;
+	multimesh->motion_vectors_last_change = -1;
+
+	if (!multimesh->data_cache.is_empty()) {
+		multimesh->data_cache.append_array(multimesh->data_cache);
+	}
+
+	if (multimesh->buffer_set) {
+		RD::get_singleton()->barrier();
+		Vector<uint8_t> buffer_data = RD::get_singleton()->buffer_get_data(multimesh->buffer);
+		if (!multimesh->data_cache.is_empty()) {
+			memcpy(buffer_data.ptrw(), multimesh->data_cache.ptr(), buffer_data.size());
+		}
+
+		RD::get_singleton()->free(multimesh->buffer);
+		uint32_t buffer_size = multimesh->instances * multimesh->stride_cache * sizeof(float) * 2;
+		multimesh->buffer = RD::get_singleton()->storage_buffer_create(buffer_size);
+		RD::get_singleton()->buffer_update(multimesh->buffer, 0, buffer_data.size(), buffer_data.ptr(), RD::BARRIER_MASK_NO_BARRIER);
+		RD::get_singleton()->buffer_update(multimesh->buffer, buffer_data.size(), buffer_data.size(), buffer_data.ptr());
+		multimesh->uniform_set_3d = RID(); // Cleared by dependency
+		return true;
+	}
+	return false; // Update the transforms uniform set cache
+}
+
+void MeshStorage::_multimesh_get_motion_vectors_offsets(RID p_multimesh, uint32_t &r_current_offset, uint32_t &r_prev_offset) {
+	MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
+	ERR_FAIL_COND(!multimesh);
+	r_current_offset = multimesh->motion_vectors_current_offset;
+	if (RSG::rasterizer->get_frame_number() - multimesh->motion_vectors_last_change >= 2) {
+		multimesh->motion_vectors_previous_offset = multimesh->motion_vectors_current_offset;
+	}
+	r_prev_offset = multimesh->motion_vectors_previous_offset;
+}
+
 int MeshStorage::multimesh_get_instance_count(RID p_multimesh) const {
 	MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
 	ERR_FAIL_COND_V(!multimesh, 0);
@@ -1261,7 +1321,7 @@ void MeshStorage::multimesh_set_mesh(RID p_multimesh, RID p_mesh) {
 		//need to re-create AABB unfortunately, calling this has a penalty
 		if (multimesh->buffer_set) {
 			Vector<uint8_t> buffer = RD::get_singleton()->buffer_get_data(multimesh->buffer);
-			const uint8_t *r = buffer.ptr();
+			const uint8_t *r = buffer.ptr() + multimesh->motion_vectors_current_offset * multimesh->stride_cache * sizeof(float);
 			const float *data = reinterpret_cast<const float *>(r);
 			_multimesh_re_create_aabb(multimesh, data, multimesh->instances);
 		}
@@ -1276,10 +1336,14 @@ void MeshStorage::_multimesh_make_local(MultiMesh *multimesh) const {
 	if (multimesh->data_cache.size() > 0) {
 		return; //already local
 	}
-	ERR_FAIL_COND(multimesh->data_cache.size() > 0);
+
 	// this means that the user wants to load/save individual elements,
 	// for this, the data must reside on CPU, so just copy it there.
-	multimesh->data_cache.resize(multimesh->instances * multimesh->stride_cache);
+	uint32_t buffer_size = multimesh->instances * multimesh->stride_cache;
+	if (multimesh->motion_vectors_enabled) {
+		buffer_size *= 2;
+	}
+	multimesh->data_cache.resize(buffer_size);
 	{
 		float *w = multimesh->data_cache.ptrw();
 
@@ -1290,15 +1354,48 @@ void MeshStorage::_multimesh_make_local(MultiMesh *multimesh) const {
 				memcpy(w, r, buffer.size());
 			}
 		} else {
-			memset(w, 0, (size_t)multimesh->instances * multimesh->stride_cache * sizeof(float));
+			memset(w, 0, buffer_size * sizeof(float));
 		}
 	}
 	uint32_t data_cache_dirty_region_count = (multimesh->instances - 1) / MULTIMESH_DIRTY_REGION_SIZE + 1;
 	multimesh->data_cache_dirty_regions = memnew_arr(bool, data_cache_dirty_region_count);
-	for (uint32_t i = 0; i < data_cache_dirty_region_count; i++) {
-		multimesh->data_cache_dirty_regions[i] = false;
+	memset(multimesh->data_cache_dirty_regions, 0, data_cache_dirty_region_count * sizeof(bool));
+	multimesh->data_cache_dirty_region_count = 0;
+
+	multimesh->previous_data_cache_dirty_regions = memnew_arr(bool, data_cache_dirty_region_count);
+	memset(multimesh->previous_data_cache_dirty_regions, 0, data_cache_dirty_region_count * sizeof(bool));
+	multimesh->previous_data_cache_dirty_region_count = 0;
+}
+
+void MeshStorage::_multimesh_update_motion_vectors_data_cache(MultiMesh *multimesh) {
+	ERR_FAIL_COND(multimesh->data_cache.is_empty());
+
+	if (!multimesh->motion_vectors_enabled) {
+		return;
+	}
+
+	uint32_t frame = RSG::rasterizer->get_frame_number();
+	if (multimesh->motion_vectors_last_change != frame) {
+		multimesh->motion_vectors_previous_offset = multimesh->motion_vectors_current_offset;
+		multimesh->motion_vectors_current_offset = multimesh->instances - multimesh->motion_vectors_current_offset;
+		multimesh->motion_vectors_last_change = frame;
+
+		if (multimesh->previous_data_cache_dirty_region_count > 0) {
+			uint8_t *data = (uint8_t *)multimesh->data_cache.ptrw();
+			uint32_t current_ofs = multimesh->motion_vectors_current_offset * multimesh->stride_cache * sizeof(float);
+			uint32_t previous_ofs = multimesh->motion_vectors_previous_offset * multimesh->stride_cache * sizeof(float);
+			uint32_t visible_instances = multimesh->visible_instances >= 0 ? multimesh->visible_instances : multimesh->instances;
+			uint32_t visible_region_count = visible_instances == 0 ? 0 : (visible_instances - 1) / MULTIMESH_DIRTY_REGION_SIZE + 1;
+			uint32_t region_size = multimesh->stride_cache * MULTIMESH_DIRTY_REGION_SIZE * sizeof(float);
+			uint32_t size = multimesh->stride_cache * (uint32_t)multimesh->instances * (uint32_t)sizeof(float);
+			for (uint32_t i = 0; i < visible_region_count; i++) {
+				if (multimesh->previous_data_cache_dirty_regions[i]) {
+					uint32_t offset = i * region_size;
+					memcpy(data + current_ofs + offset, data + previous_ofs + offset, MIN(region_size, size - offset));
+				}
+			}
+		}
 	}
-	multimesh->data_cache_used_dirty_regions = 0;
 }
 
 void MeshStorage::_multimesh_mark_dirty(MultiMesh *multimesh, int p_index, bool p_aabb) {
@@ -1309,7 +1406,7 @@ void MeshStorage::_multimesh_mark_dirty(MultiMesh *multimesh, int p_index, bool
 #endif
 	if (!multimesh->data_cache_dirty_regions[region_index]) {
 		multimesh->data_cache_dirty_regions[region_index] = true;
-		multimesh->data_cache_used_dirty_regions++;
+		multimesh->data_cache_dirty_region_count++;
 	}
 
 	if (p_aabb) {
@@ -1330,7 +1427,7 @@ void MeshStorage::_multimesh_mark_all_dirty(MultiMesh *multimesh, bool p_data, b
 		for (uint32_t i = 0; i < data_cache_dirty_region_count; i++) {
 			if (!multimesh->data_cache_dirty_regions[i]) {
 				multimesh->data_cache_dirty_regions[i] = true;
-				multimesh->data_cache_used_dirty_regions++;
+				multimesh->data_cache_dirty_region_count++;
 			}
 		}
 	}
@@ -1395,11 +1492,12 @@ void MeshStorage::multimesh_instance_set_transform(RID p_multimesh, int p_index,
 	ERR_FAIL_COND(multimesh->xform_format != RS::MULTIMESH_TRANSFORM_3D);
 
 	_multimesh_make_local(multimesh);
+	_multimesh_update_motion_vectors_data_cache(multimesh);
 
 	{
 		float *w = multimesh->data_cache.ptrw();
 
-		float *dataptr = w + p_index * multimesh->stride_cache;
+		float *dataptr = w + (multimesh->motion_vectors_current_offset + p_index) * multimesh->stride_cache;
 
 		dataptr[0] = p_transform.basis.rows[0][0];
 		dataptr[1] = p_transform.basis.rows[0][1];
@@ -1425,11 +1523,12 @@ void MeshStorage::multimesh_instance_set_transform_2d(RID p_multimesh, int p_ind
 	ERR_FAIL_COND(multimesh->xform_format != RS::MULTIMESH_TRANSFORM_2D);
 
 	_multimesh_make_local(multimesh);
+	_multimesh_update_motion_vectors_data_cache(multimesh);
 
 	{
 		float *w = multimesh->data_cache.ptrw();
 
-		float *dataptr = w + p_index * multimesh->stride_cache;
+		float *dataptr = w + (multimesh->motion_vectors_current_offset + p_index) * multimesh->stride_cache;
 
 		dataptr[0] = p_transform.columns[0][0];
 		dataptr[1] = p_transform.columns[1][0];
@@ -1451,11 +1550,12 @@ void MeshStorage::multimesh_instance_set_color(RID p_multimesh, int p_index, con
 	ERR_FAIL_COND(!multimesh->uses_colors);
 
 	_multimesh_make_local(multimesh);
+	_multimesh_update_motion_vectors_data_cache(multimesh);
 
 	{
 		float *w = multimesh->data_cache.ptrw();
 
-		float *dataptr = w + p_index * multimesh->stride_cache + multimesh->color_offset_cache;
+		float *dataptr = w + (multimesh->motion_vectors_current_offset + p_index) * multimesh->stride_cache + multimesh->color_offset_cache;
 
 		dataptr[0] = p_color.r;
 		dataptr[1] = p_color.g;
@@ -1473,11 +1573,12 @@ void MeshStorage::multimesh_instance_set_custom_data(RID p_multimesh, int p_inde
 	ERR_FAIL_COND(!multimesh->uses_custom_data);
 
 	_multimesh_make_local(multimesh);
+	_multimesh_update_motion_vectors_data_cache(multimesh);
 
 	{
 		float *w = multimesh->data_cache.ptrw();
 
-		float *dataptr = w + p_index * multimesh->stride_cache + multimesh->custom_data_offset_cache;
+		float *dataptr = w + (multimesh->motion_vectors_current_offset + p_index) * multimesh->stride_cache + multimesh->custom_data_offset_cache;
 
 		dataptr[0] = p_color.r;
 		dataptr[1] = p_color.g;
@@ -1514,7 +1615,7 @@ Transform3D MeshStorage::multimesh_instance_get_transform(RID p_multimesh, int p
 	{
 		const float *r = multimesh->data_cache.ptr();
 
-		const float *dataptr = r + p_index * multimesh->stride_cache;
+		const float *dataptr = r + (multimesh->motion_vectors_current_offset + p_index) * multimesh->stride_cache;
 
 		t.basis.rows[0][0] = dataptr[0];
 		t.basis.rows[0][1] = dataptr[1];
@@ -1545,7 +1646,7 @@ Transform2D MeshStorage::multimesh_instance_get_transform_2d(RID p_multimesh, in
 	{
 		const float *r = multimesh->data_cache.ptr();
 
-		const float *dataptr = r + p_index * multimesh->stride_cache;
+		const float *dataptr = r + (multimesh->motion_vectors_current_offset + p_index) * multimesh->stride_cache;
 
 		t.columns[0][0] = dataptr[0];
 		t.columns[1][0] = dataptr[1];
@@ -1570,7 +1671,7 @@ Color MeshStorage::multimesh_instance_get_color(RID p_multimesh, int p_index) co
 	{
 		const float *r = multimesh->data_cache.ptr();
 
-		const float *dataptr = r + p_index * multimesh->stride_cache + multimesh->color_offset_cache;
+		const float *dataptr = r + (multimesh->motion_vectors_current_offset + p_index) * multimesh->stride_cache + multimesh->color_offset_cache;
 
 		c.r = dataptr[0];
 		c.g = dataptr[1];
@@ -1593,7 +1694,7 @@ Color MeshStorage::multimesh_instance_get_custom_data(RID p_multimesh, int p_ind
 	{
 		const float *r = multimesh->data_cache.ptr();
 
-		const float *dataptr = r + p_index * multimesh->stride_cache + multimesh->custom_data_offset_cache;
+		const float *dataptr = r + (multimesh->motion_vectors_current_offset + p_index) * multimesh->stride_cache + multimesh->custom_data_offset_cache;
 
 		c.r = dataptr[0];
 		c.g = dataptr[1];
@@ -1609,25 +1710,26 @@ void MeshStorage::multimesh_set_buffer(RID p_multimesh, const Vector<float> &p_b
 	ERR_FAIL_COND(!multimesh);
 	ERR_FAIL_COND(p_buffer.size() != (multimesh->instances * (int)multimesh->stride_cache));
 
+	if (multimesh->motion_vectors_enabled) {
+		uint32_t frame = RSG::rasterizer->get_frame_number();
+
+		if (multimesh->motion_vectors_last_change != frame) {
+			multimesh->motion_vectors_previous_offset = multimesh->motion_vectors_current_offset;
+			multimesh->motion_vectors_current_offset = multimesh->instances - multimesh->motion_vectors_current_offset;
+			multimesh->motion_vectors_last_change = frame;
+		}
+	}
+
 	{
 		const float *r = p_buffer.ptr();
-		RD::get_singleton()->buffer_update(multimesh->buffer, 0, p_buffer.size() * sizeof(float), r);
+		RD::get_singleton()->buffer_update(multimesh->buffer, multimesh->motion_vectors_current_offset * multimesh->stride_cache * sizeof(float), p_buffer.size() * sizeof(float), r);
 		multimesh->buffer_set = true;
 	}
 
 	if (multimesh->data_cache.size()) {
-		//if we have a data cache, just update it
-		multimesh->data_cache = p_buffer;
-		{
-			//clear dirty since nothing will be dirty anymore
-			uint32_t data_cache_dirty_region_count = (multimesh->instances - 1) / MULTIMESH_DIRTY_REGION_SIZE + 1;
-			for (uint32_t i = 0; i < data_cache_dirty_region_count; i++) {
-				multimesh->data_cache_dirty_regions[i] = false;
-			}
-			multimesh->data_cache_used_dirty_regions = 0;
-		}
-
-		_multimesh_mark_all_dirty(multimesh, false, true); //update AABB
+		float *cache_data = multimesh->data_cache.ptrw();
+		memcpy(cache_data + (multimesh->motion_vectors_current_offset * multimesh->stride_cache), p_buffer.ptr(), p_buffer.size() * sizeof(float));
+		_multimesh_mark_all_dirty(multimesh, true, true); //update AABB
 	} else if (multimesh->mesh.is_valid()) {
 		//if we have a mesh set, we need to re-generate the AABB from the new data
 		const float *data = p_buffer.ptr();
@@ -1642,20 +1744,19 @@ Vector<float> MeshStorage::multimesh_get_buffer(RID p_multimesh) const {
 	ERR_FAIL_COND_V(!multimesh, Vector<float>());
 	if (multimesh->buffer.is_null()) {
 		return Vector<float>();
-	} else if (multimesh->data_cache.size()) {
-		return multimesh->data_cache;
 	} else {
-		//get from memory
-
-		Vector<uint8_t> buffer = RD::get_singleton()->buffer_get_data(multimesh->buffer);
 		Vector<float> ret;
 		ret.resize(multimesh->instances * multimesh->stride_cache);
-		{
-			float *w = ret.ptrw();
-			const uint8_t *r = buffer.ptr();
-			memcpy(w, r, buffer.size());
-		}
+		float *w = ret.ptrw();
 
+		if (multimesh->data_cache.size()) {
+			const uint8_t *r = (uint8_t *)multimesh->data_cache.ptr() + multimesh->motion_vectors_current_offset * multimesh->stride_cache * sizeof(float);
+			memcpy(w, r, ret.size() * sizeof(float));
+		} else {
+			Vector<uint8_t> buffer = RD::get_singleton()->buffer_get_data(multimesh->buffer);
+			const uint8_t *r = buffer.ptr() + multimesh->motion_vectors_current_offset * multimesh->stride_cache * sizeof(float);
+			memcpy(w, r, ret.size() * sizeof(float));
+		}
 		return ret;
 	}
 }
@@ -1698,36 +1799,38 @@ void MeshStorage::_update_dirty_multimeshes() {
 		MultiMesh *multimesh = multimesh_dirty_list;
 
 		if (multimesh->data_cache.size()) { //may have been cleared, so only process if it exists
-			const float *data = multimesh->data_cache.ptr();
 
 			uint32_t visible_instances = multimesh->visible_instances >= 0 ? multimesh->visible_instances : multimesh->instances;
+			uint32_t buffer_offset = multimesh->motion_vectors_current_offset * multimesh->stride_cache;
+			const float *data = multimesh->data_cache.ptr() + buffer_offset;
 
-			if (multimesh->data_cache_used_dirty_regions) {
+			uint32_t total_dirty_regions = multimesh->data_cache_dirty_region_count + multimesh->previous_data_cache_dirty_region_count;
+			if (total_dirty_regions != 0) {
 				uint32_t data_cache_dirty_region_count = (multimesh->instances - 1) / MULTIMESH_DIRTY_REGION_SIZE + 1;
 				uint32_t visible_region_count = visible_instances == 0 ? 0 : (visible_instances - 1) / MULTIMESH_DIRTY_REGION_SIZE + 1;
 
 				uint32_t region_size = multimesh->stride_cache * MULTIMESH_DIRTY_REGION_SIZE * sizeof(float);
-
-				if (multimesh->data_cache_used_dirty_regions > 32 || multimesh->data_cache_used_dirty_regions > visible_region_count / 2) {
+				if (total_dirty_regions > 32 || total_dirty_regions > visible_region_count / 2) {
 					//if there too many dirty regions, or represent the majority of regions, just copy all, else transfer cost piles up too much
-					RD::get_singleton()->buffer_update(multimesh->buffer, 0, MIN(visible_region_count * region_size, multimesh->instances * (uint32_t)multimesh->stride_cache * (uint32_t)sizeof(float)), data);
+					RD::get_singleton()->buffer_update(multimesh->buffer, buffer_offset * sizeof(float), MIN(visible_region_count * region_size, multimesh->instances * (uint32_t)multimesh->stride_cache * (uint32_t)sizeof(float)), data);
 				} else {
 					//not that many regions? update them all
 					for (uint32_t i = 0; i < visible_region_count; i++) {
-						if (multimesh->data_cache_dirty_regions[i]) {
+						if (multimesh->data_cache_dirty_regions[i] || multimesh->previous_data_cache_dirty_regions[i]) {
 							uint32_t offset = i * region_size;
 							uint32_t size = multimesh->stride_cache * (uint32_t)multimesh->instances * (uint32_t)sizeof(float);
 							uint32_t region_start_index = multimesh->stride_cache * MULTIMESH_DIRTY_REGION_SIZE * i;
-							RD::get_singleton()->buffer_update(multimesh->buffer, offset, MIN(region_size, size - offset), &data[region_start_index]);
+							RD::get_singleton()->buffer_update(multimesh->buffer, buffer_offset * sizeof(float) + offset, MIN(region_size, size - offset), &data[region_start_index], RD::BARRIER_MASK_NO_BARRIER);
 						}
 					}
+					RD::get_singleton()->barrier(RD::BARRIER_MASK_NO_BARRIER, RD::BARRIER_MASK_ALL);
 				}
 
-				for (uint32_t i = 0; i < data_cache_dirty_region_count; i++) {
-					multimesh->data_cache_dirty_regions[i] = false;
-				}
+				memcpy(multimesh->previous_data_cache_dirty_regions, multimesh->data_cache_dirty_regions, data_cache_dirty_region_count * sizeof(bool));
+				memset(multimesh->data_cache_dirty_regions, 0, data_cache_dirty_region_count * sizeof(bool));
 
-				multimesh->data_cache_used_dirty_regions = 0;
+				multimesh->previous_data_cache_dirty_region_count = multimesh->data_cache_dirty_region_count;
+				multimesh->data_cache_dirty_region_count = 0;
 			}
 
 			if (multimesh->aabb_dirty) {

+ 10 - 1
servers/rendering/renderer_rd/storage_rd/mesh_storage.h

@@ -205,13 +205,19 @@ private:
 		AABB aabb;
 		bool aabb_dirty = false;
 		bool buffer_set = false;
+		bool motion_vectors_enabled = false;
+		uint32_t motion_vectors_current_offset = 0;
+		uint32_t motion_vectors_previous_offset = 0;
+		uint64_t motion_vectors_last_change = -1;
 		uint32_t stride_cache = 0;
 		uint32_t color_offset_cache = 0;
 		uint32_t custom_data_offset_cache = 0;
 
 		Vector<float> data_cache; //used if individual setting is used
 		bool *data_cache_dirty_regions = nullptr;
-		uint32_t data_cache_used_dirty_regions = 0;
+		uint32_t data_cache_dirty_region_count = 0;
+		bool *previous_data_cache_dirty_regions = nullptr;
+		uint32_t previous_data_cache_dirty_region_count = 0;
 
 		RID buffer; //storage buffer
 		RID uniform_set_3d;
@@ -228,6 +234,7 @@ private:
 	MultiMesh *multimesh_dirty_list = nullptr;
 
 	_FORCE_INLINE_ void _multimesh_make_local(MultiMesh *multimesh) const;
+	_FORCE_INLINE_ void _multimesh_update_motion_vectors_data_cache(MultiMesh *multimesh);
 	_FORCE_INLINE_ void _multimesh_mark_dirty(MultiMesh *multimesh, int p_index, bool p_aabb);
 	_FORCE_INLINE_ void _multimesh_mark_all_dirty(MultiMesh *multimesh, bool p_data, bool p_aabb);
 	_FORCE_INLINE_ void _multimesh_re_create_aabb(MultiMesh *multimesh, const float *p_data, int p_instances);
@@ -579,6 +586,8 @@ public:
 	virtual AABB multimesh_get_aabb(RID p_multimesh) const override;
 
 	void _update_dirty_multimeshes();
+	bool _multimesh_enable_motion_vectors(RID p_multimesh);
+	void _multimesh_get_motion_vectors_offsets(RID p_multimesh, uint32_t &r_current_offset, uint32_t &r_prev_offset);
 
 	_FORCE_INLINE_ RS::MultimeshTransformFormat multimesh_get_transform_format(RID p_multimesh) const {
 		MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);