Преглед на файлове

Reimplement skeletons and blend shapes

Uses compute shaders, which only once, on demand, and all in parallel.
reduz преди 4 години
родител
ревизия
bf77016c8a

+ 16 - 3
drivers/vulkan/rendering_device_vulkan.cpp

@@ -1362,6 +1362,7 @@ Error RenderingDeviceVulkan::_buffer_allocate(Buffer *p_buffer, uint32_t p_size,
 	p_buffer->buffer_info.buffer = p_buffer->buffer;
 	p_buffer->buffer_info.offset = 0;
 	p_buffer->buffer_info.range = p_size;
+	p_buffer->usage = p_usage;
 
 	return OK;
 }
@@ -3485,7 +3486,7 @@ RID RenderingDeviceVulkan::sampler_create(const SamplerState &p_state) {
 /**** VERTEX ARRAY ****/
 /**********************/
 
-RID RenderingDeviceVulkan::vertex_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data) {
+RID RenderingDeviceVulkan::vertex_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data, bool p_use_as_storage) {
 	_THREAD_SAFE_METHOD_
 
 	ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != p_size_bytes, RID());
@@ -3494,8 +3495,12 @@ RID RenderingDeviceVulkan::vertex_buffer_create(uint32_t p_size_bytes, const Vec
 	ERR_FAIL_COND_V_MSG(compute_list != nullptr && p_data.size(), RID(),
 			"Creating buffers with data is forbidden during creation of a draw list");
 
+	uint32_t usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
+	if (p_use_as_storage) {
+		usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
+	}
 	Buffer buffer;
-	_buffer_allocate(&buffer, p_size_bytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VMA_MEMORY_USAGE_GPU_ONLY);
+	_buffer_allocate(&buffer, p_size_bytes, usage, VMA_MEMORY_USAGE_GPU_ONLY);
 	if (p_data.size()) {
 		uint64_t data_size = p_data.size();
 		const uint8_t *r = p_data.ptr();
@@ -4911,7 +4916,15 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms,
 				ERR_FAIL_COND_V_MSG(uniform.ids.size() != 1, RID(),
 						"Storage buffer supplied (binding: " + itos(uniform.binding) + ") must provide one ID (" + itos(uniform.ids.size()) + " provided).");
 
-				Buffer *buffer = storage_buffer_owner.getornull(uniform.ids[0]);
+				Buffer *buffer = nullptr;
+
+				if (storage_buffer_owner.owns(uniform.ids[0])) {
+					buffer = storage_buffer_owner.getornull(uniform.ids[0]);
+				} else if (vertex_buffer_owner.owns(uniform.ids[0])) {
+					buffer = vertex_buffer_owner.getornull(uniform.ids[0]);
+
+					ERR_FAIL_COND_V_MSG(!(buffer->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), RID(), "Vertex buffer supplied (binding: " + itos(uniform.binding) + ") was not created with storage flag.");
+				}
 				ERR_FAIL_COND_V_MSG(!buffer, RID(), "Storage buffer supplied (binding: " + itos(uniform.binding) + ") is invalid.");
 
 				//if 0, then its sized on link time

+ 1 - 1
drivers/vulkan/rendering_device_vulkan.h

@@ -951,7 +951,7 @@ public:
 	/**** VERTEX ARRAY ****/
 	/**********************/
 
-	virtual RID vertex_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data = Vector<uint8_t>());
+	virtual RID vertex_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data = Vector<uint8_t>(), bool p_use_as_storage = false);
 
 	// Internally reference counted, this ID is warranted to be unique for the same description, but needs to be freed as many times as it was allocated
 	virtual VertexFormatID vertex_format_create(const Vector<VertexAttribute> &p_vertex_formats);

+ 6 - 2
editor/editor_file_system.cpp

@@ -1586,7 +1586,7 @@ Error EditorFileSystem::_reimport_group(const String &p_group_file, const Vector
 		f->store_line("importer=\"" + importer->get_importer_name() + "\"");
 		int version = importer->get_format_version();
 		if (version > 0) {
-			f->store_line("importer_version=" + itos(importer->get_format_version()));
+			f->store_line("importer_version=" + itos(version));
 		}
 		if (importer->get_resource_type() != "") {
 			f->store_line("type=\"" + importer->get_resource_type() + "\"");
@@ -1725,7 +1725,7 @@ void EditorFileSystem::_reimport_file(const String &p_file) {
 		importer = ResourceFormatImporter::get_singleton()->get_importer_by_extension(p_file.get_extension());
 		load_default = true;
 		if (importer.is_null()) {
-			ERR_PRINT("BUG: File queued for import, but can't be imported!");
+			ERR_PRINT("BUG: File queued for import, but can't be imported, importer for type '" + importer_name + "' not found.");
 			ERR_FAIL();
 		}
 	}
@@ -1772,6 +1772,10 @@ void EditorFileSystem::_reimport_file(const String &p_file) {
 	f->store_line("[remap]");
 	f->store_line("");
 	f->store_line("importer=\"" + importer->get_importer_name() + "\"");
+	int version = importer->get_format_version();
+	if (version > 0) {
+		f->store_line("importer_version=" + itos(version));
+	}
 	if (importer->get_resource_type() != "") {
 		f->store_line("type=\"" + importer->get_resource_type() + "\"");
 	}

+ 41 - 50
scene/resources/mesh.cpp

@@ -737,21 +737,6 @@ static Vector<uint8_t> _fix_array_compatibility(const Vector<uint8_t> &p_src, ui
 bool ArrayMesh::_set(const StringName &p_name, const Variant &p_value) {
 	String sname = p_name;
 
-	if (p_name == "blend_shape/names") {
-		Vector<String> sk = p_value;
-		int sz = sk.size();
-		const String *r = sk.ptr();
-		for (int i = 0; i < sz; i++) {
-			add_blend_shape(r[i]);
-		}
-		return true;
-	}
-
-	if (p_name == "blend_shape/mode") {
-		set_blend_shape_mode(BlendShapeMode(int(p_value)));
-		return true;
-	}
-
 	if (sname.begins_with("surface_")) {
 		int sl = sname.find("/");
 		if (sl == -1) {
@@ -875,6 +860,28 @@ bool ArrayMesh::_set(const StringName &p_name, const Variant &p_value) {
 	return false;
 }
 
+void ArrayMesh::_set_blend_shape_names(const PackedStringArray &p_names) {
+	ERR_FAIL_COND(surfaces.size() > 0);
+
+	blend_shapes.resize(p_names.size());
+	for (int i = 0; i < p_names.size(); i++) {
+		blend_shapes.write[i] = p_names[i];
+	}
+
+	if (mesh.is_valid()) {
+		RS::get_singleton()->mesh_set_blend_shape_count(mesh, blend_shapes.size());
+	}
+}
+
+PackedStringArray ArrayMesh::_get_blend_shape_names() const {
+	PackedStringArray sarr;
+	sarr.resize(blend_shapes.size());
+	for (int i = 0; i < blend_shapes.size(); i++) {
+		sarr.write[i] = blend_shapes[i];
+	}
+	return sarr;
+}
+
 Array ArrayMesh::_get_surfaces() const {
 	if (mesh.is_null()) {
 		return Array();
@@ -920,7 +927,6 @@ Array ArrayMesh::_get_surfaces() const {
 
 		if (surface.blend_shape_data.size()) {
 			data["blend_shapes"] = surface.blend_shape_data;
-			data["blend_shapes_count"] = surface.blend_shape_count;
 		}
 
 		if (surfaces[i].material.is_valid()) {
@@ -945,6 +951,7 @@ void ArrayMesh::_create_if_empty() const {
 	if (!mesh.is_valid()) {
 		mesh = RS::get_singleton()->mesh_create();
 		RS::get_singleton()->mesh_set_blend_shape_mode(mesh, (RS::BlendShapeMode)blend_shape_mode);
+		RS::get_singleton()->mesh_set_blend_shape_count(mesh, blend_shapes.size());
 	}
 }
 
@@ -998,9 +1005,8 @@ void ArrayMesh::_set_surfaces(const Array &p_surfaces) {
 			}
 		}
 
-		if (d.has("blend_shapes") && d.has("blend_shape_count")) {
+		if (d.has("blend_shapes")) {
 			surface.blend_shape_data = d["blend_shapes"];
-			surface.blend_shape_count = d["blend_shape_count"];
 		}
 
 		Ref<Material> material;
@@ -1020,15 +1026,7 @@ void ArrayMesh::_set_surfaces(const Array &p_surfaces) {
 		if (d.has("2d")) {
 			_2d = d["2d"];
 		}
-		/*
-		print_line("format: " + itos(surface.format));
-		print_line("aabb: " + surface.aabb);
-		print_line("array size: " + itos(surface.vertex_data.size()));
-		print_line("vertex count: " + itos(surface.vertex_count));
-		print_line("index size: " + itos(surface.index_data.size()));
-		print_line("index count: " + itos(surface.index_count));
-		print_line("primitive: " + itos(surface.primitive));
-*/
+
 		surface_data.push_back(surface);
 		surface_materials.push_back(material);
 		surface_names.push_back(name);
@@ -1044,7 +1042,7 @@ void ArrayMesh::_set_surfaces(const Array &p_surfaces) {
 	} else {
 		// if mesh does not exist (first time this is loaded, most likely),
 		// we can create it with a single call, which is a lot more efficient and thread friendly
-		mesh = RS::get_singleton()->mesh_create_from_surfaces(surface_data);
+		mesh = RS::get_singleton()->mesh_create_from_surfaces(surface_data, blend_shapes.size());
 		RS::get_singleton()->mesh_set_blend_shape_mode(mesh, (RS::BlendShapeMode)blend_shape_mode);
 	}
 
@@ -1056,7 +1054,6 @@ void ArrayMesh::_set_surfaces(const Array &p_surfaces) {
 		s.aabb = surface_data[i].aabb;
 		if (i == 0) {
 			aabb = s.aabb;
-			blend_shapes.resize(surface_data[i].blend_shape_count);
 		} else {
 			aabb.merge_with(s.aabb);
 		}
@@ -1080,18 +1077,7 @@ bool ArrayMesh::_get(const StringName &p_name, Variant &r_ret) const {
 	}
 
 	String sname = p_name;
-
-	if (p_name == "blend_shape/names") {
-		Vector<String> sk;
-		for (int i = 0; i < blend_shapes.size(); i++) {
-			sk.push_back(blend_shapes[i]);
-		}
-		r_ret = sk;
-		return true;
-	} else if (p_name == "blend_shape/mode") {
-		r_ret = get_blend_shape_mode();
-		return true;
-	} else if (sname.begins_with("surface_")) {
+	if (sname.begins_with("surface_")) {
 		int sl = sname.find("/");
 		if (sl == -1) {
 			return false;
@@ -1114,11 +1100,6 @@ void ArrayMesh::_get_property_list(List<PropertyInfo> *p_list) const {
 		return;
 	}
 
-	if (blend_shapes.size()) {
-		p_list->push_back(PropertyInfo(Variant::PACKED_STRING_ARRAY, "blend_shape/names", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NOEDITOR | PROPERTY_USAGE_INTERNAL));
-		p_list->push_back(PropertyInfo(Variant::INT, "blend_shape/mode", PROPERTY_HINT_ENUM, "Normalized,Relative"));
-	}
-
 	for (int i = 0; i < surfaces.size(); i++) {
 		p_list->push_back(PropertyInfo(Variant::STRING, "surface_" + itos(i + 1) + "/name", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_EDITOR));
 		if (surfaces[i].is_2d) {
@@ -1144,7 +1125,7 @@ void ArrayMesh::_recompute_aabb() {
 #ifndef _MSC_VER
 #warning need to add binding to add_surface using future MeshSurfaceData object
 #endif
-void ArrayMesh::add_surface(uint32_t p_format, PrimitiveType p_primitive, const Vector<uint8_t> &p_array, const Vector<uint8_t> &p_attribute_array, const Vector<uint8_t> &p_skin_array, int p_vertex_count, const Vector<uint8_t> &p_index_array, int p_index_count, const AABB &p_aabb, const Vector<uint8_t> &p_blend_shape_data, uint32_t p_blend_shape_count, const Vector<AABB> &p_bone_aabbs, const Vector<RS::SurfaceData::LOD> &p_lods) {
+void ArrayMesh::add_surface(uint32_t p_format, PrimitiveType p_primitive, const Vector<uint8_t> &p_array, const Vector<uint8_t> &p_attribute_array, const Vector<uint8_t> &p_skin_array, int p_vertex_count, const Vector<uint8_t> &p_index_array, int p_index_count, const AABB &p_aabb, const Vector<uint8_t> &p_blend_shape_data, const Vector<AABB> &p_bone_aabbs, const Vector<RS::SurfaceData::LOD> &p_lods) {
 	_create_if_empty();
 
 	Surface s;
@@ -1169,7 +1150,6 @@ void ArrayMesh::add_surface(uint32_t p_format, PrimitiveType p_primitive, const
 	sd.index_count = p_index_count;
 	sd.index_data = p_index_array;
 	sd.blend_shape_data = p_blend_shape_data;
-	sd.blend_shape_count = p_blend_shape_count;
 	sd.bone_aabbs = p_bone_aabbs;
 	sd.lods = p_lods;
 
@@ -1198,7 +1178,7 @@ void ArrayMesh::add_surface_from_arrays(PrimitiveType p_primitive, const Array &
 	print_line("primitive: " + itos(surface.primitive));
 	*/
 
-	add_surface(surface.format, PrimitiveType(surface.primitive), surface.vertex_data, surface.attribute_data, surface.skin_data, surface.vertex_count, surface.index_data, surface.index_count, surface.aabb, surface.blend_shape_data, surface.blend_shape_count, surface.bone_aabbs, surface.lods);
+	add_surface(surface.format, PrimitiveType(surface.primitive), surface.vertex_data, surface.attribute_data, surface.skin_data, surface.vertex_count, surface.index_data, surface.index_count, surface.aabb, surface.blend_shape_data, surface.bone_aabbs, surface.lods);
 }
 
 Array ArrayMesh::surface_get_arrays(int p_surface) const {
@@ -1234,7 +1214,10 @@ void ArrayMesh::add_blend_shape(const StringName &p_name) {
 	}
 
 	blend_shapes.push_back(name);
-	//RS::get_singleton()->mesh_set_blend_shape_count(mesh, blend_shapes.size());
+
+	if (mesh.is_valid()) {
+		RS::get_singleton()->mesh_set_blend_shape_count(mesh, blend_shapes.size());
+	}
 }
 
 int ArrayMesh::get_blend_shape_count() const {
@@ -1250,6 +1233,10 @@ void ArrayMesh::clear_blend_shapes() {
 	ERR_FAIL_COND_MSG(surfaces.size(), "Can't set shape key count if surfaces are already created.");
 
 	blend_shapes.clear();
+
+	if (mesh.is_valid()) {
+		RS::get_singleton()->mesh_set_blend_shape_count(mesh, 0);
+	}
 }
 
 void ArrayMesh::set_blend_shape_mode(BlendShapeMode p_mode) {
@@ -1609,9 +1596,13 @@ void ArrayMesh::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("set_custom_aabb", "aabb"), &ArrayMesh::set_custom_aabb);
 	ClassDB::bind_method(D_METHOD("get_custom_aabb"), &ArrayMesh::get_custom_aabb);
 
+	ClassDB::bind_method(D_METHOD("_set_blend_shape_names", "blend_shape_names"), &ArrayMesh::_set_blend_shape_names);
+	ClassDB::bind_method(D_METHOD("_get_blend_shape_names"), &ArrayMesh::_get_blend_shape_names);
+
 	ClassDB::bind_method(D_METHOD("_set_surfaces", "surfaces"), &ArrayMesh::_set_surfaces);
 	ClassDB::bind_method(D_METHOD("_get_surfaces"), &ArrayMesh::_get_surfaces);
 
+	ADD_PROPERTY(PropertyInfo(Variant::PACKED_STRING_ARRAY, "_blend_shape_names", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NOEDITOR | PROPERTY_USAGE_INTERNAL), "_set_blend_shape_names", "_get_blend_shape_names");
 	ADD_PROPERTY(PropertyInfo(Variant::ARRAY, "_surfaces", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NOEDITOR | PROPERTY_USAGE_INTERNAL), "_set_surfaces", "_get_surfaces");
 	ADD_PROPERTY(PropertyInfo(Variant::INT, "blend_shape_mode", PROPERTY_HINT_ENUM, "Normalized,Relative"), "set_blend_shape_mode", "get_blend_shape_mode");
 	ADD_PROPERTY(PropertyInfo(Variant::AABB, "custom_aabb", PROPERTY_HINT_NONE, ""), "set_custom_aabb", "get_custom_aabb");

+ 4 - 1
scene/resources/mesh.h

@@ -171,6 +171,9 @@ class ArrayMesh : public Mesh {
 	GDCLASS(ArrayMesh, Mesh);
 	RES_BASE_EXTENSION("mesh");
 
+	PackedStringArray _get_blend_shape_names() const;
+	void _set_blend_shape_names(const PackedStringArray &p_names);
+
 	Array _get_surfaces() const;
 	void _set_surfaces(const Array &p_data);
 
@@ -208,7 +211,7 @@ protected:
 public:
 	void add_surface_from_arrays(PrimitiveType p_primitive, const Array &p_arrays, const Array &p_blend_shapes = Array(), const Dictionary &p_lods = Dictionary(), uint32_t p_flags = 0);
 
-	void add_surface(uint32_t p_format, PrimitiveType p_primitive, const Vector<uint8_t> &p_array, const Vector<uint8_t> &p_attribute_array, const Vector<uint8_t> &p_skin_array, int p_vertex_count, const Vector<uint8_t> &p_index_array, int p_index_count, const AABB &p_aabb, const Vector<uint8_t> &p_blend_shape_data = Vector<uint8_t>(), uint32_t p_blend_shape_count = 0, const Vector<AABB> &p_bone_aabbs = Vector<AABB>(), const Vector<RS::SurfaceData::LOD> &p_lods = Vector<RS::SurfaceData::LOD>());
+	void add_surface(uint32_t p_format, PrimitiveType p_primitive, const Vector<uint8_t> &p_array, const Vector<uint8_t> &p_attribute_array, const Vector<uint8_t> &p_skin_array, int p_vertex_count, const Vector<uint8_t> &p_index_array, int p_index_count, const AABB &p_aabb, const Vector<uint8_t> &p_blend_shape_data = Vector<uint8_t>(), const Vector<AABB> &p_bone_aabbs = Vector<AABB>(), const Vector<RS::SurfaceData::LOD> &p_lods = Vector<RS::SurfaceData::LOD>());
 
 	Array surface_get_arrays(int p_surface) const override;
 	Array surface_get_blend_shape_arrays(int p_surface) const override;

+ 2 - 2
scene/resources/surface_tool.cpp

@@ -541,7 +541,7 @@ Array SurfaceTool::commit_to_arrays() {
 					ERR_CONTINUE(v.bones.size() != count);
 
 					for (int j = 0; j < count; j++) {
-						w[idx + j] = v.bones[j];
+						w[idx * count + j] = v.bones[j];
 					}
 				}
 
@@ -561,7 +561,7 @@ Array SurfaceTool::commit_to_arrays() {
 					ERR_CONTINUE(v.weights.size() != count);
 
 					for (int j = 0; j < count; j++) {
-						w[idx + j] = v.weights[j];
+						w[idx * count + j] = v.weights[j];
 					}
 				}
 

+ 5 - 1
servers/rendering/renderer_rd/renderer_scene_render_forward.cpp

@@ -1079,7 +1079,11 @@ void RendererSceneRenderForward::_render_list(RenderingDevice::DrawListID p_draw
 
 		switch (e->instance->base_type) {
 			case RS::INSTANCE_MESH: {
-				storage->mesh_surface_get_arrays_and_format(e->instance->base, e->surface_index, pipeline->get_vertex_input_mask(), vertex_array_rd, index_array_rd, vertex_format);
+				if (e->instance->mesh_instance.is_valid()) { //skeleton and blend shape
+					storage->mesh_instance_surface_get_arrays_and_format(e->instance->mesh_instance, e->surface_index, pipeline->get_vertex_input_mask(), vertex_array_rd, index_array_rd, vertex_format);
+				} else {
+					storage->mesh_surface_get_arrays_and_format(e->instance->base, e->surface_index, pipeline->get_vertex_input_mask(), vertex_array_rd, index_array_rd, vertex_format);
+				}
 			} break;
 			case RS::INSTANCE_MULTIMESH: {
 				RID mesh = storage->multimesh_get_mesh(e->instance->base);

+ 350 - 21
servers/rendering/renderer_rd/renderer_storage_rd.cpp

@@ -2392,13 +2392,23 @@ RID RendererStorageRD::mesh_create() {
 	return mesh_owner.make_rid(Mesh());
 }
 
+void RendererStorageRD::mesh_set_blend_shape_count(RID p_mesh, int p_blend_shape_count) {
+	ERR_FAIL_COND(p_blend_shape_count < 0);
+
+	Mesh *mesh = mesh_owner.getornull(p_mesh);
+	ERR_FAIL_COND(!mesh);
+
+	ERR_FAIL_COND(mesh->surface_count > 0); //surfaces already exist
+
+	mesh->blend_shape_count = p_blend_shape_count;
+}
+
 /// Returns stride
 void RendererStorageRD::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_surface) {
 	Mesh *mesh = mesh_owner.getornull(p_mesh);
 	ERR_FAIL_COND(!mesh);
 
 	//ensure blend shape consistency
-	ERR_FAIL_COND(mesh->blend_shape_count && p_surface.blend_shape_count != mesh->blend_shape_count);
 	ERR_FAIL_COND(mesh->blend_shape_count && p_surface.bone_aabbs.size() != mesh->bone_aabbs.size());
 
 #ifdef DEBUG_ENABLED
@@ -2453,7 +2463,7 @@ void RendererStorageRD::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_su
 					case RS::ARRAY_BONES: {
 						//uses a separate array
 						bool use_8 = p_surface.format & RS::ARRAY_FLAG_USE_8_BONE_WEIGHTS;
-						skin_stride += sizeof(int16_t) * (use_8 ? 8 : 4);
+						skin_stride += sizeof(int16_t) * (use_8 ? 16 : 8);
 					} break;
 				}
 			}
@@ -2461,6 +2471,11 @@ void RendererStorageRD::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_su
 
 		int expected_size = stride * p_surface.vertex_count;
 		ERR_FAIL_COND_MSG(expected_size != p_surface.vertex_data.size(), "Size of vertex data provided (" + itos(p_surface.vertex_data.size()) + ") does not match expected (" + itos(expected_size) + ")");
+
+		int bs_expected_size = expected_size * mesh->blend_shape_count;
+
+		ERR_FAIL_COND_MSG(bs_expected_size != p_surface.blend_shape_data.size(), "Size of blend shape data provided (" + itos(p_surface.blend_shape_data.size()) + ") does not match expected (" + itos(bs_expected_size) + ")");
+
 		int expected_attrib_size = attrib_stride * p_surface.vertex_count;
 		ERR_FAIL_COND_MSG(expected_attrib_size != p_surface.attribute_data.size(), "Size of attribute data provided (" + itos(p_surface.attribute_data.size()) + ") does not match expected (" + itos(expected_attrib_size) + ")");
 
@@ -2477,15 +2492,25 @@ void RendererStorageRD::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_su
 	s->format = p_surface.format;
 	s->primitive = p_surface.primitive;
 
-	s->vertex_buffer = RD::get_singleton()->vertex_buffer_create(p_surface.vertex_data.size(), p_surface.vertex_data);
+	bool use_as_storage = (p_surface.skin_data.size() || mesh->blend_shape_count > 0);
+
+	s->vertex_buffer = RD::get_singleton()->vertex_buffer_create(p_surface.vertex_data.size(), p_surface.vertex_data, use_as_storage);
+	s->vertex_buffer_size = p_surface.vertex_data.size();
+
 	if (p_surface.attribute_data.size()) {
 		s->attribute_buffer = RD::get_singleton()->vertex_buffer_create(p_surface.attribute_data.size(), p_surface.attribute_data);
 	}
 	if (p_surface.skin_data.size()) {
-		s->skin_buffer = RD::get_singleton()->vertex_buffer_create(p_surface.skin_data.size(), p_surface.skin_data);
+		s->skin_buffer = RD::get_singleton()->vertex_buffer_create(p_surface.skin_data.size(), p_surface.skin_data, use_as_storage);
+		s->skin_buffer_size = p_surface.skin_data.size();
 	}
+
 	s->vertex_count = p_surface.vertex_count;
 
+	if (p_surface.format & RS::ARRAY_FORMAT_BONES) {
+		mesh->has_bone_weights = true;
+	}
+
 	if (p_surface.index_count) {
 		bool is_index_16 = p_surface.vertex_count <= 65536;
 
@@ -2507,17 +2532,45 @@ void RendererStorageRD::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_su
 
 	s->aabb = p_surface.aabb;
 	s->bone_aabbs = p_surface.bone_aabbs; //only really useful for returning them.
-#if 0
-	for (int i = 0; i < p_surface.blend_shapes.size(); i++) {
-		if (p_surface.blend_shapes[i].size() != p_surface.vertex_data.size()) {
-			memdelete(s);
-			ERR_FAIL_COND(p_surface.blend_shapes[i].size() != p_surface.vertex_data.size());
+
+	if (mesh->blend_shape_count > 0) {
+		s->blend_shape_buffer = RD::get_singleton()->storage_buffer_create(p_surface.blend_shape_data.size(), p_surface.blend_shape_data);
+	}
+
+	if (use_as_storage) {
+		Vector<RD::Uniform> uniforms;
+		{
+			RD::Uniform u;
+			u.binding = 0;
+			u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+			u.ids.push_back(s->vertex_buffer);
+			uniforms.push_back(u);
+		}
+		{
+			RD::Uniform u;
+			u.binding = 1;
+			u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+			if (s->skin_buffer.is_valid()) {
+				u.ids.push_back(s->skin_buffer);
+			} else {
+				u.ids.push_back(default_rd_storage_buffer);
+			}
+			uniforms.push_back(u);
 		}
-		RID vertex_buffer = RD::get_singleton()->vertex_buffer_create(p_surface.blend_shapes[i].size(), p_surface.blend_shapes[i]);
-		s->blend_shapes.push_back(vertex_buffer);
+		{
+			RD::Uniform u;
+			u.binding = 2;
+			u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+			if (s->blend_shape_buffer.is_valid()) {
+				u.ids.push_back(s->blend_shape_buffer);
+			} else {
+				u.ids.push_back(default_rd_storage_buffer);
+			}
+			uniforms.push_back(u);
+		}
+
+		s->uniform_set = RD::get_singleton()->uniform_set_create(uniforms, skeleton_shader.version_shader[0], SkeletonShader::UNIFORM_SET_SURFACE);
 	}
-#endif
-	mesh->blend_shape_count = p_surface.blend_shape_count;
 
 	if (mesh->surface_count == 0) {
 		mesh->bone_aabbs = p_surface.bone_aabbs;
@@ -2535,6 +2588,12 @@ void RendererStorageRD::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_su
 	mesh->surfaces[mesh->surface_count] = s;
 	mesh->surface_count++;
 
+	for (List<MeshInstance *>::Element *E = mesh->instances.front(); E; E = E->next()) {
+		//update instances
+		MeshInstance *mi = E->get();
+		_mesh_instance_add_surface(mi, mesh, mesh->surface_count - 1);
+	}
+
 	mesh->instance_dependency.instance_notify_changed(true, true);
 
 	mesh->material_cache.clear();
@@ -2792,16 +2851,223 @@ void RendererStorageRD::mesh_clear(RID p_mesh) {
 	mesh->surfaces = nullptr;
 	mesh->surface_count = 0;
 	mesh->material_cache.clear();
+	//clear instance data
+	for (List<MeshInstance *>::Element *E = mesh->instances.front(); E; E = E->next()) {
+		MeshInstance *mi = E->get();
+		_mesh_instance_clear(mi);
+	}
 	mesh->instance_dependency.instance_notify_changed(true, true);
+	mesh->has_bone_weights = false;
+}
+
+bool RendererStorageRD::mesh_needs_instance(RID p_mesh, bool p_has_skeleton) {
+	Mesh *mesh = mesh_owner.getornull(p_mesh);
+	ERR_FAIL_COND_V(!mesh, false);
+
+	return mesh->blend_shape_count > 0 || (mesh->has_bone_weights && p_has_skeleton);
+}
+
+/* MESH INSTANCE */
+
+RID RendererStorageRD::mesh_instance_create(RID p_base) {
+	Mesh *mesh = mesh_owner.getornull(p_base);
+	ERR_FAIL_COND_V(!mesh, RID());
+
+	MeshInstance *mi = memnew(MeshInstance);
+
+	mi->mesh = mesh;
+
+	for (uint32_t i = 0; i < mesh->surface_count; i++) {
+		_mesh_instance_add_surface(mi, mesh, i);
+	}
+
+	mi->I = mesh->instances.push_back(mi);
+
+	mi->dirty = true;
+
+	return mesh_instance_owner.make_rid(mi);
+}
+void RendererStorageRD::mesh_instance_set_skeleton(RID p_mesh_instance, RID p_skeleton) {
+	MeshInstance *mi = mesh_instance_owner.getornull(p_mesh_instance);
+	if (mi->skeleton == p_skeleton) {
+		return;
+	}
+	mi->skeleton = p_skeleton;
+	mi->skeleton_version = 0;
+	mi->dirty = true;
 }
 
-void RendererStorageRD::_mesh_surface_generate_version_for_input_mask(Mesh::Surface *s, uint32_t p_input_mask) {
-	uint32_t version = s->version_count;
-	s->version_count++;
-	s->versions = (Mesh::Surface::Version *)memrealloc(s->versions, sizeof(Mesh::Surface::Version) * s->version_count);
+void RendererStorageRD::mesh_instance_set_blend_shape_weight(RID p_mesh_instance, int p_shape, float p_weight) {
+	MeshInstance *mi = mesh_instance_owner.getornull(p_mesh_instance);
+	ERR_FAIL_COND(!mi);
+	ERR_FAIL_INDEX(p_shape, (int)mi->blend_weights.size());
+	mi->blend_weights[p_shape] = p_weight;
+	mi->weights_dirty = true;
+	//will be eventually updated
+}
+
+void RendererStorageRD::_mesh_instance_clear(MeshInstance *mi) {
+	for (uint32_t i = 0; i < mi->surfaces.size(); i++) {
+		if (mi->surfaces[i].vertex_buffer.is_valid()) {
+			RD::get_singleton()->free(mi->surfaces[i].vertex_buffer);
+		}
+		if (mi->surfaces[i].versions) {
+			for (uint32_t j = 0; j < mi->surfaces[i].version_count; j++) {
+				RD::get_singleton()->free(mi->surfaces[i].versions[j].vertex_array);
+			}
+			memfree(mi->surfaces[i].versions);
+		}
+	}
+	mi->surfaces.clear();
+
+	if (mi->blend_weights_buffer.is_valid()) {
+		RD::get_singleton()->free(mi->blend_weights_buffer);
+	}
+	mi->blend_weights.clear();
+	mi->weights_dirty = false;
+	mi->skeleton_version = 0;
+}
+
+void RendererStorageRD::_mesh_instance_add_surface(MeshInstance *mi, Mesh *mesh, uint32_t p_surface) {
+	if (mesh->blend_shape_count > 0 && mi->blend_weights_buffer.is_null()) {
+		mi->blend_weights.resize(mesh->blend_shape_count);
+		for (uint32_t i = 0; i < mi->blend_weights.size(); i++) {
+			mi->blend_weights[i] = 0;
+		}
+		mi->blend_weights_buffer = RD::get_singleton()->storage_buffer_create(sizeof(float) * mi->blend_weights.size(), mi->blend_weights.to_byte_array());
+		mi->weights_dirty = true;
+	}
 
-	Mesh::Surface::Version &v = s->versions[version];
+	MeshInstance::Surface s;
+	if (mesh->blend_shape_count > 0 || (mesh->surfaces[p_surface]->format & RS::ARRAY_FORMAT_BONES)) {
+		//surface warrants transform
+		s.vertex_buffer = RD::get_singleton()->vertex_buffer_create(mesh->surfaces[p_surface]->vertex_buffer_size, Vector<uint8_t>(), true);
 
+		Vector<RD::Uniform> uniforms;
+		{
+			RD::Uniform u;
+			u.binding = 1;
+			u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+			u.ids.push_back(s.vertex_buffer);
+			uniforms.push_back(u);
+		}
+		{
+			RD::Uniform u;
+			u.binding = 2;
+			u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+			if (mi->blend_weights_buffer.is_valid()) {
+				u.ids.push_back(mi->blend_weights_buffer);
+			} else {
+				u.ids.push_back(default_rd_storage_buffer);
+			}
+			uniforms.push_back(u);
+		}
+		s.uniform_set = RD::get_singleton()->uniform_set_create(uniforms, skeleton_shader.version_shader[0], SkeletonShader::UNIFORM_SET_INSTANCE);
+	}
+
+	mi->surfaces.push_back(s);
+	mi->dirty = true;
+}
+
+void RendererStorageRD::mesh_instance_check_for_update(RID p_mesh_instance) {
+	MeshInstance *mi = mesh_instance_owner.getornull(p_mesh_instance);
+
+	bool needs_update = mi->dirty;
+
+	if (mi->weights_dirty && !mi->weight_update_list.in_list()) {
+		dirty_mesh_instance_weights.add(&mi->weight_update_list);
+		needs_update = true;
+	}
+
+	if (mi->array_update_list.in_list()) {
+		return;
+	}
+
+	if (!needs_update && mi->skeleton.is_valid()) {
+		Skeleton *sk = skeleton_owner.getornull(mi->skeleton);
+		if (sk && sk->version != mi->skeleton_version) {
+			needs_update = true;
+		}
+	}
+
+	if (needs_update) {
+		dirty_mesh_instance_arrays.add(&mi->array_update_list);
+	}
+}
+
+void RendererStorageRD::update_mesh_instances() {
+	while (dirty_mesh_instance_weights.first()) {
+		MeshInstance *mi = dirty_mesh_instance_weights.first()->self();
+
+		if (mi->blend_weights_buffer.is_valid()) {
+			RD::get_singleton()->buffer_update(mi->blend_weights_buffer, 0, mi->blend_weights.size() * sizeof(float), mi->blend_weights.ptr(), true);
+		}
+		dirty_mesh_instance_weights.remove(&mi->weight_update_list);
+		mi->weights_dirty = false;
+	}
+	if (dirty_mesh_instance_arrays.first() == nullptr) {
+		return; //nothing to do
+	}
+
+	//process skeletons and blend shapes
+	RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
+
+	while (dirty_mesh_instance_arrays.first()) {
+		MeshInstance *mi = dirty_mesh_instance_arrays.first()->self();
+
+		Skeleton *sk = skeleton_owner.getornull(mi->skeleton);
+
+		for (uint32_t i = 0; i < mi->surfaces.size(); i++) {
+			if (mi->surfaces[i].uniform_set == RID() || mi->mesh->surfaces[i]->uniform_set == RID()) {
+				continue;
+			}
+
+			bool array_is_2d = mi->mesh->surfaces[i]->format & RS::ARRAY_FLAG_USE_2D_VERTICES;
+
+			RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, skeleton_shader.pipeline[array_is_2d ? SkeletonShader::SHADER_MODE_2D : SkeletonShader::SHADER_MODE_3D]);
+
+			RD::get_singleton()->compute_list_bind_uniform_set(compute_list, mi->surfaces[i].uniform_set, SkeletonShader::UNIFORM_SET_INSTANCE);
+			RD::get_singleton()->compute_list_bind_uniform_set(compute_list, mi->mesh->surfaces[i]->uniform_set, SkeletonShader::UNIFORM_SET_SURFACE);
+			if (sk && sk->uniform_set_mi.is_valid()) {
+				RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sk->uniform_set_mi, SkeletonShader::UNIFORM_SET_SKELETON);
+			} else {
+				RD::get_singleton()->compute_list_bind_uniform_set(compute_list, skeleton_shader.default_skeleton_uniform_set, SkeletonShader::UNIFORM_SET_SKELETON);
+			}
+
+			SkeletonShader::PushConstant push_constant;
+
+			push_constant.has_normal = mi->mesh->surfaces[i]->format & RS::ARRAY_FORMAT_NORMAL;
+			push_constant.has_tangent = mi->mesh->surfaces[i]->format & RS::ARRAY_FORMAT_TANGENT;
+			push_constant.has_skeleton = sk != nullptr && sk->use_2d == array_is_2d && (mi->mesh->surfaces[i]->format & RS::ARRAY_FORMAT_BONES);
+			push_constant.has_blend_shape = mi->mesh->blend_shape_count > 0;
+
+			push_constant.vertex_count = mi->mesh->surfaces[i]->vertex_count;
+			push_constant.vertex_stride = (mi->mesh->surfaces[i]->vertex_buffer_size / mi->mesh->surfaces[i]->vertex_count) / 4;
+			push_constant.skin_stride = (mi->mesh->surfaces[i]->skin_buffer_size / mi->mesh->surfaces[i]->vertex_count) / 4;
+			push_constant.skin_weight_offset = (mi->mesh->surfaces[i]->format & RS::ARRAY_FLAG_USE_8_BONE_WEIGHTS) ? 4 : 2;
+
+			push_constant.blend_shape_count = mi->mesh->blend_shape_count;
+			push_constant.normalized_blend_shapes = mi->mesh->blend_shape_mode == RS::BLEND_SHAPE_MODE_NORMALIZED;
+			push_constant.pad0 = 0;
+			push_constant.pad1 = 0;
+
+			RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SkeletonShader::PushConstant));
+
+			//dispatch without barrier, so all is done at the same time
+			RD::get_singleton()->compute_list_dispatch_threads(compute_list, push_constant.vertex_count, 1, 1, 64, 1, 1);
+		}
+
+		mi->dirty = false;
+		if (sk) {
+			mi->skeleton_version = sk->version;
+		}
+		dirty_mesh_instance_arrays.remove(&mi->array_update_list);
+	}
+
+	RD::get_singleton()->compute_list_end();
+}
+
+void RendererStorageRD::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::Version &v, Mesh::Surface *s, uint32_t p_input_mask, MeshInstance::Surface *mis) {
 	Vector<RD::VertexAttribute> attributes;
 	Vector<RID> buffers;
 
@@ -2873,7 +3139,11 @@ void RendererStorageRD::_mesh_surface_generate_version_for_input_mask(Mesh::Surf
 						stride += sizeof(float) * 3;
 					}
 
-					buffer = s->vertex_buffer;
+					if (mis) {
+						buffer = mis->vertex_buffer;
+					} else {
+						buffer = s->vertex_buffer;
+					}
 
 				} break;
 				case RS::ARRAY_NORMAL: {
@@ -2882,14 +3152,22 @@ void RendererStorageRD::_mesh_surface_generate_version_for_input_mask(Mesh::Surf
 					vd.format = RD::DATA_FORMAT_A2B10G10R10_UNORM_PACK32;
 
 					stride += sizeof(uint32_t);
-					buffer = s->vertex_buffer;
+					if (mis) {
+						buffer = mis->vertex_buffer;
+					} else {
+						buffer = s->vertex_buffer;
+					}
 				} break;
 				case RS::ARRAY_TANGENT: {
 					vd.offset = stride;
 
 					vd.format = RD::DATA_FORMAT_A2B10G10R10_UNORM_PACK32;
 					stride += sizeof(uint32_t);
-					buffer = s->vertex_buffer;
+					if (mis) {
+						buffer = mis->vertex_buffer;
+					} else {
+						buffer = s->vertex_buffer;
+					}
 				} break;
 				case RS::ARRAY_COLOR: {
 					vd.offset = attribute_stride;
@@ -4847,6 +5125,7 @@ void RendererStorageRD::skeleton_allocate(RID p_skeleton, int p_bones, bool p_2d
 		RD::get_singleton()->free(skeleton->buffer);
 		skeleton->buffer = RID();
 		skeleton->data.resize(0);
+		skeleton->uniform_set_mi = RID();
 	}
 
 	if (skeleton->size) {
@@ -4855,6 +5134,18 @@ void RendererStorageRD::skeleton_allocate(RID p_skeleton, int p_bones, bool p_2d
 		zeromem(skeleton->data.ptrw(), skeleton->data.size() * sizeof(float));
 
 		_skeleton_make_dirty(skeleton);
+
+		{
+			Vector<RD::Uniform> uniforms;
+			{
+				RD::Uniform u;
+				u.binding = 0;
+				u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+				u.ids.push_back(skeleton->buffer);
+				uniforms.push_back(u);
+			}
+			skeleton->uniform_set_mi = RD::get_singleton()->uniform_set_create(uniforms, skeleton_shader.version_shader[0], SkeletonShader::UNIFORM_SET_SKELETON);
+		}
 	}
 }
 
@@ -4977,6 +5268,7 @@ void RendererStorageRD::_update_dirty_skeletons() {
 		skeleton_dirty_list = skeleton->dirty_list;
 
 		skeleton->instance_dependency.instance_notify_changed(true, false);
+		skeleton->version++;
 
 		skeleton->dirty = false;
 		skeleton->dirty_list = nullptr;
@@ -7810,7 +8102,18 @@ bool RendererStorageRD::free(RID p_rid) {
 		mesh_clear(p_rid);
 		Mesh *mesh = mesh_owner.getornull(p_rid);
 		mesh->instance_dependency.instance_notify_deleted(p_rid);
+		if (mesh->instances.size()) {
+			ERR_PRINT("deleting mesh with active instances");
+		}
 		mesh_owner.free(p_rid);
+	} else if (mesh_instance_owner.owns(p_rid)) {
+		MeshInstance *mi = mesh_instance_owner.getornull(p_rid);
+		_mesh_instance_clear(mi);
+		mi->mesh->instances.erase(mi->I);
+		mi->I = nullptr;
+		mesh_instance_owner.free(p_rid);
+		memdelete(mi);
+
 	} else if (multimesh_owner.owns(p_rid)) {
 		_update_dirty_multimeshes();
 		multimesh_allocate(p_rid, 0, RS::MULTIMESH_TRANSFORM_2D);
@@ -8517,6 +8820,30 @@ RendererStorageRD::RendererStorageRD() {
 			rt_sdf.pipelines[i] = RD::get_singleton()->compute_pipeline_create(rt_sdf.shader.version_get_shader(rt_sdf.shader_version, i));
 		}
 	}
+	{
+		Vector<String> skeleton_modes;
+		skeleton_modes.push_back("\n#define MODE_2D\n");
+		skeleton_modes.push_back("");
+
+		skeleton_shader.shader.initialize(skeleton_modes);
+		skeleton_shader.version = skeleton_shader.shader.version_create();
+		for (int i = 0; i < SkeletonShader::SHADER_MODE_MAX; i++) {
+			skeleton_shader.version_shader[i] = skeleton_shader.shader.version_get_shader(skeleton_shader.version, i);
+			skeleton_shader.pipeline[i] = RD::get_singleton()->compute_pipeline_create(skeleton_shader.version_shader[i]);
+		}
+
+		{
+			Vector<RD::Uniform> uniforms;
+			{
+				RD::Uniform u;
+				u.binding = 0;
+				u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+				u.ids.push_back(default_rd_storage_buffer);
+				uniforms.push_back(u);
+			}
+			skeleton_shader.default_skeleton_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, skeleton_shader.version_shader[0], SkeletonShader::UNIFORM_SET_SKELETON);
+		}
+	}
 }
 
 RendererStorageRD::~RendererStorageRD() {
@@ -8546,6 +8873,8 @@ RendererStorageRD::~RendererStorageRD() {
 	particles_shader.copy_shader.version_free(particles_shader.copy_shader_version);
 	rt_sdf.shader.version_free(rt_sdf.shader_version);
 
+	skeleton_shader.shader.version_free(skeleton_shader.version);
+
 	RenderingServer::get_singleton()->free(particles_shader.default_material);
 	RenderingServer::get_singleton()->free(particles_shader.default_shader);
 

+ 140 - 4
servers/rendering/renderer_rd/renderer_storage_rd.h

@@ -31,6 +31,8 @@
 #ifndef RENDERING_SERVER_STORAGE_RD_H
 #define RENDERING_SERVER_STORAGE_RD_H
 
+#include "core/templates/list.h"
+#include "core/templates/local_vector.h"
 #include "core/templates/rid_owner.h"
 #include "servers/rendering/renderer_compositor.h"
 #include "servers/rendering/renderer_rd/effects_rd.h"
@@ -39,9 +41,9 @@
 #include "servers/rendering/renderer_rd/shaders/giprobe_sdf.glsl.gen.h"
 #include "servers/rendering/renderer_rd/shaders/particles.glsl.gen.h"
 #include "servers/rendering/renderer_rd/shaders/particles_copy.glsl.gen.h"
+#include "servers/rendering/renderer_rd/shaders/skeleton.glsl.gen.h"
 #include "servers/rendering/renderer_scene_render.h"
 #include "servers/rendering/rendering_device.h"
-
 class RendererStorageRD : public RendererStorage {
 public:
 	static _FORCE_INLINE_ void store_transform(const Transform &p_mtx, float *p_array) {
@@ -377,6 +379,8 @@ private:
 
 	/* Mesh */
 
+	struct MeshInstance;
+
 	struct Mesh {
 		struct Surface {
 			RS::PrimitiveType primitive = RS::PRIMITIVE_POINTS;
@@ -386,6 +390,8 @@ private:
 			RID attribute_buffer;
 			RID skin_buffer;
 			uint32_t vertex_count = 0;
+			uint32_t vertex_buffer_size = 0;
+			uint32_t skin_buffer_size = 0;
 
 			// A different pipeline needs to be allocated
 			// depending on the inputs available in the
@@ -433,6 +439,8 @@ private:
 
 			uint32_t particles_render_index = 0;
 			uint64_t particles_render_pass = 0;
+
+			RID uniform_set;
 		};
 
 		uint32_t blend_shape_count = 0;
@@ -443,17 +451,90 @@ private:
 
 		Vector<AABB> bone_aabbs;
 
+		bool has_bone_weights = false;
+
 		AABB aabb;
 		AABB custom_aabb;
 
 		Vector<RID> material_cache;
 
+		List<MeshInstance *> instances;
+
 		RendererStorage::InstanceDependency instance_dependency;
 	};
 
 	mutable RID_Owner<Mesh> mesh_owner;
 
-	void _mesh_surface_generate_version_for_input_mask(Mesh::Surface *s, uint32_t p_input_mask);
+	struct MeshInstance {
+		Mesh *mesh;
+		RID skeleton;
+		struct Surface {
+			RID vertex_buffer;
+			RID uniform_set;
+
+			Mesh::Surface::Version *versions = nullptr; //allocated on demand
+			uint32_t version_count = 0;
+		};
+		LocalVector<Surface> surfaces;
+		LocalVector<float> blend_weights;
+
+		RID blend_weights_buffer;
+		List<MeshInstance *>::Element *I = nullptr; //used to erase itself
+		uint64_t skeleton_version = 0;
+		bool dirty = false;
+		bool weights_dirty = false;
+		SelfList<MeshInstance> weight_update_list;
+		SelfList<MeshInstance> array_update_list;
+		MeshInstance() :
+				weight_update_list(this), array_update_list(this) {}
+	};
+
+	void _mesh_instance_clear(MeshInstance *mi);
+	void _mesh_instance_add_surface(MeshInstance *mi, Mesh *mesh, uint32_t p_surface);
+
+	mutable RID_PtrOwner<MeshInstance> mesh_instance_owner;
+
+	SelfList<MeshInstance>::List dirty_mesh_instance_weights;
+	SelfList<MeshInstance>::List dirty_mesh_instance_arrays;
+
+	struct SkeletonShader {
+		struct PushConstant {
+			uint32_t has_normal;
+			uint32_t has_tangent;
+			uint32_t has_skeleton;
+			uint32_t has_blend_shape;
+
+			uint32_t vertex_count;
+			uint32_t vertex_stride;
+			uint32_t skin_stride;
+			uint32_t skin_weight_offset;
+
+			uint32_t blend_shape_count;
+			uint32_t normalized_blend_shapes;
+			uint32_t pad0;
+			uint32_t pad1;
+		};
+
+		enum {
+			UNIFORM_SET_INSTANCE = 0,
+			UNIFORM_SET_SURFACE = 1,
+			UNIFORM_SET_SKELETON = 2,
+		};
+		enum {
+			SHADER_MODE_2D,
+			SHADER_MODE_3D,
+			SHADER_MODE_MAX
+		};
+
+		SkeletonShaderRD shader;
+		RID version;
+		RID version_shader[SHADER_MODE_MAX];
+		RID pipeline[SHADER_MODE_MAX];
+
+		RID default_skeleton_uniform_set;
+	} skeleton_shader;
+
+	void _mesh_surface_generate_version_for_input_mask(Mesh::Surface::Version &v, Mesh::Surface *s, uint32_t p_input_mask, MeshInstance::Surface *mis = nullptr);
 
 	RID mesh_default_rd_buffers[DEFAULT_RD_BUFFER_MAX];
 
@@ -826,6 +907,9 @@ private:
 		Transform2D base_transform_2d;
 
 		RID uniform_set_3d;
+		RID uniform_set_mi;
+
+		uint64_t version = 1;
 
 		RendererStorage::InstanceDependency instance_dependency;
 	};
@@ -1280,6 +1364,8 @@ public:
 
 	virtual RID mesh_create();
 
+	virtual void mesh_set_blend_shape_count(RID p_mesh, int p_blend_shape_count);
+
 	/// Return stride
 	virtual void mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_surface);
 
@@ -1304,6 +1390,16 @@ public:
 
 	virtual void mesh_clear(RID p_mesh);
 
+	virtual bool mesh_needs_instance(RID p_mesh, bool p_has_skeleton);
+
+	/* MESH INSTANCE */
+
+	virtual RID mesh_instance_create(RID p_base);
+	virtual void mesh_instance_set_skeleton(RID p_mesh_instance, RID p_skeleton);
+	virtual void mesh_instance_set_blend_shape_weight(RID p_mesh_instance, int p_shape, float p_weight);
+	virtual void mesh_instance_check_for_update(RID p_mesh_instance);
+	virtual void update_mesh_instances();
+
 	_FORCE_INLINE_ const RID *mesh_get_surface_count_and_materials(RID p_mesh, uint32_t &r_surface_count) {
 		Mesh *mesh = mesh_owner.getornull(p_mesh);
 		ERR_FAIL_COND_V(!mesh, nullptr);
@@ -1353,9 +1449,11 @@ public:
 			return;
 		}
 
-		uint32_t version = s->version_count; //gets added at the end
+		uint32_t version = s->version_count;
+		s->version_count++;
+		s->versions = (Mesh::Surface::Version *)memrealloc(s->versions, sizeof(Mesh::Surface::Version) * s->version_count);
 
-		_mesh_surface_generate_version_for_input_mask(s, p_input_mask);
+		_mesh_surface_generate_version_for_input_mask(s->versions[version], s, p_input_mask);
 
 		r_vertex_format = s->versions[version].vertex_format;
 		r_vertex_array_rd = s->versions[version].vertex_array;
@@ -1363,6 +1461,44 @@ public:
 		s->version_lock.unlock();
 	}
 
+	_FORCE_INLINE_ void mesh_instance_surface_get_arrays_and_format(RID p_mesh_instance, uint32_t p_surface_index, uint32_t p_input_mask, RID &r_vertex_array_rd, RID &r_index_array_rd, RD::VertexFormatID &r_vertex_format) {
+		MeshInstance *mi = mesh_instance_owner.getornull(p_mesh_instance);
+		ERR_FAIL_COND(!mi);
+		Mesh *mesh = mi->mesh;
+		ERR_FAIL_UNSIGNED_INDEX(p_surface_index, mesh->surface_count);
+
+		MeshInstance::Surface *mis = &mi->surfaces[p_surface_index];
+		Mesh::Surface *s = mesh->surfaces[p_surface_index];
+
+		r_index_array_rd = s->index_array;
+
+		s->version_lock.lock();
+
+		//there will never be more than, at much, 3 or 4 versions, so iterating is the fastest way
+
+		for (uint32_t i = 0; i < mis->version_count; i++) {
+			if (mis->versions[i].input_mask != p_input_mask) {
+				continue;
+			}
+			//we have this version, hooray
+			r_vertex_format = mis->versions[i].vertex_format;
+			r_vertex_array_rd = mis->versions[i].vertex_array;
+			s->version_lock.unlock();
+			return;
+		}
+
+		uint32_t version = mis->version_count;
+		mis->version_count++;
+		mis->versions = (Mesh::Surface::Version *)memrealloc(mis->versions, sizeof(Mesh::Surface::Version) * mis->version_count);
+
+		_mesh_surface_generate_version_for_input_mask(mis->versions[version], s, p_input_mask, mis);
+
+		r_vertex_format = mis->versions[version].vertex_format;
+		r_vertex_array_rd = mis->versions[version].vertex_array;
+
+		s->version_lock.unlock();
+	}
+
 	_FORCE_INLINE_ RID mesh_get_default_rd_buffer(DefaultRDBuffer p_buffer) {
 		ERR_FAIL_INDEX_V(p_buffer, DEFAULT_RD_BUFFER_MAX, RID());
 		return mesh_default_rd_buffers[p_buffer];

+ 1 - 0
servers/rendering/renderer_rd/shaders/SCsub

@@ -41,3 +41,4 @@ if "RD_GLSL" in env["BUILDERS"]:
     env.RD_GLSL("particles.glsl")
     env.RD_GLSL("particles_copy.glsl")
     env.RD_GLSL("sort.glsl")
+    env.RD_GLSL("skeleton.glsl")

+ 199 - 0
servers/rendering/renderer_rd/shaders/skeleton.glsl

@@ -0,0 +1,199 @@
+#[compute]
+
+#version 450
+
+VERSION_DEFINES
+
+layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+layout(set = 0, binding = 1, std430) buffer restrict writeonly DstVertexData {
+	uint data[];
+}
+dst_vertices;
+
+layout(set = 0, binding = 2, std430) buffer restrict readonly BlendShapeWeights {
+	float data[];
+}
+blend_shape_weights;
+
+layout(set = 1, binding = 0, std430) buffer restrict readonly SrcVertexData {
+	uint data[];
+}
+src_vertices;
+
+layout(set = 1, binding = 1, std430) buffer restrict readonly BoneWeightData {
+	uint data[];
+}
+src_bone_weights;
+
+layout(set = 1, binding = 2, std430) buffer restrict readonly BlendShapeData {
+	uint data[];
+}
+src_blend_shapes;
+
+layout(set = 2, binding = 0, std430) buffer restrict readonly SkeletonData {
+	vec4 data[];
+}
+bone_transforms;
+
+layout(push_constant, binding = 0, std430) uniform Params {
+	bool has_normal;
+	bool has_tangent;
+	bool has_skeleton;
+	bool has_blend_shape;
+
+	uint vertex_count;
+	uint vertex_stride;
+	uint skin_stride;
+	uint skin_weight_offset;
+
+	uint blend_shape_count;
+	bool normalized_blend_shapes;
+	uint pad0;
+	uint pad1;
+}
+params;
+
+vec4 decode_abgr_2_10_10_10(uint base) {
+	uvec4 abgr_2_10_10_10 = (uvec4(base) >> uvec4(0, 10, 20, 30)) & uvec4(0x3FF, 0x3FF, 0x3FF, 0x3);
+	return vec4(abgr_2_10_10_10) / vec4(1023.0, 1023.0, 1023.0, 3.0) * 2.0 - 1.0;
+}
+
+uint encode_abgr_2_10_10_10(vec4 base) {
+	uvec4 abgr_2_10_10_10 = uvec4(clamp(ivec4((base * 0.5 + 0.5) * vec4(1023.0, 1023.0, 1023.0, 3.0)), ivec4(0), ivec4(0x3FF, 0x3FF, 0x3FF, 0x3))) << uvec4(0, 10, 20, 30);
+	return abgr_2_10_10_10.x | abgr_2_10_10_10.y | abgr_2_10_10_10.z | abgr_2_10_10_10.w;
+}
+
+void main() {
+	uint index = gl_GlobalInvocationID.x;
+	if (index >= params.vertex_count) {
+		return;
+	}
+
+	uint src_offset = index * params.vertex_stride;
+
+#ifdef MODE_2D
+	vec2 vertex = uintBitsToFloat(uvec2(src_vertices.data[src_offset + 0], src_vertices.data[src_offset + 1]));
+#else
+	vec3 vertex;
+	vec3 normal;
+	vec4 tangent;
+
+	vertex = uintBitsToFloat(uvec3(src_vertices.data[src_offset + 0], src_vertices.data[src_offset + 1], src_vertices.data[src_offset + 2]));
+
+	src_offset += 3;
+
+	if (params.has_normal) {
+		normal = decode_abgr_2_10_10_10(src_vertices.data[src_offset]).rgb;
+		src_offset++;
+	}
+
+	if (params.has_tangent) {
+		tangent = decode_abgr_2_10_10_10(src_vertices.data[src_offset]);
+	}
+
+	if (params.has_blend_shape) {
+		float blend_total = 0.0;
+		vec3 blend_vertex = vec3(0.0);
+		vec3 blend_normal = vec3(0.0);
+		vec3 blend_tangent = vec3(0.0);
+
+		for (uint i = 0; i < params.blend_shape_count; i++) {
+			float w = blend_shape_weights.data[i];
+			if (w > 0.0001) {
+				uint base_offset = (params.vertex_count * i + index) * params.vertex_stride;
+
+				blend_vertex += uintBitsToFloat(uvec3(src_blend_shapes.data[base_offset + 0], src_blend_shapes.data[base_offset + 1], src_blend_shapes.data[base_offset + 2])) * w;
+
+				base_offset += 3;
+
+				if (params.has_normal) {
+					blend_normal += decode_abgr_2_10_10_10(src_blend_shapes.data[base_offset]).rgb * w;
+					base_offset++;
+				}
+
+				if (params.has_tangent) {
+					blend_tangent += decode_abgr_2_10_10_10(src_blend_shapes.data[base_offset]).rgb;
+				}
+
+				blend_total += w;
+			}
+		}
+
+		if (params.normalized_blend_shapes) {
+			vertex = (1.0 - blend_total) * vertex;
+			normal = (1.0 - blend_total) * normal;
+			tangent.rgb = (1.0 - blend_total) * tangent.rgb;
+		}
+
+		vertex += blend_vertex;
+		normal += normalize(normal + blend_normal);
+		tangent.rgb += normalize(tangent.rgb + blend_tangent);
+	}
+
+	if (params.has_skeleton) {
+		uint skin_offset = params.skin_stride * index;
+
+		uvec2 bones = uvec2(src_bone_weights.data[skin_offset + 0], src_bone_weights.data[skin_offset + 1]);
+		uvec2 bones_01 = uvec2(bones.x & 0xFFFF, bones.x >> 16) * 3; //pre-add xform offset
+		uvec2 bones_23 = uvec2(bones.y & 0xFFFF, bones.y >> 16) * 3;
+
+		skin_offset += params.skin_weight_offset;
+
+		uvec2 weights = uvec2(src_bone_weights.data[skin_offset + 0], src_bone_weights.data[skin_offset + 1]);
+
+		vec2 weights_01 = unpackUnorm2x16(weights.x);
+		vec2 weights_23 = unpackUnorm2x16(weights.y);
+
+		mat4 m = mat4(bone_transforms.data[bones_01.x], bone_transforms.data[bones_01.x + 1], bone_transforms.data[bones_01.x + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_01.x;
+		m += mat4(bone_transforms.data[bones_01.y], bone_transforms.data[bones_01.y + 1], bone_transforms.data[bones_01.y + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_01.y;
+		m += mat4(bone_transforms.data[bones_23.x], bone_transforms.data[bones_23.x + 1], bone_transforms.data[bones_23.x + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_23.x;
+		m += mat4(bone_transforms.data[bones_23.y], bone_transforms.data[bones_23.y + 1], bone_transforms.data[bones_23.y + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_23.y;
+
+		if (params.skin_weight_offset == 4) {
+			//using 8 bones/weights
+			skin_offset = params.skin_stride * index + 2;
+
+			bones = uvec2(src_bone_weights.data[skin_offset + 0], src_bone_weights.data[skin_offset + 1]);
+			bones_01 = uvec2(bones.x & 0xFFFF, bones.x >> 16) * 3; //pre-add xform offset
+			bones_23 = uvec2(bones.y & 0xFFFF, bones.y >> 16) * 3;
+
+			skin_offset += params.skin_weight_offset;
+
+			weights = uvec2(src_bone_weights.data[skin_offset + 0], src_bone_weights.data[skin_offset + 1]);
+
+			weights_01 = unpackUnorm2x16(weights.x);
+			weights_23 = unpackUnorm2x16(weights.y);
+
+			m += mat4(bone_transforms.data[bones_01.x], bone_transforms.data[bones_01.x + 1], bone_transforms.data[bones_01.x + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_01.x;
+			m += mat4(bone_transforms.data[bones_01.y], bone_transforms.data[bones_01.y + 1], bone_transforms.data[bones_01.y + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_01.y;
+			m += mat4(bone_transforms.data[bones_23.x], bone_transforms.data[bones_23.x + 1], bone_transforms.data[bones_23.x + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_23.x;
+			m += mat4(bone_transforms.data[bones_23.y], bone_transforms.data[bones_23.y + 1], bone_transforms.data[bones_23.y + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_23.y;
+		}
+
+		//reverse order because its transposed
+		vertex = (vec4(vertex, 1.0) * m).xyz;
+		normal = normalize((vec4(normal, 0.0) * m).xyz);
+		tangent.xyz = normalize((vec4(tangent.xyz, 0.0) * m).xyz);
+	}
+
+	uint dst_offset = index * params.vertex_stride;
+
+	uvec3 uvertex = floatBitsToUint(vertex);
+	dst_vertices.data[dst_offset + 0] = uvertex.x;
+	dst_vertices.data[dst_offset + 1] = uvertex.y;
+	dst_vertices.data[dst_offset + 2] = uvertex.z;
+
+	dst_offset += 3;
+
+	if (params.has_normal) {
+		dst_vertices.data[dst_offset] = encode_abgr_2_10_10_10(vec4(normal, 0.0));
+		dst_offset++;
+	}
+
+	if (params.has_tangent) {
+		dst_vertices.data[dst_offset] = encode_abgr_2_10_10_10(tangent);
+	}
+
+#endif
+}

+ 66 - 13
servers/rendering/renderer_scene_cull.cpp

@@ -370,6 +370,22 @@ RID RendererSceneCull::instance_create() {
 	return instance_rid;
 }
 
+void RendererSceneCull::_instance_update_mesh_instance(Instance *p_instance) {
+	bool needs_instance = RSG::storage->mesh_needs_instance(p_instance->base, p_instance->skeleton.is_valid());
+	if (needs_instance != p_instance->mesh_instance.is_valid()) {
+		if (needs_instance) {
+			p_instance->mesh_instance = RSG::storage->mesh_instance_create(p_instance->base);
+		} else {
+			RSG::storage->free(p_instance->mesh_instance);
+			p_instance->mesh_instance = RID();
+		}
+	}
+
+	if (p_instance->mesh_instance.is_valid()) {
+		RSG::storage->mesh_instance_set_skeleton(p_instance->mesh_instance, p_instance->skeleton);
+	}
+}
+
 void RendererSceneCull::instance_set_base(RID p_instance, RID p_base) {
 	Instance *instance = instance_owner.getornull(p_instance);
 	ERR_FAIL_COND(!instance);
@@ -384,6 +400,11 @@ void RendererSceneCull::instance_set_base(RID p_instance, RID p_base) {
 			instance->octree_id = 0;
 		}
 
+		if (instance->mesh_instance.is_valid()) {
+			RSG::storage->free(instance->mesh_instance);
+			instance->mesh_instance = RID();
+		}
+
 		switch (instance->base_type) {
 			case RS::INSTANCE_LIGHT: {
 				InstanceLightData *light = static_cast<InstanceLightData *>(instance->base_data);
@@ -450,7 +471,6 @@ void RendererSceneCull::instance_set_base(RID p_instance, RID p_base) {
 			instance->base_data = nullptr;
 		}
 
-		instance->blend_values.clear();
 		instance->materials.clear();
 	}
 
@@ -479,9 +499,7 @@ void RendererSceneCull::instance_set_base(RID p_instance, RID p_base) {
 			case RS::INSTANCE_PARTICLES: {
 				InstanceGeometryData *geom = memnew(InstanceGeometryData);
 				instance->base_data = geom;
-				if (instance->base_type == RS::INSTANCE_MESH) {
-					instance->blend_values.resize(RSG::storage->mesh_get_blend_shape_count(p_base));
-				}
+
 			} break;
 			case RS::INSTANCE_REFLECTION_PROBE: {
 				InstanceReflectionProbeData *reflection_probe = memnew(InstanceReflectionProbeData);
@@ -520,6 +538,10 @@ void RendererSceneCull::instance_set_base(RID p_instance, RID p_base) {
 
 		instance->base = p_base;
 
+		if (instance->base_type == RS::INSTANCE_MESH) {
+			_instance_update_mesh_instance(instance);
+		}
+
 		//forcefully update the dependency now, so if for some reason it gets removed, we can immediately clear it
 		RSG::storage->base_update_dependency(p_base, instance);
 	}
@@ -662,8 +684,9 @@ void RendererSceneCull::instance_set_blend_shape_weight(RID p_instance, int p_sh
 		_update_dirty_instance(instance);
 	}
 
-	ERR_FAIL_INDEX(p_shape, instance->blend_values.size());
-	instance->blend_values.write[p_shape] = p_weight;
+	if (instance->mesh_instance.is_valid()) {
+		RSG::storage->mesh_instance_set_blend_shape_weight(instance->mesh_instance, p_shape, p_weight);
+	}
 }
 
 void RendererSceneCull::instance_set_surface_material(RID p_instance, int p_surface, RID p_material) {
@@ -777,6 +800,9 @@ void RendererSceneCull::instance_attach_skeleton(RID p_instance, RID p_skeleton)
 		//update the dependency now, so if cleared, we remove it
 		RSG::storage->skeleton_update_dependency(p_skeleton, instance);
 	}
+
+	_instance_update_mesh_instance(instance);
+
 	_instance_queue_update(instance, true, true);
 }
 
@@ -1571,6 +1597,10 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons
 					if (j == 0 || max > cull_max) {
 						cull_max = max;
 					}
+
+					if (instance->mesh_instance.is_valid()) {
+						RSG::storage->mesh_instance_check_for_update(instance->mesh_instance);
+					}
 				}
 
 				if (cull_max > z_max) {
@@ -1671,6 +1701,8 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons
 					scene_render->light_instance_set_shadow_transform(light->instance, ortho_camera, ortho_transform, z_max - z_min_cam, distances[i + 1], i, radius * 2.0 / texture_size, bias_scale * aspect_bias_scale * min_distance_bias_scale, z_max, uv_scale);
 				}
 
+				RSG::storage->update_mesh_instances();
+
 				scene_render->render_shadow(light->instance, p_shadow_atlas, i, (RendererSceneRender::InstanceBase **)instance_shadow_cull_result, cull_count);
 			}
 
@@ -1711,9 +1743,15 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons
 
 							instance->depth = near_plane.distance_to(instance->transform.origin);
 							instance->depth_layer = 0;
+
+							if (instance->mesh_instance.is_valid()) {
+								RSG::storage->mesh_instance_check_for_update(instance->mesh_instance);
+							}
 						}
 					}
 
+					RSG::storage->update_mesh_instances();
+
 					scene_render->light_instance_set_shadow_transform(light->instance, CameraMatrix(), light_transform, radius, 0, i, 0);
 					scene_render->render_shadow(light->instance, p_shadow_atlas, i, (RendererSceneRender::InstanceBase **)instance_shadow_cull_result, cull_count);
 				}
@@ -1763,9 +1801,13 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons
 							}
 							instance->depth = near_plane.distance_to(instance->transform.origin);
 							instance->depth_layer = 0;
+							if (instance->mesh_instance.is_valid()) {
+								RSG::storage->mesh_instance_check_for_update(instance->mesh_instance);
+							}
 						}
 					}
 
+					RSG::storage->update_mesh_instances();
 					scene_render->light_instance_set_shadow_transform(light->instance, cm, xform, radius, 0, i, 0);
 					scene_render->render_shadow(light->instance, p_shadow_atlas, i, (RendererSceneRender::InstanceBase **)instance_shadow_cull_result, cull_count);
 				}
@@ -1800,9 +1842,15 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons
 					}
 					instance->depth = near_plane.distance_to(instance->transform.origin);
 					instance->depth_layer = 0;
+
+					if (instance->mesh_instance.is_valid()) {
+						RSG::storage->mesh_instance_check_for_update(instance->mesh_instance);
+					}
 				}
 			}
 
+			RSG::storage->update_mesh_instances();
+
 			scene_render->light_instance_set_shadow_transform(light->instance, cm, light_transform, radius, 0, 0, 0);
 			scene_render->render_shadow(light->instance, p_shadow_atlas, 0, (RendererSceneRender::InstanceBase **)instance_shadow_cull_result, cull_count);
 
@@ -2143,6 +2191,10 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca
 				}
 			}
 
+			if (ins->mesh_instance.is_valid()) {
+				RSG::storage->mesh_instance_check_for_update(ins->mesh_instance);
+			}
+
 			ins->depth = near_plane.distance_to(ins->transform.origin);
 			ins->depth_layer = CLAMP(int(ins->depth * 16 / z_far), 0, 15);
 		}
@@ -2159,6 +2211,8 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca
 		ins->last_frame_pass = frame_number;
 	}
 
+	RSG::storage->update_mesh_instances();
+
 	/* STEP 5 - PROCESS LIGHTS */
 
 	RID *directional_light_ptr = &light_instance_cull_result[light_cull_count];
@@ -2344,6 +2398,9 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca
 				} else if ((1 << ins->base_type) & RS::INSTANCE_GEOMETRY_MASK) {
 					if (ins->baked_light) {
 						keep = true;
+						if (ins->mesh_instance.is_valid()) {
+							RSG::storage->mesh_instance_check_for_update(ins->mesh_instance);
+						}
 					}
 				}
 
@@ -2355,6 +2412,8 @@ void RendererSceneCull::_prepare_scene(const Transform p_cam_transform, const Ca
 				}
 			}
 
+			RSG::storage->update_mesh_instances();
+
 			scene_render->render_sdfgi(p_render_buffers, i, (RendererSceneRender::InstanceBase **)instance_shadow_cull_result, sdfgi_cull_count);
 			//have to save updated cascades, then update static lights.
 		}
@@ -2795,13 +2854,7 @@ void RendererSceneCull::_update_dirty_instance(Instance *p_instance) {
 			int new_mat_count = RSG::storage->mesh_get_surface_count(p_instance->base);
 			p_instance->materials.resize(new_mat_count);
 
-			int new_blend_shape_count = RSG::storage->mesh_get_blend_shape_count(p_instance->base);
-			if (new_blend_shape_count != p_instance->blend_values.size()) {
-				p_instance->blend_values.resize(new_blend_shape_count);
-				for (int i = 0; i < new_blend_shape_count; i++) {
-					p_instance->blend_values.write[i] = 0;
-				}
-			}
+			_instance_update_mesh_instance(p_instance);
 		}
 
 		if ((1 << p_instance->base_type) & RS::INSTANCE_GEOMETRY_MASK) {

+ 2 - 0
servers/rendering/renderer_scene_cull.h

@@ -138,6 +138,8 @@ public:
 	static void *_instance_pair(void *p_self, OctreeElementID, Instance *p_A, int, OctreeElementID, Instance *p_B, int);
 	static void _instance_unpair(void *p_self, OctreeElementID, Instance *p_A, int, OctreeElementID, Instance *p_B, int, void *);
 
+	static void _instance_update_mesh_instance(Instance *p_instance);
+
 	virtual RID scenario_create();
 
 	virtual void scenario_set_debug(RID p_scenario, RS::ScenarioDebugMode p_debug_mode);

+ 1 - 3
servers/rendering/renderer_scene_render.h

@@ -135,7 +135,7 @@ public:
 		RID skeleton;
 		RID material_override;
 
-		RID instance_data;
+		RID mesh_instance; //only used for meshes and when skeleton/blendshapes exist
 
 		Transform transform;
 
@@ -149,8 +149,6 @@ public:
 		Vector<RID> reflection_probe_instances;
 		Vector<RID> gi_probe_instances;
 
-		Vector<float> blend_values;
-
 		RS::ShadowCastingSetting cast_shadows;
 
 		//fit in 32 bits

+ 12 - 0
servers/rendering/renderer_storage.h

@@ -187,6 +187,8 @@ public:
 
 	virtual RID mesh_create() = 0;
 
+	virtual void mesh_set_blend_shape_count(RID p_mesh, int p_blend_shape_count) = 0;
+
 	/// Returns stride
 	virtual void mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_surface) = 0;
 
@@ -211,6 +213,16 @@ public:
 
 	virtual void mesh_clear(RID p_mesh) = 0;
 
+	virtual bool mesh_needs_instance(RID p_mesh, bool p_has_skeleton) = 0;
+
+	/* MESH INSTANCE */
+
+	virtual RID mesh_instance_create(RID p_base) = 0;
+	virtual void mesh_instance_set_skeleton(RID p_mesh_instance, RID p_skeleton) = 0;
+	virtual void mesh_instance_set_blend_shape_weight(RID p_mesh_instance, int p_shape, float p_weight) = 0;
+	virtual void mesh_instance_check_for_update(RID p_mesh_instance) = 0;
+	virtual void update_mesh_instances() = 0;
+
 	/* MULTIMESH API */
 
 	virtual RID multimesh_create() = 0;

+ 1 - 1
servers/rendering/rendering_device.cpp

@@ -270,7 +270,7 @@ void RenderingDevice::_bind_methods() {
 
 	ClassDB::bind_method(D_METHOD("sampler_create", "state"), &RenderingDevice::_sampler_create);
 
-	ClassDB::bind_method(D_METHOD("vertex_buffer_create", "size_bytes", "data"), &RenderingDevice::vertex_buffer_create, DEFVAL(Vector<uint8_t>()));
+	ClassDB::bind_method(D_METHOD("vertex_buffer_create", "size_bytes", "data", "use_as_storage"), &RenderingDevice::vertex_buffer_create, DEFVAL(Vector<uint8_t>()), DEFVAL(false));
 	ClassDB::bind_method(D_METHOD("vertex_format_create", "vertex_descriptions"), &RenderingDevice::_vertex_format_create);
 
 	ClassDB::bind_method(D_METHOD("index_buffer_create", "size_indices", "format", "data"), &RenderingDevice::index_buffer_create, DEFVAL(Vector<uint8_t>()), DEFVAL(false));

+ 1 - 1
servers/rendering/rendering_device.h

@@ -564,7 +564,7 @@ public:
 			frequency = VERTEX_FREQUENCY_VERTEX;
 		}
 	};
-	virtual RID vertex_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data = Vector<uint8_t>()) = 0;
+	virtual RID vertex_buffer_create(uint32_t p_size_bytes, const Vector<uint8_t> &p_data = Vector<uint8_t>(), bool p_use_as_storage = false) = 0;
 
 	typedef int64_t VertexFormatID;
 

+ 4 - 1
servers/rendering/rendering_server_default.h

@@ -231,14 +231,17 @@ public:
 
 	/* MESH API */
 
-	virtual RID mesh_create_from_surfaces(const Vector<SurfaceData> &p_surfaces) {
+	virtual RID mesh_create_from_surfaces(const Vector<SurfaceData> &p_surfaces, int p_blend_shape_count = 0) {
 		RID mesh = mesh_create();
+		mesh_set_blend_shape_count(mesh, p_blend_shape_count);
 		for (int i = 0; i < p_surfaces.size(); i++) {
 			mesh_add_surface(mesh, p_surfaces[i]);
 		}
 		return mesh;
 	}
 
+	BIND2(mesh_set_blend_shape_count, RID, int)
+
 	BIND0R(RID, mesh_create)
 
 	BIND2(mesh_add_surface, RID, const SurfaceData &)

+ 4 - 2
servers/rendering/rendering_server_wrap_mt.h

@@ -143,10 +143,12 @@ public:
 
 	/* MESH API */
 
-	virtual RID mesh_create_from_surfaces(const Vector<SurfaceData> &p_surfaces) {
-		return rendering_server->mesh_create_from_surfaces(p_surfaces);
+	virtual RID mesh_create_from_surfaces(const Vector<SurfaceData> &p_surfaces, int p_blend_shape_count = 0) {
+		return rendering_server->mesh_create_from_surfaces(p_surfaces, p_blend_shape_count);
 	}
 
+	FUNC2(mesh_set_blend_shape_count, RID, int)
+
 	FUNCRID(mesh)
 
 	FUNC2(mesh_add_surface, RID, const SurfaceData &)

+ 8 - 7
servers/rendering_server.cpp

@@ -620,6 +620,8 @@ Error RenderingServer::_surface_set_data(Array p_arrays, uint32_t p_format, uint
 
 		r_bone_aabb.resize(total_bones);
 
+		int weight_count = (p_format & ARRAY_FLAG_USE_8_BONE_WEIGHTS) ? 8 : 4;
+
 		if (first) {
 			for (int i = 0; i < total_bones; i++) {
 				r_bone_aabb.write[i].size = Vector3(-1, -1, -1); //negative means unused
@@ -632,7 +634,7 @@ Error RenderingServer::_surface_set_data(Array p_arrays, uint32_t p_format, uint
 
 		bool any_valid = false;
 
-		if (vertices.size() && bones.size() == vertices.size() * 4 && weights.size() == bones.size()) {
+		if (vertices.size() && bones.size() == vertices.size() * weight_count && weights.size() == bones.size()) {
 			int vs = vertices.size();
 			const Vector3 *rv = vertices.ptr();
 			const int *rb = bones.ptr();
@@ -642,9 +644,9 @@ Error RenderingServer::_surface_set_data(Array p_arrays, uint32_t p_format, uint
 
 			for (int i = 0; i < vs; i++) {
 				Vector3 v = rv[i];
-				for (int j = 0; j < 4; j++) {
-					int idx = rb[i * 4 + j];
-					float w = rw[i * 4 + j];
+				for (int j = 0; j < weight_count; j++) {
+					int idx = rb[i * weight_count + j];
+					float w = rw[i * weight_count + j];
 					if (w == 0) {
 						continue; //break;
 					}
@@ -992,7 +994,6 @@ Error RenderingServer::mesh_create_surface_data_from_arrays(SurfaceData *r_surfa
 	surface_data.vertex_count = array_len;
 	surface_data.index_data = index_array;
 	surface_data.index_count = index_array_len;
-	surface_data.blend_shape_count = blend_shape_count;
 	surface_data.blend_shape_data = blend_shape_data;
 	surface_data.bone_aabbs = bone_aabb;
 	surface_data.lods = lods;
@@ -1311,10 +1312,10 @@ Array RenderingServer::mesh_surface_get_blend_shape_arrays(RID p_mesh, int p_sur
 
 		uint32_t blend_shape_count = blend_shape_data.size() / divisor;
 
-		ERR_FAIL_COND_V(blend_shape_count != sd.blend_shape_count, Array());
+		ERR_FAIL_COND_V(blend_shape_count != (uint32_t)mesh_get_blend_shape_count(p_mesh), Array());
 
 		Array blend_shape_array;
-		blend_shape_array.resize(blend_shape_count);
+		blend_shape_array.resize(mesh_get_blend_shape_count(p_mesh));
 		for (uint32_t i = 0; i < blend_shape_count; i++) {
 			Vector<uint8_t> bs_data = blend_shape_data.subarray(i * divisor, (i + 1) * divisor - 1);
 			Vector<uint8_t> unused;

+ 3 - 3
servers/rendering_server.h

@@ -282,8 +282,6 @@ public:
 		Vector<uint8_t> index_data;
 		uint32_t index_count = 0;
 
-		uint32_t blend_shape_count = 0;
-
 		AABB aabb;
 		struct LOD {
 			float edge_length;
@@ -297,9 +295,11 @@ public:
 		RID material;
 	};
 
-	virtual RID mesh_create_from_surfaces(const Vector<SurfaceData> &p_surfaces) = 0;
+	virtual RID mesh_create_from_surfaces(const Vector<SurfaceData> &p_surfaces, int p_blend_shape_count = 0) = 0;
 	virtual RID mesh_create() = 0;
 
+	virtual void mesh_set_blend_shape_count(RID p_mesh, int p_blend_shape_count) = 0;
+
 	virtual uint32_t mesh_surface_get_format_offset(uint32_t p_format, int p_vertex_len, int p_array_index) const;
 	virtual uint32_t mesh_surface_get_format_vertex_stride(uint32_t p_format, int p_vertex_len) const;
 	virtual uint32_t mesh_surface_get_format_attribute_stride(uint32_t p_format, int p_vertex_len) const;