Browse Source

Merge pull request #51363 from parulina/gles2-blendshapes-perf

Rémi Verschelde 4 years ago
parent
commit
33c9dd8845

+ 12 - 0
drivers/dummy/rasterizer_dummy.h

@@ -146,6 +146,7 @@ public:
 		Vector<DummySurface> surfaces;
 		int blend_shape_count;
 		VS::BlendShapeMode blend_shape_mode;
+		PoolRealArray blend_shape_values;
 	};
 
 	mutable RID_Owner<DummyTexture> texture_owner;
@@ -336,6 +337,17 @@ public:
 		return m->blend_shape_mode;
 	}
 
+	void mesh_set_blend_shape_values(RID p_mesh, PoolVector<float> p_values) {
+		DummyMesh *m = mesh_owner.getornull(p_mesh);
+		ERR_FAIL_COND(!m);
+		m->blend_shape_values = p_values;
+	}
+	PoolVector<float> mesh_get_blend_shape_values(RID p_mesh) const {
+		DummyMesh *m = mesh_owner.getornull(p_mesh);
+		ERR_FAIL_COND_V(!m, PoolRealArray());
+		return m->blend_shape_values;
+	}
+
 	void mesh_surface_update_region(RID p_mesh, int p_surface, int p_offset, const PoolVector<uint8_t> &p_data) {}
 
 	void mesh_surface_set_material(RID p_mesh, int p_surface, RID p_material) {}

+ 4 - 246
drivers/gles2/rasterizer_scene_gles2.cpp

@@ -1385,239 +1385,6 @@ bool RasterizerSceneGLES2::_setup_material(RasterizerStorageGLES2::Material *p_m
 	return shader_rebind;
 }
 
-void static _calculate_blend_shape_buffer(RasterizerSceneGLES2::RenderList::Element *p_element, PoolVector<float> &transform_buffer) {
-	RasterizerStorageGLES2::Surface *s = static_cast<RasterizerStorageGLES2::Surface *>(p_element->geometry);
-	if (!s->blend_shape_data.empty()) {
-		if (transform_buffer.size() < s->array_byte_size) {
-			transform_buffer.resize(s->array_byte_size);
-		}
-		for (int i = 0; i < VS::ARRAY_MAX - 1; i++) {
-			if (s->attribs[i].enabled) {
-				const float *p_weights = p_element->instance->blend_values.ptr();
-
-				PoolVector<float>::Write write = transform_buffer.write();
-				PoolVector<uint8_t>::Read read = s->data.read();
-				float attrib_array[4] = { 0.0 };
-
-				// Read all attributes
-				for (int j = 0; j < s->array_len; j++) {
-					size_t offset = s->attribs[i].offset + (j * s->attribs[i].stride);
-					float base_weight = 1.0;
-
-					if (s->mesh->blend_shape_mode == VS::BLEND_SHAPE_MODE_NORMALIZED) {
-						for (int ti = 0; ti < s->blend_shape_data.size(); ti++) {
-							base_weight -= p_weights[ti];
-						}
-					}
-
-					// Set the base
-					switch (i) {
-						case VS::ARRAY_VERTEX: {
-							if (s->format & VS::ARRAY_COMPRESS_VERTEX) {
-								const uint16_t *v = (const uint16_t *)(read.ptr() + offset);
-								attrib_array[0] = Math::halfptr_to_float(&v[0]) * base_weight;
-								attrib_array[1] = Math::halfptr_to_float(&v[1]) * base_weight;
-								attrib_array[2] = Math::halfptr_to_float(&v[2]) * base_weight;
-							} else {
-								const float *v = (const float *)(read.ptr() + offset);
-								attrib_array[0] = v[0] * base_weight;
-								attrib_array[1] = v[1] * base_weight;
-								attrib_array[2] = v[2] * base_weight;
-							}
-						} break;
-						case VS::ARRAY_NORMAL: {
-							if (s->format & VS::ARRAY_COMPRESS_NORMAL) {
-								const int8_t *v = (const int8_t *)(read.ptr() + offset);
-								attrib_array[0] = (v[0] / 127.0) * base_weight;
-								attrib_array[1] = (v[1] / 127.0) * base_weight;
-								attrib_array[2] = (v[2] / 127.0) * base_weight;
-							} else {
-								const float *v = (const float *)(read.ptr() + offset);
-								attrib_array[0] = v[0] * base_weight;
-								attrib_array[1] = v[1] * base_weight;
-								attrib_array[2] = v[2] * base_weight;
-							}
-						} break;
-						case VS::ARRAY_TANGENT: {
-							if (s->format & VS::ARRAY_COMPRESS_TANGENT) {
-								const int8_t *v = (const int8_t *)(read.ptr() + offset);
-								attrib_array[0] = (v[0] / 127.0) * base_weight;
-								attrib_array[1] = (v[1] / 127.0) * base_weight;
-								attrib_array[2] = (v[2] / 127.0) * base_weight;
-								attrib_array[3] = (v[3] / 127.0) * base_weight;
-							} else {
-								const float *v = (const float *)(read.ptr() + offset);
-								attrib_array[0] = v[0] * base_weight;
-								attrib_array[1] = v[1] * base_weight;
-								attrib_array[2] = v[2] * base_weight;
-								attrib_array[3] = v[3] * base_weight;
-							}
-						} break;
-						case VS::ARRAY_COLOR: {
-							if (s->format & VS::ARRAY_COMPRESS_COLOR) {
-								const uint8_t *v = (const uint8_t *)(read.ptr() + offset);
-								attrib_array[0] = (v[0] / 255.0) * base_weight;
-								attrib_array[1] = (v[1] / 255.0) * base_weight;
-								attrib_array[2] = (v[2] / 255.0) * base_weight;
-								attrib_array[3] = (v[3] / 255.0) * base_weight;
-							} else {
-								const float *v = (const float *)(read.ptr() + offset);
-								attrib_array[0] = v[0] * base_weight;
-								attrib_array[1] = v[1] * base_weight;
-								attrib_array[2] = v[2] * base_weight;
-								attrib_array[3] = v[3] * base_weight;
-							}
-						} break;
-						case VS::ARRAY_TEX_UV: {
-							if (s->format & VS::ARRAY_COMPRESS_TEX_UV) {
-								const uint16_t *v = (const uint16_t *)(read.ptr() + offset);
-								attrib_array[0] = Math::halfptr_to_float(&v[0]) * base_weight;
-								attrib_array[1] = Math::halfptr_to_float(&v[1]) * base_weight;
-							} else {
-								const float *v = (const float *)(read.ptr() + offset);
-								attrib_array[0] = v[0] * base_weight;
-								attrib_array[1] = v[1] * base_weight;
-							}
-						} break;
-						case VS::ARRAY_TEX_UV2: {
-							if (s->format & VS::ARRAY_COMPRESS_TEX_UV2) {
-								const uint16_t *v = (const uint16_t *)(read.ptr() + offset);
-								attrib_array[0] = Math::halfptr_to_float(&v[0]) * base_weight;
-								attrib_array[1] = Math::halfptr_to_float(&v[1]) * base_weight;
-							} else {
-								const float *v = (const float *)(read.ptr() + offset);
-								attrib_array[0] = v[0] * base_weight;
-								attrib_array[1] = v[1] * base_weight;
-							}
-						} break;
-						case VS::ARRAY_WEIGHTS: {
-							if (s->format & VS::ARRAY_COMPRESS_WEIGHTS) {
-								const uint16_t *v = (const uint16_t *)(read.ptr() + offset);
-								attrib_array[0] = (v[0] / 65535.0) * base_weight;
-								attrib_array[1] = (v[1] / 65535.0) * base_weight;
-								attrib_array[2] = (v[2] / 65535.0) * base_weight;
-								attrib_array[3] = (v[3] / 65535.0) * base_weight;
-							} else {
-								const float *v = (const float *)(read.ptr() + offset);
-								attrib_array[0] = v[0] * base_weight;
-								attrib_array[1] = v[1] * base_weight;
-								attrib_array[2] = v[2] * base_weight;
-								attrib_array[3] = v[3] * base_weight;
-							}
-						} break;
-					}
-
-					// Add all blend shapes
-					for (int ti = 0; ti < s->blend_shape_data.size(); ti++) {
-						PoolVector<uint8_t>::Read blend = s->blend_shape_data[ti].read();
-						float weight = p_weights[ti];
-						if (Math::is_zero_approx(weight)) {
-							continue;
-						}
-
-						switch (i) {
-							case VS::ARRAY_VERTEX: {
-								if (s->format & VS::ARRAY_COMPRESS_VERTEX) {
-									const uint16_t *v = (const uint16_t *)(blend.ptr() + offset);
-									attrib_array[0] += Math::halfptr_to_float(&v[0]) * weight;
-									attrib_array[1] += Math::halfptr_to_float(&v[1]) * weight;
-									attrib_array[2] += Math::halfptr_to_float(&v[2]) * weight;
-								} else {
-									const float *v = (const float *)(blend.ptr() + offset);
-									attrib_array[0] += v[0] * weight;
-									attrib_array[1] += v[1] * weight;
-									attrib_array[2] += v[2] * weight;
-								}
-							} break;
-							case VS::ARRAY_NORMAL: {
-								if (s->format & VS::ARRAY_COMPRESS_NORMAL) {
-									const int8_t *v = (const int8_t *)(blend.ptr() + offset);
-									attrib_array[0] += (float(v[0]) / 127.0) * weight;
-									attrib_array[1] += (float(v[1]) / 127.0) * weight;
-									attrib_array[2] += (float(v[2]) / 127.0) * weight;
-								} else {
-									const float *v = (const float *)(blend.ptr() + offset);
-									attrib_array[0] += v[0] * weight;
-									attrib_array[1] += v[1] * weight;
-									attrib_array[2] += v[2] * weight;
-								}
-							} break;
-							case VS::ARRAY_TANGENT: {
-								if (s->format & VS::ARRAY_COMPRESS_TANGENT) {
-									const int8_t *v = (const int8_t *)(read.ptr() + offset);
-									attrib_array[0] += (float(v[0]) / 127.0) * weight;
-									attrib_array[1] += (float(v[1]) / 127.0) * weight;
-									attrib_array[2] += (float(v[2]) / 127.0) * weight;
-									attrib_array[3] = (float(v[3]) / 127.0);
-								} else {
-									const float *v = (const float *)(read.ptr() + offset);
-									attrib_array[0] += v[0] * weight;
-									attrib_array[1] += v[1] * weight;
-									attrib_array[2] += v[2] * weight;
-									attrib_array[3] = v[3];
-								}
-							} break;
-							case VS::ARRAY_COLOR: {
-								if (s->format & VS::ARRAY_COMPRESS_COLOR) {
-									const uint8_t *v = (const uint8_t *)(blend.ptr() + offset);
-									attrib_array[0] += (v[0] / 255.0) * weight;
-									attrib_array[1] += (v[1] / 255.0) * weight;
-									attrib_array[2] += (v[2] / 255.0) * weight;
-									attrib_array[3] += (v[3] / 255.0) * weight;
-								} else {
-									const float *v = (const float *)(blend.ptr() + offset);
-									attrib_array[0] += v[0] * weight;
-									attrib_array[1] += v[1] * weight;
-									attrib_array[2] += v[2] * weight;
-									attrib_array[3] += v[3] * weight;
-								}
-							} break;
-							case VS::ARRAY_TEX_UV: {
-								if (s->format & VS::ARRAY_COMPRESS_TEX_UV) {
-									const uint16_t *v = (const uint16_t *)(blend.ptr() + offset);
-									attrib_array[0] += Math::halfptr_to_float(&v[0]) * weight;
-									attrib_array[1] += Math::halfptr_to_float(&v[1]) * weight;
-								} else {
-									const float *v = (const float *)(blend.ptr() + offset);
-									attrib_array[0] += v[0] * weight;
-									attrib_array[1] += v[1] * weight;
-								}
-							} break;
-							case VS::ARRAY_TEX_UV2: {
-								if (s->format & VS::ARRAY_COMPRESS_TEX_UV2) {
-									const uint16_t *v = (const uint16_t *)(blend.ptr() + offset);
-									attrib_array[0] += Math::halfptr_to_float(&v[0]) * weight;
-									attrib_array[1] += Math::halfptr_to_float(&v[1]) * weight;
-								} else {
-									const float *v = (const float *)(blend.ptr() + offset);
-									attrib_array[0] += v[0] * weight;
-									attrib_array[1] += v[1] * weight;
-								}
-							} break;
-							case VS::ARRAY_WEIGHTS: {
-								if (s->format & VS::ARRAY_COMPRESS_WEIGHTS) {
-									const uint16_t *v = (const uint16_t *)(blend.ptr() + offset);
-									attrib_array[0] += (v[0] / 65535.0) * weight;
-									attrib_array[1] += (v[1] / 65535.0) * weight;
-									attrib_array[2] += (v[2] / 65535.0) * weight;
-									attrib_array[3] += (v[3] / 65535.0) * weight;
-								} else {
-									const float *v = (const float *)(blend.ptr() + offset);
-									attrib_array[0] += v[0] * weight;
-									attrib_array[1] += v[1] * weight;
-									attrib_array[2] += v[2] * weight;
-									attrib_array[3] += v[3] * weight;
-								}
-							} break;
-						}
-					}
-					memcpy(&write[offset], attrib_array, sizeof(float) * s->attribs[i].size);
-				}
-			}
-		}
-	}
-}
-
 void RasterizerSceneGLES2::_setup_geometry(RenderList::Element *p_element, RasterizerStorageGLES2::Skeleton *p_skeleton) {
 	switch (p_element->instance->base_type) {
 		case VS::INSTANCE_MESH: {
@@ -1627,25 +1394,16 @@ void RasterizerSceneGLES2::_setup_geometry(RenderList::Element *p_element, Raste
 				glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, s->index_id);
 			}
 
-			if (!s->blend_shape_data.empty()) {
-				_calculate_blend_shape_buffer(p_element, storage->resources.blend_shapes_transform_cpu_buffer);
-				storage->_update_blend_shape_transform_buffer(storage->resources.blend_shapes_transform_cpu_buffer, s->array_byte_size);
-			}
-
 			for (int i = 0; i < VS::ARRAY_MAX - 1; i++) {
 				if (s->attribs[i].enabled) {
-					if (!s->blend_shape_data.empty() && (i != VS::ARRAY_BONES)) {
-						glBindBuffer(GL_ARRAY_BUFFER, storage->resources.blend_shape_transform_buffer);
-
-						glEnableVertexAttribArray(i);
+					glEnableVertexAttribArray(i);
 
-						glVertexAttribPointer(s->attribs[i].index, s->attribs[i].size, GL_FLOAT, GL_FALSE, s->attribs[i].stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(s->attribs[i].offset * sizeof(float)));
+					if (!s->blend_shape_data.empty() && i != VS::ARRAY_BONES && s->blend_shape_buffer_size > 0) {
+						glBindBuffer(GL_ARRAY_BUFFER, s->blend_shape_buffer_id);
+						glVertexAttribPointer(s->attribs[i].index, s->attribs[i].size, GL_FLOAT, GL_FALSE, 8 * 4 * sizeof(float), CAST_INT_TO_UCHAR_PTR(i * 4 * sizeof(float)));
 
 					} else {
 						glBindBuffer(GL_ARRAY_BUFFER, s->vertex_id);
-
-						glEnableVertexAttribArray(i);
-
 						glVertexAttribPointer(s->attribs[i].index, s->attribs[i].size, s->attribs[i].type, s->attribs[i].normalized, s->attribs[i].stride, CAST_INT_TO_UCHAR_PTR(s->attribs[i].offset));
 					}
 				} else {

+ 254 - 35
drivers/gles2/rasterizer_storage_gles2.cpp

@@ -2560,24 +2560,11 @@ void RasterizerStorageGLES2::mesh_add_surface(RID p_mesh, uint32_t p_format, VS:
 		}
 
 		// TODO generate wireframes
-	}
-
-	{
-		// blend shapes
-
-		for (int i = 0; i < p_blend_shapes.size(); i++) {
-			Surface::BlendShape mt;
-
-			PoolVector<uint8_t>::Read vr = p_blend_shapes[i].read();
 
-			surface->total_data_size += array_size;
-
-			glGenBuffers(1, &mt.vertex_id);
-			glBindBuffer(GL_ARRAY_BUFFER, mt.vertex_id);
-			glBufferData(GL_ARRAY_BUFFER, array_size, vr.ptr(), GL_STATIC_DRAW);
-			glBindBuffer(GL_ARRAY_BUFFER, 0);
-
-			surface->blend_shapes.push_back(mt);
+		// Make one blend shape buffer per surface
+		{
+			surface->blend_shape_buffer_size = 0;
+			glGenBuffers(1, &surface->blend_shape_buffer_id);
 		}
 	}
 
@@ -2596,6 +2583,9 @@ void RasterizerStorageGLES2::mesh_set_blend_shape_count(RID p_mesh, int p_amount
 
 	mesh->blend_shape_count = p_amount;
 	mesh->instance_change_notify(true, false);
+	if (!mesh->update_list.in_list()) {
+		blend_shapes_update_list.add(&mesh->update_list);
+	}
 }
 
 int RasterizerStorageGLES2::mesh_get_blend_shape_count(RID p_mesh) const {
@@ -2609,6 +2599,9 @@ void RasterizerStorageGLES2::mesh_set_blend_shape_mode(RID p_mesh, VS::BlendShap
 	ERR_FAIL_COND(!mesh);
 
 	mesh->blend_shape_mode = p_mode;
+	if (!mesh->update_list.in_list()) {
+		blend_shapes_update_list.add(&mesh->update_list);
+	}
 }
 
 VS::BlendShapeMode RasterizerStorageGLES2::mesh_get_blend_shape_mode(RID p_mesh) const {
@@ -2618,6 +2611,23 @@ VS::BlendShapeMode RasterizerStorageGLES2::mesh_get_blend_shape_mode(RID p_mesh)
 	return mesh->blend_shape_mode;
 }
 
+void RasterizerStorageGLES2::mesh_set_blend_shape_values(RID p_mesh, PoolVector<float> p_values) {
+	Mesh *mesh = mesh_owner.getornull(p_mesh);
+	ERR_FAIL_COND(!mesh);
+
+	mesh->blend_shape_values = p_values;
+	if (!mesh->update_list.in_list()) {
+		blend_shapes_update_list.add(&mesh->update_list);
+	}
+}
+
+PoolVector<float> RasterizerStorageGLES2::mesh_get_blend_shape_values(RID p_mesh) const {
+	const Mesh *mesh = mesh_owner.getornull(p_mesh);
+	ERR_FAIL_COND_V(!mesh, PoolVector<float>());
+
+	return mesh->blend_shape_values;
+}
+
 void RasterizerStorageGLES2::mesh_surface_update_region(RID p_mesh, int p_surface, int p_offset, const PoolVector<uint8_t> &p_data) {
 	Mesh *mesh = mesh_owner.getornull(p_mesh);
 
@@ -2756,9 +2766,7 @@ void RasterizerStorageGLES2::mesh_remove_surface(RID p_mesh, int p_surface) {
 		glDeleteBuffers(1, &surface->index_id);
 	}
 
-	for (int i = 0; i < surface->blend_shapes.size(); i++) {
-		glDeleteBuffers(1, &surface->blend_shapes[i].vertex_id);
-	}
+	glDeleteBuffers(1, &surface->blend_shape_buffer_id);
 
 	info.vertex_mem -= surface->total_data_size;
 
@@ -3740,23 +3748,233 @@ void RasterizerStorageGLES2::skeleton_set_base_transform_2d(RID p_skeleton, cons
 	skeleton->base_transform_2d = p_base_transform;
 }
 
-void RasterizerStorageGLES2::_update_blend_shape_transform_buffer(const PoolVector<float> &p_data, size_t p_size) {
-	glBindBuffer(GL_ARRAY_BUFFER, resources.blend_shape_transform_buffer);
+void RasterizerStorageGLES2::update_dirty_blend_shapes() {
+	while (blend_shapes_update_list.first()) {
+		Mesh *mesh = blend_shapes_update_list.first()->self();
+		for (int is = 0; is < mesh->surfaces.size(); is++) {
+			RasterizerStorageGLES2::Surface *s = mesh->surfaces[is];
+			if (!s->blend_shape_data.empty()) {
+				PoolVector<float> &transform_buffer = resources.blend_shape_transform_cpu_buffer;
+				size_t buffer_size = s->array_len * 8 * 4;
+				if (resources.blend_shape_transform_cpu_buffer_size < buffer_size) {
+					resources.blend_shape_transform_cpu_buffer_size = buffer_size;
+					transform_buffer.resize(buffer_size);
+				}
 
-	uint32_t buffer_size = p_size * sizeof(float);
+				PoolVector<uint8_t>::Read read = s->data.read();
+				PoolVector<float>::Write write = transform_buffer.write();
+				float base_weight = 1.0;
 
-	if (p_size > resources.blend_shape_transform_buffer_size) {
-		// new requested buffer is bigger, so resizing the GPU buffer
+				if (s->mesh->blend_shape_mode == VS::BLEND_SHAPE_MODE_NORMALIZED) {
+					for (int ti = 0; ti < mesh->blend_shape_values.size(); ti++) {
+						base_weight -= mesh->blend_shape_values.get(ti);
+					}
+				}
 
-		resources.blend_shape_transform_buffer_size = p_size;
+				for (int i = 0; i < VS::ARRAY_MAX - 1; i++) {
+					if (s->attribs[i].enabled) {
+						// Read all attributes
+						for (int j = 0; j < s->array_len; j++) {
+							size_t offset = s->attribs[i].offset + (j * s->attribs[i].stride);
+							const float *rd = (const float *)(read.ptr() + offset);
+
+							size_t offset_write = i * 4 + (j * 8 * 4);
+							float *wr = (float *)(write.ptr() + offset_write);
+
+							// Set the base
+							switch (i) {
+								case VS::ARRAY_VERTEX: {
+									if (s->format & VS::ARRAY_COMPRESS_VERTEX) {
+										wr[0] = Math::halfptr_to_float(&((uint16_t *)rd)[0]) * base_weight;
+										wr[1] = Math::halfptr_to_float(&((uint16_t *)rd)[1]) * base_weight;
+										wr[2] = Math::halfptr_to_float(&((uint16_t *)rd)[2]) * base_weight;
+									} else {
+										float a[3] = { 0 };
+										a[0] = wr[0] = rd[0] * base_weight;
+										a[1] = wr[1] = rd[1] * base_weight;
+										a[2] = wr[2] = rd[2] * base_weight;
+										memcpy(&write[offset_write], a, sizeof(float) * s->attribs[i].size);
+									}
+								} break;
+								case VS::ARRAY_NORMAL: {
+									if (s->format & VS::ARRAY_COMPRESS_NORMAL) {
+										wr[0] = (((int8_t *)rd)[0] / 127.0) * base_weight;
+										wr[1] = (((int8_t *)rd)[1] / 127.0) * base_weight;
+										wr[2] = (((int8_t *)rd)[2] / 127.0) * base_weight;
+									} else {
+										wr[0] = rd[0] * base_weight;
+										wr[1] = rd[1] * base_weight;
+										wr[2] = rd[2] * base_weight;
+									}
+								} break;
+								case VS::ARRAY_TANGENT: {
+									if (s->format & VS::ARRAY_COMPRESS_TANGENT) {
+										wr[0] = (((int8_t *)rd)[0] / 127.0) * base_weight;
+										wr[1] = (((int8_t *)rd)[1] / 127.0) * base_weight;
+										wr[2] = (((int8_t *)rd)[2] / 127.0) * base_weight;
+										wr[3] = (((int8_t *)rd)[3] / 127.0) * base_weight;
+									} else {
+										wr[0] = rd[0] * base_weight;
+										wr[1] = rd[1] * base_weight;
+										wr[2] = rd[2] * base_weight;
+										wr[3] = rd[3] * base_weight;
+									}
+								} break;
+								case VS::ARRAY_COLOR: {
+									if (s->format & VS::ARRAY_COMPRESS_COLOR) {
+										wr[0] = (((uint8_t *)rd)[0] / 255.0) * base_weight;
+										wr[1] = (((uint8_t *)rd)[1] / 255.0) * base_weight;
+										wr[2] = (((uint8_t *)rd)[2] / 255.0) * base_weight;
+										wr[3] = (((uint8_t *)rd)[3] / 255.0) * base_weight;
+									} else {
+										wr[0] = rd[0] * base_weight;
+										wr[1] = rd[1] * base_weight;
+										wr[2] = rd[2] * base_weight;
+										wr[3] = rd[3] * base_weight;
+									}
+								} break;
+								case VS::ARRAY_TEX_UV: {
+									if (s->format & VS::ARRAY_COMPRESS_TEX_UV) {
+										wr[0] = Math::halfptr_to_float(&((uint16_t *)rd)[0]) * base_weight;
+										wr[1] = Math::halfptr_to_float(&((uint16_t *)rd)[1]) * base_weight;
+									} else {
+										wr[0] = rd[0] * base_weight;
+										wr[1] = rd[1] * base_weight;
+									}
+								} break;
+								case VS::ARRAY_TEX_UV2: {
+									if (s->format & VS::ARRAY_COMPRESS_TEX_UV2) {
+										wr[0] = Math::halfptr_to_float(&((uint16_t *)rd)[0]) * base_weight;
+										wr[1] = Math::halfptr_to_float(&((uint16_t *)rd)[1]) * base_weight;
+									} else {
+										wr[0] = rd[0] * base_weight;
+										wr[1] = rd[1] * base_weight;
+									}
+								} break;
+								case VS::ARRAY_WEIGHTS: {
+									if (s->format & VS::ARRAY_COMPRESS_WEIGHTS) {
+										wr[0] = (((uint16_t *)rd)[0] / 65535.0) * base_weight;
+										wr[1] = (((uint16_t *)rd)[1] / 65535.0) * base_weight;
+										wr[2] = (((uint16_t *)rd)[2] / 65535.0) * base_weight;
+										wr[3] = (((uint16_t *)rd)[3] / 65535.0) * base_weight;
+									} else {
+										wr[0] = rd[0] * base_weight;
+										wr[1] = rd[1] * base_weight;
+										wr[2] = rd[2] * base_weight;
+										wr[3] = rd[3] * base_weight;
+									}
+								} break;
+							}
+
+							// Add all blend shapes
+							for (int ti = 0; ti < mesh->blend_shape_values.size(); ti++) {
+								PoolVector<uint8_t>::Read blend = s->blend_shape_data[ti].read();
+								const float *br = (const float *)(blend.ptr() + offset);
+
+								float weight = mesh->blend_shape_values.get(ti);
+								if (Math::is_zero_approx(weight)) {
+									continue;
+								}
+
+								switch (i) {
+									case VS::ARRAY_VERTEX: {
+										if (s->format & VS::ARRAY_COMPRESS_VERTEX) {
+											wr[0] += Math::halfptr_to_float(&((uint16_t *)br)[0]) * weight;
+											wr[1] += Math::halfptr_to_float(&((uint16_t *)br)[1]) * weight;
+											wr[2] += Math::halfptr_to_float(&((uint16_t *)br)[2]) * weight;
+										} else {
+											wr[0] += br[0] * weight;
+											wr[1] += br[1] * weight;
+											wr[2] += br[2] * weight;
+										}
+									} break;
+									case VS::ARRAY_NORMAL: {
+										if (s->format & VS::ARRAY_COMPRESS_NORMAL) {
+											wr[0] += (float(((int8_t *)br)[0]) / 127.0) * weight;
+											wr[1] += (float(((int8_t *)br)[1]) / 127.0) * weight;
+											wr[2] += (float(((int8_t *)br)[2]) / 127.0) * weight;
+										} else {
+											wr[0] += br[0] * weight;
+											wr[1] += br[1] * weight;
+											wr[2] += br[2] * weight;
+										}
+									} break;
+									case VS::ARRAY_TANGENT: {
+										if (s->format & VS::ARRAY_COMPRESS_TANGENT) {
+											wr[0] += (float(((int8_t *)br)[0]) / 127.0) * weight;
+											wr[1] += (float(((int8_t *)br)[1]) / 127.0) * weight;
+											wr[2] += (float(((int8_t *)br)[2]) / 127.0) * weight;
+											wr[3] = (float(((int8_t *)br)[3]) / 127.0);
+										} else {
+											wr[0] += br[0] * weight;
+											wr[1] += br[1] * weight;
+											wr[2] += br[2] * weight;
+											wr[3] = br[3];
+										}
+									} break;
+									case VS::ARRAY_COLOR: {
+										if (s->format & VS::ARRAY_COMPRESS_COLOR) {
+											wr[0] += (((uint8_t *)br)[0] / 255.0) * weight;
+											wr[1] += (((uint8_t *)br)[1] / 255.0) * weight;
+											wr[2] += (((uint8_t *)br)[2] / 255.0) * weight;
+											wr[3] += (((uint8_t *)br)[3] / 255.0) * weight;
+										} else {
+											wr[0] += br[0] * weight;
+											wr[1] += br[1] * weight;
+											wr[2] += br[2] * weight;
+											wr[3] += br[3] * weight;
+										}
+									} break;
+									case VS::ARRAY_TEX_UV: {
+										if (s->format & VS::ARRAY_COMPRESS_TEX_UV) {
+											wr[0] += Math::halfptr_to_float(&((uint16_t *)br)[0]) * weight;
+											wr[1] += Math::halfptr_to_float(&((uint16_t *)br)[1]) * weight;
+										} else {
+											wr[0] += br[0] * weight;
+											wr[1] += br[1] * weight;
+										}
+									} break;
+									case VS::ARRAY_TEX_UV2: {
+										if (s->format & VS::ARRAY_COMPRESS_TEX_UV2) {
+											wr[0] += Math::halfptr_to_float(&((uint16_t *)br)[0]) * weight;
+											wr[1] += Math::halfptr_to_float(&((uint16_t *)br)[1]) * weight;
+										} else {
+											wr[0] += br[0] * weight;
+											wr[1] += br[1] * weight;
+										}
+									} break;
+									case VS::ARRAY_WEIGHTS: {
+										if (s->format & VS::ARRAY_COMPRESS_WEIGHTS) {
+											wr[0] += (((uint16_t *)br)[0] / 65535.0) * weight;
+											wr[1] += (((uint16_t *)br)[1] / 65535.0) * weight;
+											wr[2] += (((uint16_t *)br)[2] / 65535.0) * weight;
+											wr[3] += (((uint16_t *)br)[3] / 65535.0) * weight;
+										} else {
+											wr[0] += br[0] * weight;
+											wr[1] += br[1] * weight;
+											wr[2] += br[2] * weight;
+											wr[3] += br[3] * weight;
+										}
+									} break;
+								}
+							}
+						}
+					}
+				}
 
-		glBufferData(GL_ARRAY_BUFFER, buffer_size, p_data.read().ptr(), GL_DYNAMIC_DRAW);
-	} else {
-		// this may not be best, it could be better to use glBufferData in both cases.
-		buffer_orphan_and_upload(resources.blend_shape_transform_buffer_size, 0, buffer_size, p_data.read().ptr(), GL_ARRAY_BUFFER, true);
+				// Store size and send changed blend shape render to GL
+				glBindBuffer(GL_ARRAY_BUFFER, s->blend_shape_buffer_id);
+				if (buffer_size > s->blend_shape_buffer_size) {
+					s->blend_shape_buffer_size = buffer_size;
+					glBufferData(GL_ARRAY_BUFFER, buffer_size * sizeof(float), transform_buffer.read().ptr(), GL_DYNAMIC_DRAW);
+				} else {
+					buffer_orphan_and_upload(s->blend_shape_buffer_size, 0, buffer_size * sizeof(float), transform_buffer.read().ptr(), GL_ARRAY_BUFFER, true);
+				}
+				glBindBuffer(GL_ARRAY_BUFFER, 0);
+			}
+		}
+		blend_shapes_update_list.remove(blend_shapes_update_list.first());
 	}
-
-	glBindBuffer(GL_ARRAY_BUFFER, 0);
 }
 
 void RasterizerStorageGLES2::_update_skeleton_transform_buffer(const PoolVector<float> &p_data, size_t p_size) {
@@ -6231,10 +6449,10 @@ void RasterizerStorageGLES2::initialize() {
 		resources.skeleton_transform_buffer_size = 0;
 		glGenBuffers(1, &resources.skeleton_transform_buffer);
 	}
-	// blend shape buffer
+
+	// blend buffer
 	{
-		resources.blend_shape_transform_buffer_size = 0;
-		glGenBuffers(1, &resources.blend_shape_transform_buffer);
+		resources.blend_shape_transform_cpu_buffer_size = 0;
 	}
 
 	// radical inverse vdc cache texture
@@ -6315,6 +6533,7 @@ void RasterizerStorageGLES2::_copy_screen() {
 void RasterizerStorageGLES2::update_dirty_resources() {
 	update_dirty_shaders();
 	update_dirty_materials();
+	update_dirty_blend_shapes();
 	update_dirty_skeletons();
 	update_dirty_multimeshes();
 	update_dirty_captures();

+ 18 - 13
drivers/gles2/rasterizer_storage_gles2.h

@@ -125,10 +125,8 @@ public:
 		GLuint skeleton_transform_buffer;
 		PoolVector<float> skeleton_transform_cpu_buffer;
 
-		size_t blend_shape_transform_buffer_size;
-		GLuint blend_shape_transform_buffer;
-		PoolVector<float> blend_shapes_transform_cpu_buffer;
-
+		size_t blend_shape_transform_cpu_buffer_size;
+		PoolVector<float> blend_shape_transform_cpu_buffer;
 	} resources;
 
 	mutable struct Shaders {
@@ -640,13 +638,6 @@ public:
 		GLuint vertex_id;
 		GLuint index_id;
 
-		struct BlendShape {
-			GLuint vertex_id;
-			GLuint array_id;
-		};
-
-		Vector<BlendShape> blend_shapes;
-
 		AABB aabb;
 
 		int array_len;
@@ -665,8 +656,12 @@ public:
 
 		PoolVector<uint8_t> data;
 		PoolVector<uint8_t> index_data;
+
 		Vector<PoolVector<uint8_t>> blend_shape_data;
 
+		GLuint blend_shape_buffer_id;
+		size_t blend_shape_buffer_size;
+
 		int total_data_size;
 
 		Surface() :
@@ -690,6 +685,9 @@ public:
 
 		int blend_shape_count;
 		VS::BlendShapeMode blend_shape_mode;
+		PoolRealArray blend_shape_values;
+
+		SelfList<Mesh> update_list;
 
 		AABB custom_aabb;
 
@@ -708,11 +706,14 @@ public:
 
 		Mesh() :
 				blend_shape_count(0),
-				blend_shape_mode(VS::BLEND_SHAPE_MODE_NORMALIZED) {
+				blend_shape_mode(VS::BLEND_SHAPE_MODE_NORMALIZED),
+				blend_shape_values(PoolRealArray()),
+				update_list(this) {
 		}
 	};
 
 	mutable RID_Owner<Mesh> mesh_owner;
+	SelfList<Mesh>::List blend_shapes_update_list;
 
 	virtual RID mesh_create();
 
@@ -724,6 +725,9 @@ public:
 	virtual void mesh_set_blend_shape_mode(RID p_mesh, VS::BlendShapeMode p_mode);
 	virtual VS::BlendShapeMode mesh_get_blend_shape_mode(RID p_mesh) const;
 
+	virtual void mesh_set_blend_shape_values(RID p_mesh, PoolVector<float> p_values);
+	virtual PoolVector<float> mesh_get_blend_shape_values(RID p_mesh) const;
+
 	virtual void mesh_surface_update_region(RID p_mesh, int p_surface, int p_offset, const PoolVector<uint8_t> &p_data);
 
 	virtual void mesh_surface_set_material(RID p_mesh, int p_surface, RID p_material);
@@ -751,6 +755,8 @@ public:
 	virtual AABB mesh_get_aabb(RID p_mesh, RID p_skeleton) const;
 	virtual void mesh_clear(RID p_mesh);
 
+	void update_dirty_blend_shapes();
+
 	/* MULTIMESH API */
 
 	struct MultiMesh : public GeometryOwner {
@@ -912,7 +918,6 @@ public:
 	virtual Transform2D skeleton_bone_get_transform_2d(RID p_skeleton, int p_bone) const;
 	virtual void skeleton_set_base_transform_2d(RID p_skeleton, const Transform2D &p_base_transform);
 
-	void _update_blend_shape_transform_buffer(const PoolVector<float> &p_data, size_t p_size);
 	void _update_skeleton_transform_buffer(const PoolVector<float> &p_data, size_t p_size);
 
 	/* Light API */

+ 1 - 1
drivers/gles3/rasterizer_scene_gles3.cpp

@@ -1266,7 +1266,7 @@ void RasterizerSceneGLES3::_setup_geometry(RenderList::Element *e, const Transfo
 
 			if (s->blend_shapes.size() && e->instance->blend_values.size()) {
 				//blend shapes, use transform feedback
-				storage->mesh_render_blend_shapes(s, e->instance->blend_values.ptr());
+				storage->mesh_render_blend_shapes(s, e->instance->blend_values.read().ptr());
 				//rebind shader
 				state.scene_shader.bind();
 #ifdef DEBUG_ENABLED

+ 12 - 0
drivers/gles3/rasterizer_storage_gles3.cpp

@@ -3849,6 +3849,18 @@ VS::BlendShapeMode RasterizerStorageGLES3::mesh_get_blend_shape_mode(RID p_mesh)
 	return mesh->blend_shape_mode;
 }
 
+void RasterizerStorageGLES3::mesh_set_blend_shape_values(RID p_mesh, PoolVector<float> p_values) {
+	Mesh *mesh = mesh_owner.getornull(p_mesh);
+	ERR_FAIL_COND(!mesh);
+	mesh->blend_shape_values = p_values;
+}
+
+PoolVector<float> RasterizerStorageGLES3::mesh_get_blend_shape_values(RID p_mesh) const {
+	const Mesh *mesh = mesh_owner.getornull(p_mesh);
+	ERR_FAIL_COND_V(!mesh, PoolVector<float>());
+	return mesh->blend_shape_values;
+}
+
 void RasterizerStorageGLES3::mesh_surface_update_region(RID p_mesh, int p_surface, int p_offset, const PoolVector<uint8_t> &p_data) {
 	Mesh *mesh = mesh_owner.getornull(p_mesh);
 	ERR_FAIL_COND(!mesh);

+ 4 - 0
drivers/gles3/rasterizer_storage_gles3.h

@@ -715,6 +715,7 @@ public:
 		Vector<Surface *> surfaces;
 		int blend_shape_count;
 		VS::BlendShapeMode blend_shape_mode;
+		PoolRealArray blend_shape_values;
 		AABB custom_aabb;
 		mutable uint64_t last_pass;
 		SelfList<MultiMesh>::List multimeshes;
@@ -746,6 +747,9 @@ public:
 	virtual void mesh_set_blend_shape_mode(RID p_mesh, VS::BlendShapeMode p_mode);
 	virtual VS::BlendShapeMode mesh_get_blend_shape_mode(RID p_mesh) const;
 
+	virtual void mesh_set_blend_shape_values(RID p_mesh, PoolVector<float> p_values);
+	virtual PoolVector<float> mesh_get_blend_shape_values(RID p_mesh) const;
+
 	virtual void mesh_surface_update_region(RID p_mesh, int p_surface, int p_offset, const PoolVector<uint8_t> &p_data);
 
 	virtual void mesh_surface_set_material(RID p_mesh, int p_surface, RID p_material);

+ 4 - 1
servers/visual/rasterizer.h

@@ -101,7 +101,7 @@ public:
 		Vector<RID> reflection_probe_instances;
 		Vector<RID> gi_probe_instances;
 
-		Vector<float> blend_values;
+		PoolVector<float> blend_values;
 
 		VS::ShadowCastingSetting cast_shadows;
 
@@ -284,6 +284,9 @@ public:
 	virtual void mesh_set_blend_shape_mode(RID p_mesh, VS::BlendShapeMode p_mode) = 0;
 	virtual VS::BlendShapeMode mesh_get_blend_shape_mode(RID p_mesh) const = 0;
 
+	virtual void mesh_set_blend_shape_values(RID p_mesh, PoolVector<float> p_values) = 0;
+	virtual PoolVector<float> mesh_get_blend_shape_values(RID p_mesh) const = 0;
+
 	virtual void mesh_surface_update_region(RID p_mesh, int p_surface, int p_offset, const PoolVector<uint8_t> &p_data) = 0;
 
 	virtual void mesh_surface_set_material(RID p_mesh, int p_surface, RID p_material) = 0;

+ 4 - 3
servers/visual/visual_server_scene.cpp

@@ -518,7 +518,7 @@ void VisualServerScene::instance_set_base(RID p_instance, RID p_base) {
 			instance->base_data = nullptr;
 		}
 
-		instance->blend_values.clear();
+		instance->blend_values = PoolRealArray();
 
 		for (int i = 0; i < instance->materials.size(); i++) {
 			if (instance->materials[i].is_valid()) {
@@ -714,7 +714,8 @@ void VisualServerScene::instance_set_blend_shape_weight(RID p_instance, int p_sh
 	}
 
 	ERR_FAIL_INDEX(p_shape, instance->blend_values.size());
-	instance->blend_values.write[p_shape] = p_weight;
+	instance->blend_values.write().ptr()[p_shape] = p_weight;
+	VSG::storage->mesh_set_blend_shape_values(instance->base, instance->blend_values);
 }
 
 void VisualServerScene::instance_set_surface_material(RID p_instance, int p_surface, RID p_material) {
@@ -3806,7 +3807,7 @@ void VisualServerScene::_update_dirty_instance(Instance *p_instance) {
 			if (new_blend_shape_count != p_instance->blend_values.size()) {
 				p_instance->blend_values.resize(new_blend_shape_count);
 				for (int i = 0; i < new_blend_shape_count; i++) {
-					p_instance->blend_values.write[i] = 0;
+					p_instance->blend_values.write().ptr()[i] = 0;
 				}
 			}
 		}