Browse Source

Merge pull request #62057 from clayjohn/GLES3-compressed-multimesh

Implement MultiMesh in 3D and flesh out MultiMesh functions
Rémi Verschelde 3 years ago
parent
commit
ccd9378aac

+ 23 - 24
drivers/gles3/rasterizer_canvas_gles3.cpp

@@ -689,6 +689,10 @@ void RasterizerCanvasGLES3::_render_item(RID p_render_target, const Item *p_item
 				_bind_instance_data_buffer(1);
 				glBindVertexArray(pb->vertex_array);
 
+				if (pb->color_disabled) {
+					glVertexAttrib4f(RS::ARRAY_COLOR, pb->color.r, pb->color.g, pb->color.b, pb->color.a);
+				}
+
 				if (pb->index_buffer != 0) {
 					glDrawElements(prim[polygon->primitive], pb->count, GL_UNSIGNED_INT, nullptr);
 				} else {
@@ -698,6 +702,11 @@ void RasterizerCanvasGLES3::_render_item(RID p_render_target, const Item *p_item
 				state.fences[state.current_buffer] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
 
 				state.current_buffer = (state.current_buffer + 1) % state.canvas_instance_data_buffers.size();
+
+				if (pb->color_disabled) {
+					// Reset so this doesn't pollute other draw calls.
+					glVertexAttrib4f(RS::ARRAY_COLOR, 1.0, 1.0, 1.0, 1.0);
+				}
 			} break;
 
 			case Item::Command::TYPE_PRIMITIVE: {
@@ -758,7 +767,6 @@ void RasterizerCanvasGLES3::_render_item(RID p_render_target, const Item *p_item
 				GLuint multimesh_buffer = 0;
 				uint32_t multimesh_stride = 0;
 				uint32_t multimesh_color_offset = 0;
-				uint32_t multimesh_custom_data_offset = 0;
 				bool multimesh_uses_color = false;
 				bool multimesh_uses_custom_data = false;
 
@@ -788,7 +796,6 @@ void RasterizerCanvasGLES3::_render_item(RID p_render_target, const Item *p_item
 					multimesh_buffer = mesh_storage->multimesh_get_gl_buffer(multimesh);
 					multimesh_stride = mesh_storage->multimesh_get_stride(multimesh);
 					multimesh_color_offset = mesh_storage->multimesh_get_color_offset(multimesh);
-					multimesh_custom_data_offset = mesh_storage->multimesh_get_custom_data_offset(multimesh);
 					multimesh_uses_color = mesh_storage->multimesh_uses_colors(multimesh);
 					multimesh_uses_custom_data = mesh_storage->multimesh_uses_custom_data(multimesh);
 				}
@@ -854,22 +861,17 @@ void RasterizerCanvasGLES3::_render_item(RID p_render_target, const Item *p_item
 					if (instance_count > 1) {
 						// Bind instance buffers.
 						glBindBuffer(GL_ARRAY_BUFFER, multimesh_buffer);
-						glEnableVertexAttribArray(5);
-						glVertexAttribPointer(5, 4, GL_FLOAT, GL_FALSE, multimesh_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(0));
-						glVertexAttribDivisor(5, 1);
-						glEnableVertexAttribArray(6);
-						glVertexAttribPointer(6, 4, GL_FLOAT, GL_FALSE, multimesh_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(4 * 4));
-						glVertexAttribDivisor(6, 1);
-
-						if (multimesh_uses_color) {
-							glEnableVertexAttribArray(7);
-							glVertexAttribPointer(7, 4, GL_FLOAT, GL_FALSE, multimesh_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(multimesh_color_offset * sizeof(float)));
-							glVertexAttribDivisor(7, 1);
-						}
-						if (multimesh_uses_custom_data) {
-							glEnableVertexAttribArray(8);
-							glVertexAttribPointer(8, 4, GL_FLOAT, GL_FALSE, multimesh_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(multimesh_custom_data_offset * sizeof(float)));
-							glVertexAttribDivisor(8, 1);
+						glEnableVertexAttribArray(1);
+						glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, multimesh_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(0));
+						glVertexAttribDivisor(1, 1);
+						glEnableVertexAttribArray(2);
+						glVertexAttribPointer(2, 4, GL_FLOAT, GL_FALSE, multimesh_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(4 * 4));
+						glVertexAttribDivisor(2, 1);
+
+						if (multimesh_uses_color || multimesh_uses_custom_data) {
+							glEnableVertexAttribArray(5);
+							glVertexAttribIPointer(5, 4, GL_UNSIGNED_INT, multimesh_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(multimesh_color_offset * sizeof(float)));
+							glVertexAttribDivisor(5, 1);
 						}
 					}
 
@@ -1268,11 +1270,7 @@ RendererCanvasRender::PolygonID RasterizerCanvasGLES3::request_polygon(const Vec
 		}
 
 		// Next add colors
-		if (p_colors.size() == 1) {
-			glDisableVertexAttribArray(RS::ARRAY_COLOR);
-			Color m = p_colors[0];
-			glVertexAttrib4f(RS::ARRAY_COLOR, m.r, m.g, m.b, m.a);
-		} else if ((uint32_t)p_colors.size() == vertex_count) {
+		if ((uint32_t)p_colors.size() == vertex_count) {
 			glEnableVertexAttribArray(RS::ARRAY_COLOR);
 			glVertexAttribPointer(RS::ARRAY_COLOR, 4, GL_FLOAT, GL_FALSE, stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(base_offset * sizeof(float)));
 
@@ -1287,7 +1285,8 @@ RendererCanvasRender::PolygonID RasterizerCanvasGLES3::request_polygon(const Vec
 			base_offset += 4;
 		} else {
 			glDisableVertexAttribArray(RS::ARRAY_COLOR);
-			glVertexAttrib4f(RS::ARRAY_COLOR, 1.0, 1.0, 1.0, 1.0);
+			pb.color_disabled = true;
+			pb.color = p_colors.size() == 1 ? p_colors[0] : Color(1.0, 1.0, 1.0, 1.0);
 		}
 
 		if ((uint32_t)p_uvs.size() == vertex_count) {

+ 2 - 0
drivers/gles3/rasterizer_canvas_gles3.h

@@ -238,6 +238,8 @@ public:
 		GLuint vertex_array;
 		GLuint index_buffer;
 		int count;
+		bool color_disabled = false;
+		Color color;
 	};
 
 	struct {

+ 60 - 16
drivers/gles3/rasterizer_scene_gles3.cpp

@@ -491,7 +491,7 @@ void RasterizerSceneGLES3::_geometry_instance_update(GeometryInstance *p_geometr
 				}
 			}
 
-			ginstance->instance_count = 1;
+			ginstance->instance_count = -1;
 
 		} break;
 
@@ -2200,6 +2200,7 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params,
 	GLES3::SceneMaterialData *prev_material_data = nullptr;
 	GLES3::SceneShaderData *prev_shader = nullptr;
 	GeometryInstanceGLES3 *prev_inst = nullptr;
+	SceneShaderGLES3::ShaderVariant prev_variant = SceneShaderGLES3::ShaderVariant::MODE_COLOR;
 
 	SceneShaderGLES3::ShaderVariant shader_variant = SceneShaderGLES3::MODE_COLOR; // Assigned to silence wrong -Wmaybe-initialized.
 
@@ -2386,12 +2387,11 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params,
 			prev_vertex_array_gl = vertex_array_gl;
 		}
 
-		bool use_index_buffer = false;
+		bool use_index_buffer = index_array_gl != 0;
 		if (prev_index_array_gl != index_array_gl) {
 			if (index_array_gl != 0) {
 				// Bind index each time so we can use LODs
 				glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_array_gl);
-				use_index_buffer = true;
 			}
 			prev_index_array_gl = index_array_gl;
 		}
@@ -2406,8 +2406,13 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params,
 			prev_material_data = material_data;
 		}
 
-		if (prev_shader != shader) {
-			material_storage->shaders.scene_shader.version_bind_shader(shader->version, shader_variant);
+		SceneShaderGLES3::ShaderVariant instance_variant = shader_variant;
+		if (inst->instance_count > 0) {
+			instance_variant = SceneShaderGLES3::ShaderVariant(1 + int(shader_variant));
+		}
+
+		if (prev_shader != shader || prev_variant != instance_variant) {
+			material_storage->shaders.scene_shader.version_bind_shader(shader->version, instance_variant);
 			float opaque_prepass_threshold = 0.0;
 			if (p_pass_mode == PASS_MODE_DEPTH) {
 				opaque_prepass_threshold = 0.99;
@@ -2415,33 +2420,69 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params,
 				opaque_prepass_threshold = 0.1;
 			}
 
-			material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::OPAQUE_PREPASS_THRESHOLD, opaque_prepass_threshold, shader->version, shader_variant);
+			material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::OPAQUE_PREPASS_THRESHOLD, opaque_prepass_threshold, shader->version, instance_variant);
 
 			prev_shader = shader;
+			prev_variant = instance_variant;
 		}
 
-		if (prev_inst != inst) {
+		if (prev_inst != inst || prev_shader != shader || prev_variant != instance_variant) {
 			// Rebind the light indices.
-			material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::OMNI_LIGHT_COUNT, inst->omni_light_count, shader->version, shader_variant);
-			material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::SPOT_LIGHT_COUNT, inst->spot_light_count, shader->version, shader_variant);
+			material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::OMNI_LIGHT_COUNT, inst->omni_light_count, shader->version, instance_variant);
+			material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::SPOT_LIGHT_COUNT, inst->spot_light_count, shader->version, instance_variant);
 
 			if (inst->omni_light_count) {
-				glUniform1uiv(material_storage->shaders.scene_shader.version_get_uniform(SceneShaderGLES3::OMNI_LIGHT_INDICES, shader->version, shader_variant), inst->omni_light_count, inst->omni_light_gl_cache.ptr());
+				glUniform1uiv(material_storage->shaders.scene_shader.version_get_uniform(SceneShaderGLES3::OMNI_LIGHT_INDICES, shader->version, instance_variant), inst->omni_light_count, inst->omni_light_gl_cache.ptr());
 			}
 
 			if (inst->spot_light_count) {
-				glUniform1uiv(material_storage->shaders.scene_shader.version_get_uniform(SceneShaderGLES3::SPOT_LIGHT_INDICES, shader->version, shader_variant), inst->spot_light_count, inst->spot_light_gl_cache.ptr());
+				glUniform1uiv(material_storage->shaders.scene_shader.version_get_uniform(SceneShaderGLES3::SPOT_LIGHT_INDICES, shader->version, instance_variant), inst->spot_light_count, inst->spot_light_gl_cache.ptr());
 			}
 
 			prev_inst = inst;
 		}
 
-		material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::WORLD_TRANSFORM, world_transform, shader->version, shader_variant);
-
-		if (use_index_buffer) {
-			glDrawElements(primitive_gl, mesh_storage->mesh_surface_get_vertices_drawn_count(mesh_surface), mesh_storage->mesh_surface_get_index_type(mesh_surface), 0);
+		material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::WORLD_TRANSFORM, world_transform, shader->version, instance_variant);
+		if (inst->instance_count > 0) {
+			// Using MultiMesh.
+			// Bind instance buffers.
+
+			GLuint multimesh_buffer = mesh_storage->multimesh_get_gl_buffer(inst->data->base);
+			glBindBuffer(GL_ARRAY_BUFFER, multimesh_buffer);
+			uint32_t multimesh_stride = mesh_storage->multimesh_get_stride(inst->data->base);
+			glEnableVertexAttribArray(12);
+			glVertexAttribPointer(12, 4, GL_FLOAT, GL_FALSE, multimesh_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(0));
+			glVertexAttribDivisor(12, 1);
+			glEnableVertexAttribArray(13);
+			glVertexAttribPointer(13, 4, GL_FLOAT, GL_FALSE, multimesh_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(4 * 4));
+			glVertexAttribDivisor(13, 1);
+			glEnableVertexAttribArray(14);
+			glVertexAttribPointer(14, 4, GL_FLOAT, GL_FALSE, multimesh_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(4 * 8));
+			glVertexAttribDivisor(14, 1);
+
+			if (mesh_storage->multimesh_uses_colors(inst->data->base) || mesh_storage->multimesh_uses_custom_data(inst->data->base)) {
+				glEnableVertexAttribArray(15);
+				glVertexAttribIPointer(15, 4, GL_UNSIGNED_INT, multimesh_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(mesh_storage->multimesh_get_color_offset(inst->data->base) * sizeof(float)));
+				glVertexAttribDivisor(15, 1);
+			}
+			if (use_index_buffer) {
+				glDrawElementsInstanced(primitive_gl, mesh_storage->mesh_surface_get_vertices_drawn_count(mesh_surface), mesh_storage->mesh_surface_get_index_type(mesh_surface), 0, inst->instance_count);
+			} else {
+				glDrawArraysInstanced(primitive_gl, 0, mesh_storage->mesh_surface_get_vertices_drawn_count(mesh_surface), inst->instance_count);
+			}
 		} else {
-			glDrawArrays(primitive_gl, 0, mesh_storage->mesh_surface_get_vertices_drawn_count(mesh_surface));
+			// Using regular Mesh.
+			if (use_index_buffer) {
+				glDrawElements(primitive_gl, mesh_storage->mesh_surface_get_vertices_drawn_count(mesh_surface), mesh_storage->mesh_surface_get_index_type(mesh_surface), 0);
+			} else {
+				glDrawArrays(primitive_gl, 0, mesh_storage->mesh_surface_get_vertices_drawn_count(mesh_surface));
+			}
+		}
+		if (inst->instance_count > 0) {
+			glDisableVertexAttribArray(12);
+			glDisableVertexAttribArray(13);
+			glDisableVertexAttribArray(14);
+			glDisableVertexAttribArray(15);
 		}
 	}
 }
@@ -2811,6 +2852,9 @@ void sky() {
 #ifdef GLES_OVER_GL
 	glEnable(_EXT_TEXTURE_CUBE_MAP_SEAMLESS);
 #endif
+
+	// MultiMesh may read from color when color is disabled, so make sure that the color defaults to white instead of black;
+	glVertexAttrib4f(RS::ARRAY_COLOR, 1.0, 1.0, 1.0, 1.0);
 }
 
 RasterizerSceneGLES3::~RasterizerSceneGLES3() {

+ 1 - 1
drivers/gles3/rasterizer_scene_gles3.h

@@ -277,7 +277,7 @@ private:
 		int32_t shader_parameters_offset = -1;
 
 		uint32_t layer_mask = 1;
-		uint32_t instance_count = 0;
+		int32_t instance_count = 0;
 
 		RID mesh_instance;
 		bool can_sdfgi = false;

+ 25 - 0
drivers/gles3/rasterizer_storage_gles3.cpp

@@ -38,6 +38,8 @@
 #include "rasterizer_scene_gles3.h"
 #include "servers/rendering/shader_language.h"
 
+/* MISC */
+
 void RasterizerStorageGLES3::base_update_dependency(RID p_base, DependencyTracker *p_instance) {
 	if (GLES3::MeshStorage::get_singleton()->owns_mesh(p_base)) {
 		GLES3::Mesh *mesh = GLES3::MeshStorage::get_singleton()->get_mesh(p_base);
@@ -54,6 +56,29 @@ void RasterizerStorageGLES3::base_update_dependency(RID p_base, DependencyTracke
 	}
 }
 
+Vector<uint8_t> RasterizerStorageGLES3::buffer_get_data(GLenum p_target, GLuint p_buffer, uint32_t p_buffer_size) {
+	Vector<uint8_t> ret;
+	ret.resize(p_buffer_size);
+	glBindBuffer(p_target, p_buffer);
+
+#if defined(__EMSCRIPTEN__)
+	{
+		uint8_t *w = ret.ptrw();
+		glGetBufferSubData(p_target, 0, p_buffer_size, w);
+	}
+#else
+	void *data = glMapBufferRange(p_target, 0, p_buffer_size, GL_MAP_READ_BIT);
+	ERR_FAIL_NULL_V(data, Vector<uint8_t>());
+	{
+		uint8_t *w = ret.ptrw();
+		memcpy(w, data, p_buffer_size);
+	}
+	glUnmapBuffer(p_target);
+#endif
+	glBindBuffer(p_target, 0);
+	return ret;
+}
+
 /* VOXEL GI API */
 
 RID RasterizerStorageGLES3::voxel_gi_allocate() {

+ 3 - 18
drivers/gles3/rasterizer_storage_gles3.h

@@ -98,6 +98,9 @@ public:
 		}
 	}
 
+	// Buffer size is specified in bytes
+	static Vector<uint8_t> buffer_get_data(GLenum p_target, GLuint p_buffer, uint32_t p_buffer_size);
+
 	struct Resources {
 		GLuint mipmap_blur_fbo;
 		GLuint mipmap_blur_color;
@@ -295,28 +298,10 @@ public:
 		return String();
 	}
 
-	//bool validate_framebuffer(); // Validate currently bound framebuffer, does not touch global state
-	String get_framebuffer_error(GLenum p_status);
-
 	RasterizerStorageGLES3();
 	~RasterizerStorageGLES3();
 };
 
-inline String RasterizerStorageGLES3::get_framebuffer_error(GLenum p_status) {
-#if defined(DEBUG_ENABLED) && defined(GLES_OVER_GL)
-	if (p_status == GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT) {
-		return "GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT";
-	} else if (p_status == GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT) {
-		return "GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT";
-	} else if (p_status == GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER) {
-		return "GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER";
-	} else if (p_status == GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER) {
-		return "GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER";
-	}
-#endif
-	return itos(p_status);
-}
-
 #endif // GLES3_ENABLED
 
 #endif // RASTERIZER_STORAGE_OPENGL_H

+ 5 - 5
drivers/gles3/shaders/canvas.glsl

@@ -23,10 +23,9 @@ layout(location = 11) in vec4 weight_attrib;
 
 #ifdef USE_INSTANCING
 
-layout(location = 5) in highp vec4 instance_xform0;
-layout(location = 6) in highp vec4 instance_xform1;
-layout(location = 7) in lowp vec4 instance_color;
-layout(location = 8) in highp vec4 instance_custom_data;
+layout(location = 1) in highp vec4 instance_xform0;
+layout(location = 2) in highp vec4 instance_xform1;
+layout(location = 5) in highp uvec4 instance_color_custom_data; // Color packed into xy, custom_data packed into zw for compatibility with 3D
 
 #endif
 
@@ -98,8 +97,9 @@ void main() {
 	vec4 bone_weights = weight_attrib;
 
 #ifdef USE_INSTANCING
+	vec4 instance_color = vec4(unpackHalf2x16(instance_color_custom_data.x), unpackHalf2x16(instance_color_custom_data.y));
 	color *= instance_color;
-	instance_custom = instance_custom_data;
+	instance_custom = vec4(unpackHalf2x16(instance_color_custom_data.z), unpackHalf2x16(instance_color_custom_data.w));
 #endif
 
 #else

+ 22 - 2
drivers/gles3/shaders/scene.glsl

@@ -2,8 +2,11 @@
 #[modes]
 
 mode_color = #define BASE_PASS
+mode_color_instancing = #define BASE_PASS \n#define USE_INSTANCING
 mode_additive = #define USE_ADDITIVE_LIGHTING
+mode_additive_instancing = #define USE_ADDITIVE_LIGHTING \n#define USE_INSTANCING
 mode_depth = #define MODE_RENDER_DEPTH
+mode_depth_instancing = #define MODE_RENDER_DEPTH \n#define USE_INSTANCING
 
 #[specializations]
 
@@ -43,8 +46,6 @@ ARRAY_CUSTOM2 = 8,
 ARRAY_CUSTOM3 = 9,
 ARRAY_BONES = 10, // RGBA16UI (x2 if 8 weights)
 ARRAY_WEIGHTS = 11, // RGBA16UNORM (x2 if 8 weights)
-ARRAY_INDEX = 12, // 16 or 32 bits depending on length > 0xFFFF.
-ARRAY_MAX = 13
 */
 
 /* INPUT ATTRIBS */
@@ -96,6 +97,13 @@ layout(location = 10) in uvec4 bone_attrib;
 layout(location = 11) in vec4 weight_attrib;
 #endif
 
+#ifdef USE_INSTANCING
+layout(location = 12) in highp vec4 instance_xform0;
+layout(location = 13) in highp vec4 instance_xform1;
+layout(location = 14) in highp vec4 instance_xform2;
+layout(location = 15) in highp uvec4 instance_color_custom_data; // Color packed into xy, Custom data into zw.
+#endif
+
 layout(std140) uniform GlobalVariableData { //ubo:1
 	vec4 global_variables[MAX_GLOBAL_VARIABLES];
 };
@@ -195,6 +203,10 @@ void main() {
 	highp vec3 vertex = vertex_attrib;
 
 	highp mat4 model_matrix = world_transform;
+#ifdef USE_INSTANCING
+	highp mat4 m = mat4(instance_xform0, instance_xform1, instance_xform2, vec4(0.0, 0.0, 0.0, 1.0));
+	model_matrix = model_matrix * transpose(m);
+#endif
 
 #ifdef NORMAL_USED
 	vec3 normal = normal_attrib * 2.0 - 1.0;
@@ -209,6 +221,10 @@ void main() {
 
 #if defined(COLOR_USED)
 	color_interp = color_attrib;
+#ifdef USE_INSTANCING
+	vec4 instance_color = vec4(unpackHalf2x16(instance_color_custom_data.x), unpackHalf2x16(instance_color_custom_data.y));
+	color_interp *= instance_color;
+#endif
 #endif
 
 #if defined(UV_USED)
@@ -229,7 +245,11 @@ void main() {
 	highp mat4 projection_matrix = scene_data.projection_matrix;
 	highp mat4 inv_projection_matrix = scene_data.inv_projection_matrix;
 
+#ifdef USE_INSTANCING
+	vec4 instance_custom = vec4(unpackHalf2x16(instance_color_custom_data.z), unpackHalf2x16(instance_color_custom_data.w));
+#else
 	vec4 instance_custom = vec4(0.0);
+#endif
 
 	// Using world coordinates
 #if !defined(SKIP_TRANSFORM_USED) && defined(VERTEX_WORLD_COORDS_USED)

+ 148 - 122
drivers/gles3/storage/mesh_storage.cpp

@@ -31,6 +31,7 @@
 #ifdef GLES3_ENABLED
 
 #include "mesh_storage.h"
+#include "../rasterizer_storage_gles3.h"
 #include "material_storage.h"
 
 using namespace GLES3;
@@ -230,6 +231,7 @@ void MeshStorage::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_surface)
 				glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); //unbind
 				s->lods[i].edge_length = p_surface.lods[i].edge_length;
 				s->lods[i].index_count = p_surface.lods[i].index_data.size() / (is_index_16 ? 2 : 4);
+				s->lods[i].index_buffer_size = p_surface.lods[i].index_data.size();
 			}
 		}
 	}
@@ -333,48 +335,10 @@ RS::SurfaceData MeshStorage::mesh_get_surface(RID p_mesh, int p_surface) const {
 
 	RS::SurfaceData sd;
 	sd.format = s.format;
-	{
-		Vector<uint8_t> ret;
-		ret.resize(s.vertex_buffer_size);
-		glBindBuffer(GL_ARRAY_BUFFER, s.vertex_buffer);
-
-#if defined(__EMSCRIPTEN__)
-		{
-			uint8_t *w = ret.ptrw();
-			glGetBufferSubData(GL_ARRAY_BUFFER, 0, s.vertex_buffer_size, w);
-		}
-#else
-		void *data = glMapBufferRange(GL_ARRAY_BUFFER, 0, s.vertex_buffer_size, GL_MAP_READ_BIT);
-		ERR_FAIL_NULL_V(data, RS::SurfaceData());
-		{
-			uint8_t *w = ret.ptrw();
-			memcpy(w, data, s.vertex_buffer_size);
-		}
-		glUnmapBuffer(GL_ARRAY_BUFFER);
-#endif
-		sd.vertex_data = ret;
-	}
+	sd.vertex_data = RasterizerStorageGLES3::buffer_get_data(GL_ARRAY_BUFFER, s.vertex_buffer, s.vertex_buffer_size);
 
 	if (s.attribute_buffer != 0) {
-		Vector<uint8_t> ret;
-		ret.resize(s.attribute_buffer_size);
-		glBindBuffer(GL_ARRAY_BUFFER, s.attribute_buffer);
-
-#if defined(__EMSCRIPTEN__)
-		{
-			uint8_t *w = ret.ptrw();
-			glGetBufferSubData(GL_ARRAY_BUFFER, 0, s.attribute_buffer_size, w);
-		}
-#else
-		void *data = glMapBufferRange(GL_ARRAY_BUFFER, 0, s.attribute_buffer_size, GL_MAP_READ_BIT);
-		ERR_FAIL_NULL_V(data, RS::SurfaceData());
-		{
-			uint8_t *w = ret.ptrw();
-			memcpy(w, data, s.attribute_buffer_size);
-		}
-		glUnmapBuffer(GL_ARRAY_BUFFER);
-#endif
-		sd.attribute_data = ret;
+		sd.attribute_data = RasterizerStorageGLES3::buffer_get_data(GL_ARRAY_BUFFER, s.attribute_buffer, s.attribute_buffer_size);
 	}
 
 	sd.vertex_count = s.vertex_count;
@@ -382,33 +346,14 @@ RS::SurfaceData MeshStorage::mesh_get_surface(RID p_mesh, int p_surface) const {
 	sd.primitive = s.primitive;
 
 	if (sd.index_count) {
-		Vector<uint8_t> ret;
-		ret.resize(s.index_buffer_size);
-		glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, s.index_buffer);
-
-#if defined(__EMSCRIPTEN__)
-		{
-			uint8_t *w = ret.ptrw();
-			glGetBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, s.index_buffer_size, w);
-		}
-#else
-		void *data = glMapBufferRange(GL_ELEMENT_ARRAY_BUFFER, 0, s.index_buffer_size, GL_MAP_READ_BIT);
-		ERR_FAIL_NULL_V(data, RS::SurfaceData());
-		{
-			uint8_t *w = ret.ptrw();
-			memcpy(w, data, s.index_buffer_size);
-		}
-		glUnmapBuffer(GL_ELEMENT_ARRAY_BUFFER);
-#endif
-		sd.index_data = ret;
-		glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
+		sd.index_data = RasterizerStorageGLES3::buffer_get_data(GL_ELEMENT_ARRAY_BUFFER, s.index_buffer, s.index_buffer_size);
 	}
 
 	sd.aabb = s.aabb;
 	for (uint32_t i = 0; i < s.lod_count; i++) {
 		RS::SurfaceData::LOD lod;
 		lod.edge_length = s.lods[i].edge_length;
-		//lod.index_data = RD::get_singleton()->buffer_get_data(s.lods[i].index_buffer);
+		lod.index_data = RasterizerStorageGLES3::buffer_get_data(GL_ELEMENT_ARRAY_BUFFER, s.lods[i].index_buffer, s.lods[i].index_buffer_size);
 		sd.lods.push_back(lod);
 	}
 
@@ -723,17 +668,6 @@ void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::V
 	for (int i = 0; i < RS::ARRAY_INDEX; i++) {
 		if (!attribs[i].enabled) {
 			glDisableVertexAttribArray(i);
-			if (s->format & RS::ARRAY_FLAG_USE_2D_VERTICES) {
-				if (i == RS::ARRAY_COLOR) {
-					glVertexAttrib4f(i, 1, 1, 1, 1);
-				} else if (i == RS::ARRAY_TEX_UV) {
-					glVertexAttrib2f(i, 1, 1);
-				} else if (i == RS::ARRAY_BONES) {
-					glVertexAttrib4f(i, 1, 1, 1, 1);
-				} else if (i == RS::ARRAY_WEIGHTS) {
-					glVertexAttrib4f(i, 1, 1, 1, 1);
-				}
-			}
 			continue;
 		}
 		if (i <= RS::ARRAY_TANGENT) {
@@ -949,8 +883,8 @@ void MeshStorage::multimesh_allocate_data(RID p_multimesh, int p_instances, RS::
 	multimesh->uses_colors = p_use_colors;
 	multimesh->color_offset_cache = p_transform_format == RS::MULTIMESH_TRANSFORM_2D ? 8 : 12;
 	multimesh->uses_custom_data = p_use_custom_data;
-	multimesh->custom_data_offset_cache = multimesh->color_offset_cache + (p_use_colors ? 4 : 0);
-	multimesh->stride_cache = multimesh->custom_data_offset_cache + (p_use_custom_data ? 4 : 0);
+	multimesh->custom_data_offset_cache = multimesh->color_offset_cache + (p_use_colors ? 2 : 0);
+	multimesh->stride_cache = multimesh->custom_data_offset_cache + (p_use_custom_data ? 2 : 0);
 	multimesh->buffer_set = false;
 
 	multimesh->data_cache = Vector<float>();
@@ -977,7 +911,7 @@ int MeshStorage::multimesh_get_instance_count(RID p_multimesh) const {
 void MeshStorage::multimesh_set_mesh(RID p_multimesh, RID p_mesh) {
 	MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
 	ERR_FAIL_COND(!multimesh);
-	if (multimesh->mesh == p_mesh) {
+	if (multimesh->mesh == p_mesh || p_mesh.is_null()) {
 		return;
 	}
 	multimesh->mesh = p_mesh;
@@ -990,13 +924,12 @@ void MeshStorage::multimesh_set_mesh(RID p_multimesh, RID p_mesh) {
 		//we have a data cache, just mark it dirty
 		_multimesh_mark_all_dirty(multimesh, false, true);
 	} else if (multimesh->instances) {
-		//need to re-create AABB unfortunately, calling this has a penalty
+		// Need to re-create AABB. Unfortunately, calling this has a penalty.
 		if (multimesh->buffer_set) {
-			// TODO add a function to RasterizerStorage to get data from a buffer
-			//Vector<uint8_t> buffer = RD::get_singleton()->buffer_get_data(multimesh->buffer);
-			//const uint8_t *r = buffer.ptr();
-			//const float *data = (const float *)r;
-			//_multimesh_re_create_aabb(multimesh, data, multimesh->instances);
+			Vector<uint8_t> buffer = RasterizerStorageGLES3::buffer_get_data(GL_ARRAY_BUFFER, multimesh->buffer, multimesh->instances * multimesh->stride_cache * sizeof(float));
+			const uint8_t *r = buffer.ptr();
+			const float *data = (const float *)r;
+			_multimesh_re_create_aabb(multimesh, data, multimesh->instances);
 		}
 	}
 
@@ -1017,10 +950,11 @@ void MeshStorage::_multimesh_make_local(MultiMesh *multimesh) const {
 		float *w = multimesh->data_cache.ptrw();
 
 		if (multimesh->buffer_set) {
-			//Vector<uint8_t> buffer = RD::get_singleton()->buffer_get_data(multimesh->buffer);
+			Vector<uint8_t> buffer = RasterizerStorageGLES3::buffer_get_data(GL_ARRAY_BUFFER, multimesh->buffer, multimesh->instances * multimesh->stride_cache * sizeof(float));
+
 			{
-				//	const uint8_t *r = buffer.ptr();
-				//	memcpy(w, r, buffer.size());
+				const uint8_t *r = buffer.ptr();
+				memcpy(w, r, buffer.size());
 			}
 		} else {
 			memset(w, 0, (size_t)multimesh->instances * multimesh->stride_cache * sizeof(float));
@@ -1186,14 +1120,12 @@ void MeshStorage::multimesh_instance_set_color(RID p_multimesh, int p_index, con
 	_multimesh_make_local(multimesh);
 
 	{
+		// Colors are packed into 2 floats.
 		float *w = multimesh->data_cache.ptrw();
 
 		float *dataptr = w + p_index * multimesh->stride_cache + multimesh->color_offset_cache;
-
-		dataptr[0] = p_color.r;
-		dataptr[1] = p_color.g;
-		dataptr[2] = p_color.b;
-		dataptr[3] = p_color.a;
+		uint16_t val[4] = { Math::make_half_float(p_color.r), Math::make_half_float(p_color.g), Math::make_half_float(p_color.b), Math::make_half_float(p_color.a) };
+		memcpy(dataptr, val, 2 * 4);
 	}
 
 	_multimesh_mark_dirty(multimesh, p_index, false);
@@ -1211,11 +1143,8 @@ void MeshStorage::multimesh_instance_set_custom_data(RID p_multimesh, int p_inde
 		float *w = multimesh->data_cache.ptrw();
 
 		float *dataptr = w + p_index * multimesh->stride_cache + multimesh->custom_data_offset_cache;
-
-		dataptr[0] = p_color.r;
-		dataptr[1] = p_color.g;
-		dataptr[2] = p_color.b;
-		dataptr[3] = p_color.a;
+		uint16_t val[4] = { Math::make_half_float(p_color.r), Math::make_half_float(p_color.g), Math::make_half_float(p_color.b), Math::make_half_float(p_color.a) };
+		memcpy(dataptr, val, 2 * 4);
 	}
 
 	_multimesh_mark_dirty(multimesh, p_index, false);
@@ -1306,11 +1235,12 @@ Color MeshStorage::multimesh_instance_get_color(RID p_multimesh, int p_index) co
 		const float *r = multimesh->data_cache.ptr();
 
 		const float *dataptr = r + p_index * multimesh->stride_cache + multimesh->color_offset_cache;
-
-		c.r = dataptr[0];
-		c.g = dataptr[1];
-		c.b = dataptr[2];
-		c.a = dataptr[3];
+		uint16_t raw_data[4];
+		memcpy(raw_data, dataptr, 2 * 4);
+		c.r = Math::half_to_float(raw_data[0]);
+		c.g = Math::half_to_float(raw_data[1]);
+		c.b = Math::half_to_float(raw_data[2]);
+		c.a = Math::half_to_float(raw_data[3]);
 	}
 
 	return c;
@@ -1329,11 +1259,12 @@ Color MeshStorage::multimesh_instance_get_custom_data(RID p_multimesh, int p_ind
 		const float *r = multimesh->data_cache.ptr();
 
 		const float *dataptr = r + p_index * multimesh->stride_cache + multimesh->custom_data_offset_cache;
-
-		c.r = dataptr[0];
-		c.g = dataptr[1];
-		c.b = dataptr[2];
-		c.a = dataptr[3];
+		uint16_t raw_data[4];
+		memcpy(raw_data, dataptr, 2 * 4);
+		c.r = Math::half_to_float(raw_data[0]);
+		c.g = Math::half_to_float(raw_data[1]);
+		c.b = Math::half_to_float(raw_data[2]);
+		c.a = Math::half_to_float(raw_data[3]);
 	}
 
 	return c;
@@ -1342,19 +1273,66 @@ Color MeshStorage::multimesh_instance_get_custom_data(RID p_multimesh, int p_ind
 void MeshStorage::multimesh_set_buffer(RID p_multimesh, const Vector<float> &p_buffer) {
 	MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
 	ERR_FAIL_COND(!multimesh);
-	ERR_FAIL_COND(p_buffer.size() != (multimesh->instances * (int)multimesh->stride_cache));
 
-	{
+	if (multimesh->uses_colors || multimesh->uses_custom_data) {
+		// Color and custom need to be packed so copy buffer to data_cache and pack.
+
+		_multimesh_make_local(multimesh);
+		multimesh->data_cache = p_buffer;
+
+		float *w = multimesh->data_cache.ptrw();
+		uint32_t old_stride = multimesh->xform_format == RS::MULTIMESH_TRANSFORM_2D ? 8 : 12;
+		old_stride += multimesh->uses_colors ? 4 : 0;
+		old_stride += multimesh->uses_custom_data ? 4 : 0;
+		for (int i = 0; i < multimesh->instances; i++) {
+			{
+				float *dataptr = w + i * old_stride;
+				float *newptr = w + i * multimesh->stride_cache;
+				float vals[8] = { dataptr[0], dataptr[1], dataptr[2], dataptr[3], dataptr[4], dataptr[5], dataptr[6], dataptr[7] };
+				memcpy(newptr, vals, 8 * 4);
+			}
+
+			if (multimesh->xform_format == RS::MULTIMESH_TRANSFORM_3D) {
+				float *dataptr = w + i * old_stride + 8;
+				float *newptr = w + i * multimesh->stride_cache + 8;
+				float vals[8] = { dataptr[0], dataptr[1], dataptr[2], dataptr[3] };
+				memcpy(newptr, vals, 4 * 4);
+			}
+
+			if (multimesh->uses_colors) {
+				float *dataptr = w + i * old_stride + (multimesh->xform_format == RS::MULTIMESH_TRANSFORM_2D ? 8 : 12);
+				float *newptr = w + i * multimesh->stride_cache + multimesh->color_offset_cache;
+				uint16_t val[4] = { Math::make_half_float(dataptr[0]), Math::make_half_float(dataptr[1]), Math::make_half_float(dataptr[2]), Math::make_half_float(dataptr[3]) };
+				memcpy(newptr, val, 2 * 4);
+			}
+			if (multimesh->uses_custom_data) {
+				float *dataptr = w + i * old_stride + (multimesh->xform_format == RS::MULTIMESH_TRANSFORM_2D ? 8 : 12) + (multimesh->uses_colors ? 4 : 0);
+				float *newptr = w + i * multimesh->stride_cache + multimesh->custom_data_offset_cache;
+				uint16_t val[4] = { Math::make_half_float(dataptr[0]), Math::make_half_float(dataptr[1]), Math::make_half_float(dataptr[2]), Math::make_half_float(dataptr[3]) };
+				memcpy(newptr, val, 2 * 4);
+			}
+		}
+
+		multimesh->data_cache.resize(multimesh->instances * (int)multimesh->stride_cache);
+		const float *r = multimesh->data_cache.ptr();
+		glBindBuffer(GL_ARRAY_BUFFER, multimesh->buffer);
+		glBufferData(GL_ARRAY_BUFFER, multimesh->data_cache.size() * sizeof(float), r, GL_STATIC_DRAW);
+		glBindBuffer(GL_ARRAY_BUFFER, 0);
+
+	} else {
+		// Only Transform is being used, so we can upload directly.
+		ERR_FAIL_COND(p_buffer.size() != (multimesh->instances * (int)multimesh->stride_cache));
 		const float *r = p_buffer.ptr();
 		glBindBuffer(GL_ARRAY_BUFFER, multimesh->buffer);
 		glBufferData(GL_ARRAY_BUFFER, p_buffer.size() * sizeof(float), r, GL_STATIC_DRAW);
 		glBindBuffer(GL_ARRAY_BUFFER, 0);
-		multimesh->buffer_set = true;
 	}
 
-	if (multimesh->data_cache.size()) {
+	multimesh->buffer_set = true;
+
+	if (multimesh->data_cache.size() || multimesh->uses_colors || multimesh->uses_custom_data) {
 		//if we have a data cache, just update it
-		multimesh->data_cache = p_buffer;
+		multimesh->data_cache = multimesh->data_cache;
 		{
 			//clear dirty since nothing will be dirty anymore
 			uint32_t data_cache_dirty_region_count = (multimesh->instances - 1) / MULTIMESH_DIRTY_REGION_SIZE + 1;
@@ -1367,7 +1345,7 @@ void MeshStorage::multimesh_set_buffer(RID p_multimesh, const Vector<float> &p_b
 		_multimesh_mark_all_dirty(multimesh, false, true); //update AABB
 	} else if (multimesh->mesh.is_valid()) {
 		//if we have a mesh set, we need to re-generate the AABB from the new data
-		const float *data = p_buffer.ptr();
+		const float *data = multimesh->data_cache.ptr();
 
 		_multimesh_re_create_aabb(multimesh, data, multimesh->instances);
 		multimesh->dependency.changed_notify(RendererStorage::DEPENDENCY_CHANGED_AABB);
@@ -1377,22 +1355,71 @@ void MeshStorage::multimesh_set_buffer(RID p_multimesh, const Vector<float> &p_b
 Vector<float> MeshStorage::multimesh_get_buffer(RID p_multimesh) const {
 	MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
 	ERR_FAIL_COND_V(!multimesh, Vector<float>());
+	Vector<float> ret;
 	if (multimesh->buffer == 0) {
 		return Vector<float>();
 	} else if (multimesh->data_cache.size()) {
-		return multimesh->data_cache;
+		ret = multimesh->data_cache;
 	} else {
-		//get from memory
+		// Buffer not cached, so fetch from GPU memory. This can be a stalling operation, avoid whenever possible.
 
-		//Vector<uint8_t> buffer = RD::get_singleton()->buffer_get_data(multimesh->buffer);
-		Vector<float> ret;
+		Vector<uint8_t> buffer = RasterizerStorageGLES3::buffer_get_data(GL_ARRAY_BUFFER, multimesh->buffer, multimesh->instances * multimesh->stride_cache * sizeof(float));
 		ret.resize(multimesh->instances * multimesh->stride_cache);
-		//{
-		//	float *w = ret.ptrw();
-		//	const uint8_t *r = buffer.ptr();
-		//	memcpy(w, r, buffer.size());
-		//}
+		{
+			float *w = ret.ptrw();
+			const uint8_t *r = buffer.ptr();
+			memcpy(w, r, buffer.size());
+		}
+	}
+	if (multimesh->uses_colors || multimesh->uses_custom_data) {
+		// Need to decompress buffer.
+		uint32_t new_stride = multimesh->xform_format == RS::MULTIMESH_TRANSFORM_2D ? 8 : 12;
+		new_stride += multimesh->uses_colors ? 4 : 0;
+		new_stride += multimesh->uses_custom_data ? 4 : 0;
+
+		Vector<float> decompressed;
+		decompressed.resize(multimesh->instances * (int)new_stride);
+		float *w = decompressed.ptrw();
+		const float *r = ret.ptr();
 
+		for (int i = 0; i < multimesh->instances; i++) {
+			{
+				float *newptr = w + i * new_stride;
+				const float *oldptr = r + i * multimesh->stride_cache;
+				float vals[8] = { oldptr[0], oldptr[1], oldptr[2], oldptr[3], oldptr[4], oldptr[5], oldptr[6], oldptr[7] };
+				memcpy(newptr, vals, 8 * 4);
+			}
+
+			if (multimesh->xform_format == RS::MULTIMESH_TRANSFORM_3D) {
+				float *newptr = w + i * new_stride + 8;
+				const float *oldptr = r + i * multimesh->stride_cache + 8;
+				float vals[8] = { oldptr[0], oldptr[1], oldptr[2], oldptr[3] };
+				memcpy(newptr, vals, 4 * 4);
+			}
+
+			if (multimesh->uses_colors) {
+				float *newptr = w + i * new_stride + (multimesh->xform_format == RS::MULTIMESH_TRANSFORM_2D ? 8 : 12);
+				const float *oldptr = r + i * multimesh->stride_cache + multimesh->color_offset_cache;
+				uint16_t raw_data[4];
+				memcpy(raw_data, oldptr, 2 * 4);
+				newptr[0] = Math::half_to_float(raw_data[0]);
+				newptr[1] = Math::half_to_float(raw_data[1]);
+				newptr[2] = Math::half_to_float(raw_data[2]);
+				newptr[3] = Math::half_to_float(raw_data[3]);
+			}
+			if (multimesh->uses_custom_data) {
+				float *newptr = w + i * new_stride + (multimesh->xform_format == RS::MULTIMESH_TRANSFORM_2D ? 8 : 12) + (multimesh->uses_colors ? 4 : 0);
+				const float *oldptr = r + i * multimesh->stride_cache + multimesh->custom_data_offset_cache;
+				uint16_t raw_data[4];
+				memcpy(raw_data, oldptr, 2 * 4);
+				newptr[0] = Math::half_to_float(raw_data[0]);
+				newptr[1] = Math::half_to_float(raw_data[1]);
+				newptr[2] = Math::half_to_float(raw_data[2]);
+				newptr[3] = Math::half_to_float(raw_data[3]);
+			}
+		}
+		return decompressed;
+	} else {
 		return ret;
 	}
 }
@@ -1439,7 +1466,7 @@ void MeshStorage::_update_dirty_multimeshes() {
 				if (multimesh->data_cache_used_dirty_regions > 32 || multimesh->data_cache_used_dirty_regions > visible_region_count / 2) {
 					// If there too many dirty regions, or represent the majority of regions, just copy all, else transfer cost piles up too much
 					glBindBuffer(GL_ARRAY_BUFFER, multimesh->buffer);
-					glBufferData(GL_ARRAY_BUFFER, MIN(visible_region_count * region_size, multimesh->instances * (uint32_t)multimesh->stride_cache * (uint32_t)sizeof(float)), data, GL_STATIC_DRAW);
+					glBufferData(GL_ARRAY_BUFFER, MIN(visible_region_count * region_size, multimesh->instances * multimesh->stride_cache * sizeof(float)), data, GL_STATIC_DRAW);
 					glBindBuffer(GL_ARRAY_BUFFER, 0);
 				} else {
 					// Not that many regions? update them all
@@ -1463,8 +1490,7 @@ void MeshStorage::_update_dirty_multimeshes() {
 				multimesh->data_cache_used_dirty_regions = 0;
 			}
 
-			if (multimesh->aabb_dirty) {
-				//aabb is dirty..
+			if (multimesh->aabb_dirty && multimesh->mesh.is_valid()) {
 				_multimesh_re_create_aabb(multimesh, data, visible_instances);
 				multimesh->aabb_dirty = false;
 				multimesh->dependency.changed_notify(RendererStorage::DEPENDENCY_CHANGED_AABB);

+ 1 - 0
drivers/gles3/storage/mesh_storage.h

@@ -90,6 +90,7 @@ struct Mesh {
 		struct LOD {
 			float edge_length = 0.0;
 			uint32_t index_count = 0;
+			uint32_t index_buffer_size = 0;
 			GLuint index_buffer;
 		};