Browse Source

Merge pull request #67335 from clayjohn/GLES3-2d-lights

Add 2D lights to OpenGL3 canvas renderer
Rémi Verschelde 2 years ago
parent
commit
367a9b8056

+ 1 - 1
drivers/gles3/effects/copy_effects.cpp

@@ -114,7 +114,7 @@ CopyEffects::~CopyEffects() {
 	copy.shader.version_free(copy.shader_version);
 	copy.shader.version_free(copy.shader_version);
 }
 }
 
 
-void CopyEffects::copy_to_rect(const Rect2i &p_rect) {
+void CopyEffects::copy_to_rect(const Rect2 &p_rect) {
 	copy.shader.version_bind_shader(copy.shader_version, CopyShaderGLES3::MODE_COPY_SECTION);
 	copy.shader.version_bind_shader(copy.shader_version, CopyShaderGLES3::MODE_COPY_SECTION);
 	copy.shader.version_set_uniform(CopyShaderGLES3::COPY_SECTION, p_rect.position.x, p_rect.position.y, p_rect.size.x, p_rect.size.y, copy.shader_version, CopyShaderGLES3::MODE_COPY_SECTION);
 	copy.shader.version_set_uniform(CopyShaderGLES3::COPY_SECTION, p_rect.position.x, p_rect.position.y, p_rect.size.x, p_rect.size.y, copy.shader_version, CopyShaderGLES3::MODE_COPY_SECTION);
 	glBindVertexArray(quad_array);
 	glBindVertexArray(quad_array);

+ 1 - 1
drivers/gles3/effects/copy_effects.h

@@ -61,7 +61,7 @@ public:
 	~CopyEffects();
 	~CopyEffects();
 
 
 	// These functions assume that a framebuffer and texture are bound already. They only manage the shader, uniforms, and vertex array.
 	// These functions assume that a framebuffer and texture are bound already. They only manage the shader, uniforms, and vertex array.
-	void copy_to_rect(const Rect2i &p_rect);
+	void copy_to_rect(const Rect2 &p_rect);
 	void copy_screen();
 	void copy_screen();
 	void bilinear_blur(GLuint p_source_texture, int p_mipmap_count, const Rect2i &p_region);
 	void bilinear_blur(GLuint p_source_texture, int p_mipmap_count, const Rect2i &p_region);
 	void set_color(const Color &p_color, const Rect2i &p_region);
 	void set_color(const Color &p_color, const Rect2i &p_region);

+ 272 - 30
drivers/gles3/rasterizer_canvas_gles3.cpp

@@ -115,7 +115,7 @@ void RasterizerCanvasGLES3::_update_transform_to_mat4(const Transform3D &p_trans
 	p_mat4[15] = 1;
 	p_mat4[15] = 1;
 }
 }
 
 
-void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_item_list, const Color &p_modulate, Light *p_light_list, Light *p_directional_list, const Transform2D &p_canvas_transform, RS::CanvasItemTextureFilter p_default_filter, RS::CanvasItemTextureRepeat p_default_repeat, bool p_snap_2d_vertices_to_pixel, bool &r_sdf_used) {
+void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_item_list, const Color &p_modulate, Light *p_light_list, Light *p_directional_light_list, const Transform2D &p_canvas_transform, RS::CanvasItemTextureFilter p_default_filter, RS::CanvasItemTextureRepeat p_default_repeat, bool p_snap_2d_vertices_to_pixel, bool &r_sdf_used) {
 	GLES3::TextureStorage *texture_storage = GLES3::TextureStorage::get_singleton();
 	GLES3::TextureStorage *texture_storage = GLES3::TextureStorage::get_singleton();
 	GLES3::MaterialStorage *material_storage = GLES3::MaterialStorage::get_singleton();
 	GLES3::MaterialStorage *material_storage = GLES3::MaterialStorage::get_singleton();
 
 
@@ -144,9 +144,173 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_
 		}
 		}
 	}
 	}
 
 
-	// TODO: Setup Directional Lights
+	//setup directional lights if exist
 
 
-	// TODO: Setup lights
+	uint32_t light_count = 0;
+	uint32_t directional_light_count = 0;
+	{
+		Light *l = p_directional_light_list;
+		uint32_t index = 0;
+
+		while (l) {
+			if (index == data.max_lights_per_render) {
+				l->render_index_cache = -1;
+				l = l->next_ptr;
+				continue;
+			}
+
+			CanvasLight *clight = canvas_light_owner.get_or_null(l->light_internal);
+			if (!clight) { //unused or invalid texture
+				l->render_index_cache = -1;
+				l = l->next_ptr;
+				ERR_CONTINUE(!clight);
+			}
+
+			Vector2 canvas_light_dir = l->xform_cache.columns[1].normalized();
+
+			state.light_uniforms[index].position[0] = -canvas_light_dir.x;
+			state.light_uniforms[index].position[1] = -canvas_light_dir.y;
+
+			//_update_transform_2d_to_mat2x4(clight->shadow.directional_xform, state.light_uniforms[index].shadow_matrix);
+
+			state.light_uniforms[index].height = l->height; //0..1 here
+
+			for (int i = 0; i < 4; i++) {
+				state.light_uniforms[index].shadow_color[i] = uint8_t(CLAMP(int32_t(l->shadow_color[i] * 255.0), 0, 255));
+				state.light_uniforms[index].color[i] = l->color[i];
+			}
+
+			state.light_uniforms[index].color[3] = l->energy; //use alpha for energy, so base color can go separate
+
+			/*
+			if (state.shadow_fb.is_valid()) {
+				state.light_uniforms[index].shadow_pixel_size = (1.0 / state.shadow_texture_size) * (1.0 + l->shadow_smooth);
+				state.light_uniforms[index].shadow_z_far_inv = 1.0 / clight->shadow.z_far;
+				state.light_uniforms[index].shadow_y_ofs = clight->shadow.y_offset;
+			} else {
+				state.light_uniforms[index].shadow_pixel_size = 1.0;
+				state.light_uniforms[index].shadow_z_far_inv = 1.0;
+				state.light_uniforms[index].shadow_y_ofs = 0;
+			}
+			*/
+
+			state.light_uniforms[index].flags = l->blend_mode << LIGHT_FLAGS_BLEND_SHIFT;
+			state.light_uniforms[index].flags |= l->shadow_filter << LIGHT_FLAGS_FILTER_SHIFT;
+			/*
+			if (clight->shadow.enabled) {
+				state.light_uniforms[index].flags |= LIGHT_FLAGS_HAS_SHADOW;
+			}
+			*/
+
+			l->render_index_cache = index;
+
+			index++;
+			l = l->next_ptr;
+		}
+
+		light_count = index;
+		directional_light_count = light_count;
+		state.using_directional_lights = directional_light_count > 0;
+	}
+
+	//setup lights if exist
+
+	{
+		Light *l = p_light_list;
+		uint32_t index = light_count;
+
+		while (l) {
+			if (index == data.max_lights_per_render) {
+				l->render_index_cache = -1;
+				l = l->next_ptr;
+				continue;
+			}
+
+			CanvasLight *clight = canvas_light_owner.get_or_null(l->light_internal);
+			if (!clight) { //unused or invalid texture
+				l->render_index_cache = -1;
+				l = l->next_ptr;
+				ERR_CONTINUE(!clight);
+			}
+			Transform2D to_light_xform = (p_canvas_transform * l->light_shader_xform).affine_inverse();
+
+			Vector2 canvas_light_pos = p_canvas_transform.xform(l->xform.get_origin()); //convert light position to canvas coordinates, as all computation is done in canvas coords to avoid precision loss
+			state.light_uniforms[index].position[0] = canvas_light_pos.x;
+			state.light_uniforms[index].position[1] = canvas_light_pos.y;
+
+			_update_transform_2d_to_mat2x4(to_light_xform, state.light_uniforms[index].matrix);
+			_update_transform_2d_to_mat2x4(l->xform_cache.affine_inverse(), state.light_uniforms[index].shadow_matrix);
+
+			state.light_uniforms[index].height = l->height * (p_canvas_transform.columns[0].length() + p_canvas_transform.columns[1].length()) * 0.5; //approximate height conversion to the canvas size, since all calculations are done in canvas coords to avoid precision loss
+			for (int i = 0; i < 4; i++) {
+				state.light_uniforms[index].shadow_color[i] = uint8_t(CLAMP(int32_t(l->shadow_color[i] * 255.0), 0, 255));
+				state.light_uniforms[index].color[i] = l->color[i];
+			}
+
+			state.light_uniforms[index].color[3] = l->energy; //use alpha for energy, so base color can go separate
+
+			/*
+				if (state.shadow_fb.is_valid()) {
+					state.light_uniforms[index].shadow_pixel_size = (1.0 / state.shadow_texture_size) * (1.0 + l->shadow_smooth);
+					state.light_uniforms[index].shadow_z_far_inv = 1.0 / clight->shadow.z_far;
+					state.light_uniforms[index].shadow_y_ofs = clight->shadow.y_offset;
+				} else {
+					state.light_uniforms[index].shadow_pixel_size = 1.0;
+					state.light_uniforms[index].shadow_z_far_inv = 1.0;
+					state.light_uniforms[index].shadow_y_ofs = 0;
+				}
+			*/
+			state.light_uniforms[index].flags = l->blend_mode << LIGHT_FLAGS_BLEND_SHIFT;
+			state.light_uniforms[index].flags |= l->shadow_filter << LIGHT_FLAGS_FILTER_SHIFT;
+			/*
+			if (clight->shadow.enabled) {
+				state.light_uniforms[index].flags |= LIGHT_FLAGS_HAS_SHADOW;
+			}
+			*/
+
+			if (clight->texture.is_valid()) {
+				Rect2 atlas_rect = GLES3::TextureStorage::get_singleton()->texture_atlas_get_texture_rect(clight->texture);
+				state.light_uniforms[index].atlas_rect[0] = atlas_rect.position.x;
+				state.light_uniforms[index].atlas_rect[1] = atlas_rect.position.y;
+				state.light_uniforms[index].atlas_rect[2] = atlas_rect.size.width;
+				state.light_uniforms[index].atlas_rect[3] = atlas_rect.size.height;
+
+			} else {
+				state.light_uniforms[index].atlas_rect[0] = 0;
+				state.light_uniforms[index].atlas_rect[1] = 0;
+				state.light_uniforms[index].atlas_rect[2] = 0;
+				state.light_uniforms[index].atlas_rect[3] = 0;
+			}
+
+			l->render_index_cache = index;
+
+			index++;
+			l = l->next_ptr;
+		}
+
+		light_count = index;
+	}
+
+	if (light_count > 0) {
+		glBindBufferBase(GL_UNIFORM_BUFFER, LIGHT_UNIFORM_LOCATION, state.canvas_instance_data_buffers[state.current_buffer].light_ubo);
+
+#ifdef WEB_ENABLED
+		glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(LightUniform) * light_count, state.light_uniforms);
+#else
+		// On Desktop and mobile we map the memory without synchronizing for maximum speed.
+		void *ubo = glMapBufferRange(GL_UNIFORM_BUFFER, 0, sizeof(LightUniform) * light_count, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
+		memcpy(ubo, state.light_uniforms, sizeof(LightUniform) * light_count);
+		glUnmapBuffer(GL_UNIFORM_BUFFER);
+#endif
+
+		GLuint texture_atlas = texture_storage->texture_atlas_get_texture();
+		if (texture_atlas == 0) {
+			GLES3::Texture *tex = texture_storage->get_texture(texture_storage->texture_gl_get_default(GLES3::DEFAULT_GL_TEXTURE_WHITE));
+			texture_atlas = tex->tex_id;
+		}
+		glActiveTexture(GL_TEXTURE0 + GLES3::Config::get_singleton()->max_texture_image_units - 2);
+		glBindTexture(GL_TEXTURE_2D, texture_atlas);
+	}
 
 
 	{
 	{
 		//update canvas state uniform buffer
 		//update canvas state uniform buffer
@@ -175,13 +339,12 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_
 		state_buffer.screen_pixel_size[0] = 1.0 / render_target_size.x;
 		state_buffer.screen_pixel_size[0] = 1.0 / render_target_size.x;
 		state_buffer.screen_pixel_size[1] = 1.0 / render_target_size.y;
 		state_buffer.screen_pixel_size[1] = 1.0 / render_target_size.y;
 
 
-		// TODO: temporary, this should be set at the top of this function
 		glViewport(0, 0, render_target_size.x, render_target_size.y);
 		glViewport(0, 0, render_target_size.x, render_target_size.y);
 
 
 		state_buffer.time = state.time;
 		state_buffer.time = state.time;
 		state_buffer.use_pixel_snap = p_snap_2d_vertices_to_pixel;
 		state_buffer.use_pixel_snap = p_snap_2d_vertices_to_pixel;
 
 
-		state_buffer.directional_light_count = 0; //directional_light_count;
+		state_buffer.directional_light_count = directional_light_count;
 
 
 		Vector2 canvas_scale = p_canvas_transform.get_scale();
 		Vector2 canvas_scale = p_canvas_transform.get_scale();
 
 
@@ -200,7 +363,7 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_
 		state_buffer.sdf_to_tex[3] = -sdf_tex_rect.position.y / sdf_tex_rect.size.height;
 		state_buffer.sdf_to_tex[3] = -sdf_tex_rect.position.y / sdf_tex_rect.size.height;
 
 
 		state_buffer.tex_to_sdf = 1.0 / ((canvas_scale.x + canvas_scale.y) * 0.5);
 		state_buffer.tex_to_sdf = 1.0 / ((canvas_scale.x + canvas_scale.y) * 0.5);
-		glBindBufferBase(GL_UNIFORM_BUFFER, BASE_UNIFORM_LOCATION, state.canvas_state_buffer);
+		glBindBufferBase(GL_UNIFORM_BUFFER, BASE_UNIFORM_LOCATION, state.canvas_instance_data_buffers[state.current_buffer].state_ubo);
 		glBufferData(GL_UNIFORM_BUFFER, sizeof(StateBuffer), &state_buffer, GL_STREAM_DRAW);
 		glBufferData(GL_UNIFORM_BUFFER, sizeof(StateBuffer), &state_buffer, GL_STREAM_DRAW);
 
 
 		GLuint global_buffer = material_storage->global_shader_parameters_get_uniform_buffer();
 		GLuint global_buffer = material_storage->global_shader_parameters_get_uniform_buffer();
@@ -442,7 +605,9 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou
 
 
 		GLES3::CanvasMaterialData *material_data = state.canvas_instance_batches[i].material_data;
 		GLES3::CanvasMaterialData *material_data = state.canvas_instance_batches[i].material_data;
 		CanvasShaderGLES3::ShaderVariant variant = state.canvas_instance_batches[i].shader_variant;
 		CanvasShaderGLES3::ShaderVariant variant = state.canvas_instance_batches[i].shader_variant;
-		_bind_material(material_data, variant);
+		uint64_t specialization = 0;
+		specialization |= uint64_t(state.canvas_instance_batches[i].lights_disabled);
+		_bind_material(material_data, variant, specialization);
 
 
 		GLES3::CanvasShaderData::BlendMode blend_mode = state.canvas_instance_batches[i].blend_mode;
 		GLES3::CanvasShaderData::BlendMode blend_mode = state.canvas_instance_batches[i].blend_mode;
 
 
@@ -554,6 +719,38 @@ void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, const Tran
 
 
 	bool skipping = false;
 	bool skipping = false;
 
 
+	// TODO: consider making lights a per-batch property and then baking light operations in the shader for better performance.
+	uint32_t lights[4] = { 0, 0, 0, 0 };
+
+	uint16_t light_count = 0;
+
+	{
+		Light *light = p_lights;
+
+		while (light) {
+			if (light->render_index_cache >= 0 && p_item->light_mask & light->item_mask && p_item->z_final >= light->z_min && p_item->z_final <= light->z_max && p_item->global_rect_cache.intersects_transformed(light->xform_cache, light->rect_cache)) {
+				uint32_t light_index = light->render_index_cache;
+				lights[light_count >> 2] |= light_index << ((light_count & 3) * 8);
+
+				light_count++;
+
+				if (light_count == data.max_lights_per_item) {
+					break;
+				}
+			}
+			light = light->next_ptr;
+		}
+
+		base_flags |= light_count << FLAGS_LIGHT_COUNT_SHIFT;
+	}
+
+	bool lights_disabled = light_count == 0 && !state.using_directional_lights;
+
+	if (lights_disabled != state.canvas_instance_batches[state.current_batch_index].lights_disabled) {
+		_new_batch(r_batch_broken, r_index);
+		state.canvas_instance_batches[state.current_batch_index].lights_disabled = lights_disabled;
+	}
+
 	const Item::Command *c = p_item->commands;
 	const Item::Command *c = p_item->commands;
 	while (c) {
 	while (c) {
 		if (skipping && c->type != Item::Command::TYPE_ANIMATION_SLICE) {
 		if (skipping && c->type != Item::Command::TYPE_ANIMATION_SLICE) {
@@ -580,6 +777,11 @@ void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, const Tran
 		state.instance_data_array[r_index].pad[0] = 0.0;
 		state.instance_data_array[r_index].pad[0] = 0.0;
 		state.instance_data_array[r_index].pad[1] = 0.0;
 		state.instance_data_array[r_index].pad[1] = 0.0;
 
 
+		state.instance_data_array[r_index].lights[0] = lights[0];
+		state.instance_data_array[r_index].lights[1] = lights[1];
+		state.instance_data_array[r_index].lights[2] = lights[2];
+		state.instance_data_array[r_index].lights[3] = lights[3];
+
 		state.instance_data_array[r_index].flags = base_flags | (state.instance_data_array[r_index == 0 ? 0 : r_index - 1].flags & (FLAGS_DEFAULT_NORMAL_MAP_USED | FLAGS_DEFAULT_SPECULAR_MAP_USED)); //reset on each command for sanity, keep canvastexture binding config
 		state.instance_data_array[r_index].flags = base_flags | (state.instance_data_array[r_index == 0 ? 0 : r_index - 1].flags & (FLAGS_DEFAULT_NORMAL_MAP_USED | FLAGS_DEFAULT_SPECULAR_MAP_USED)); //reset on each command for sanity, keep canvastexture binding config
 
 
 		Color blend_color;
 		Color blend_color;
@@ -1140,25 +1342,41 @@ void RasterizerCanvasGLES3::_new_batch(bool &r_batch_broken, uint32_t &r_index)
 	_align_instance_data_buffer(r_index);
 	_align_instance_data_buffer(r_index);
 }
 }
 
 
-void RasterizerCanvasGLES3::_bind_material(GLES3::CanvasMaterialData *p_material_data, CanvasShaderGLES3::ShaderVariant p_variant) {
+void RasterizerCanvasGLES3::_bind_material(GLES3::CanvasMaterialData *p_material_data, CanvasShaderGLES3::ShaderVariant p_variant, uint64_t p_specialization) {
 	if (p_material_data) {
 	if (p_material_data) {
 		if (p_material_data->shader_data->version.is_valid() && p_material_data->shader_data->valid) {
 		if (p_material_data->shader_data->version.is_valid() && p_material_data->shader_data->valid) {
 			// Bind uniform buffer and textures
 			// Bind uniform buffer and textures
 			p_material_data->bind_uniforms();
 			p_material_data->bind_uniforms();
-			GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(p_material_data->shader_data->version, p_variant);
+			GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(p_material_data->shader_data->version, p_variant, p_specialization);
 		} else {
 		} else {
-			GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(data.canvas_shader_default_version, p_variant);
+			GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(data.canvas_shader_default_version, p_variant, p_specialization);
 		}
 		}
 	} else {
 	} else {
-		GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(data.canvas_shader_default_version, p_variant);
+		GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(data.canvas_shader_default_version, p_variant, p_specialization);
 	}
 	}
 }
 }
 
 
 RID RasterizerCanvasGLES3::light_create() {
 RID RasterizerCanvasGLES3::light_create() {
-	return RID();
+	CanvasLight canvas_light;
+	return canvas_light_owner.make_rid(canvas_light);
 }
 }
 
 
 void RasterizerCanvasGLES3::light_set_texture(RID p_rid, RID p_texture) {
 void RasterizerCanvasGLES3::light_set_texture(RID p_rid, RID p_texture) {
+	GLES3::TextureStorage *texture_storage = GLES3::TextureStorage::get_singleton();
+
+	CanvasLight *cl = canvas_light_owner.get_or_null(p_rid);
+	ERR_FAIL_COND(!cl);
+	if (cl->texture == p_texture) {
+		return;
+	}
+	if (cl->texture.is_valid()) {
+		texture_storage->texture_remove_from_texture_atlas(cl->texture);
+	}
+	cl->texture = p_texture;
+
+	if (cl->texture.is_valid()) {
+		texture_storage->texture_add_to_texture_atlas(cl->texture);
+	}
 }
 }
 
 
 void RasterizerCanvasGLES3::light_set_use_shadow(RID p_rid, bool p_enable) {
 void RasterizerCanvasGLES3::light_set_use_shadow(RID p_rid, bool p_enable) {
@@ -1187,6 +1405,14 @@ void RasterizerCanvasGLES3::set_shadow_texture_size(int p_size) {
 }
 }
 
 
 bool RasterizerCanvasGLES3::free(RID p_rid) {
 bool RasterizerCanvasGLES3::free(RID p_rid) {
+	if (canvas_light_owner.owns(p_rid)) {
+		CanvasLight *cl = canvas_light_owner.get_or_null(p_rid);
+		ERR_FAIL_COND_V(!cl, false);
+		canvas_light_owner.free(p_rid);
+	} else {
+		return false;
+	}
+
 	return true;
 	return true;
 }
 }
 
 
@@ -1357,7 +1583,7 @@ void RasterizerCanvasGLES3::_prepare_canvas_texture(RID p_texture, RS::CanvasIte
 		state.instance_data_array[r_index].flags &= ~FLAGS_DEFAULT_SPECULAR_MAP_USED;
 		state.instance_data_array[r_index].flags &= ~FLAGS_DEFAULT_SPECULAR_MAP_USED;
 	}
 	}
 
 
-	if (!normal_map) {
+	if (normal_map) {
 		state.instance_data_array[r_index].flags |= FLAGS_DEFAULT_NORMAL_MAP_USED;
 		state.instance_data_array[r_index].flags |= FLAGS_DEFAULT_NORMAL_MAP_USED;
 	} else {
 	} else {
 		state.instance_data_array[r_index].flags &= ~FLAGS_DEFAULT_NORMAL_MAP_USED;
 		state.instance_data_array[r_index].flags &= ~FLAGS_DEFAULT_NORMAL_MAP_USED;
@@ -1567,13 +1793,23 @@ void RasterizerCanvasGLES3::free_polygon(PolygonID p_polygon) {
 // In theory allocations can reach as high as number of windows * 3 frames
 // In theory allocations can reach as high as number of windows * 3 frames
 // because OpenGL can start rendering subsequent frames before finishing the current one
 // because OpenGL can start rendering subsequent frames before finishing the current one
 void RasterizerCanvasGLES3::_allocate_instance_data_buffer() {
 void RasterizerCanvasGLES3::_allocate_instance_data_buffer() {
-	GLuint new_buffer;
-	glGenBuffers(1, &new_buffer);
-	glBindBuffer(GL_UNIFORM_BUFFER, new_buffer);
-	glBufferData(GL_UNIFORM_BUFFER, data.max_instance_buffer_size, nullptr, GL_DYNAMIC_DRAW);
+	GLuint new_buffers[3];
+	glGenBuffers(3, new_buffers);
+	// Batch UBO.
+	glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[0]);
+	glBufferData(GL_UNIFORM_BUFFER, data.max_instance_buffer_size, nullptr, GL_STREAM_DRAW);
+	// Light uniform buffer.
+	glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[1]);
+	glBufferData(GL_UNIFORM_BUFFER, sizeof(LightUniform) * data.max_lights_per_render, nullptr, GL_STREAM_DRAW);
+	// State buffer.
+	glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[2]);
+	glBufferData(GL_UNIFORM_BUFFER, sizeof(StateBuffer), nullptr, GL_STREAM_DRAW);
+
 	state.current_buffer = (state.current_buffer + 1);
 	state.current_buffer = (state.current_buffer + 1);
 	DataBuffer db;
 	DataBuffer db;
-	db.ubo = new_buffer;
+	db.ubo = new_buffers[0];
+	db.light_ubo = new_buffers[1];
+	db.state_ubo = new_buffers[2];
 	db.last_frame_used = RSG::rasterizer->get_frame_number();
 	db.last_frame_used = RSG::rasterizer->get_frame_number();
 	state.canvas_instance_data_buffers.insert(state.current_buffer, db);
 	state.canvas_instance_data_buffers.insert(state.current_buffer, db);
 	state.current_buffer = state.current_buffer % state.canvas_instance_data_buffers.size();
 	state.current_buffer = state.current_buffer % state.canvas_instance_data_buffers.size();
@@ -1755,12 +1991,21 @@ RasterizerCanvasGLES3::RasterizerCanvasGLES3() {
 	state.canvas_instance_batches.reserve(200);
 	state.canvas_instance_batches.reserve(200);
 
 
 	for (int i = 0; i < 3; i++) {
 	for (int i = 0; i < 3; i++) {
-		GLuint new_buffer;
-		glGenBuffers(1, &new_buffer);
-		glBindBuffer(GL_UNIFORM_BUFFER, new_buffer);
-		glBufferData(GL_UNIFORM_BUFFER, data.max_instance_buffer_size, nullptr, GL_DYNAMIC_DRAW);
+		GLuint new_buffers[3];
+		glGenBuffers(3, new_buffers);
+		// Batch UBO.
+		glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[0]);
+		glBufferData(GL_UNIFORM_BUFFER, data.max_instance_buffer_size, nullptr, GL_STREAM_DRAW);
+		// Light uniform buffer.
+		glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[1]);
+		glBufferData(GL_UNIFORM_BUFFER, sizeof(LightUniform) * data.max_lights_per_render, nullptr, GL_STREAM_DRAW);
+		// State buffer.
+		glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[2]);
+		glBufferData(GL_UNIFORM_BUFFER, sizeof(StateBuffer), nullptr, GL_STREAM_DRAW);
 		DataBuffer db;
 		DataBuffer db;
-		db.ubo = new_buffer;
+		db.ubo = new_buffers[0];
+		db.light_ubo = new_buffers[1];
+		db.state_ubo = new_buffers[2];
 		db.last_frame_used = 0;
 		db.last_frame_used = 0;
 		db.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
 		db.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
 		state.canvas_instance_data_buffers[i] = db;
 		state.canvas_instance_data_buffers[i] = db;
@@ -1768,6 +2013,7 @@ RasterizerCanvasGLES3::RasterizerCanvasGLES3() {
 	glBindBuffer(GL_UNIFORM_BUFFER, 0);
 	glBindBuffer(GL_UNIFORM_BUFFER, 0);
 
 
 	state.instance_data_array = memnew_arr(InstanceData, data.max_instances_per_ubo);
 	state.instance_data_array = memnew_arr(InstanceData, data.max_instances_per_ubo);
+	state.light_uniforms = memnew_arr(LightUniform, data.max_lights_per_render);
 
 
 	{
 	{
 		const uint32_t no_of_instances = data.max_instances_per_batch;
 		const uint32_t no_of_instances = data.max_instances_per_batch;
@@ -1794,14 +2040,9 @@ RasterizerCanvasGLES3::RasterizerCanvasGLES3() {
 		delete[] indices;
 		delete[] indices;
 	}
 	}
 
 
-	glGenBuffers(1, &state.canvas_state_buffer);
-	glBindBuffer(GL_UNIFORM_BUFFER, state.canvas_state_buffer);
-	glBufferData(GL_UNIFORM_BUFFER, sizeof(StateBuffer), nullptr, GL_STREAM_DRAW);
-	glBindBuffer(GL_UNIFORM_BUFFER, 0);
-
 	String global_defines;
 	String global_defines;
 	global_defines += "#define MAX_GLOBAL_SHADER_UNIFORMS 256\n"; // TODO: this is arbitrary for now
 	global_defines += "#define MAX_GLOBAL_SHADER_UNIFORMS 256\n"; // TODO: this is arbitrary for now
-	global_defines += "#define MAX_LIGHTS " + itos(data.max_instances_per_batch) + "\n";
+	global_defines += "#define MAX_LIGHTS " + itos(data.max_lights_per_render) + "\n";
 	global_defines += "#define MAX_DRAW_DATA_INSTANCES " + itos(data.max_instances_per_batch) + "\n";
 	global_defines += "#define MAX_DRAW_DATA_INSTANCES " + itos(data.max_instances_per_batch) + "\n";
 
 
 	GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.initialize(global_defines);
 	GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.initialize(global_defines);
@@ -1854,7 +2095,8 @@ RasterizerCanvasGLES3::~RasterizerCanvasGLES3() {
 	glDeleteVertexArrays(1, &data.canvas_quad_array);
 	glDeleteVertexArrays(1, &data.canvas_quad_array);
 
 
 	GLES3::TextureStorage::get_singleton()->canvas_texture_free(default_canvas_texture);
 	GLES3::TextureStorage::get_singleton()->canvas_texture_free(default_canvas_texture);
-	memfree(state.instance_data_array);
+	memdelete_arr(state.instance_data_array);
+	memdelete_arr(state.light_uniforms);
 }
 }
 
 
 #endif // GLES3_ENABLED
 #endif // GLES3_ENABLED

+ 41 - 4
drivers/gles3/rasterizer_canvas_gles3.h

@@ -96,6 +96,33 @@ class RasterizerCanvasGLES3 : public RendererCanvasRender {
 		DEFAULT_MAX_LIGHTS_PER_RENDER = 256,
 		DEFAULT_MAX_LIGHTS_PER_RENDER = 256,
 	};
 	};
 
 
+	/******************/
+	/**** LIGHTING ****/
+	/******************/
+
+	struct CanvasLight {
+		RID texture;
+	};
+
+	RID_Owner<CanvasLight> canvas_light_owner;
+
+	struct LightUniform {
+		float matrix[8]; //light to texture coordinate matrix
+		float shadow_matrix[8]; //light to shadow coordinate matrix
+		float color[4];
+
+		uint8_t shadow_color[4];
+		uint32_t flags; //index to light texture
+		float shadow_pixel_size;
+		float height;
+
+		float position[2];
+		float shadow_z_far_inv;
+		float shadow_y_ofs;
+
+		float atlas_rect[4];
+	};
+
 public:
 public:
 	enum {
 	enum {
 		BASE_UNIFORM_LOCATION = 0,
 		BASE_UNIFORM_LOCATION = 0,
@@ -184,8 +211,8 @@ public:
 
 
 		RID canvas_shader_default_version;
 		RID canvas_shader_default_version;
 
 
-		uint32_t max_lights_per_render;
-		uint32_t max_lights_per_item;
+		uint32_t max_lights_per_render = 256;
+		uint32_t max_lights_per_item = 16;
 		uint32_t max_instances_per_batch = 512;
 		uint32_t max_instances_per_batch = 512;
 		uint32_t max_instances_per_ubo = 16384;
 		uint32_t max_instances_per_ubo = 16384;
 		uint32_t max_instance_buffer_size = 16384 * 128;
 		uint32_t max_instance_buffer_size = 16384 * 128;
@@ -212,16 +239,22 @@ public:
 		const Item::Command *command = nullptr;
 		const Item::Command *command = nullptr;
 		Item::Command::Type command_type = Item::Command::TYPE_ANIMATION_SLICE; // Can default to any type that doesn't form a batch.
 		Item::Command::Type command_type = Item::Command::TYPE_ANIMATION_SLICE; // Can default to any type that doesn't form a batch.
 		uint32_t primitive_points = 0;
 		uint32_t primitive_points = 0;
+
+		bool lights_disabled = false;
 	};
 	};
 
 
+	// DataBuffer contains our per-frame data. I.e. the resources that are updated each frame.
+	// We track them and ensure that they don't get reused until at least 2 frames have passed
+	// to avoid the GPU stalling to wait for a resource to become available.
 	struct DataBuffer {
 	struct DataBuffer {
 		GLuint ubo = 0;
 		GLuint ubo = 0;
+		GLuint light_ubo = 0;
+		GLuint state_ubo = 0;
 		uint64_t last_frame_used = -3;
 		uint64_t last_frame_used = -3;
 		GLsync fence = GLsync();
 		GLsync fence = GLsync();
 	};
 	};
 
 
 	struct State {
 	struct State {
-		GLuint canvas_state_buffer;
 		LocalVector<DataBuffer> canvas_instance_data_buffers;
 		LocalVector<DataBuffer> canvas_instance_data_buffers;
 		LocalVector<Batch> canvas_instance_batches;
 		LocalVector<Batch> canvas_instance_batches;
 		uint32_t current_buffer = 0;
 		uint32_t current_buffer = 0;
@@ -230,6 +263,10 @@ public:
 
 
 		InstanceData *instance_data_array = nullptr;
 		InstanceData *instance_data_array = nullptr;
 
 
+		LightUniform *light_uniforms = nullptr;
+
+		bool using_directional_lights = false;
+
 		RID current_tex = RID();
 		RID current_tex = RID();
 		RS::CanvasItemTextureFilter current_filter_mode = RS::CANVAS_ITEM_TEXTURE_FILTER_MAX;
 		RS::CanvasItemTextureFilter current_filter_mode = RS::CANVAS_ITEM_TEXTURE_FILTER_MAX;
 		RS::CanvasItemTextureRepeat current_repeat_mode = RS::CANVAS_ITEM_TEXTURE_REPEAT_MAX;
 		RS::CanvasItemTextureRepeat current_repeat_mode = RS::CANVAS_ITEM_TEXTURE_REPEAT_MAX;
@@ -282,7 +319,7 @@ public:
 	void _render_items(RID p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, uint32_t &r_last_index, bool p_to_backbuffer = false);
 	void _render_items(RID p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, uint32_t &r_last_index, bool p_to_backbuffer = false);
 	void _record_item_commands(const Item *p_item, const Transform2D &p_canvas_transform_inverse, Item *&current_clip, GLES3::CanvasShaderData::BlendMode p_blend_mode, Light *p_lights, uint32_t &r_index, bool &r_break_batch);
 	void _record_item_commands(const Item *p_item, const Transform2D &p_canvas_transform_inverse, Item *&current_clip, GLES3::CanvasShaderData::BlendMode p_blend_mode, Light *p_lights, uint32_t &r_index, bool &r_break_batch);
 	void _render_batch(Light *p_lights, uint32_t p_index);
 	void _render_batch(Light *p_lights, uint32_t p_index);
-	void _bind_material(GLES3::CanvasMaterialData *p_material_data, CanvasShaderGLES3::ShaderVariant p_variant);
+	void _bind_material(GLES3::CanvasMaterialData *p_material_data, CanvasShaderGLES3::ShaderVariant p_variant, uint64_t p_specialization);
 	void _new_batch(bool &r_batch_broken, uint32_t &r_index);
 	void _new_batch(bool &r_batch_broken, uint32_t &r_index);
 	void _add_to_batch(uint32_t &r_index, bool &r_batch_broken);
 	void _add_to_batch(uint32_t &r_index, bool &r_batch_broken);
 	void _allocate_instance_data_buffer();
 	void _allocate_instance_data_buffer();

+ 168 - 2
drivers/gles3/shaders/canvas.glsl

@@ -211,7 +211,7 @@ void main() {
 #include "canvas_uniforms_inc.glsl"
 #include "canvas_uniforms_inc.glsl"
 #include "stdlib_inc.glsl"
 #include "stdlib_inc.glsl"
 
 
-//uniform sampler2D atlas_texture; //texunit:-2
+uniform sampler2D atlas_texture; //texunit:-2
 //uniform sampler2D shadow_atlas_texture; //texunit:-3
 //uniform sampler2D shadow_atlas_texture; //texunit:-3
 uniform sampler2D screen_texture; //texunit:-4
 uniform sampler2D screen_texture; //texunit:-4
 uniform sampler2D sdf_texture; //texunit:-5
 uniform sampler2D sdf_texture; //texunit:-5
@@ -243,6 +243,77 @@ layout(std140) uniform MaterialUniforms{
 #endif
 #endif
 
 
 #GLOBALS
 #GLOBALS
+#ifndef DISABLE_LIGHTING
+#ifdef LIGHT_CODE_USED
+
+vec4 light_compute(
+		vec3 light_vertex,
+		vec3 light_position,
+		vec3 normal,
+		vec4 light_color,
+		float light_energy,
+		vec4 specular_shininess,
+		inout vec4 shadow_modulate,
+		vec2 screen_uv,
+		vec2 uv,
+		vec4 color, bool is_directional) {
+	vec4 light = vec4(0.0);
+	vec3 light_direction = vec3(0.0);
+
+	if (is_directional) {
+		light_direction = normalize(mix(vec3(light_position.xy, 0.0), vec3(0, 0, 1), light_position.z));
+		light_position = vec3(0.0);
+	} else {
+		light_direction = normalize(light_position - light_vertex);
+	}
+
+#CODE : LIGHT
+
+	return light;
+}
+
+#endif
+
+vec3 light_normal_compute(vec3 light_vec, vec3 normal, vec3 base_color, vec3 light_color, vec4 specular_shininess, bool specular_shininess_used) {
+	float cNdotL = max(0.0, dot(normal, light_vec));
+
+	if (specular_shininess_used) {
+		//blinn
+		vec3 view = vec3(0.0, 0.0, 1.0); // not great but good enough
+		vec3 half_vec = normalize(view + light_vec);
+
+		float cNdotV = max(dot(normal, view), 0.0);
+		float cNdotH = max(dot(normal, half_vec), 0.0);
+		float cVdotH = max(dot(view, half_vec), 0.0);
+		float cLdotH = max(dot(light_vec, half_vec), 0.0);
+		float shininess = exp2(15.0 * specular_shininess.a + 1.0) * 0.25;
+		float blinn = pow(cNdotH, shininess);
+		blinn *= (shininess + 8.0) * (1.0 / (8.0 * M_PI));
+		float s = (blinn) / max(4.0 * cNdotV * cNdotL, 0.75);
+
+		return specular_shininess.rgb * light_color * s + light_color * base_color * cNdotL;
+	} else {
+		return light_color * base_color * cNdotL;
+	}
+}
+
+void light_blend_compute(uint light_base, vec4 light_color, inout vec3 color) {
+	uint blend_mode = light_array[light_base].flags & LIGHT_FLAGS_BLEND_MASK;
+
+	switch (blend_mode) {
+		case LIGHT_FLAGS_BLEND_MODE_ADD: {
+			color.rgb += light_color.rgb * light_color.a;
+		} break;
+		case LIGHT_FLAGS_BLEND_MODE_SUB: {
+			color.rgb -= light_color.rgb * light_color.a;
+		} break;
+		case LIGHT_FLAGS_BLEND_MODE_MIX: {
+			color.rgb = mix(color.rgb, light_color.rgb, light_color.a);
+		} break;
+	}
+}
+
+#endif
 
 
 #ifdef USE_NINEPATCH
 #ifdef USE_NINEPATCH
 
 
@@ -353,7 +424,8 @@ void main() {
 		color *= texture(color_texture, uv);
 		color *= texture(color_texture, uv);
 	}
 	}
 
 
-	bool using_light = false;
+	uint light_count = (draw_data[draw_data_instance].flags >> uint(FLAGS_LIGHT_COUNT_SHIFT)) & uint(0xF); //max 16 lights
+	bool using_light = light_count > 0u || directional_light_count > 0u;
 
 
 	vec3 normal;
 	vec3 normal;
 
 
@@ -414,11 +486,105 @@ void main() {
 #endif
 #endif
 	}
 	}
 
 
+	if (normal_used) {
+		//convert by item transform
+		normal.xy = mat2(normalize(draw_data[draw_data_instance].world_x), normalize(draw_data[draw_data_instance].world_y)) * normal.xy;
+		//convert by canvas transform
+		normal = normalize((canvas_normal_transform * vec4(normal, 0.0)).xyz);
+	}
+
+	vec4 base_color = color;
+
 #ifdef MODE_LIGHT_ONLY
 #ifdef MODE_LIGHT_ONLY
 	color = vec4(0.0);
 	color = vec4(0.0);
 #else
 #else
 	color *= canvas_modulation;
 	color *= canvas_modulation;
 #endif
 #endif
 
 
+#if !defined(DISABLE_LIGHTING) && !defined(MODE_UNSHADED)
+
+	// Directional Lights
+
+	for (uint i = 0u; i < directional_light_count; i++) {
+		uint light_base = i;
+
+		vec2 direction = light_array[light_base].position;
+		vec4 light_color = light_array[light_base].color;
+
+#ifdef LIGHT_CODE_USED
+
+		vec4 shadow_modulate = vec4(1.0);
+		light_color = light_compute(light_vertex, vec3(direction, light_array[light_base].height), normal, light_color, light_color.a, specular_shininess, shadow_modulate, screen_uv, uv, base_color, true);
+#else
+
+		if (normal_used) {
+			vec3 light_vec = normalize(mix(vec3(direction, 0.0), vec3(0, 0, 1), light_array[light_base].height));
+			light_color.rgb = light_normal_compute(light_vec, normal, base_color.rgb, light_color.rgb, specular_shininess, specular_shininess_used);
+		} else {
+			light_color.rgb *= base_color.rgb;
+		}
+#endif
+
+		light_blend_compute(light_base, light_color, color.rgb);
+	}
+
+	// Positional Lights
+
+	for (uint i = 0u; i < MAX_LIGHTS_PER_ITEM; i++) {
+		if (i >= light_count) {
+			break;
+		}
+		uint light_base;
+		if (i < 8u) {
+			if (i < 4u) {
+				light_base = draw_data[draw_data_instance].lights[0];
+			} else {
+				light_base = draw_data[draw_data_instance].lights[1];
+			}
+		} else {
+			if (i < 12u) {
+				light_base = draw_data[draw_data_instance].lights[2];
+			} else {
+				light_base = draw_data[draw_data_instance].lights[3];
+			}
+		}
+		light_base >>= (i & 3u) * 8u;
+		light_base &= uint(0xFF);
+
+		vec2 tex_uv = (vec4(vertex, 0.0, 1.0) * mat4(light_array[light_base].texture_matrix[0], light_array[light_base].texture_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations.
+		vec2 tex_uv_atlas = tex_uv * light_array[light_base].atlas_rect.zw + light_array[light_base].atlas_rect.xy;
+		vec4 light_color = textureLod(atlas_texture, tex_uv_atlas, 0.0);
+		vec4 light_base_color = light_array[light_base].color;
+
+#ifdef LIGHT_CODE_USED
+
+		vec4 shadow_modulate = vec4(1.0);
+		vec3 light_position = vec3(light_array[light_base].position, light_array[light_base].height);
+
+		light_color.rgb *= light_base_color.rgb;
+		light_color = light_compute(light_vertex, light_position, normal, light_color, light_base_color.a, specular_shininess, shadow_modulate, screen_uv, uv, base_color, false);
+#else
+
+		light_color.rgb *= light_base_color.rgb * light_base_color.a;
+
+		if (normal_used) {
+			vec3 light_pos = vec3(light_array[light_base].position, light_array[light_base].height);
+			vec3 pos = light_vertex;
+			vec3 light_vec = normalize(light_pos - pos);
+
+			light_color.rgb = light_normal_compute(light_vec, normal, base_color.rgb, light_color.rgb, specular_shininess, specular_shininess_used);
+		} else {
+			light_color.rgb *= base_color.rgb;
+		}
+#endif
+		if (any(lessThan(tex_uv, vec2(0.0, 0.0))) || any(greaterThanEqual(tex_uv, vec2(1.0, 1.0)))) {
+			//if outside the light texture, light color is zero
+			light_color.a = 0.0;
+		}
+
+		light_blend_compute(light_base, light_color, color.rgb);
+	}
+#endif
+
 	frag_color = color;
 	frag_color = color;
 }
 }

+ 21 - 0
drivers/gles3/shaders/canvas_uniforms_inc.glsl

@@ -94,6 +94,27 @@ layout(std140) uniform CanvasData { //ubo:0
 #define LIGHT_FLAGS_SHADOW_PCF5 uint(1 << 22)
 #define LIGHT_FLAGS_SHADOW_PCF5 uint(1 << 22)
 #define LIGHT_FLAGS_SHADOW_PCF13 uint(2 << 22)
 #define LIGHT_FLAGS_SHADOW_PCF13 uint(2 << 22)
 
 
+struct Light {
+	mat2x4 texture_matrix; //light to texture coordinate matrix (transposed)
+	mat2x4 shadow_matrix; //light to shadow coordinate matrix (transposed)
+	vec4 color;
+
+	uint shadow_color; // packed
+	uint flags; //index to light texture
+	float shadow_pixel_size;
+	float height;
+
+	vec2 position;
+	float shadow_zfar_inv;
+	float shadow_y_ofs;
+
+	vec4 atlas_rect;
+};
+
+layout(std140) uniform LightData { //ubo:2
+	Light light_array[MAX_LIGHTS];
+};
+
 layout(std140) uniform DrawDataInstances { //ubo:3
 layout(std140) uniform DrawDataInstances { //ubo:3
 
 
 	DrawData draw_data[MAX_DRAW_DATA_INSTANCES];
 	DrawData draw_data[MAX_DRAW_DATA_INSTANCES];

+ 2 - 3
drivers/gles3/shaders/copy.glsl

@@ -2,7 +2,7 @@
 #[modes]
 #[modes]
 
 
 mode_default = #define MODE_SIMPLE_COPY
 mode_default = #define MODE_SIMPLE_COPY
-mode_copy_section = #define USE_COPY_SECTION
+mode_copy_section = #define USE_COPY_SECTION \n#define MODE_SIMPLE_COPY
 mode_gaussian_blur = #define MODE_GAUSSIAN_BLUR
 mode_gaussian_blur = #define MODE_GAUSSIAN_BLUR
 mode_mipmap = #define MODE_MIPMAP
 mode_mipmap = #define MODE_MIPMAP
 mode_simple_color = #define MODE_SIMPLE_COLOR \n#define USE_COPY_SECTION
 mode_simple_color = #define MODE_SIMPLE_COLOR \n#define USE_COPY_SECTION
@@ -25,8 +25,7 @@ void main() {
 	gl_Position = vec4(vertex_attrib, 1.0, 1.0);
 	gl_Position = vec4(vertex_attrib, 1.0, 1.0);
 
 
 #ifdef USE_COPY_SECTION
 #ifdef USE_COPY_SECTION
-	gl_Position.xy = (copy_section.xy + (uv_interp.xy * 0.5 + 0.5) * copy_section.zw) * 2.0 - 1.0;
-	uv_interp = copy_section.xy + uv_interp * copy_section.zw;
+	gl_Position.xy = (copy_section.xy + uv_interp.xy * copy_section.zw) * 2.0 - 1.0;
 #endif
 #endif
 }
 }
 
 

+ 234 - 2
drivers/gles3/storage/texture_storage.cpp

@@ -197,6 +197,22 @@ TextureStorage::TextureStorage() {
 
 
 	glBindTexture(GL_TEXTURE_2D, 0);
 	glBindTexture(GL_TEXTURE_2D, 0);
 
 
+	{ // Atlas Texture initialize.
+		uint8_t pixel_data[4 * 4 * 4];
+		for (int i = 0; i < 16; i++) {
+			pixel_data[i * 4 + 0] = 0;
+			pixel_data[i * 4 + 1] = 0;
+			pixel_data[i * 4 + 2] = 0;
+			pixel_data[i * 4 + 3] = 255;
+		}
+
+		glGenTextures(1, &texture_atlas.texture);
+		glBindTexture(GL_TEXTURE_2D, texture_atlas.texture);
+		glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 4, 4, 0, GL_RGBA, GL_UNSIGNED_BYTE, pixel_data);
+	}
+
+	glBindTexture(GL_TEXTURE_2D, 0);
+
 #ifdef GLES_OVER_GL
 #ifdef GLES_OVER_GL
 	glEnable(GL_PROGRAM_POINT_SIZE);
 	glEnable(GL_PROGRAM_POINT_SIZE);
 #endif
 #endif
@@ -207,6 +223,11 @@ TextureStorage::~TextureStorage() {
 	for (int i = 0; i < DEFAULT_GL_TEXTURE_MAX; i++) {
 	for (int i = 0; i < DEFAULT_GL_TEXTURE_MAX; i++) {
 		texture_free(default_gl_textures[i]);
 		texture_free(default_gl_textures[i]);
 	}
 	}
+
+	glDeleteTextures(1, &texture_atlas.texture);
+	texture_atlas.texture = 0;
+	glDeleteFramebuffers(1, &texture_atlas.framebuffer);
+	texture_atlas.framebuffer = 0;
 }
 }
 
 
 //TODO, move back to storage
 //TODO, move back to storage
@@ -653,7 +674,7 @@ void TextureStorage::texture_free(RID p_texture) {
 		}
 		}
 	}
 	}
 
 
-	//decal_atlas_remove_texture(p_texture);
+	texture_atlas_remove_texture(p_texture);
 
 
 	for (int i = 0; i < t->proxies.size(); i++) {
 	for (int i = 0; i < t->proxies.size(); i++) {
 		Texture *p = texture_owner.get_or_null(t->proxies[i]);
 		Texture *p = texture_owner.get_or_null(t->proxies[i]);
@@ -875,7 +896,7 @@ void TextureStorage::texture_replace(RID p_texture, RID p_by_texture) {
 	//delete last, so proxies can be updated
 	//delete last, so proxies can be updated
 	texture_owner.free(p_by_texture);
 	texture_owner.free(p_by_texture);
 
 
-	//decal_atlas_mark_dirty_on_texture(p_texture);
+	texture_atlas_mark_dirty_on_texture(p_texture);
 }
 }
 
 
 void TextureStorage::texture_set_size_override(RID p_texture, int p_width, int p_height) {
 void TextureStorage::texture_set_size_override(RID p_texture, int p_width, int p_height) {
@@ -1143,6 +1164,217 @@ RID TextureStorage::texture_create_radiance_cubemap(RID p_source, int p_resoluti
 	return RID();
 	return RID();
 }
 }
 
 
+/* TEXTURE ATLAS API */
+
+void TextureStorage::texture_add_to_texture_atlas(RID p_texture) {
+	if (!texture_atlas.textures.has(p_texture)) {
+		TextureAtlas::Texture t;
+		t.users = 1;
+		texture_atlas.textures[p_texture] = t;
+		texture_atlas.dirty = true;
+	} else {
+		TextureAtlas::Texture *t = texture_atlas.textures.getptr(p_texture);
+		t->users++;
+	}
+}
+
+void TextureStorage::texture_remove_from_texture_atlas(RID p_texture) {
+	TextureAtlas::Texture *t = texture_atlas.textures.getptr(p_texture);
+	ERR_FAIL_COND(!t);
+	t->users--;
+	if (t->users == 0) {
+		texture_atlas.textures.erase(p_texture);
+		// Do not mark it dirty, there is no need to since it remains working.
+	}
+}
+
+void TextureStorage::texture_atlas_mark_dirty_on_texture(RID p_texture) {
+	if (texture_atlas.textures.has(p_texture)) {
+		texture_atlas.dirty = true; // Mark it dirty since it was most likely modified.
+	}
+}
+
+void TextureStorage::texture_atlas_remove_texture(RID p_texture) {
+	if (texture_atlas.textures.has(p_texture)) {
+		texture_atlas.textures.erase(p_texture);
+		// There is not much a point of making it dirty, texture can be removed next time the atlas is updated.
+	}
+}
+
+GLuint TextureStorage::texture_atlas_get_texture() const {
+	return texture_atlas.texture;
+}
+
+void TextureStorage::update_texture_atlas() {
+	CopyEffects *copy_effects = CopyEffects::get_singleton();
+	ERR_FAIL_NULL(copy_effects);
+
+	if (!texture_atlas.dirty) {
+		return; //nothing to do
+	}
+
+	texture_atlas.dirty = false;
+
+	if (texture_atlas.texture != 0) {
+		glDeleteTextures(1, &texture_atlas.texture);
+		texture_atlas.texture = 0;
+		glDeleteFramebuffers(1, &texture_atlas.framebuffer);
+		texture_atlas.framebuffer = 0;
+	}
+
+	const int border = 2;
+
+	if (texture_atlas.textures.size()) {
+		//generate atlas
+		Vector<TextureAtlas::SortItem> itemsv;
+		itemsv.resize(texture_atlas.textures.size());
+		int base_size = 8;
+
+		int idx = 0;
+
+		for (const KeyValue<RID, TextureAtlas::Texture> &E : texture_atlas.textures) {
+			TextureAtlas::SortItem &si = itemsv.write[idx];
+
+			Texture *src_tex = get_texture(E.key);
+
+			si.size.width = (src_tex->width / border) + 1;
+			si.size.height = (src_tex->height / border) + 1;
+			si.pixel_size = Size2i(src_tex->width, src_tex->height);
+
+			if (base_size < si.size.width) {
+				base_size = nearest_power_of_2_templated(si.size.width);
+			}
+
+			si.texture = E.key;
+			idx++;
+		}
+
+		//sort items by size
+		itemsv.sort();
+
+		//attempt to create atlas
+		int item_count = itemsv.size();
+		TextureAtlas::SortItem *items = itemsv.ptrw();
+
+		int atlas_height = 0;
+
+		while (true) {
+			Vector<int> v_offsetsv;
+			v_offsetsv.resize(base_size);
+
+			int *v_offsets = v_offsetsv.ptrw();
+			memset(v_offsets, 0, sizeof(int) * base_size);
+
+			int max_height = 0;
+
+			for (int i = 0; i < item_count; i++) {
+				//best fit
+				TextureAtlas::SortItem &si = items[i];
+				int best_idx = -1;
+				int best_height = 0x7FFFFFFF;
+				for (int j = 0; j <= base_size - si.size.width; j++) {
+					int height = 0;
+					for (int k = 0; k < si.size.width; k++) {
+						int h = v_offsets[k + j];
+						if (h > height) {
+							height = h;
+							if (height > best_height) {
+								break; //already bad
+							}
+						}
+					}
+
+					if (height < best_height) {
+						best_height = height;
+						best_idx = j;
+					}
+				}
+
+				//update
+				for (int k = 0; k < si.size.width; k++) {
+					v_offsets[k + best_idx] = best_height + si.size.height;
+				}
+
+				si.pos.x = best_idx;
+				si.pos.y = best_height;
+
+				if (si.pos.y + si.size.height > max_height) {
+					max_height = si.pos.y + si.size.height;
+				}
+			}
+
+			if (max_height <= base_size * 2) {
+				atlas_height = max_height;
+				break; //good ratio, break;
+			}
+
+			base_size *= 2;
+		}
+
+		texture_atlas.size.width = base_size * border;
+		texture_atlas.size.height = nearest_power_of_2_templated(atlas_height * border);
+
+		for (int i = 0; i < item_count; i++) {
+			TextureAtlas::Texture *t = texture_atlas.textures.getptr(items[i].texture);
+			t->uv_rect.position = items[i].pos * border + Vector2i(border / 2, border / 2);
+			t->uv_rect.size = items[i].pixel_size;
+
+			t->uv_rect.position /= Size2(texture_atlas.size);
+			t->uv_rect.size /= Size2(texture_atlas.size);
+		}
+	} else {
+		texture_atlas.size.width = 4;
+		texture_atlas.size.height = 4;
+	}
+
+	{ // Atlas Texture initialize.
+		// TODO validate texture atlas size with maximum texture size
+		glGenTextures(1, &texture_atlas.texture);
+		glActiveTexture(GL_TEXTURE0);
+		glBindTexture(GL_TEXTURE_2D, texture_atlas.texture);
+		glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, texture_atlas.size.width, texture_atlas.size.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
+
+		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
+		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 1);
+
+		glGenFramebuffers(1, &texture_atlas.framebuffer);
+		glBindFramebuffer(GL_FRAMEBUFFER, texture_atlas.framebuffer);
+		glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture_atlas.texture, 0);
+
+		GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
+
+		if (status != GL_FRAMEBUFFER_COMPLETE) {
+			glDeleteFramebuffers(1, &texture_atlas.framebuffer);
+			texture_atlas.framebuffer = 0;
+			glDeleteTextures(1, &texture_atlas.texture);
+			texture_atlas.texture = 0;
+			WARN_PRINT("Could not create texture atlas, status: " + get_framebuffer_error(status));
+			return;
+		}
+		glViewport(0, 0, texture_atlas.size.width, texture_atlas.size.height);
+		glClearColor(0.0, 0.0, 0.0, 0.0);
+		glClear(GL_COLOR_BUFFER_BIT);
+		glBindTexture(GL_TEXTURE_2D, 0);
+	}
+
+	glDisable(GL_BLEND);
+
+	if (texture_atlas.textures.size()) {
+		for (const KeyValue<RID, TextureAtlas::Texture> &E : texture_atlas.textures) {
+			TextureAtlas::Texture *t = texture_atlas.textures.getptr(E.key);
+			Texture *src_tex = get_texture(E.key);
+			glActiveTexture(GL_TEXTURE0);
+			glBindTexture(GL_TEXTURE_2D, src_tex->tex_id);
+			copy_effects->copy_to_rect(t->uv_rect);
+		}
+	}
+	glBindFramebuffer(GL_FRAMEBUFFER, 0);
+}
+
 /* DECAL API */
 /* DECAL API */
 
 
 RID TextureStorage::decal_allocate() {
 RID TextureStorage::decal_allocate() {

+ 51 - 0
drivers/gles3/storage/texture_storage.h

@@ -371,6 +371,38 @@ private:
 
 
 	Ref<Image> _get_gl_image_and_format(const Ref<Image> &p_image, Image::Format p_format, Image::Format &r_real_format, GLenum &r_gl_format, GLenum &r_gl_internal_format, GLenum &r_gl_type, bool &r_compressed, bool p_force_decompress) const;
 	Ref<Image> _get_gl_image_and_format(const Ref<Image> &p_image, Image::Format p_format, Image::Format &r_real_format, GLenum &r_gl_format, GLenum &r_gl_internal_format, GLenum &r_gl_type, bool &r_compressed, bool p_force_decompress) const;
 
 
+	/* TEXTURE ATLAS API */
+
+	struct TextureAtlas {
+		struct Texture {
+			int users;
+			Rect2 uv_rect;
+		};
+
+		struct SortItem {
+			RID texture;
+			Size2i pixel_size;
+			Size2i size;
+			Point2i pos;
+
+			bool operator<(const SortItem &p_item) const {
+				//sort larger to smaller
+				if (size.height == p_item.size.height) {
+					return size.width > p_item.size.width;
+				} else {
+					return size.height > p_item.size.height;
+				}
+			}
+		};
+
+		HashMap<RID, Texture> textures;
+		bool dirty = true;
+
+		GLuint texture = 0;
+		GLuint framebuffer = 0;
+		Size2i size;
+	} texture_atlas;
+
 	/* Render Target API */
 	/* Render Target API */
 
 
 	mutable RID_Owner<RenderTarget> render_target_owner;
 	mutable RID_Owner<RenderTarget> render_target_owner;
@@ -473,6 +505,25 @@ public:
 	void texture_bind(RID p_texture, uint32_t p_texture_no);
 	void texture_bind(RID p_texture, uint32_t p_texture_no);
 	RID texture_create_radiance_cubemap(RID p_source, int p_resolution = -1) const;
 	RID texture_create_radiance_cubemap(RID p_source, int p_resolution = -1) const;
 
 
+	/* TEXTURE ATLAS API */
+
+	void update_texture_atlas();
+
+	GLuint texture_atlas_get_texture() const;
+	_FORCE_INLINE_ Rect2 texture_atlas_get_texture_rect(RID p_texture) {
+		TextureAtlas::Texture *t = texture_atlas.textures.getptr(p_texture);
+		if (!t) {
+			return Rect2();
+		}
+
+		return t->uv_rect;
+	}
+
+	void texture_add_to_texture_atlas(RID p_texture);
+	void texture_remove_from_texture_atlas(RID p_texture);
+	void texture_atlas_mark_dirty_on_texture(RID p_texture);
+	void texture_atlas_remove_texture(RID p_texture);
+
 	/* DECAL API */
 	/* DECAL API */
 
 
 	virtual RID decal_allocate() override;
 	virtual RID decal_allocate() override;

+ 1 - 0
drivers/gles3/storage/utilities.cpp

@@ -302,6 +302,7 @@ void Utilities::update_dirty_resources() {
 	MaterialStorage::get_singleton()->_update_queued_materials();
 	MaterialStorage::get_singleton()->_update_queued_materials();
 	//MeshStorage::get_singleton()->_update_dirty_skeletons();
 	//MeshStorage::get_singleton()->_update_dirty_skeletons();
 	MeshStorage::get_singleton()->_update_dirty_multimeshes();
 	MeshStorage::get_singleton()->_update_dirty_multimeshes();
+	TextureStorage::get_singleton()->update_texture_atlas();
 }
 }
 
 
 void Utilities::set_debug_generate_wireframes(bool p_generate) {
 void Utilities::set_debug_generate_wireframes(bool p_generate) {

+ 9 - 11
servers/rendering/renderer_rd/shaders/canvas.glsl

@@ -598,13 +598,11 @@ void main() {
 		normal = normalize((canvas_data.canvas_normal_transform * vec4(normal, 0.0)).xyz);
 		normal = normalize((canvas_data.canvas_normal_transform * vec4(normal, 0.0)).xyz);
 	}
 	}
 
 
-	vec3 base_color = color.rgb;
+	vec4 base_color = color;
 	if (bool(draw_data.flags & FLAGS_USING_LIGHT_MASK)) {
 	if (bool(draw_data.flags & FLAGS_USING_LIGHT_MASK)) {
 		color = vec4(0.0); //invisible by default due to using light mask
 		color = vec4(0.0); //invisible by default due to using light mask
 	}
 	}
 
 
-	vec4 original_color = color;
-
 #ifdef MODE_LIGHT_ONLY
 #ifdef MODE_LIGHT_ONLY
 	color = vec4(0.0);
 	color = vec4(0.0);
 #elif !defined(MODE_UNSHADED)
 #elif !defined(MODE_UNSHADED)
@@ -624,12 +622,14 @@ void main() {
 #ifdef LIGHT_CODE_USED
 #ifdef LIGHT_CODE_USED
 
 
 		vec4 shadow_modulate = vec4(1.0);
 		vec4 shadow_modulate = vec4(1.0);
-		light_color = light_compute(light_vertex, vec3(direction, light_array.data[light_base].height), normal, light_color, light_color.a, specular_shininess, shadow_modulate, screen_uv, uv, color, true);
+		light_color = light_compute(light_vertex, vec3(direction, light_array.data[light_base].height), normal, light_color, light_color.a, specular_shininess, shadow_modulate, screen_uv, uv, base_color, true);
 #else
 #else
 
 
 		if (normal_used) {
 		if (normal_used) {
 			vec3 light_vec = normalize(mix(vec3(direction, 0.0), vec3(0, 0, 1), light_array.data[light_base].height));
 			vec3 light_vec = normalize(mix(vec3(direction, 0.0), vec3(0, 0, 1), light_array.data[light_base].height));
-			light_color.rgb = light_normal_compute(light_vec, normal, base_color, light_color.rgb, specular_shininess, specular_shininess_used);
+			light_color.rgb = light_normal_compute(light_vec, normal, base_color.rgb, light_color.rgb, specular_shininess, specular_shininess_used);
+		} else {
+			light_color.rgb *= base_color.rgb;
 		}
 		}
 #endif
 #endif
 
 
@@ -646,8 +646,6 @@ void main() {
 			);
 			);
 		}
 		}
 
 
-		light_color.rgb *= original_color.rgb;
-
 		light_blend_compute(light_base, light_color, color.rgb);
 		light_blend_compute(light_base, light_color, color.rgb);
 	}
 	}
 
 
@@ -685,7 +683,7 @@ void main() {
 		vec3 light_position = vec3(light_array.data[light_base].position, light_array.data[light_base].height);
 		vec3 light_position = vec3(light_array.data[light_base].position, light_array.data[light_base].height);
 
 
 		light_color.rgb *= light_base_color.rgb;
 		light_color.rgb *= light_base_color.rgb;
-		light_color = light_compute(light_vertex, light_position, normal, light_color, light_base_color.a, specular_shininess, shadow_modulate, screen_uv, uv, color, false);
+		light_color = light_compute(light_vertex, light_position, normal, light_color, light_base_color.a, specular_shininess, shadow_modulate, screen_uv, uv, base_color, false);
 #else
 #else
 
 
 		light_color.rgb *= light_base_color.rgb * light_base_color.a;
 		light_color.rgb *= light_base_color.rgb * light_base_color.a;
@@ -695,7 +693,9 @@ void main() {
 			vec3 pos = light_vertex;
 			vec3 pos = light_vertex;
 			vec3 light_vec = normalize(light_pos - pos);
 			vec3 light_vec = normalize(light_pos - pos);
 
 
-			light_color.rgb = light_normal_compute(light_vec, normal, base_color, light_color.rgb, specular_shininess, specular_shininess_used);
+			light_color.rgb = light_normal_compute(light_vec, normal, base_color.rgb, light_color.rgb, specular_shininess, specular_shininess_used);
+		} else {
+			light_color.rgb *= base_color.rgb;
 		}
 		}
 #endif
 #endif
 		if (any(lessThan(tex_uv, vec2(0.0, 0.0))) || any(greaterThanEqual(tex_uv, vec2(1.0, 1.0)))) {
 		if (any(lessThan(tex_uv, vec2(0.0, 0.0))) || any(greaterThanEqual(tex_uv, vec2(1.0, 1.0)))) {
@@ -743,8 +743,6 @@ void main() {
 			);
 			);
 		}
 		}
 
 
-		light_color.rgb *= original_color.rgb;
-
 		light_blend_compute(light_base, light_color, color.rgb);
 		light_blend_compute(light_base, light_color, color.rgb);
 	}
 	}
 #endif
 #endif