Browse Source

Use best fit normals for storing screen space normals

clayjohn 1 year ago
parent
commit
43cf21cb71

+ 1 - 0
drivers/gles3/rasterizer_scene_gles3.h

@@ -328,6 +328,7 @@ private:
 	};
 	};
 
 
 	enum {
 	enum {
+		INSTANCE_DATA_FLAGS_DYNAMIC = 1 << 3,
 		INSTANCE_DATA_FLAGS_NON_UNIFORM_SCALE = 1 << 4,
 		INSTANCE_DATA_FLAGS_NON_UNIFORM_SCALE = 1 << 4,
 		INSTANCE_DATA_FLAG_USE_GI_BUFFERS = 1 << 5,
 		INSTANCE_DATA_FLAG_USE_GI_BUFFERS = 1 << 5,
 		INSTANCE_DATA_FLAG_USE_LIGHTMAP_CAPTURE = 1 << 7,
 		INSTANCE_DATA_FLAG_USE_LIGHTMAP_CAPTURE = 1 << 7,

+ 5 - 1
servers/rendering/renderer_rd/effects/copy_effects.cpp

@@ -533,7 +533,7 @@ void CopyEffects::copy_to_atlas_fb(RID p_source_rd_texture, RID p_dest_framebuff
 	RD::get_singleton()->draw_list_draw(draw_list, true);
 	RD::get_singleton()->draw_list_draw(draw_list, true);
 }
 }
 
 
-void CopyEffects::copy_to_fb_rect(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2i &p_rect, bool p_flip_y, bool p_force_luminance, bool p_alpha_to_zero, bool p_srgb, RID p_secondary, bool p_multiview, bool p_alpha_to_one, bool p_linear) {
+void CopyEffects::copy_to_fb_rect(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2i &p_rect, bool p_flip_y, bool p_force_luminance, bool p_alpha_to_zero, bool p_srgb, RID p_secondary, bool p_multiview, bool p_alpha_to_one, bool p_linear, bool p_normal) {
 	UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton();
 	UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton();
 	ERR_FAIL_NULL(uniform_set_cache);
 	ERR_FAIL_NULL(uniform_set_cache);
 	MaterialStorage *material_storage = MaterialStorage::get_singleton();
 	MaterialStorage *material_storage = MaterialStorage::get_singleton();
@@ -564,6 +564,10 @@ void CopyEffects::copy_to_fb_rect(RID p_source_rd_texture, RID p_dest_framebuffe
 		copy_to_fb.push_constant.luminance_multiplier = prefer_raster_effects ? 2.0 : 1.0;
 		copy_to_fb.push_constant.luminance_multiplier = prefer_raster_effects ? 2.0 : 1.0;
 	}
 	}
 
 
+	if (p_normal) {
+		copy_to_fb.push_constant.flags |= COPY_TO_FB_FLAG_NORMAL;
+	}
+
 	// setup our uniforms
 	// setup our uniforms
 	RID default_sampler = material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED);
 	RID default_sampler = material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED);
 
 

+ 2 - 1
servers/rendering/renderer_rd/effects/copy_effects.h

@@ -190,6 +190,7 @@ private:
 		COPY_TO_FB_FLAG_SRGB = (1 << 4),
 		COPY_TO_FB_FLAG_SRGB = (1 << 4),
 		COPY_TO_FB_FLAG_ALPHA_TO_ONE = (1 << 5),
 		COPY_TO_FB_FLAG_ALPHA_TO_ONE = (1 << 5),
 		COPY_TO_FB_FLAG_LINEAR = (1 << 6),
 		COPY_TO_FB_FLAG_LINEAR = (1 << 6),
+		COPY_TO_FB_FLAG_NORMAL = (1 << 7),
 	};
 	};
 
 
 	struct CopyToFbPushConstant {
 	struct CopyToFbPushConstant {
@@ -328,7 +329,7 @@ public:
 	void copy_cubemap_to_panorama(RID p_source_cube, RID p_dest_panorama, const Size2i &p_panorama_size, float p_lod, bool p_is_array);
 	void copy_cubemap_to_panorama(RID p_source_cube, RID p_dest_panorama, const Size2i &p_panorama_size, float p_lod, bool p_is_array);
 	void copy_depth_to_rect(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2i &p_rect, bool p_flip_y = false);
 	void copy_depth_to_rect(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2i &p_rect, bool p_flip_y = false);
 	void copy_depth_to_rect_and_linearize(RID p_source_rd_texture, RID p_dest_texture, const Rect2i &p_rect, bool p_flip_y, float p_z_near, float p_z_far);
 	void copy_depth_to_rect_and_linearize(RID p_source_rd_texture, RID p_dest_texture, const Rect2i &p_rect, bool p_flip_y, float p_z_near, float p_z_far);
-	void copy_to_fb_rect(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2i &p_rect, bool p_flip_y = false, bool p_force_luminance = false, bool p_alpha_to_zero = false, bool p_srgb = false, RID p_secondary = RID(), bool p_multiview = false, bool alpha_to_one = false, bool p_linear = false);
+	void copy_to_fb_rect(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2i &p_rect, bool p_flip_y = false, bool p_force_luminance = false, bool p_alpha_to_zero = false, bool p_srgb = false, RID p_secondary = RID(), bool p_multiview = false, bool alpha_to_one = false, bool p_linear = false, bool p_normal = false);
 	void copy_to_atlas_fb(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2 &p_uv_rect, RD::DrawListID p_draw_list, bool p_flip_y = false, bool p_panorama = false);
 	void copy_to_atlas_fb(RID p_source_rd_texture, RID p_dest_framebuffer, const Rect2 &p_uv_rect, RD::DrawListID p_draw_list, bool p_flip_y = false, bool p_panorama = false);
 	void copy_to_drawlist(RD::DrawListID p_draw_list, RD::FramebufferFormatID p_fb_format, RID p_source_rd_texture, bool p_linear = false);
 	void copy_to_drawlist(RD::DrawListID p_draw_list, RD::FramebufferFormatID p_fb_format, RID p_source_rd_texture, bool p_linear = false);
 	void copy_raster(RID p_source_texture, RID p_dest_framebuffer);
 	void copy_raster(RID p_source_texture, RID p_dest_framebuffer);

+ 56 - 5
servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp

@@ -961,13 +961,17 @@ void RenderForwardClustered::_fill_render_list(RenderListType p_render_list, con
 					inst->gi_offset_cache = 0xFFFFFFFF;
 					inst->gi_offset_cache = 0xFFFFFFFF;
 				}
 				}
 			}
 			}
-
-			if (p_pass_mode == PASS_MODE_COLOR && p_using_motion_pass) {
+			if (p_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS || p_pass_mode == PASS_MODE_DEPTH_NORMAL_ROUGHNESS_VOXEL_GI || p_pass_mode == PASS_MODE_COLOR) {
 				bool transform_changed = inst->prev_transform_change_frame == frame;
 				bool transform_changed = inst->prev_transform_change_frame == frame;
 				bool has_mesh_instance = inst->mesh_instance.is_valid();
 				bool has_mesh_instance = inst->mesh_instance.is_valid();
 				bool uses_particles = inst->base_flags & INSTANCE_DATA_FLAG_PARTICLES;
 				bool uses_particles = inst->base_flags & INSTANCE_DATA_FLAG_PARTICLES;
 				bool is_multimesh_with_motion = !uses_particles && (inst->base_flags & INSTANCE_DATA_FLAG_MULTIMESH) && mesh_storage->_multimesh_uses_motion_vectors_offsets(inst->data->base);
 				bool is_multimesh_with_motion = !uses_particles && (inst->base_flags & INSTANCE_DATA_FLAG_MULTIMESH) && mesh_storage->_multimesh_uses_motion_vectors_offsets(inst->data->base);
-				uses_motion = transform_changed || has_mesh_instance || uses_particles || is_multimesh_with_motion;
+				bool is_dynamic = transform_changed || has_mesh_instance || uses_particles || is_multimesh_with_motion;
+				if (p_pass_mode == PASS_MODE_COLOR && p_using_motion_pass) {
+					uses_motion = is_dynamic;
+				} else if (is_dynamic) {
+					flags |= INSTANCE_DATA_FLAGS_DYNAMIC;
+				}
 			}
 			}
 		}
 		}
 		inst->flags_cache = flags;
 		inst->flags_cache = flags;
@@ -1779,11 +1783,11 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co
 			} break;
 			} break;
 			case PASS_MODE_DEPTH_NORMAL_ROUGHNESS: {
 			case PASS_MODE_DEPTH_NORMAL_ROUGHNESS: {
 				depth_framebuffer = rb_data->get_depth_fb(RenderBufferDataForwardClustered::DEPTH_FB_ROUGHNESS);
 				depth_framebuffer = rb_data->get_depth_fb(RenderBufferDataForwardClustered::DEPTH_FB_ROUGHNESS);
-				depth_pass_clear.push_back(Color(0.5, 0.5, 0.5, 0));
+				depth_pass_clear.push_back(Color(0, 0, 0, 0));
 			} break;
 			} break;
 			case PASS_MODE_DEPTH_NORMAL_ROUGHNESS_VOXEL_GI: {
 			case PASS_MODE_DEPTH_NORMAL_ROUGHNESS_VOXEL_GI: {
 				depth_framebuffer = rb_data->get_depth_fb(RenderBufferDataForwardClustered::DEPTH_FB_ROUGHNESS_VOXELGI);
 				depth_framebuffer = rb_data->get_depth_fb(RenderBufferDataForwardClustered::DEPTH_FB_ROUGHNESS_VOXELGI);
-				depth_pass_clear.push_back(Color(0.5, 0.5, 0.5, 0));
+				depth_pass_clear.push_back(Color(0, 0, 0, 0));
 				depth_pass_clear.push_back(Color(0, 0, 0, 0));
 				depth_pass_clear.push_back(Color(0, 0, 0, 0));
 			} break;
 			} break;
 			default: {
 			default: {
@@ -2980,6 +2984,14 @@ void RenderForwardClustered::_update_render_base_uniform_set() {
 			uniforms.push_back(u);
 			uniforms.push_back(u);
 		}
 		}
 
 
+		{
+			RD::Uniform u;
+			u.binding = 15;
+			u.uniform_type = RD::UNIFORM_TYPE_TEXTURE;
+			u.append_id(best_fit_normal.texture);
+			uniforms.push_back(u);
+		}
+
 		render_base_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, scene_shader.default_shader_rd, SCENE_UNIFORM_SET);
 		render_base_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, scene_shader.default_shader_rd, SCENE_UNIFORM_SET);
 	}
 	}
 }
 }
@@ -4230,6 +4242,44 @@ RenderForwardClustered::RenderForwardClustered() {
 		shadow_sampler = RD::get_singleton()->sampler_create(sampler);
 		shadow_sampler = RD::get_singleton()->sampler_create(sampler);
 	}
 	}
 
 
+	{
+		Vector<String> modes;
+		modes.push_back("\n");
+		best_fit_normal.shader.initialize(modes);
+		best_fit_normal.shader_version = best_fit_normal.shader.version_create();
+		best_fit_normal.pipeline = RD::get_singleton()->compute_pipeline_create(best_fit_normal.shader.version_get_shader(best_fit_normal.shader_version, 0));
+
+		RD::TextureFormat tformat;
+		tformat.format = RD::DATA_FORMAT_R8_UNORM;
+		tformat.width = 1024;
+		tformat.height = 1024;
+		tformat.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT;
+		tformat.texture_type = RD::TEXTURE_TYPE_2D;
+		best_fit_normal.texture = RD::get_singleton()->texture_create(tformat, RD::TextureView());
+
+		RID shader = best_fit_normal.shader.version_get_shader(best_fit_normal.shader_version, 0);
+		ERR_FAIL_COND(shader.is_null());
+
+		Vector<RD::Uniform> uniforms;
+
+		{
+			RD::Uniform u;
+			u.binding = 0;
+			u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
+			u.append_id(best_fit_normal.texture);
+			uniforms.push_back(u);
+		}
+		RID uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shader, 0);
+
+		RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
+		RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, best_fit_normal.pipeline);
+		RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set, 0);
+		RD::get_singleton()->compute_list_dispatch_threads(compute_list, tformat.width, tformat.height, 1);
+		RD::get_singleton()->compute_list_end();
+
+		best_fit_normal.shader.version_free(best_fit_normal.shader_version);
+	}
+
 	render_list_thread_threshold = GLOBAL_GET("rendering/limits/forward_renderer/threaded_render_minimum_instances");
 	render_list_thread_threshold = GLOBAL_GET("rendering/limits/forward_renderer/threaded_render_minimum_instances");
 
 
 	_update_shader_quality_settings();
 	_update_shader_quality_settings();
@@ -4263,6 +4313,7 @@ RenderForwardClustered::~RenderForwardClustered() {
 
 
 	RD::get_singleton()->free(shadow_sampler);
 	RD::get_singleton()->free(shadow_sampler);
 	RSG::light_storage->directional_shadow_atlas_set_size(0);
 	RSG::light_storage->directional_shadow_atlas_set_size(0);
+	RD::get_singleton()->free(best_fit_normal.texture);
 
 
 	{
 	{
 		for (const RID &rid : scene_state.uniform_buffers) {
 		for (const RID &rid : scene_state.uniform_buffers) {

+ 9 - 0
servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h

@@ -40,6 +40,7 @@
 #include "servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h"
 #include "servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h"
 #include "servers/rendering/renderer_rd/pipeline_cache_rd.h"
 #include "servers/rendering/renderer_rd/pipeline_cache_rd.h"
 #include "servers/rendering/renderer_rd/renderer_scene_render_rd.h"
 #include "servers/rendering/renderer_rd/renderer_scene_render_rd.h"
+#include "servers/rendering/renderer_rd/shaders/forward_clustered/best_fit_normal.glsl.gen.h"
 #include "servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl.gen.h"
 #include "servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl.gen.h"
 #include "servers/rendering/renderer_rd/storage_rd/utilities.h"
 #include "servers/rendering/renderer_rd/storage_rd/utilities.h"
 
 
@@ -164,6 +165,13 @@ class RenderForwardClustered : public RendererSceneRenderRD {
 	RID _setup_sdfgi_render_pass_uniform_set(RID p_albedo_texture, RID p_emission_texture, RID p_emission_aniso_texture, RID p_geom_facing_texture, const RendererRD::MaterialStorage::Samplers &p_samplers);
 	RID _setup_sdfgi_render_pass_uniform_set(RID p_albedo_texture, RID p_emission_texture, RID p_emission_aniso_texture, RID p_geom_facing_texture, const RendererRD::MaterialStorage::Samplers &p_samplers);
 	RID _setup_render_pass_uniform_set(RenderListType p_render_list, const RenderDataRD *p_render_data, RID p_radiance_texture, const RendererRD::MaterialStorage::Samplers &p_samplers, bool p_use_directional_shadow_atlas = false, int p_index = 0);
 	RID _setup_render_pass_uniform_set(RenderListType p_render_list, const RenderDataRD *p_render_data, RID p_radiance_texture, const RendererRD::MaterialStorage::Samplers &p_samplers, bool p_use_directional_shadow_atlas = false, int p_index = 0);
 
 
+	struct BestFitNormal {
+		BestFitNormalShaderRD shader;
+		RID shader_version;
+		RID pipeline;
+		RID texture;
+	} best_fit_normal;
+
 	enum PassMode {
 	enum PassMode {
 		PASS_MODE_COLOR,
 		PASS_MODE_COLOR,
 		PASS_MODE_SHADOW,
 		PASS_MODE_SHADOW,
@@ -236,6 +244,7 @@ class RenderForwardClustered : public RendererSceneRenderRD {
 
 
 	// When changing any of these enums, remember to change the corresponding enums in the shader files as well.
 	// When changing any of these enums, remember to change the corresponding enums in the shader files as well.
 	enum {
 	enum {
+		INSTANCE_DATA_FLAGS_DYNAMIC = 1 << 3,
 		INSTANCE_DATA_FLAGS_NON_UNIFORM_SCALE = 1 << 4,
 		INSTANCE_DATA_FLAGS_NON_UNIFORM_SCALE = 1 << 4,
 		INSTANCE_DATA_FLAG_USE_GI_BUFFERS = 1 << 5,
 		INSTANCE_DATA_FLAG_USE_GI_BUFFERS = 1 << 5,
 		INSTANCE_DATA_FLAG_USE_SDFGI = 1 << 6,
 		INSTANCE_DATA_FLAG_USE_SDFGI = 1 << 6,

+ 1 - 0
servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.h

@@ -386,6 +386,7 @@ protected:
 
 
 	// When changing any of these enums, remember to change the corresponding enums in the shader files as well.
 	// When changing any of these enums, remember to change the corresponding enums in the shader files as well.
 	enum {
 	enum {
+		INSTANCE_DATA_FLAGS_DYNAMIC = 1 << 3,
 		INSTANCE_DATA_FLAGS_NON_UNIFORM_SCALE = 1 << 4,
 		INSTANCE_DATA_FLAGS_NON_UNIFORM_SCALE = 1 << 4,
 		INSTANCE_DATA_FLAG_USE_GI_BUFFERS = 1 << 5,
 		INSTANCE_DATA_FLAG_USE_GI_BUFFERS = 1 << 5,
 		INSTANCE_DATA_FLAG_USE_SDFGI = 1 << 6,
 		INSTANCE_DATA_FLAG_USE_SDFGI = 1 << 6,

+ 1 - 1
servers/rendering/renderer_rd/renderer_scene_render_rd.cpp

@@ -773,7 +773,7 @@ void RendererSceneRenderRD::_render_buffers_debug_draw(const RenderDataRD *p_ren
 
 
 	if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_NORMAL_BUFFER && _render_buffers_get_normal_texture(rb).is_valid()) {
 	if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_NORMAL_BUFFER && _render_buffers_get_normal_texture(rb).is_valid()) {
 		Size2 rtsize = texture_storage->render_target_get_size(render_target);
 		Size2 rtsize = texture_storage->render_target_get_size(render_target);
-		copy_effects->copy_to_fb_rect(_render_buffers_get_normal_texture(rb), texture_storage->render_target_get_rd_framebuffer(render_target), Rect2(Vector2(), rtsize), false, false);
+		copy_effects->copy_to_fb_rect(_render_buffers_get_normal_texture(rb), texture_storage->render_target_get_rd_framebuffer(render_target), Rect2(Vector2(), rtsize), false, false, false, false, RID(), false, false, false, true);
 	}
 	}
 
 
 	if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_OCCLUDERS) {
 	if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_OCCLUDERS) {

+ 5 - 0
servers/rendering/renderer_rd/shaders/effects/copy_to_fb.glsl

@@ -20,6 +20,7 @@
 #define FLAG_SRGB (1 << 4)
 #define FLAG_SRGB (1 << 4)
 #define FLAG_ALPHA_TO_ONE (1 << 5)
 #define FLAG_ALPHA_TO_ONE (1 << 5)
 #define FLAG_LINEAR (1 << 6)
 #define FLAG_LINEAR (1 << 6)
+#define FLAG_NORMAL (1 << 7)
 
 
 #ifdef MULTIVIEW
 #ifdef MULTIVIEW
 layout(location = 0) out vec3 uv_interp;
 layout(location = 0) out vec3 uv_interp;
@@ -77,6 +78,7 @@ void main() {
 #define FLAG_SRGB (1 << 4)
 #define FLAG_SRGB (1 << 4)
 #define FLAG_ALPHA_TO_ONE (1 << 5)
 #define FLAG_ALPHA_TO_ONE (1 << 5)
 #define FLAG_LINEAR (1 << 6)
 #define FLAG_LINEAR (1 << 6)
+#define FLAG_NORMAL (1 << 7)
 
 
 layout(push_constant, std430) uniform Params {
 layout(push_constant, std430) uniform Params {
 	vec4 section;
 	vec4 section;
@@ -192,6 +194,9 @@ void main() {
 	if (bool(params.flags & FLAG_LINEAR)) {
 	if (bool(params.flags & FLAG_LINEAR)) {
 		color.rgb = srgb_to_linear(color.rgb);
 		color.rgb = srgb_to_linear(color.rgb);
 	}
 	}
+	if (bool(params.flags & FLAG_NORMAL)) {
+		color.rgb = normalize(color.rgb * 2.0 - 1.0) * 0.5 + 0.5;
+	}
 
 
 	frag_color = color / params.luminance_multiplier;
 	frag_color = color / params.luminance_multiplier;
 #endif // MODE_SET_COLOR
 #endif // MODE_SET_COLOR

+ 5 - 1
servers/rendering/renderer_rd/shaders/effects/screen_space_reflection.glsl

@@ -65,8 +65,12 @@ void main() {
 	vec3 vertex = reconstructCSPosition(uv * vec2(params.screen_size), base_depth);
 	vec3 vertex = reconstructCSPosition(uv * vec2(params.screen_size), base_depth);
 
 
 	vec4 normal_roughness = imageLoad(source_normal_roughness, ssC);
 	vec4 normal_roughness = imageLoad(source_normal_roughness, ssC);
-	vec3 normal = normal_roughness.xyz * 2.0 - 1.0;
+	vec3 normal = normalize(normal_roughness.xyz * 2.0 - 1.0);
 	float roughness = normal_roughness.w;
 	float roughness = normal_roughness.w;
+	if (roughness > 0.5) {
+		roughness = 1.0 - roughness;
+	}
+	roughness /= (127.0 / 255.0);
 
 
 	// The roughness cutoff of 0.6 is chosen to match the roughness fadeout from GH-69828.
 	// The roughness cutoff of 0.6 is chosen to match the roughness fadeout from GH-69828.
 	if (roughness > 0.6) {
 	if (roughness > 0.6) {

+ 8 - 2
servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_scale.glsl

@@ -59,8 +59,13 @@ void main() {
 			color += texelFetch(source_ssr, ofs, 0);
 			color += texelFetch(source_ssr, ofs, 0);
 			float d = texelFetch(source_depth, ofs, 0).r;
 			float d = texelFetch(source_depth, ofs, 0).r;
 			vec4 nr = texelFetch(source_normal, ofs, 0);
 			vec4 nr = texelFetch(source_normal, ofs, 0);
-			normal.xyz += nr.xyz * 2.0 - 1.0;
-			normal.w += nr.w;
+			normal.xyz += normalize(nr.xyz * 2.0 - 1.0);
+			float roughness = normal.w;
+			if (roughness > 0.5) {
+				roughness = 1.0 - roughness;
+			}
+			roughness /= (127.0 / 255.0);
+			normal.w += roughness;
 
 
 			if (sc_multiview) {
 			if (sc_multiview) {
 				// we're doing a full unproject so we need the value as is.
 				// we're doing a full unproject so we need the value as is.
@@ -81,6 +86,7 @@ void main() {
 		depth /= 4.0;
 		depth /= 4.0;
 		normal.xyz = normalize(normal.xyz / 4.0) * 0.5 + 0.5;
 		normal.xyz = normalize(normal.xyz / 4.0) * 0.5 + 0.5;
 		normal.w /= 4.0;
 		normal.w /= 4.0;
+		normal.w = normal.w * (127.0 / 255.0);
 	} else {
 	} else {
 		ivec2 ofs = ssC << 1;
 		ivec2 ofs = ssC << 1;
 
 

+ 6 - 11
servers/rendering/renderer_rd/shaders/effects/ssao.glsl

@@ -158,21 +158,16 @@ vec4 calculate_edges(const float p_center_z, const float p_left_z, const float p
 	return clamp((1.3 - edgesLRTB / (p_center_z * 0.040)), 0.0, 1.0);
 	return clamp((1.3 - edgesLRTB / (p_center_z * 0.040)), 0.0, 1.0);
 }
 }
 
 
-vec3 decode_normal(vec3 p_encoded_normal) {
-	vec3 normal = p_encoded_normal * 2.0 - 1.0;
-	return normal;
-}
-
 vec3 load_normal(ivec2 p_pos) {
 vec3 load_normal(ivec2 p_pos) {
-	vec3 encoded_normal = imageLoad(source_normal, p_pos).xyz;
-	encoded_normal.z = 1.0 - encoded_normal.z;
-	return decode_normal(encoded_normal);
+	vec3 encoded_normal = normalize(imageLoad(source_normal, p_pos).xyz * 2.0 - 1.0);
+	encoded_normal.z = -encoded_normal.z;
+	return encoded_normal;
 }
 }
 
 
 vec3 load_normal(ivec2 p_pos, ivec2 p_offset) {
 vec3 load_normal(ivec2 p_pos, ivec2 p_offset) {
-	vec3 encoded_normal = imageLoad(source_normal, p_pos + p_offset).xyz;
-	encoded_normal.z = 1.0 - encoded_normal.z;
-	return decode_normal(encoded_normal);
+	vec3 encoded_normal = normalize(imageLoad(source_normal, p_pos + p_offset).xyz * 2.0 - 1.0);
+	encoded_normal.z = -encoded_normal.z;
+	return encoded_normal;
 }
 }
 
 
 // all vectors in viewspace
 // all vectors in viewspace

+ 6 - 11
servers/rendering/renderer_rd/shaders/effects/ssil.glsl

@@ -159,21 +159,16 @@ vec4 calculate_edges(const float p_center_z, const float p_left_z, const float p
 	return clamp((1.3 - edgesLRTB / (p_center_z * 0.040)), 0.0, 1.0);
 	return clamp((1.3 - edgesLRTB / (p_center_z * 0.040)), 0.0, 1.0);
 }
 }
 
 
-vec3 decode_normal(vec3 p_encoded_normal) {
-	vec3 normal = p_encoded_normal * 2.0 - 1.0;
-	return normal;
-}
-
 vec3 load_normal(ivec2 p_pos) {
 vec3 load_normal(ivec2 p_pos) {
-	vec3 encoded_normal = imageLoad(source_normal, p_pos).xyz;
-	encoded_normal.z = 1.0 - encoded_normal.z;
-	return decode_normal(encoded_normal);
+	vec3 encoded_normal = normalize(imageLoad(source_normal, p_pos).xyz * 2.0 - 1.0);
+	encoded_normal.z = -encoded_normal.z;
+	return encoded_normal;
 }
 }
 
 
 vec3 load_normal(ivec2 p_pos, ivec2 p_offset) {
 vec3 load_normal(ivec2 p_pos, ivec2 p_offset) {
-	vec3 encoded_normal = imageLoad(source_normal, p_pos + p_offset).xyz;
-	encoded_normal.z = 1.0 - encoded_normal.z;
-	return decode_normal(encoded_normal);
+	vec3 encoded_normal = normalize(imageLoad(source_normal, p_pos + p_offset).xyz * 2.0 - 1.0);
+	encoded_normal.z = -encoded_normal.z;
+	return encoded_normal;
 }
 }
 
 
 // all vectors in viewspace
 // all vectors in viewspace

+ 5 - 0
servers/rendering/renderer_rd/shaders/environment/gi.glsl

@@ -618,6 +618,11 @@ void process_gi(ivec2 pos, vec3 vertex, inout vec4 ambient_light, inout vec4 ref
 	if (normal.length() > 0.5) {
 	if (normal.length() > 0.5) {
 		//valid normal, can do GI
 		//valid normal, can do GI
 		float roughness = normal_roughness.w;
 		float roughness = normal_roughness.w;
+		bool dynamic_object = roughness > 0.5;
+		if (dynamic_object) {
+			roughness = 1.0 - roughness;
+		}
+		roughness /= (127.0 / 255.0);
 		vec3 view = -normalize(mat3(scene_data.cam_transform) * (vertex - scene_data.eye_offset[gl_GlobalInvocationID.z].xyz));
 		vec3 view = -normalize(mat3(scene_data.cam_transform) * (vertex - scene_data.eye_offset[gl_GlobalInvocationID.z].xyz));
 		vertex = mat3(scene_data.cam_transform) * vertex;
 		vertex = mat3(scene_data.cam_transform) * vertex;
 		normal = normalize(mat3(scene_data.cam_transform) * normal);
 		normal = normalize(mat3(scene_data.cam_transform) * normal);

+ 1 - 1
servers/rendering/renderer_rd/shaders/environment/voxel_gi.glsl

@@ -492,7 +492,7 @@ void main() {
 
 
 		ivec3 pos = params.x_dir * (params.rect_pos.x + pos_xy.x) + params.y_dir * (params.rect_pos.y + pos_xy.y) + abs(params.z_dir) * int(z);
 		ivec3 pos = params.x_dir * (params.rect_pos.x + pos_xy.x) + params.y_dir * (params.rect_pos.y + pos_xy.y) + abs(params.z_dir) * int(z);
 
 
-		vec3 normal = imageLoad(source_normal, uv_xy).xyz * 2.0 - 1.0;
+		vec3 normal = normalize(imageLoad(source_normal, uv_xy).xyz * 2.0 - 1.0);
 		normal = vec3(params.x_dir) * normal.x * mix(1.0, -1.0, params.flip_x) + vec3(params.y_dir) * normal.y * mix(1.0, -1.0, params.flip_y) - vec3(params.z_dir) * normal.z;
 		normal = vec3(params.x_dir) * normal.x * mix(1.0, -1.0, params.flip_x) + vec3(params.y_dir) * normal.y * mix(1.0, -1.0, params.flip_y) - vec3(params.z_dir) * normal.z;
 
 
 		vec4 albedo = imageLoad(source_albedo, uv_xy);
 		vec4 albedo = imageLoad(source_albedo, uv_xy);

+ 43 - 0
servers/rendering/renderer_rd/shaders/forward_clustered/best_fit_normal.glsl

@@ -0,0 +1,43 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+
+layout(r8, set = 0, binding = 0) uniform restrict writeonly image2D current_image;
+
+// This shader is used to generate a "best fit normal texture" as described by:
+// https://advances.realtimerendering.com/s2010/Kaplanyan-CryEngine3(SIGGRAPH%202010%20Advanced%20RealTime%20Rendering%20Course).pdf
+// This texture tells you what length of normal can be used to store a unit vector
+// with the lest amount of error.
+
+vec3 quantize(vec3 c) {
+	return round(clamp(c * 0.5 + 0.5, 0.0, 1.0) * 255.0) * (1.0 / 255.0) * 2.0 - 1.0;
+}
+
+float find_minimum_error(vec3 normal) {
+	float min_error = 100000.0;
+	float t_best = 0.0;
+	for (float nstep = 1.5; nstep < 127.5; ++nstep) {
+		float t = nstep / 127.5;
+		vec3 vp = normal * t;
+		vec3 quantizedp = quantize(vp);
+		vec3 vdiff = (quantizedp - vp) / t;
+		float error = max(abs(vdiff.x), max(abs(vdiff.y), abs(vdiff.z)));
+		if (error < min_error) {
+			min_error = error;
+			t_best = t;
+		}
+	}
+	return t_best;
+}
+
+void main() {
+	vec2 uv = vec2(gl_GlobalInvocationID.xy) * vec2(1.0 / 1024.0) + vec2(0.5 / 1024.0);
+	uv.y *= uv.x;
+
+	vec3 dir = vec3(uv.x, uv.y, 1.0);
+	imageStore(current_image, ivec2(gl_GlobalInvocationID.xy), vec4(find_minimum_error(dir), 1.0, 1.0, 1.0));
+}

+ 36 - 6
servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl

@@ -868,6 +868,28 @@ uint cluster_get_range_clip_mask(uint i, uint z_min, uint z_max) {
 
 
 #endif //!MODE_RENDER DEPTH
 #endif //!MODE_RENDER DEPTH
 
 
+#if defined(MODE_RENDER_NORMAL_ROUGHNESS) || defined(MODE_RENDER_MATERIAL)
+// https://advances.realtimerendering.com/s2010/Kaplanyan-CryEngine3(SIGGRAPH%202010%20Advanced%20RealTime%20Rendering%20Course).pdf
+vec3 encode24(vec3 v) {
+	// Unsigned normal (handles most symmetry)
+	vec3 vNormalUns = abs(v);
+	// Get the major axis for our collapsed cubemap lookup
+	float maxNAbs = max(vNormalUns.z, max(vNormalUns.x, vNormalUns.y));
+	// Get the collapsed cubemap texture coordinates
+	vec2 vTexCoord = vNormalUns.z < maxNAbs ? (vNormalUns.y < maxNAbs ? vNormalUns.yz : vNormalUns.xz) : vNormalUns.xy;
+	vTexCoord /= maxNAbs;
+	vTexCoord = vTexCoord.x < vTexCoord.y ? vTexCoord.yx : vTexCoord.xy;
+	// Stretch:
+	vTexCoord.y /= vTexCoord.x;
+	float fFittingScale = texture(sampler2D(best_fit_normal_texture, SAMPLER_NEAREST_CLAMP), vTexCoord).r;
+	// Make vector touch unit cube
+	vec3 result = v / maxNAbs;
+	// scale the normal to get the best fit
+	result *= fFittingScale;
+	return result;
+}
+#endif // MODE_RENDER_NORMAL_ROUGHNESS
+
 void fragment_shader(in SceneData scene_data) {
 void fragment_shader(in SceneData scene_data) {
 	uint instance_index = instance_index_interp;
 	uint instance_index = instance_index_interp;
 
 
@@ -1519,18 +1541,18 @@ void fragment_shader(in SceneData scene_data) {
 			vec2 base_coord = screen_uv;
 			vec2 base_coord = screen_uv;
 			vec2 closest_coord = base_coord;
 			vec2 closest_coord = base_coord;
 #ifdef USE_MULTIVIEW
 #ifdef USE_MULTIVIEW
-			float closest_ang = dot(normal, textureLod(sampler2DArray(normal_roughness_buffer, SAMPLER_LINEAR_CLAMP), vec3(base_coord, ViewIndex), 0.0).xyz * 2.0 - 1.0);
+			float closest_ang = dot(normal, normalize(textureLod(sampler2DArray(normal_roughness_buffer, SAMPLER_LINEAR_CLAMP), vec3(base_coord, ViewIndex), 0.0).xyz * 2.0 - 1.0));
 #else // USE_MULTIVIEW
 #else // USE_MULTIVIEW
-			float closest_ang = dot(normal, textureLod(sampler2D(normal_roughness_buffer, SAMPLER_LINEAR_CLAMP), base_coord, 0.0).xyz * 2.0 - 1.0);
+			float closest_ang = dot(normal, normalize(textureLod(sampler2D(normal_roughness_buffer, SAMPLER_LINEAR_CLAMP), base_coord, 0.0).xyz * 2.0 - 1.0));
 #endif // USE_MULTIVIEW
 #endif // USE_MULTIVIEW
 
 
 			for (int i = 0; i < 4; i++) {
 			for (int i = 0; i < 4; i++) {
 				const vec2 neighbors[4] = vec2[](vec2(-1, 0), vec2(1, 0), vec2(0, -1), vec2(0, 1));
 				const vec2 neighbors[4] = vec2[](vec2(-1, 0), vec2(1, 0), vec2(0, -1), vec2(0, 1));
 				vec2 neighbour_coord = base_coord + neighbors[i] * scene_data.screen_pixel_size;
 				vec2 neighbour_coord = base_coord + neighbors[i] * scene_data.screen_pixel_size;
 #ifdef USE_MULTIVIEW
 #ifdef USE_MULTIVIEW
-				float neighbour_ang = dot(normal, textureLod(sampler2DArray(normal_roughness_buffer, SAMPLER_LINEAR_CLAMP), vec3(neighbour_coord, ViewIndex), 0.0).xyz * 2.0 - 1.0);
+				float neighbour_ang = dot(normal, normalize(textureLod(sampler2DArray(normal_roughness_buffer, SAMPLER_LINEAR_CLAMP), vec3(neighbour_coord, ViewIndex), 0.0).xyz * 2.0 - 1.0));
 #else // USE_MULTIVIEW
 #else // USE_MULTIVIEW
-				float neighbour_ang = dot(normal, textureLod(sampler2D(normal_roughness_buffer, SAMPLER_LINEAR_CLAMP), neighbour_coord, 0.0).xyz * 2.0 - 1.0);
+				float neighbour_ang = dot(normal, normalize(textureLod(sampler2D(normal_roughness_buffer, SAMPLER_LINEAR_CLAMP), neighbour_coord, 0.0).xyz * 2.0 - 1.0));
 #endif // USE_MULTIVIEW
 #endif // USE_MULTIVIEW
 				if (neighbour_ang > closest_ang) {
 				if (neighbour_ang > closest_ang) {
 					closest_ang = neighbour_ang;
 					closest_ang = neighbour_ang;
@@ -2302,7 +2324,7 @@ void fragment_shader(in SceneData scene_data) {
 	albedo_output_buffer.rgb = albedo;
 	albedo_output_buffer.rgb = albedo;
 	albedo_output_buffer.a = alpha;
 	albedo_output_buffer.a = alpha;
 
 
-	normal_output_buffer.rgb = normal * 0.5 + 0.5;
+	normal_output_buffer.rgb = encode24(normal) * 0.5 + 0.5;
 	normal_output_buffer.a = 0.0;
 	normal_output_buffer.a = 0.0;
 	depth_output_buffer.r = -vertex.z;
 	depth_output_buffer.r = -vertex.z;
 
 
@@ -2316,7 +2338,15 @@ void fragment_shader(in SceneData scene_data) {
 #endif
 #endif
 
 
 #ifdef MODE_RENDER_NORMAL_ROUGHNESS
 #ifdef MODE_RENDER_NORMAL_ROUGHNESS
-	normal_roughness_output_buffer = vec4(normal * 0.5 + 0.5, roughness);
+	normal_roughness_output_buffer = vec4(encode24(normal) * 0.5 + 0.5, roughness);
+
+	// We encode the dynamic static into roughness.
+	// Values over 0.5 are dynamic, under 0.5 are static.
+	normal_roughness_output_buffer.w = normal_roughness_output_buffer.w * (127.0 / 255.0);
+	if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_DYNAMIC)) {
+		normal_roughness_output_buffer.w = 1.0 - normal_roughness_output_buffer.w;
+	}
+	normal_roughness_output_buffer.w = normal_roughness_output_buffer.w;
 
 
 #ifdef MODE_RENDER_VOXEL_GI
 #ifdef MODE_RENDER_VOXEL_GI
 	if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_VOXEL_GI)) { // process voxel_gi_instances
 	if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_VOXEL_GI)) { // process voxel_gi_instances

+ 12 - 0
servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered_inc.glsl

@@ -48,6 +48,7 @@ draw_call;
 
 
 layout(set = 0, binding = 2) uniform sampler shadow_sampler;
 layout(set = 0, binding = 2) uniform sampler shadow_sampler;
 
 
+#define INSTANCE_FLAGS_DYNAMIC (1 << 3)
 #define INSTANCE_FLAGS_NON_UNIFORM_SCALE (1 << 4)
 #define INSTANCE_FLAGS_NON_UNIFORM_SCALE (1 << 4)
 #define INSTANCE_FLAGS_USE_GI_BUFFERS (1 << 5)
 #define INSTANCE_FLAGS_USE_GI_BUFFERS (1 << 5)
 #define INSTANCE_FLAGS_USE_SDFGI (1 << 6)
 #define INSTANCE_FLAGS_USE_SDFGI (1 << 6)
@@ -163,6 +164,8 @@ sdfgi;
 
 
 layout(set = 0, binding = 14) uniform sampler DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP;
 layout(set = 0, binding = 14) uniform sampler DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP;
 
 
+layout(set = 0, binding = 15) uniform texture2D best_fit_normal_texture;
+
 /* Set 1: Render Pass (changes per render pass) */
 /* Set 1: Render Pass (changes per render pass) */
 
 
 layout(set = 1, binding = 0, std140) uniform SceneDataBlock {
 layout(set = 1, binding = 0, std140) uniform SceneDataBlock {
@@ -328,6 +331,15 @@ layout(set = 1, binding = 34) uniform texture2D ssil_buffer;
 
 
 #endif
 #endif
 
 
+vec4 normal_roughness_compatibility(vec4 p_normal_roughness) {
+	float roughness = p_normal_roughness.w;
+	if (roughness > 0.5) {
+		roughness = 1.0 - roughness;
+	}
+	roughness /= (127.0 / 255.0);
+	return vec4(normalize(p_normal_roughness.xyz * 2.0 - 1.0) * 0.5 + 0.5, roughness);
+}
+
 /* Set 2 Skeleton & Instancing (can change per item) */
 /* Set 2 Skeleton & Instancing (can change per item) */
 
 
 layout(set = 2, binding = 0, std430) restrict readonly buffer Transforms {
 layout(set = 2, binding = 0, std430) restrict readonly buffer Transforms {

+ 1 - 0
servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile_inc.glsl

@@ -29,6 +29,7 @@ draw_call;
 
 
 layout(set = 0, binding = 2) uniform sampler shadow_sampler;
 layout(set = 0, binding = 2) uniform sampler shadow_sampler;
 
 
+#define INSTANCE_FLAGS_DYNAMIC (1 << 3)
 #define INSTANCE_FLAGS_NON_UNIFORM_SCALE (1 << 4)
 #define INSTANCE_FLAGS_NON_UNIFORM_SCALE (1 << 4)
 #define INSTANCE_FLAGS_USE_GI_BUFFERS (1 << 5)
 #define INSTANCE_FLAGS_USE_GI_BUFFERS (1 << 5)
 #define INSTANCE_FLAGS_USE_SDFGI (1 << 6)
 #define INSTANCE_FLAGS_USE_SDFGI (1 << 6)

+ 4 - 1
servers/rendering/shader_compiler.cpp

@@ -1191,6 +1191,7 @@ String ShaderCompiler::_dump_node_code(const SL::Node *p_node, int p_level, Gene
 					// we will add logic to automatically switch between
 					// we will add logic to automatically switch between
 					// sampler2D and sampler2D array and vec2 UV and vec3 UV.
 					// sampler2D and sampler2D array and vec2 UV and vec3 UV.
 					bool multiview_uv_needed = false;
 					bool multiview_uv_needed = false;
+					bool is_normal_roughness_texture = false;
 
 
 					for (int i = 1; i < onode->arguments.size(); i++) {
 					for (int i = 1; i < onode->arguments.size(); i++) {
 						if (i > 1) {
 						if (i > 1) {
@@ -1259,7 +1260,6 @@ String ShaderCompiler::_dump_node_code(const SL::Node *p_node, int p_level, Gene
 								// Need to map from texture to sampler in order to sample when using Vulkan GLSL.
 								// Need to map from texture to sampler in order to sample when using Vulkan GLSL.
 								String sampler_name;
 								String sampler_name;
 								bool is_depth_texture = false;
 								bool is_depth_texture = false;
-								bool is_normal_roughness_texture = false;
 
 
 								if (actions.custom_samplers.has(texture_uniform)) {
 								if (actions.custom_samplers.has(texture_uniform)) {
 									sampler_name = actions.custom_samplers[texture_uniform];
 									sampler_name = actions.custom_samplers[texture_uniform];
@@ -1340,6 +1340,9 @@ String ShaderCompiler::_dump_node_code(const SL::Node *p_node, int p_level, Gene
 					if (is_screen_texture && !texture_func_returns_data && actions.apply_luminance_multiplier) {
 					if (is_screen_texture && !texture_func_returns_data && actions.apply_luminance_multiplier) {
 						code = "(" + code + " * vec4(vec3(sc_luminance_multiplier), 1.0))";
 						code = "(" + code + " * vec4(vec3(sc_luminance_multiplier), 1.0))";
 					}
 					}
+					if (is_normal_roughness_texture) {
+						code = "normal_roughness_compatibility(" + code + ")";
+					}
 				} break;
 				} break;
 				case SL::OP_INDEX: {
 				case SL::OP_INDEX: {
 					code += _dump_node_code(onode->arguments[0], p_level, r_gen_code, p_actions, p_default_actions, p_assigning);
 					code += _dump_node_code(onode->arguments[0], p_level, r_gen_code, p_actions, p_default_actions, p_assigning);