Forráskód Böngészése

Save cluster render shader from being optimized out entirely

Pedro J. Estébanez 2 éve
szülő
commit
6465432570

+ 3 - 0
drivers/vulkan/rendering_device_vulkan.cpp

@@ -9380,6 +9380,9 @@ bool RenderingDeviceVulkan::has_feature(const Features p_feature) const {
 			VulkanContext::VRSCapabilities vrs_capabilities = context->get_vrs_capabilities();
 			return vrs_capabilities.attachment_vrs_supported && context->get_physical_device_features().shaderStorageImageExtendedFormats;
 		} break;
+		case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: {
+			return true;
+		} break;
 		default: {
 			return false;
 		}

+ 17 - 3
servers/rendering/renderer_rd/cluster_builder_rd.cpp

@@ -47,15 +47,29 @@ ClusterBuilderSharedDataRD::ClusterBuilderSharedDataRD() {
 	}
 
 	{
+		RD::FramebufferFormatID fb_format;
+		RD::PipelineColorBlendState blend_state;
+		String defines;
+		if (RD::get_singleton()->has_feature(RD::SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS)) {
+			fb_format = RD::get_singleton()->framebuffer_format_create_empty();
+			blend_state = RD::PipelineColorBlendState::create_disabled();
+		} else {
+			Vector<RD::AttachmentFormat> afs;
+			afs.push_back(RD::AttachmentFormat());
+			afs.write[0].usage_flags = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT;
+			fb_format = RD::get_singleton()->framebuffer_format_create(afs);
+			defines = "\n#define USE_ATTACHMENT\n";
+		}
+
 		Vector<String> versions;
 		versions.push_back("");
-		cluster_render.cluster_render_shader.initialize(versions);
+		cluster_render.cluster_render_shader.initialize(versions, defines);
 		cluster_render.shader_version = cluster_render.cluster_render_shader.version_create();
 		cluster_render.shader = cluster_render.cluster_render_shader.version_get_shader(cluster_render.shader_version, 0);
-		cluster_render.shader_pipelines[ClusterRender::PIPELINE_NORMAL] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, RD::get_singleton()->framebuffer_format_create_empty(), vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), RD::PipelineColorBlendState(), 0);
+		cluster_render.shader_pipelines[ClusterRender::PIPELINE_NORMAL] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, fb_format, vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), blend_state, 0);
 		RD::PipelineMultisampleState ms;
 		ms.sample_count = RD::TEXTURE_SAMPLES_4;
-		cluster_render.shader_pipelines[ClusterRender::PIPELINE_MSAA] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, RD::get_singleton()->framebuffer_format_create_empty(), vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), ms, RD::PipelineDepthStencilState(), RD::PipelineColorBlendState(), 0);
+		cluster_render.shader_pipelines[ClusterRender::PIPELINE_MSAA] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, fb_format, vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), ms, RD::PipelineDepthStencilState(), blend_state, 0);
 	}
 	{
 		Vector<String> versions;

+ 14 - 4
servers/rendering/renderer_rd/shaders/cluster_render.glsl

@@ -100,6 +100,10 @@ layout(set = 0, binding = 3, std430) buffer restrict ClusterRender {
 }
 cluster_render;
 
+#ifdef USE_ATTACHMENT
+layout(location = 0) out vec4 frag_color;
+#endif
+
 void main() {
 	//convert from screen to cluster
 	uvec2 cluster = uvec2(gl_FragCoord.xy) >> state.screen_to_clusters_shift;
@@ -113,6 +117,8 @@ void main() {
 	uint usage_write_offset = cluster_offset + (element_index >> 5);
 	uint usage_write_bit = 1 << (element_index & 0x1F);
 
+	uint aux = 0;
+
 #ifdef USE_SUBGROUPS
 
 	uint cluster_thread_group_index;
@@ -138,7 +144,7 @@ void main() {
 		cluster_thread_group_index = subgroupBallotExclusiveBitCount(mask);
 
 		if (cluster_thread_group_index == 0) {
-			atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
+			aux = atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
 		}
 	}
 #else
@@ -147,7 +153,7 @@ void main() {
 	if (!gl_HelperInvocation)
 #endif
 	{
-		atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
+		aux = atomicOr(cluster_render.data[usage_write_offset], usage_write_bit);
 	}
 #endif
 	//find the current element in the depth usage list and mark the current depth as used
@@ -162,7 +168,7 @@ void main() {
 	if (!gl_HelperInvocation) {
 		z_write_bit = subgroupOr(z_write_bit); //merge all Zs
 		if (cluster_thread_group_index == 0) {
-			atomicOr(cluster_render.data[z_write_offset], z_write_bit);
+			aux = atomicOr(cluster_render.data[z_write_offset], z_write_bit);
 		}
 	}
 #else
@@ -171,7 +177,11 @@ void main() {
 	if (!gl_HelperInvocation)
 #endif
 	{
-		atomicOr(cluster_render.data[z_write_offset], z_write_bit);
+		aux = atomicOr(cluster_render.data[z_write_offset], z_write_bit);
 	}
 #endif
+
+#ifdef USE_ATTACHMENT
+	frag_color = vec4(float(aux));
+#endif
 }

+ 2 - 0
servers/rendering/rendering_device.h

@@ -704,6 +704,8 @@ public:
 		SUPPORTS_MULTIVIEW,
 		SUPPORTS_FSR_HALF_FLOAT,
 		SUPPORTS_ATTACHMENT_VRS,
+		// If not supported, a fragment shader with only side effets (i.e., writes  to buffers, but doesn't output to attachments), may be optimized down to no-op by the GPU driver.
+		SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS,
 	};
 	virtual bool has_feature(const Features p_feature) const = 0;