Browse Source

Fixes and optimizations to mobile renderer

* Only apply final actions to attachments used in the last pass.
* Fixes to draw list final action (was using continue instead of read/drop).
* Profiling regions inside draw lists now properly throw errors.
* Ability to enable gpu profile printing from project settings. (used to debug).
reduz 4 years ago
parent
commit
ca117910da

+ 2 - 0
doc/classes/ProjectSettings.xml

@@ -423,6 +423,8 @@
 		<member name="debug/settings/stdout/print_fps" type="bool" setter="" getter="" default="false">
 			Print frames per second to standard output every second.
 		</member>
+		<member name="debug/settings/stdout/print_gpu_profile" type="bool" setter="" getter="" default="false">
+		</member>
 		<member name="debug/settings/stdout/verbose_stdout" type="bool" setter="" getter="" default="false">
 			Print more information to standard output when running. It displays information such as memory leaks, which scenes and resources are being loaded, etc.
 		</member>

+ 85 - 20
drivers/vulkan/rendering_device_vulkan.cpp

@@ -3299,13 +3299,25 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
 		// Also, each UNDEFINED will do an immediate layout transition (write), s.t. we must ensure execution synchronization vs.
 		// the read. If this is a performance issue, one could track the actual last accessor of each resource, adding only that
 		// stage
+
 		switch (is_depth ? p_initial_depth_action : p_initial_action) {
 			case INITIAL_ACTION_CLEAR_REGION:
 			case INITIAL_ACTION_CLEAR: {
-				description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
-				description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
-				description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there
-				dependency_from_external.srcStageMask |= reading_stages;
+				if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
+					description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
+					description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
+					description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
+				} else if (p_attachments[i].usage_flags & TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
+					description.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
+					description.initialLayout = is_sampled ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : (is_storage ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
+					description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
+					dependency_from_external.srcStageMask |= reading_stages;
+				} else {
+					description.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
+					description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
+					description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; //don't care what is there
+					dependency_from_external.srcStageMask |= reading_stages;
+				}
 			} break;
 			case INITIAL_ACTION_KEEP: {
 				if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
@@ -3363,7 +3375,58 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
 			}
 		}
 
-		switch (is_depth ? p_final_depth_action : p_final_action) {
+		bool used_last = false;
+
+		{
+			int last_pass = p_passes.size() - 1;
+
+			if (is_depth) {
+				//likely missing depth resolve?
+				if (p_passes[last_pass].depth_attachment == i) {
+					used_last = true;
+				}
+			} else {
+				if (p_passes[last_pass].resolve_attachments.size()) {
+					//if using resolve attachments, check resolve attachments
+					for (int j = 0; j < p_passes[last_pass].resolve_attachments.size(); j++) {
+						if (p_passes[last_pass].resolve_attachments[j] == i) {
+							used_last = true;
+							break;
+						}
+					}
+				} else {
+					for (int j = 0; j < p_passes[last_pass].color_attachments.size(); j++) {
+						if (p_passes[last_pass].color_attachments[j] == i) {
+							used_last = true;
+							break;
+						}
+					}
+				}
+			}
+
+			if (!used_last) {
+				for (int j = 0; j < p_passes[last_pass].preserve_attachments.size(); j++) {
+					if (p_passes[last_pass].preserve_attachments[j] == i) {
+						used_last = true;
+						break;
+					}
+				}
+			}
+		}
+
+		FinalAction final_action = p_final_action;
+		FinalAction final_depth_action = p_final_depth_action;
+
+		if (!used_last) {
+			if (is_depth) {
+				final_depth_action = FINAL_ACTION_DISCARD;
+
+			} else {
+				final_action = FINAL_ACTION_DISCARD;
+			}
+		}
+
+		switch (is_depth ? final_depth_action : final_action) {
 			case FINAL_ACTION_READ: {
 				if (p_attachments[i].usage_flags & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT) {
 					description.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
@@ -3516,21 +3579,6 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
 			resolve_references.push_back(reference);
 		}
 
-		LocalVector<uint32_t> &preserve_references = preserve_reference_array[i];
-
-		for (int j = 0; j < pass->preserve_attachments.size(); j++) {
-			int32_t attachment = pass->preserve_attachments[j];
-
-			ERR_FAIL_COND_V_MSG(attachment == FramebufferPass::ATTACHMENT_UNUSED, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), preserve attachment (" + itos(j) + "). Preserve attachments can't be unused.");
-
-			ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), preserve attachment (" + itos(j) + ").");
-			ERR_FAIL_COND_V_MSG(attachment_last_pass[attachment] == i, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), it already was used for something else before in this pass.");
-
-			attachment_last_pass[attachment] = i;
-
-			preserve_references.push_back(attachment);
-		}
-
 		VkAttachmentReference &depth_stencil_reference = depth_reference_array[i];
 
 		if (pass->depth_attachment != FramebufferPass::ATTACHMENT_UNUSED) {
@@ -3554,6 +3602,22 @@ VkRenderPass RenderingDeviceVulkan::_render_pass_create(const Vector<AttachmentF
 			depth_stencil_reference.layout = VK_IMAGE_LAYOUT_UNDEFINED;
 		}
 
+		LocalVector<uint32_t> &preserve_references = preserve_reference_array[i];
+
+		for (int j = 0; j < pass->preserve_attachments.size(); j++) {
+			int32_t attachment = pass->preserve_attachments[j];
+
+			ERR_FAIL_COND_V_MSG(attachment == FramebufferPass::ATTACHMENT_UNUSED, VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), preserve attachment (" + itos(j) + "). Preserve attachments can't be unused.");
+
+			ERR_FAIL_INDEX_V_MSG(attachment, p_attachments.size(), VK_NULL_HANDLE, "Invalid framebuffer format attachment(" + itos(attachment) + "), in pass (" + itos(i) + "), preserve attachment (" + itos(j) + ").");
+
+			if (attachment_last_pass[attachment] != i) {
+				//preserve can still be used to keep depth or color from being discarded after use
+				attachment_last_pass[attachment] = i;
+				preserve_references.push_back(attachment);
+			}
+		}
+
 		VkSubpassDescription &subpass = subpasses[i];
 		subpass.flags = 0;
 		subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
@@ -8864,6 +8928,7 @@ void RenderingDeviceVulkan::_free_rids(T &p_owner, const char *p_type) {
 }
 
 void RenderingDeviceVulkan::capture_timestamp(const String &p_name) {
+	ERR_FAIL_COND_MSG(draw_list != nullptr, "Capturing timestamps during draw list creation is not allowed. Offending timestap was: " + p_name);
 	ERR_FAIL_COND(frames[frame].timestamp_count >= max_timestamp_query_elements);
 
 	//this should be optional for profiling, else it will slow things down

+ 2 - 1
main/main.cpp

@@ -1352,6 +1352,7 @@ Error Main::setup(const char *execpath, int argc, char *argv[], bool p_second_ph
 					PROPERTY_HINT_RANGE, "0,1000,1"));
 
 	GLOBAL_DEF("debug/settings/stdout/print_fps", false);
+	GLOBAL_DEF("debug/settings/stdout/print_gpu_profile", false);
 	GLOBAL_DEF("debug/settings/stdout/verbose_stdout", false);
 
 	if (!OS::get_singleton()->_verbose_stdout) { // Not manually overridden.
@@ -1591,7 +1592,7 @@ Error Main::setup2(Thread::ID p_main_tid_override) {
 	rendering_server->init();
 	rendering_server->set_render_loop_enabled(!disable_render_loop);
 
-	if (profile_gpu) {
+	if (profile_gpu || (!editor && bool(GLOBAL_GET("debug/settings/stdout/print_gpu_profile")))) {
 		rendering_server->set_print_gpu_profile(true);
 	}
 

+ 14 - 12
servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp

@@ -519,7 +519,7 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
 			using_subpass_post_process = false;
 		}
 
-		if (scene_state.used_screen_texture || scene_state.used_depth_texture) {
+		if (using_ssr || using_sss || scene_state.used_screen_texture || scene_state.used_depth_texture) {
 			// can't use our last two subpasses
 			using_subpass_transparent = false;
 			using_subpass_post_process = false;
@@ -679,17 +679,20 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
 
 		_setup_environment(p_render_data, p_render_data->reflection_probe.is_valid(), screen_size, !p_render_data->reflection_probe.is_valid(), p_default_bg_color, p_render_data->render_buffers.is_valid());
 
-		RENDER_TIMESTAMP("Render Opaque Subpass");
+		if (using_subpass_transparent && using_subpass_post_process) {
+			RENDER_TIMESTAMP("Render Opaque + Transparent + Tonemap");
+		} else if (using_subpass_transparent) {
+			RENDER_TIMESTAMP("Render Opaque + Transparent");
+		} else {
+			RENDER_TIMESTAMP("Render Opaque");
+		}
 
 		RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_OPAQUE, p_render_data, radiance_texture, true);
 
-		bool can_continue_color = !scene_state.used_screen_texture && !using_ssr && !using_sss;
-		bool can_continue_depth = !scene_state.used_depth_texture && !using_ssr && !using_sss;
+		bool can_continue_color = !using_subpass_transparent && !scene_state.used_screen_texture && !using_ssr && !using_sss;
+		bool can_continue_depth = !using_subpass_transparent && !scene_state.used_depth_texture && !using_ssr && !using_sss;
 
 		{
-			bool will_continue_color = (can_continue_color || draw_sky || draw_sky_fog_only);
-			bool will_continue_depth = (can_continue_depth || draw_sky || draw_sky_fog_only);
-
 			// regular forward for now
 			Vector<Color> c;
 			c.push_back(clear_color.to_linear()); // our render buffer
@@ -709,11 +712,11 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
 				// secondary command buffers need more testing at this time
 				//multi threaded
 				thread_draw_lists.resize(RendererThreadPool::singleton->thread_work_pool.get_thread_count());
-				RD::get_singleton()->draw_list_begin_split(framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0);
+				RD::get_singleton()->draw_list_begin_split(framebuffer, thread_draw_lists.size(), thread_draw_lists.ptr(), keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, can_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, can_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0);
 				RendererThreadPool::singleton->thread_work_pool.do_work(thread_draw_lists.size(), this, &RenderForwardMobile::_render_list_thread_function, &render_list_params);
 			} else {
 				//single threaded
-				RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0);
+				RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, can_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CLEAR, can_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0);
 				_render_list(draw_list, fb_format, &render_list_params, 0, render_list_params.element_count);
 			}
 		}
@@ -721,8 +724,6 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
 		RD::get_singleton()->draw_command_end_label(); //Render Opaque Subpass
 
 		if (draw_sky || draw_sky_fog_only) {
-			RENDER_TIMESTAMP("Render Sky Subpass");
-
 			RD::get_singleton()->draw_command_begin_label("Draw Sky Subpass");
 
 			RD::DrawListID draw_list = RD::get_singleton()->draw_list_switch_to_next_pass();
@@ -752,7 +753,6 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
 		}
 
 		// transparent pass
-		RENDER_TIMESTAMP("Render Transparent Subpass");
 
 		RD::get_singleton()->draw_command_begin_label("Render Transparent Subpass");
 
@@ -789,6 +789,8 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
 
 			RD::get_singleton()->draw_list_end(RD::BARRIER_MASK_ALL);
 		} else {
+			RENDER_TIMESTAMP("Render Transparent");
+
 			framebuffer = render_buffer->color_fbs[FB_CONFIG_ONE_PASS];
 
 			// this may be needed if we re-introduced steps that change info, not sure which do so in the previous implementation

+ 0 - 1
servers/rendering/renderer_rd/renderer_scene_render_rd.cpp

@@ -2010,7 +2010,6 @@ void RendererSceneRenderRD::_post_process_subpass(RID p_source_texture, RID p_fr
 
 	bool can_use_effects = rb->width >= 8 && rb->height >= 8;
 
-	RENDER_TIMESTAMP("Tonemap");
 	RD::DrawListID draw_list = RD::get_singleton()->draw_list_switch_to_next_pass();
 
 	EffectsRD::TonemapSettings tonemap;

+ 10 - 10
servers/rendering/renderer_rd/shaders/light_data_inc.glsl

@@ -3,7 +3,7 @@
 #define LIGHT_BAKE_STATIC 2
 
 struct LightData { //this structure needs to be as packed as possible
-	vec3 position;
+	highp vec3 position;
 	float inv_radius;
 
 	vec3 direction;
@@ -17,8 +17,8 @@ struct LightData { //this structure needs to be as packed as possible
 	float specular_amount;
 	bool shadow_enabled;
 
-	vec4 atlas_rect; // rect in the shadow atlas
-	mat4 shadow_matrix;
+	highp vec4 atlas_rect; // rect in the shadow atlas
+	highp mat4 shadow_matrix;
 	float shadow_bias;
 	float shadow_normal_bias;
 	float transmittance_bias;
@@ -27,7 +27,7 @@ struct LightData { //this structure needs to be as packed as possible
 	uint mask;
 	float shadow_volumetric_fog_fade;
 	uint bake_mode;
-	vec4 projector_rect; //projector rect in srgb decal atlas
+	highp vec4 projector_rect; //projector rect in srgb decal atlas
 };
 
 #define REFLECTION_AMBIENT_DISABLED 0
@@ -69,13 +69,13 @@ struct DirectionalLightData {
 	vec4 shadow_bias;
 	vec4 shadow_normal_bias;
 	vec4 shadow_transmittance_bias;
-	vec4 shadow_z_range;
-	vec4 shadow_range_begin;
+	highp vec4 shadow_z_range;
+	highp vec4 shadow_range_begin;
 	vec4 shadow_split_offsets;
-	mat4 shadow_matrix1;
-	mat4 shadow_matrix2;
-	mat4 shadow_matrix3;
-	mat4 shadow_matrix4;
+	highp mat4 shadow_matrix1;
+	highp mat4 shadow_matrix2;
+	highp mat4 shadow_matrix3;
+	highp mat4 shadow_matrix4;
 	vec4 shadow_color1;
 	vec4 shadow_color2;
 	vec4 shadow_color3;

+ 19 - 15
servers/rendering/renderer_rd/shaders/scene_forward_mobile.glsl

@@ -59,27 +59,27 @@ layout(location = 11) in vec4 weight_attrib;
 
 /* Varyings */
 
-layout(location = 0) out vec3 vertex_interp;
+layout(location = 0) highp out vec3 vertex_interp;
 
 #ifdef NORMAL_USED
-layout(location = 1) out vec3 normal_interp;
+layout(location = 1) mediump out vec3 normal_interp;
 #endif
 
 #if defined(COLOR_USED)
-layout(location = 2) out vec4 color_interp;
+layout(location = 2) mediump out vec4 color_interp;
 #endif
 
 #ifdef UV_USED
-layout(location = 3) out vec2 uv_interp;
+layout(location = 3) mediump out vec2 uv_interp;
 #endif
 
 #if defined(UV2_USED) || defined(USE_LIGHTMAP)
-layout(location = 4) out vec2 uv2_interp;
+layout(location = 4) mediump out vec2 uv2_interp;
 #endif
 
 #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED)
-layout(location = 5) out vec3 tangent_interp;
-layout(location = 6) out vec3 binormal_interp;
+layout(location = 5) mediump out vec3 tangent_interp;
+layout(location = 6) mediump out vec3 binormal_interp;
 #endif
 
 #ifdef MATERIAL_UNIFORMS_USED
@@ -370,6 +370,10 @@ void main() {
 
 #VERSION_DEFINES
 
+//use medium precision for floats on mobile.
+
+precision mediump float;
+
 /* Specialization Constants */
 
 /* Specialization Constants (Toggles) */
@@ -395,32 +399,32 @@ layout(constant_id = 11) const bool sc_projector_use_mipmaps = true;
 
 /* Varyings */
 
-layout(location = 0) in vec3 vertex_interp;
+layout(location = 0) highp in vec3 vertex_interp;
 
 #ifdef NORMAL_USED
-layout(location = 1) in vec3 normal_interp;
+layout(location = 1) mediump in vec3 normal_interp;
 #endif
 
 #if defined(COLOR_USED)
-layout(location = 2) in vec4 color_interp;
+layout(location = 2) mediump in vec4 color_interp;
 #endif
 
 #ifdef UV_USED
-layout(location = 3) in vec2 uv_interp;
+layout(location = 3) mediump in vec2 uv_interp;
 #endif
 
 #if defined(UV2_USED) || defined(USE_LIGHTMAP)
-layout(location = 4) in vec2 uv2_interp;
+layout(location = 4) mediump in vec2 uv2_interp;
 #endif
 
 #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED)
-layout(location = 5) in vec3 tangent_interp;
-layout(location = 6) in vec3 binormal_interp;
+layout(location = 5) mediump in vec3 tangent_interp;
+layout(location = 6) mediump in vec3 binormal_interp;
 #endif
 
 #ifdef MODE_DUAL_PARABOLOID
 
-layout(location = 8) in float dp_clip;
+layout(location = 8) highp in float dp_clip;
 
 #endif
 

+ 8 - 8
servers/rendering/renderer_rd/shaders/scene_forward_mobile_inc.glsl

@@ -16,12 +16,12 @@
 /* don't exceed 128 bytes!! */
 /* put instance data into our push content, not a array */
 layout(push_constant, binding = 0, std430) uniform DrawCall {
-	mat4 transform; // 64 - 64
+	highp mat4 transform; // 64 - 64
 	uint flags; // 04 - 68
 	uint instance_uniforms_ofs; //base offset in global buffer for instance variables	// 04 - 72
 	uint gi_offset; //GI information when using lightmapping (VCT or lightmap index)    // 04 - 76
 	uint layer_mask; // 04 - 80
-	vec4 lightmap_uv_scale; // 16 - 96 doubles as uv_offset when needed
+	highp vec4 lightmap_uv_scale; // 16 - 96 doubles as uv_offset when needed
 
 	uvec2 reflection_probes; // 08 - 104
 	uvec2 omni_lights; // 08 - 112
@@ -126,14 +126,14 @@ global_variables;
 /* Set 1: Render Pass (changes per render pass) */
 
 layout(set = 1, binding = 0, std140) uniform SceneData {
-	mat4 projection_matrix;
-	mat4 inv_projection_matrix;
-	mat4 camera_matrix;
-	mat4 inv_camera_matrix;
+	highp mat4 projection_matrix;
+	highp mat4 inv_projection_matrix;
+	highp mat4 camera_matrix;
+	highp mat4 inv_camera_matrix;
 
 	// only used for multiview
-	mat4 projection_matrix_view[MAX_VIEWS];
-	mat4 inv_projection_matrix_view[MAX_VIEWS];
+	highp mat4 projection_matrix_view[MAX_VIEWS];
+	highp mat4 inv_projection_matrix_view[MAX_VIEWS];
 
 	vec2 viewport_size;
 	vec2 screen_pixel_size;