1 gadu atpakaļ · cd39da2df7
--- a/COPYRIGHT.txt
+++ b/COPYRIGHT.txt
@@ -141,6 +141,11 @@ Comment: AMD FidelityFX Super Resolution
 
				 Copyright: 2021, Advanced Micro Devices, Inc.
			
 
				 License: Expat
			
 
				 
			
 
				+Files: ./thirdparty/amd-fsr2/
			
 
				+Comment: AMD FidelityFX Super Resolution 2
			
 
				+Copyright: 2022-2023, Advanced Micro Devices, Inc.
			
 
				+License: Expat
			
 
				+
			
 
				 Files: ./thirdparty/angle/
			
 
				 Comment: ANGLE
			
 
				 Copyright: 2018, The ANGLE Project Authors.
			
--- a/doc/classes/RenderSceneBuffersRD.xml
+++ b/doc/classes/RenderSceneBuffersRD.xml
@@ -130,6 +130,19 @@
 
				 				Returns the texture size of a given slice of a cached texture.
			
 
				 			</description>
			
 
				 		</method>
			
 
				+		<method name="get_texture_slice_view">
			
 
				+			<return type="RID" />
			
 
				+			<param index="0" name="context" type="StringName" />
			
 
				+			<param index="1" name="name" type="StringName" />
			
 
				+			<param index="2" name="layer" type="int" />
			
 
				+			<param index="3" name="mipmap" type="int" />
			
 
				+			<param index="4" name="layers" type="int" />
			
 
				+			<param index="5" name="mipmaps" type="int" />
			
 
				+			<param index="6" name="view" type="RDTextureView" />
			
 
				+			<description>
			
 
				+				Returns a specific view of a slice (layer or mipmap) for a cached texture.
			
 
				+			</description>
			
 
				+		</method>
			
 
				 		<method name="get_use_taa" qualifiers="const">
			
 
				 			<return type="bool" />
			
 
				 			<description>
			
--- a/doc/classes/RenderingServer.xml
+++ b/doc/classes/RenderingServer.xml
@@ -4507,7 +4507,10 @@
 
				 		<constant name="VIEWPORT_SCALING_3D_MODE_FSR" value="1" enum="ViewportScaling3DMode">
			
 
				 			Use AMD FidelityFX Super Resolution 1.0 upscaling for the viewport's 3D buffer. The amount of scaling can be set using [member Viewport.scaling_3d_scale]. Values less than [code]1.0[/code] will be result in the viewport being upscaled using FSR. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] disables scaling.
			
 
				 		</constant>
			
 
				-		<constant name="VIEWPORT_SCALING_3D_MODE_MAX" value="2" enum="ViewportScaling3DMode">
			
 
				+		<constant name="VIEWPORT_SCALING_3D_MODE_FSR2" value="2" enum="ViewportScaling3DMode">
			
 
				+			Use AMD FidelityFX Super Resolution 2.2 upscaling for the viewport's 3D buffer. The amount of scaling can be set using [member Viewport.scaling_3d_scale]. Values less than [code]1.0[/code] will be result in the viewport being upscaled using FSR2. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] will use FSR2 at native resolution as a TAA solution.
			
 
				+		</constant>
			
 
				+		<constant name="VIEWPORT_SCALING_3D_MODE_MAX" value="3" enum="ViewportScaling3DMode">
			
 
				 			Represents the size of the [enum ViewportScaling3DMode] enum.
			
 
				 		</constant>
			
 
				 		<constant name="VIEWPORT_UPDATE_DISABLED" value="0" enum="ViewportUpdateMode">
			
@@ -4708,6 +4711,9 @@
 
				 		<constant name="VIEWPORT_DEBUG_DRAW_MOTION_VECTORS" value="25" enum="ViewportDebugDraw">
			
 
				 			Draws the motion vectors buffer. This is used by temporal antialiasing to correct for motion that occurs during gameplay.
			
 
				 		</constant>
			
 
				+		<constant name="VIEWPORT_DEBUG_DRAW_INTERNAL_BUFFER" value="26" enum="ViewportDebugDraw">
			
 
				+			Internal buffer is drawn instead of regular scene so you can see the per-pixel output that will be used by post-processing effects.
			
 
				+		</constant>
			
 
				 		<constant name="VIEWPORT_VRS_DISABLED" value="0" enum="ViewportVRSMode">
			
 
				 			Variable rate shading is disabled.
			
 
				 		</constant>
			
--- a/doc/classes/Viewport.xml
+++ b/doc/classes/Viewport.xml
@@ -443,7 +443,10 @@
 
				 		<constant name="SCALING_3D_MODE_FSR" value="1" enum="Scaling3DMode">
			
 
				 			Use AMD FidelityFX Super Resolution 1.0 upscaling for the viewport's 3D buffer. The amount of scaling can be set using [member scaling_3d_scale]. Values less than [code]1.0[/code] will be result in the viewport being upscaled using FSR. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] disables scaling.
			
 
				 		</constant>
			
 
				-		<constant name="SCALING_3D_MODE_MAX" value="2" enum="Scaling3DMode">
			
 
				+		<constant name="SCALING_3D_MODE_FSR2" value="2" enum="Scaling3DMode">
			
 
				+			Use AMD FidelityFX Super Resolution 2.2 upscaling for the viewport's 3D buffer. The amount of scaling can be set using [member Viewport.scaling_3d_scale]. Values less than [code]1.0[/code] will be result in the viewport being upscaled using FSR2. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] will use FSR2 at native resolution as a TAA solution.
			
 
				+		</constant>
			
 
				+		<constant name="SCALING_3D_MODE_MAX" value="3" enum="Scaling3DMode">
			
 
				 			Represents the size of the [enum Scaling3DMode] enum.
			
 
				 		</constant>
			
 
				 		<constant name="MSAA_DISABLED" value="0" enum="MSAA">
			
@@ -553,6 +556,9 @@
 
				 		</constant>
			
 
				 		<constant name="DEBUG_DRAW_MOTION_VECTORS" value="25" enum="DebugDraw">
			
 
				 		</constant>
			
 
				+		<constant name="DEBUG_DRAW_INTERNAL_BUFFER" value="26" enum="DebugDraw">
			
 
				+			Draws the internal resolution buffer of the scene before post-processing is applied.
			
 
				+		</constant>
			
 
				 		<constant name="DEFAULT_CANVAS_ITEM_TEXTURE_FILTER_NEAREST" value="0" enum="DefaultCanvasItemTextureFilter">
			
 
				 			The texture filter reads from the nearest pixel only. The simplest and fastest method of filtering, but the texture will look pixelized.
			
 
				 		</constant>
			
--- a/drivers/vulkan/rendering_device_vulkan.cpp
+++ b/drivers/vulkan/rendering_device_vulkan.cpp
@@ -5750,8 +5750,8 @@ RID RenderingDeviceVulkan::uniform_set_create(const Vector<Uniform> &p_uniforms,
 
				 				Buffer *buffer = uniform_buffer_owner.get_or_null(uniform.get_id(0));
			
 
				 				ERR_FAIL_NULL_V_MSG(buffer, RID(), "Uniform buffer supplied (binding: " + itos(uniform.binding) + ") is invalid.");
			
 
				 
			
 
				-				ERR_FAIL_COND_V_MSG(buffer->size != (uint32_t)set_uniform.length, RID(),
			
 
				-						"Uniform buffer supplied (binding: " + itos(uniform.binding) + ") size (" + itos(buffer->size) + " does not match size of shader uniform: (" + itos(set_uniform.length) + ").");
			
 
				+				ERR_FAIL_COND_V_MSG(buffer->size < (uint32_t)set_uniform.length, RID(),
			
 
				+						"Uniform buffer supplied (binding: " + itos(uniform.binding) + ") size (" + itos(buffer->size) + " is smaller than size of shader uniform: (" + itos(set_uniform.length) + ").");
			
 
				 
			
 
				 				write.dstArrayElement = 0;
			
 
				 				write.descriptorCount = 1;
			
@@ -9562,6 +9562,14 @@ uint64_t RenderingDeviceVulkan::limit_get(Limit p_limit) const {
 
				 			VulkanContext::SubgroupCapabilities subgroup_capabilities = context->get_subgroup_capabilities();
			
 
				 			return subgroup_capabilities.size;
			
 
				 		}
			
 
				+		case LIMIT_SUBGROUP_MIN_SIZE: {
			
 
				+			VulkanContext::SubgroupCapabilities subgroup_capabilities = context->get_subgroup_capabilities();
			
 
				+			return subgroup_capabilities.min_size;
			
 
				+		}
			
 
				+		case LIMIT_SUBGROUP_MAX_SIZE: {
			
 
				+			VulkanContext::SubgroupCapabilities subgroup_capabilities = context->get_subgroup_capabilities();
			
 
				+			return subgroup_capabilities.max_size;
			
 
				+		}
			
 
				 		case LIMIT_SUBGROUP_IN_SHADERS: {
			
 
				 			VulkanContext::SubgroupCapabilities subgroup_capabilities = context->get_subgroup_capabilities();
			
 
				 			return subgroup_capabilities.supported_stages_flags_rd();
			
--- a/drivers/vulkan/vulkan_context.cpp
+++ b/drivers/vulkan/vulkan_context.cpp
@@ -504,6 +504,7 @@ Error VulkanContext::_initialize_device_extensions() {
 
				 	register_requested_device_extension(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, false);
			
 
				 	register_requested_device_extension(VK_KHR_MAINTENANCE_2_EXTENSION_NAME, false);
			
 
				 	register_requested_device_extension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME, false);
			
 
				+	register_requested_device_extension(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
			
 
				 
			
 
				 	if (Engine::get_singleton()->is_generate_spirv_debug_info_enabled()) {
			
 
				 		register_requested_device_extension(VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME, true);
			
@@ -739,9 +740,12 @@ Error VulkanContext::_check_capabilities() {
 
				 	multiview_capabilities.max_view_count = 0;
			
 
				 	multiview_capabilities.max_instance_count = 0;
			
 
				 	subgroup_capabilities.size = 0;
			
 
				+	subgroup_capabilities.min_size = 0;
			
 
				+	subgroup_capabilities.max_size = 0;
			
 
				 	subgroup_capabilities.supportedStages = 0;
			
 
				 	subgroup_capabilities.supportedOperations = 0;
			
 
				 	subgroup_capabilities.quadOperationsInAllStages = false;
			
 
				+	subgroup_capabilities.size_control_is_supported = false;
			
 
				 	shader_capabilities.shader_float16_is_supported = false;
			
 
				 	shader_capabilities.shader_int8_is_supported = false;
			
 
				 	storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported = false;
			
@@ -886,6 +890,7 @@ Error VulkanContext::_check_capabilities() {
 
				 			VkPhysicalDeviceFragmentShadingRatePropertiesKHR vrsProperties{};
			
 
				 			VkPhysicalDeviceMultiviewProperties multiviewProperties{};
			
 
				 			VkPhysicalDeviceSubgroupProperties subgroupProperties{};
			
 
				+			VkPhysicalDeviceSubgroupSizeControlProperties subgroupSizeControlProperties = {};
			
 
				 			VkPhysicalDeviceProperties2 physicalDeviceProperties{};
			
 
				 			void *nextptr = nullptr;
			
 
				 
			
@@ -894,6 +899,15 @@ Error VulkanContext::_check_capabilities() {
 
				 				subgroupProperties.pNext = nextptr;
			
 
				 
			
 
				 				nextptr = &subgroupProperties;
			
 
				+
			
 
				+				subgroup_capabilities.size_control_is_supported = is_device_extension_enabled(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
			
 
				+
			
 
				+				if (subgroup_capabilities.size_control_is_supported) {
			
 
				+					subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES;
			
 
				+					subgroupSizeControlProperties.pNext = nextptr;
			
 
				+
			
 
				+					nextptr = &subgroupSizeControlProperties;
			
 
				+				}
			
 
				 			}
			
 
				 
			
 
				 			if (multiview_capabilities.is_supported) {
			
@@ -916,6 +930,8 @@ Error VulkanContext::_check_capabilities() {
 
				 			device_properties_func(gpu, &physicalDeviceProperties);
			
 
				 
			
 
				 			subgroup_capabilities.size = subgroupProperties.subgroupSize;
			
 
				+			subgroup_capabilities.min_size = subgroupProperties.subgroupSize;
			
 
				+			subgroup_capabilities.max_size = subgroupProperties.subgroupSize;
			
 
				 			subgroup_capabilities.supportedStages = subgroupProperties.supportedStages;
			
 
				 			subgroup_capabilities.supportedOperations = subgroupProperties.supportedOperations;
			
 
				 			// Note: quadOperationsInAllStages will be true if:
			
@@ -923,6 +939,11 @@ Error VulkanContext::_check_capabilities() {
 
				 			// - supportedOperations has VK_SUBGROUP_FEATURE_QUAD_BIT.
			
 
				 			subgroup_capabilities.quadOperationsInAllStages = subgroupProperties.quadOperationsInAllStages;
			
 
				 
			
 
				+			if (subgroup_capabilities.size_control_is_supported && (subgroupSizeControlProperties.requiredSubgroupSizeStages & VK_SHADER_STAGE_COMPUTE_BIT)) {
			
 
				+				subgroup_capabilities.min_size = subgroupSizeControlProperties.minSubgroupSize;
			
 
				+				subgroup_capabilities.max_size = subgroupSizeControlProperties.maxSubgroupSize;
			
 
				+			}
			
 
				+
			
 
				 			if (vrs_capabilities.pipeline_vrs_supported || vrs_capabilities.primitive_vrs_supported || vrs_capabilities.attachment_vrs_supported) {
			
 
				 				print_verbose("- Vulkan Variable Rate Shading supported:");
			
 
				 				if (vrs_capabilities.pipeline_vrs_supported) {
			
@@ -962,6 +983,8 @@ Error VulkanContext::_check_capabilities() {
 
				 
			
 
				 			print_verbose("- Vulkan subgroup:");
			
 
				 			print_verbose("  size: " + itos(subgroup_capabilities.size));
			
 
				+			print_verbose("  min size: " + itos(subgroup_capabilities.min_size));
			
 
				+			print_verbose("  max size: " + itos(subgroup_capabilities.max_size));
			
 
				 			print_verbose("  stages: " + subgroup_capabilities.supported_stages_desc());
			
 
				 			print_verbose("  supported ops: " + subgroup_capabilities.supported_operations_desc());
			
 
				 			if (subgroup_capabilities.quadOperationsInAllStages) {
			
--- a/drivers/vulkan/vulkan_context.h
+++ b/drivers/vulkan/vulkan_context.h
@@ -52,9 +52,12 @@ class VulkanContext {
 
				 public:
			
 
				 	struct SubgroupCapabilities {
			
 
				 		uint32_t size;
			
 
				+		uint32_t min_size;
			
 
				+		uint32_t max_size;
			
 
				 		VkShaderStageFlags supportedStages;
			
 
				 		VkSubgroupFeatureFlags supportedOperations;
			
 
				 		VkBool32 quadOperationsInAllStages;
			
 
				+		bool size_control_is_supported;
			
 
				 
			
 
				 		uint32_t supported_stages_flags_rd() const;
			
 
				 		String supported_stages_desc() const;
			
--- a/editor/plugins/node_3d_editor_plugin.cpp
+++ b/editor/plugins/node_3d_editor_plugin.cpp
@@ -3472,7 +3472,8 @@ void Node3DEditorViewport::_menu_option(int p_option) {
 
				 		case VIEW_DISPLAY_DEBUG_CLUSTER_DECALS:
			
 
				 		case VIEW_DISPLAY_DEBUG_CLUSTER_REFLECTION_PROBES:
			
 
				 		case VIEW_DISPLAY_DEBUG_OCCLUDERS:
			
 
				-		case VIEW_DISPLAY_MOTION_VECTORS: {
			
 
				+		case VIEW_DISPLAY_MOTION_VECTORS:
			
 
				+		case VIEW_DISPLAY_INTERNAL_BUFFER: {
			
 
				 			static const int display_options[] = {
			
 
				 				VIEW_DISPLAY_NORMAL,
			
 
				 				VIEW_DISPLAY_WIREFRAME,
			
@@ -3500,6 +3501,7 @@ void Node3DEditorViewport::_menu_option(int p_option) {
 
				 				VIEW_DISPLAY_DEBUG_CLUSTER_REFLECTION_PROBES,
			
 
				 				VIEW_DISPLAY_DEBUG_OCCLUDERS,
			
 
				 				VIEW_DISPLAY_MOTION_VECTORS,
			
 
				+				VIEW_DISPLAY_INTERNAL_BUFFER,
			
 
				 				VIEW_MAX
			
 
				 			};
			
 
				 			static const Viewport::DebugDraw debug_draw_modes[] = {
			
@@ -3529,6 +3531,7 @@ void Node3DEditorViewport::_menu_option(int p_option) {
 
				 				Viewport::DEBUG_DRAW_CLUSTER_REFLECTION_PROBES,
			
 
				 				Viewport::DEBUG_DRAW_OCCLUDERS,
			
 
				 				Viewport::DEBUG_DRAW_MOTION_VECTORS,
			
 
				+				Viewport::DEBUG_DRAW_INTERNAL_BUFFER,
			
 
				 			};
			
 
				 
			
 
				 			for (int idx = 0; display_options[idx] != VIEW_MAX; idx++) {
			
@@ -5112,6 +5115,7 @@ Node3DEditorViewport::Node3DEditorViewport(Node3DEditor *p_spatial_editor, int p
 
				 	display_submenu->add_radio_check_item(TTR("ReflectionProbe Cluster"), VIEW_DISPLAY_DEBUG_CLUSTER_REFLECTION_PROBES);
			
 
				 	display_submenu->add_radio_check_item(TTR("Occlusion Culling Buffer"), VIEW_DISPLAY_DEBUG_OCCLUDERS);
			
 
				 	display_submenu->add_radio_check_item(TTR("Motion Vectors"), VIEW_DISPLAY_MOTION_VECTORS);
			
 
				+	display_submenu->add_radio_check_item(TTR("Internal Buffer"), VIEW_DISPLAY_INTERNAL_BUFFER);
			
 
				 
			
 
				 	display_submenu->set_name("display_advanced");
			
 
				 	view_menu->get_popup()->add_submenu_item(TTR("Display Advanced..."), "display_advanced", VIEW_DISPLAY_ADVANCED);
			
--- a/editor/plugins/node_3d_editor_plugin.h
+++ b/editor/plugins/node_3d_editor_plugin.h
@@ -156,6 +156,7 @@ class Node3DEditorViewport : public Control {
 
				 		VIEW_DISPLAY_DEBUG_CLUSTER_REFLECTION_PROBES,
			
 
				 		VIEW_DISPLAY_DEBUG_OCCLUDERS,
			
 
				 		VIEW_DISPLAY_MOTION_VECTORS,
			
 
				+		VIEW_DISPLAY_INTERNAL_BUFFER,
			
 
				 		VIEW_DISPLAY_MAX,
			
 
				 		// > Keep in sync with menu.
			
 
				 
			
--- a/scene/main/viewport.cpp
+++ b/scene/main/viewport.cpp
@@ -4505,7 +4505,7 @@ void Viewport::_bind_methods() {
 
				 
			
 
				 #ifndef _3D_DISABLED
			
 
				 	ADD_GROUP("Scaling 3D", "");
			
 
				-	ADD_PROPERTY(PropertyInfo(Variant::INT, "scaling_3d_mode", PROPERTY_HINT_ENUM, "Bilinear (Fastest),FSR 1.0 (Fast)"), "set_scaling_3d_mode", "get_scaling_3d_mode");
			
 
				+	ADD_PROPERTY(PropertyInfo(Variant::INT, "scaling_3d_mode", PROPERTY_HINT_ENUM, "Bilinear (Fastest),FSR 1.0 (Fast),FSR 2.2 (Slow)"), "set_scaling_3d_mode", "get_scaling_3d_mode");
			
 
				 	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "scaling_3d_scale", PROPERTY_HINT_RANGE, "0.25,2.0,0.01"), "set_scaling_3d_scale", "get_scaling_3d_scale");
			
 
				 	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "texture_mipmap_bias", PROPERTY_HINT_RANGE, "-2,2,0.001"), "set_texture_mipmap_bias", "get_texture_mipmap_bias");
			
 
				 	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "fsr_sharpness", PROPERTY_HINT_RANGE, "0,2,0.1"), "set_fsr_sharpness", "get_fsr_sharpness");
			
@@ -4556,6 +4556,7 @@ void Viewport::_bind_methods() {
 
				 
			
 
				 	BIND_ENUM_CONSTANT(SCALING_3D_MODE_BILINEAR);
			
 
				 	BIND_ENUM_CONSTANT(SCALING_3D_MODE_FSR);
			
 
				+	BIND_ENUM_CONSTANT(SCALING_3D_MODE_FSR2);
			
 
				 	BIND_ENUM_CONSTANT(SCALING_3D_MODE_MAX);
			
 
				 
			
 
				 	BIND_ENUM_CONSTANT(MSAA_DISABLED);
			
@@ -4603,6 +4604,7 @@ void Viewport::_bind_methods() {
 
				 	BIND_ENUM_CONSTANT(DEBUG_DRAW_CLUSTER_REFLECTION_PROBES);
			
 
				 	BIND_ENUM_CONSTANT(DEBUG_DRAW_OCCLUDERS)
			
 
				 	BIND_ENUM_CONSTANT(DEBUG_DRAW_MOTION_VECTORS)
			
 
				+	BIND_ENUM_CONSTANT(DEBUG_DRAW_INTERNAL_BUFFER);
			
 
				 
			
 
				 	BIND_ENUM_CONSTANT(DEFAULT_CANVAS_ITEM_TEXTURE_FILTER_NEAREST);
			
 
				 	BIND_ENUM_CONSTANT(DEFAULT_CANVAS_ITEM_TEXTURE_FILTER_LINEAR);
			
--- a/scene/main/viewport.h
+++ b/scene/main/viewport.h
@@ -98,6 +98,7 @@ public:
 
				 	enum Scaling3DMode {
			
 
				 		SCALING_3D_MODE_BILINEAR,
			
 
				 		SCALING_3D_MODE_FSR,
			
 
				+		SCALING_3D_MODE_FSR2,
			
 
				 		SCALING_3D_MODE_MAX
			
 
				 	};
			
 
				 
			
@@ -167,6 +168,7 @@ public:
 
				 		DEBUG_DRAW_CLUSTER_REFLECTION_PROBES,
			
 
				 		DEBUG_DRAW_OCCLUDERS,
			
 
				 		DEBUG_DRAW_MOTION_VECTORS,
			
 
				+		DEBUG_DRAW_INTERNAL_BUFFER,
			
 
				 	};
			
 
				 
			
 
				 	enum DefaultCanvasItemTextureFilter {
			
--- a/servers/rendering/renderer_rd/effects/SCsub
+++ b/servers/rendering/renderer_rd/effects/SCsub
@@ -2,4 +2,33 @@
 
				 
			
 
				 Import("env")
			
 
				 
			
 
				-env.add_source_files(env.servers_sources, "*.cpp")
			
 
				+env_effects = env.Clone()
			
 
				+
			
 
				+# Thirdparty source files
			
 
				+
			
 
				+thirdparty_obj = []
			
 
				+
			
 
				+thirdparty_dir = "#thirdparty/amd-fsr2/"
			
 
				+thirdparty_sources = ["ffx_assert.cpp", "ffx_fsr2.cpp"]
			
 
				+thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
			
 
				+
			
 
				+env_effects.Prepend(CPPPATH=[thirdparty_dir])
			
 
				+
			
 
				+# This flag doesn't actually control anything GCC specific in FSR2. It determines
			
 
				+# if symbols should be exported, which is not required for Godot.
			
 
				+env_effects.Append(CPPDEFINES=["FFX_GCC"])
			
 
				+
			
 
				+env_thirdparty = env_effects.Clone()
			
 
				+env_thirdparty.disable_warnings()
			
 
				+env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources)
			
 
				+env.servers_sources += thirdparty_obj
			
 
				+
			
 
				+# Godot source files
			
 
				+
			
 
				+module_obj = []
			
 
				+
			
 
				+env_effects.add_source_files(module_obj, "*.cpp")
			
 
				+env.servers_sources += module_obj
			
 
				+
			
 
				+# Needed to force rebuilding the module files when the thirdparty library is updated.
			
 
				+env.Depends(module_obj, thirdparty_obj)
			
--- a/servers/rendering/renderer_rd/effects/copy_effects.cpp
+++ b/servers/rendering/renderer_rd/effects/copy_effects.cpp
@@ -281,8 +281,8 @@ CopyEffects::CopyEffects(bool p_prefer_raster_effects) {
 
				 		ba.enable_blend = true;
			
 
				 		ba.src_color_blend_factor = RD::BLEND_FACTOR_ONE;
			
 
				 		ba.dst_color_blend_factor = RD::BLEND_FACTOR_ONE;
			
 
				-		ba.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE;
			
 
				-		ba.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE;
			
 
				+		ba.src_alpha_blend_factor = RD::BLEND_FACTOR_ZERO;
			
 
				+		ba.dst_alpha_blend_factor = RD::BLEND_FACTOR_ZERO;
			
 
				 		ba.color_blend_op = RD::BLEND_OP_ADD;
			
 
				 		ba.alpha_blend_op = RD::BLEND_OP_ADD;
			
 
				 
			
--- a/servers/rendering/renderer_rd/effects/debug_effects.cpp
+++ b/servers/rendering/renderer_rd/effects/debug_effects.cpp
@@ -340,25 +340,38 @@ void DebugEffects::draw_shadow_frustum(RID p_light, const Projection &p_cam_proj
 
				 	}
			
 
				 }
			
 
				 
			
 
				-void DebugEffects::draw_motion_vectors(RID p_velocity, RID p_dest_fb, Size2i p_velocity_size) {
			
 
				+void DebugEffects::draw_motion_vectors(RID p_velocity, RID p_depth, RID p_dest_fb, const Projection &p_current_projection, const Transform3D &p_current_transform, const Projection &p_previous_projection, const Transform3D &p_previous_transform, Size2i p_resolution) {
			
 
				 	MaterialStorage *material_storage = MaterialStorage::get_singleton();
			
 
				 	ERR_FAIL_NULL(material_storage);
			
 
				 
			
 
				 	UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton();
			
 
				 	ERR_FAIL_NULL(uniform_set_cache);
			
 
				 
			
 
				-	RID default_sampler = material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED);
			
 
				+	RID default_sampler = material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED);
			
 
				 	RD::Uniform u_source_velocity(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector<RID>({ default_sampler, p_velocity }));
			
 
				+	RD::Uniform u_source_depth(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 1, Vector<RID>({ default_sampler, p_depth }));
			
 
				 
			
 
				 	RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_fb, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD);
			
 
				 	RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, motion_vectors.pipeline.get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_fb), false, RD::get_singleton()->draw_list_get_current_pass()));
			
 
				 
			
 
				-	motion_vectors.push_constant.velocity_resolution[0] = p_velocity_size.width;
			
 
				-	motion_vectors.push_constant.velocity_resolution[1] = p_velocity_size.height;
			
 
				+	Projection reprojection = p_previous_projection.flipped_y() * p_previous_transform.affine_inverse() * p_current_transform * p_current_projection.flipped_y().inverse();
			
 
				+	RendererRD::MaterialStorage::store_camera(reprojection, motion_vectors.push_constant.reprojection_matrix);
			
 
				+
			
 
				+	motion_vectors.push_constant.resolution[0] = p_resolution.width;
			
 
				+	motion_vectors.push_constant.resolution[1] = p_resolution.height;
			
 
				+	motion_vectors.push_constant.force_derive_from_depth = false;
			
 
				 
			
 
				 	RID shader = motion_vectors.shader.version_get_shader(motion_vectors.shader_version, 0);
			
 
				-	RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_source_velocity), 0);
			
 
				+	RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_source_velocity, u_source_depth), 0);
			
 
				+	RD::get_singleton()->draw_list_set_push_constant(draw_list, &motion_vectors.push_constant, sizeof(MotionVectorsPushConstant));
			
 
				+	RD::get_singleton()->draw_list_draw(draw_list, false, 1u, 3u);
			
 
				+
			
 
				+#ifdef DRAW_DERIVATION_FROM_DEPTH_ON_TOP
			
 
				+	motion_vectors.push_constant.force_derive_from_depth = true;
			
 
				+
			
 
				 	RD::get_singleton()->draw_list_set_push_constant(draw_list, &motion_vectors.push_constant, sizeof(MotionVectorsPushConstant));
			
 
				 	RD::get_singleton()->draw_list_draw(draw_list, false, 1u, 3u);
			
 
				+#endif
			
 
				+
			
 
				 	RD::get_singleton()->draw_list_end();
			
 
				 }
			
--- a/servers/rendering/renderer_rd/effects/debug_effects.h
+++ b/servers/rendering/renderer_rd/effects/debug_effects.h
@@ -72,8 +72,10 @@ private:
 
				 	} shadow_frustum;
			
 
				 
			
 
				 	struct MotionVectorsPushConstant {
			
 
				-		float velocity_resolution[2];
			
 
				-		float pad[2];
			
 
				+		float reprojection_matrix[16];
			
 
				+		float resolution[2];
			
 
				+		uint32_t force_derive_from_depth;
			
 
				+		float pad;
			
 
				 	};
			
 
				 
			
 
				 	struct {
			
@@ -91,7 +93,7 @@ public:
 
				 	~DebugEffects();
			
 
				 
			
 
				 	void draw_shadow_frustum(RID p_light, const Projection &p_cam_projection, const Transform3D &p_cam_transform, RID p_dest_fb, const Rect2 p_rect);
			
 
				-	void draw_motion_vectors(RID p_velocity, RID p_dest_fb, Size2i p_velocity_size);
			
 
				+	void draw_motion_vectors(RID p_velocity, RID p_depth, RID p_dest_fb, const Projection &p_current_projection, const Transform3D &p_current_transform, const Projection &p_previous_projection, const Transform3D &p_previous_transform, Size2i p_resolution);
			
 
				 };
			
 
				 
			
 
				 } // namespace RendererRD
			
--- a/servers/rendering/renderer_rd/effects/fsr2.cpp
+++ b/servers/rendering/renderer_rd/effects/fsr2.cpp
@@ -0,0 +1,889 @@
 
				+/**************************************************************************/
			
 
				+/*  fsr2.cpp                                                              */
			
 
				+/**************************************************************************/
			
 
				+/*                         This file is part of:                          */
			
 
				+/*                             GODOT ENGINE                               */
			
 
				+/*                        https://godotengine.org                         */
			
 
				+/**************************************************************************/
			
 
				+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
			
 
				+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
			
 
				+/*                                                                        */
			
 
				+/* Permission is hereby granted, free of charge, to any person obtaining  */
			
 
				+/* a copy of this software and associated documentation files (the        */
			
 
				+/* "Software"), to deal in the Software without restriction, including    */
			
 
				+/* without limitation the rights to use, copy, modify, merge, publish,    */
			
 
				+/* distribute, sublicense, and/or sell copies of the Software, and to     */
			
 
				+/* permit persons to whom the Software is furnished to do so, subject to  */
			
 
				+/* the following conditions:                                              */
			
 
				+/*                                                                        */
			
 
				+/* The above copyright notice and this permission notice shall be         */
			
 
				+/* included in all copies or substantial portions of the Software.        */
			
 
				+/*                                                                        */
			
 
				+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
			
 
				+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
			
 
				+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
			
 
				+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
			
 
				+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
			
 
				+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
			
 
				+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
			
 
				+/**************************************************************************/
			
 
				+
			
 
				+#include "fsr2.h"
			
 
				+
			
 
				+#include "../storage_rd/material_storage.h"
			
 
				+#include "../uniform_set_cache_rd.h"
			
 
				+
			
 
				+using namespace RendererRD;
			
 
				+
			
 
				+#ifndef _MSC_VER
			
 
				+#include <wchar.h>
			
 
				+#define wcscpy_s wcscpy
			
 
				+#endif
			
 
				+
			
 
				+static RD::TextureType ffx_resource_type_to_rd_texture_type(FfxResourceType p_type) {
			
 
				+	switch (p_type) {
			
 
				+		case FFX_RESOURCE_TYPE_TEXTURE1D:
			
 
				+			return RD::TEXTURE_TYPE_1D;
			
 
				+		case FFX_RESOURCE_TYPE_TEXTURE2D:
			
 
				+			return RD::TEXTURE_TYPE_2D;
			
 
				+		case FFX_RESOURCE_TYPE_TEXTURE3D:
			
 
				+			return RD::TEXTURE_TYPE_3D;
			
 
				+		default:
			
 
				+			return RD::TEXTURE_TYPE_MAX;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static FfxResourceType rd_texture_type_to_ffx_resource_type(RD::TextureType p_type) {
			
 
				+	switch (p_type) {
			
 
				+		case RD::TEXTURE_TYPE_1D:
			
 
				+			return FFX_RESOURCE_TYPE_TEXTURE1D;
			
 
				+		case RD::TEXTURE_TYPE_2D:
			
 
				+			return FFX_RESOURCE_TYPE_TEXTURE2D;
			
 
				+		case RD::TEXTURE_TYPE_3D:
			
 
				+			return FFX_RESOURCE_TYPE_TEXTURE3D;
			
 
				+		default:
			
 
				+			return FFX_RESOURCE_TYPE_BUFFER;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static RD::DataFormat ffx_surface_format_to_rd_format(FfxSurfaceFormat p_format) {
			
 
				+	switch (p_format) {
			
 
				+		case FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS:
			
 
				+			return RD::DATA_FORMAT_R32G32B32A32_SFLOAT;
			
 
				+		case FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT:
			
 
				+			return RD::DATA_FORMAT_R32G32B32A32_SFLOAT;
			
 
				+		case FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT:
			
 
				+			return RD::DATA_FORMAT_R16G16B16A16_SFLOAT;
			
 
				+		case FFX_SURFACE_FORMAT_R16G16B16A16_UNORM:
			
 
				+			return RD::DATA_FORMAT_R16G16B16A16_UNORM;
			
 
				+		case FFX_SURFACE_FORMAT_R32G32_FLOAT:
			
 
				+			return RD::DATA_FORMAT_R32G32_SFLOAT;
			
 
				+		case FFX_SURFACE_FORMAT_R32_UINT:
			
 
				+			return RD::DATA_FORMAT_R32_UINT;
			
 
				+		case FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS:
			
 
				+			return RD::DATA_FORMAT_R8G8B8A8_UNORM;
			
 
				+		case FFX_SURFACE_FORMAT_R8G8B8A8_UNORM:
			
 
				+			return RD::DATA_FORMAT_R8G8B8A8_UNORM;
			
 
				+		case FFX_SURFACE_FORMAT_R11G11B10_FLOAT:
			
 
				+			return RD::DATA_FORMAT_B10G11R11_UFLOAT_PACK32;
			
 
				+		case FFX_SURFACE_FORMAT_R16G16_FLOAT:
			
 
				+			return RD::DATA_FORMAT_R16G16_SFLOAT;
			
 
				+		case FFX_SURFACE_FORMAT_R16G16_UINT:
			
 
				+			return RD::DATA_FORMAT_R16G16_UINT;
			
 
				+		case FFX_SURFACE_FORMAT_R16_FLOAT:
			
 
				+			return RD::DATA_FORMAT_R16_SFLOAT;
			
 
				+		case FFX_SURFACE_FORMAT_R16_UINT:
			
 
				+			return RD::DATA_FORMAT_R16_UINT;
			
 
				+		case FFX_SURFACE_FORMAT_R16_UNORM:
			
 
				+			return RD::DATA_FORMAT_R16_UNORM;
			
 
				+		case FFX_SURFACE_FORMAT_R16_SNORM:
			
 
				+			return RD::DATA_FORMAT_R16_SNORM;
			
 
				+		case FFX_SURFACE_FORMAT_R8_UNORM:
			
 
				+			return RD::DATA_FORMAT_R8_UNORM;
			
 
				+		case FFX_SURFACE_FORMAT_R8_UINT:
			
 
				+			return RD::DATA_FORMAT_R8_UINT;
			
 
				+		case FFX_SURFACE_FORMAT_R8G8_UNORM:
			
 
				+			return RD::DATA_FORMAT_R8G8_UNORM;
			
 
				+		case FFX_SURFACE_FORMAT_R32_FLOAT:
			
 
				+			return RD::DATA_FORMAT_R32_SFLOAT;
			
 
				+		default:
			
 
				+			return RD::DATA_FORMAT_MAX;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static FfxSurfaceFormat rd_format_to_ffx_surface_format(RD::DataFormat p_format) {
			
 
				+	switch (p_format) {
			
 
				+		case RD::DATA_FORMAT_R32G32B32A32_SFLOAT:
			
 
				+			return FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT;
			
 
				+		case RD::DATA_FORMAT_R16G16B16A16_SFLOAT:
			
 
				+			return FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT;
			
 
				+		case RD::DATA_FORMAT_R16G16B16A16_UNORM:
			
 
				+			return FFX_SURFACE_FORMAT_R16G16B16A16_UNORM;
			
 
				+		case RD::DATA_FORMAT_R32G32_SFLOAT:
			
 
				+			return FFX_SURFACE_FORMAT_R32G32_FLOAT;
			
 
				+		case RD::DATA_FORMAT_R32_UINT:
			
 
				+			return FFX_SURFACE_FORMAT_R32_UINT;
			
 
				+		case RD::DATA_FORMAT_R8G8B8A8_UNORM:
			
 
				+			return FFX_SURFACE_FORMAT_R8G8B8A8_UNORM;
			
 
				+		case RD::DATA_FORMAT_B10G11R11_UFLOAT_PACK32:
			
 
				+			return FFX_SURFACE_FORMAT_R11G11B10_FLOAT;
			
 
				+		case RD::DATA_FORMAT_R16G16_SFLOAT:
			
 
				+			return FFX_SURFACE_FORMAT_R16G16_FLOAT;
			
 
				+		case RD::DATA_FORMAT_R16G16_UINT:
			
 
				+			return FFX_SURFACE_FORMAT_R16G16_UINT;
			
 
				+		case RD::DATA_FORMAT_R16_SFLOAT:
			
 
				+			return FFX_SURFACE_FORMAT_R16_FLOAT;
			
 
				+		case RD::DATA_FORMAT_R16_UINT:
			
 
				+			return FFX_SURFACE_FORMAT_R16_UINT;
			
 
				+		case RD::DATA_FORMAT_R16_UNORM:
			
 
				+			return FFX_SURFACE_FORMAT_R16_UNORM;
			
 
				+		case RD::DATA_FORMAT_R16_SNORM:
			
 
				+			return FFX_SURFACE_FORMAT_R16_SNORM;
			
 
				+		case RD::DATA_FORMAT_R8_UNORM:
			
 
				+			return FFX_SURFACE_FORMAT_R8_UNORM;
			
 
				+		case RD::DATA_FORMAT_R8_UINT:
			
 
				+			return FFX_SURFACE_FORMAT_R8_UINT;
			
 
				+		case RD::DATA_FORMAT_R8G8_UNORM:
			
 
				+			return FFX_SURFACE_FORMAT_R8G8_UNORM;
			
 
				+		case RD::DATA_FORMAT_R32_SFLOAT:
			
 
				+			return FFX_SURFACE_FORMAT_R32_FLOAT;
			
 
				+		default:
			
 
				+			return FFX_SURFACE_FORMAT_UNKNOWN;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static uint32_t ffx_usage_to_rd_usage_flags(uint32_t p_flags) {
			
 
				+	uint32_t ret = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT;
			
 
				+
			
 
				+	if (p_flags & FFX_RESOURCE_USAGE_RENDERTARGET) {
			
 
				+		ret |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT;
			
 
				+	}
			
 
				+
			
 
				+	if (p_flags & FFX_RESOURCE_USAGE_UAV) {
			
 
				+		ret |= RD::TEXTURE_USAGE_STORAGE_BIT;
			
 
				+		ret |= RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT;
			
 
				+		ret |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT;
			
 
				+	}
			
 
				+
			
 
				+	return ret;
			
 
				+}
			
 
				+
			
 
				+static FfxErrorCode create_backend_context_rd(FfxFsr2Interface *p_backend_interface, FfxDevice p_device) {
			
 
				+	FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer);
			
 
				+
			
 
				+	// Store pointer to the device common to all contexts.
			
 
				+	scratch.device = p_device;
			
 
				+
			
 
				+	// Create a ring buffer of uniform buffers.
			
 
				+	// FIXME: This could be optimized to be a single memory block if it was possible for RD to create views into a particular memory range of a UBO.
			
 
				+	for (uint32_t i = 0; i < FSR2_UBO_RING_BUFFER_SIZE; i++) {
			
 
				+		scratch.ubo_ring_buffer[i] = RD::get_singleton()->uniform_buffer_create(FFX_MAX_CONST_SIZE * sizeof(uint32_t));
			
 
				+		ERR_FAIL_COND_V(scratch.ubo_ring_buffer[i].is_null(), FFX_ERROR_BACKEND_API_ERROR);
			
 
				+	}
			
 
				+
			
 
				+	return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+static FfxErrorCode get_device_capabilities_rd(FfxFsr2Interface *p_backend_interface, FfxDeviceCapabilities *p_out_device_capabilities, FfxDevice p_device) {
			
 
				+	FSR2Effect::Device &effect_device = *reinterpret_cast<FSR2Effect::Device *>(p_device);
			
 
				+
			
 
				+	*p_out_device_capabilities = effect_device.capabilities;
			
 
				+
			
 
				+	return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+static FfxErrorCode destroy_backend_context_rd(FfxFsr2Interface *p_backend_interface) {
			
 
				+	FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer);
			
 
				+
			
 
				+	for (uint32_t i = 0; i < FSR2_UBO_RING_BUFFER_SIZE; i++) {
			
 
				+		RD::get_singleton()->free(scratch.ubo_ring_buffer[i]);
			
 
				+	}
			
 
				+
			
 
				+	return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+static FfxErrorCode create_resource_rd(FfxFsr2Interface *p_backend_interface, const FfxCreateResourceDescription *p_create_resource_description, FfxResourceInternal *p_out_resource) {
			
 
				+	// FSR2's base implementation won't issue a call to create a heap type that isn't just default on its own,
			
 
				+	// so we can safely ignore it as RD does not expose this concept.
			
 
				+	ERR_FAIL_COND_V(p_create_resource_description->heapType != FFX_HEAP_TYPE_DEFAULT, FFX_ERROR_INVALID_ARGUMENT);
			
 
				+
			
 
				+	RenderingDevice *rd = RD::get_singleton();
			
 
				+	FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer);
			
 
				+	FfxResourceDescription res_desc = p_create_resource_description->resourceDescription;
			
 
				+
			
 
				+	// FSR2's base implementation never requests buffer creation.
			
 
				+	ERR_FAIL_COND_V(res_desc.type != FFX_RESOURCE_TYPE_TEXTURE1D && res_desc.type != FFX_RESOURCE_TYPE_TEXTURE2D && res_desc.type != FFX_RESOURCE_TYPE_TEXTURE3D, FFX_ERROR_INVALID_ARGUMENT);
			
 
				+
			
 
				+	if (res_desc.mipCount == 0) {
			
 
				+		// Mipmap count must be derived from the resource's dimensions.
			
 
				+		res_desc.mipCount = uint32_t(1 + floor(log2(MAX(MAX(res_desc.width, res_desc.height), res_desc.depth))));
			
 
				+	}
			
 
				+
			
 
				+	Vector<PackedByteArray> initial_data;
			
 
				+	if (p_create_resource_description->initDataSize) {
			
 
				+		PackedByteArray byte_array;
			
 
				+		byte_array.resize(p_create_resource_description->initDataSize);
			
 
				+		memcpy(byte_array.ptrw(), p_create_resource_description->initData, p_create_resource_description->initDataSize);
			
 
				+		initial_data.push_back(byte_array);
			
 
				+	}
			
 
				+
			
 
				+	RD::TextureFormat texture_format;
			
 
				+	texture_format.texture_type = ffx_resource_type_to_rd_texture_type(res_desc.type);
			
 
				+	texture_format.format = ffx_surface_format_to_rd_format(res_desc.format);
			
 
				+	texture_format.usage_bits = ffx_usage_to_rd_usage_flags(p_create_resource_description->usage);
			
 
				+	texture_format.width = res_desc.width;
			
 
				+	texture_format.height = res_desc.height;
			
 
				+	texture_format.depth = res_desc.depth;
			
 
				+	texture_format.mipmaps = res_desc.mipCount;
			
 
				+
			
 
				+	RID texture = rd->texture_create(texture_format, RD::TextureView(), initial_data);
			
 
				+	ERR_FAIL_COND_V(texture.is_null(), FFX_ERROR_BACKEND_API_ERROR);
			
 
				+
			
 
				+	rd->set_resource_name(texture, String(p_create_resource_description->name));
			
 
				+
			
 
				+	// Add the resource to the storage and use the internal index to reference it.
			
 
				+	p_out_resource->internalIndex = scratch.resources.add(texture, false, p_create_resource_description->id, res_desc);
			
 
				+
			
 
				+	return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+static FfxErrorCode register_resource_rd(FfxFsr2Interface *p_backend_interface, const FfxResource *p_in_resource, FfxResourceInternal *p_out_resource) {
			
 
				+	if (p_in_resource->resource == nullptr) {
			
 
				+		// Null resource case.
			
 
				+		p_out_resource->internalIndex = -1;
			
 
				+		return FFX_OK;
			
 
				+	}
			
 
				+
			
 
				+	FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer);
			
 
				+	const RID &rid = *reinterpret_cast<const RID *>(p_in_resource->resource);
			
 
				+	ERR_FAIL_COND_V(rid.is_null(), FFX_ERROR_INVALID_ARGUMENT);
			
 
				+
			
 
				+	// Add the resource to the storage and use the internal index to reference it.
			
 
				+	p_out_resource->internalIndex = scratch.resources.add(rid, true, FSR2Context::RESOURCE_ID_DYNAMIC, p_in_resource->description);
			
 
				+
			
 
				+	return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+static FfxErrorCode unregister_resources_rd(FfxFsr2Interface *p_backend_interface) {
			
 
				+	FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer);
			
 
				+	LocalVector<uint32_t> dynamic_list_copy = scratch.resources.dynamic_list;
			
 
				+	for (uint32_t i : dynamic_list_copy) {
			
 
				+		scratch.resources.remove(i);
			
 
				+	}
			
 
				+
			
 
				+	return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+static FfxResourceDescription get_resource_description_rd(FfxFsr2Interface *p_backend_interface, FfxResourceInternal p_resource) {
			
 
				+	if (p_resource.internalIndex != -1) {
			
 
				+		FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer);
			
 
				+		return scratch.resources.descriptions[p_resource.internalIndex];
			
 
				+	} else {
			
 
				+		return {};
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static FfxErrorCode destroy_resource_rd(FfxFsr2Interface *p_backend_interface, FfxResourceInternal p_resource) {
			
 
				+	if (p_resource.internalIndex != -1) {
			
 
				+		FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer);
			
 
				+		if (scratch.resources.rids[p_resource.internalIndex].is_valid()) {
			
 
				+			RD::get_singleton()->free(scratch.resources.rids[p_resource.internalIndex]);
			
 
				+			scratch.resources.remove(p_resource.internalIndex);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+static FfxErrorCode create_pipeline_rd(FfxFsr2Interface *p_backend_interface, FfxFsr2Pass p_pass, const FfxPipelineDescription *p_pipeline_description, FfxPipelineState *p_out_pipeline) {
			
 
				+	FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer);
			
 
				+	FSR2Effect::Device &device = *reinterpret_cast<FSR2Effect::Device *>(scratch.device);
			
 
				+	FSR2Effect::Pass &effect_pass = device.passes[p_pass];
			
 
				+
			
 
				+	if (effect_pass.pipeline.pipeline_rid.is_null()) {
			
 
				+		// Create pipeline for the device if it hasn't been created yet.
			
 
				+		effect_pass.root_signature.shader_rid = effect_pass.shader->version_get_shader(effect_pass.shader_version, effect_pass.shader_variant);
			
 
				+		ERR_FAIL_COND_V(effect_pass.root_signature.shader_rid.is_null(), FFX_ERROR_BACKEND_API_ERROR);
			
 
				+
			
 
				+		effect_pass.pipeline.pipeline_rid = RD::get_singleton()->compute_pipeline_create(effect_pass.root_signature.shader_rid);
			
 
				+		ERR_FAIL_COND_V(effect_pass.pipeline.pipeline_rid.is_null(), FFX_ERROR_BACKEND_API_ERROR);
			
 
				+	}
			
 
				+
			
 
				+	// While this is not their intended use, we use the pipeline and root signature pointers to store the
			
 
				+	// RIDs to the pipeline and shader that RD needs for the compute pipeline.
			
 
				+	p_out_pipeline->pipeline = reinterpret_cast<FfxPipeline>(&effect_pass.pipeline);
			
 
				+	p_out_pipeline->rootSignature = reinterpret_cast<FfxRootSignature>(&effect_pass.root_signature);
			
 
				+
			
 
				+	p_out_pipeline->srvCount = effect_pass.sampled_bindings.size();
			
 
				+	ERR_FAIL_COND_V(p_out_pipeline->srvCount > FFX_MAX_NUM_SRVS, FFX_ERROR_OUT_OF_RANGE);
			
 
				+	memcpy(p_out_pipeline->srvResourceBindings, effect_pass.sampled_bindings.ptr(), sizeof(FfxResourceBinding) * p_out_pipeline->srvCount);
			
 
				+
			
 
				+	p_out_pipeline->uavCount = effect_pass.storage_bindings.size();
			
 
				+	ERR_FAIL_COND_V(p_out_pipeline->uavCount > FFX_MAX_NUM_UAVS, FFX_ERROR_OUT_OF_RANGE);
			
 
				+	memcpy(p_out_pipeline->uavResourceBindings, effect_pass.storage_bindings.ptr(), sizeof(FfxResourceBinding) * p_out_pipeline->uavCount);
			
 
				+
			
 
				+	p_out_pipeline->constCount = effect_pass.uniform_bindings.size();
			
 
				+	ERR_FAIL_COND_V(p_out_pipeline->constCount > FFX_MAX_NUM_CONST_BUFFERS, FFX_ERROR_OUT_OF_RANGE);
			
 
				+	memcpy(p_out_pipeline->cbResourceBindings, effect_pass.uniform_bindings.ptr(), sizeof(FfxResourceBinding) * p_out_pipeline->constCount);
			
 
				+
			
 
				+	bool low_resolution_mvs = (p_pipeline_description->contextFlags & FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) == 0;
			
 
				+
			
 
				+	if (p_pass == FFX_FSR2_PASS_ACCUMULATE || p_pass == FFX_FSR2_PASS_ACCUMULATE_SHARPEN) {
			
 
				+		// Change the binding for motion vectors in this particular pass if low resolution MVs are used.
			
 
				+		if (low_resolution_mvs) {
			
 
				+			FfxResourceBinding &binding = p_out_pipeline->srvResourceBindings[2];
			
 
				+			wcscpy_s(binding.name, L"r_dilated_motion_vectors");
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+static FfxErrorCode destroy_pipeline_rd(FfxFsr2Interface *p_backend_interface, FfxPipelineState *p_pipeline) {
			
 
				+	// We don't want to destroy pipelines when the FSR2 API deems it necessary as it'll do so whenever the context is destroyed.
			
 
				+
			
 
				+	return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+static FfxErrorCode schedule_gpu_job_rd(FfxFsr2Interface *p_backend_interface, const FfxGpuJobDescription *p_job) {
			
 
				+	ERR_FAIL_NULL_V(p_backend_interface, FFX_ERROR_INVALID_ARGUMENT);
			
 
				+	ERR_FAIL_NULL_V(p_job, FFX_ERROR_INVALID_ARGUMENT);
			
 
				+
			
 
				+	FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer);
			
 
				+	scratch.gpu_jobs.push_back(*p_job);
			
 
				+
			
 
				+	return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+static FfxErrorCode execute_gpu_job_clear_float_rd(FSR2Context::Scratch &p_scratch, const FfxClearFloatJobDescription &p_job) {
			
 
				+	RID resource = p_scratch.resources.rids[p_job.target.internalIndex];
			
 
				+	FfxResourceDescription &desc = p_scratch.resources.descriptions[p_job.target.internalIndex];
			
 
				+
			
 
				+	ERR_FAIL_COND_V(desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT);
			
 
				+
			
 
				+	Color color(p_job.color[0], p_job.color[1], p_job.color[2], p_job.color[3]);
			
 
				+	RD::get_singleton()->texture_clear(resource, color, 0, desc.mipCount, 0, 1);
			
 
				+
			
 
				+	return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+static FfxErrorCode execute_gpu_job_copy_rd(FSR2Context::Scratch &p_scratch, const FfxCopyJobDescription &p_job) {
			
 
				+	RID src = p_scratch.resources.rids[p_job.src.internalIndex];
			
 
				+	RID dst = p_scratch.resources.rids[p_job.dst.internalIndex];
			
 
				+	FfxResourceDescription &src_desc = p_scratch.resources.descriptions[p_job.src.internalIndex];
			
 
				+	FfxResourceDescription &dst_desc = p_scratch.resources.descriptions[p_job.dst.internalIndex];
			
 
				+
			
 
				+	ERR_FAIL_COND_V(src_desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT);
			
 
				+	ERR_FAIL_COND_V(dst_desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT);
			
 
				+
			
 
				+	for (uint32_t mip_level = 0; mip_level < src_desc.mipCount; mip_level++) {
			
 
				+		// Only push the barriers on the last copy.
			
 
				+		// FIXME: This could be optimized if RenderingDevice was able to copy multiple mip levels in a single command.
			
 
				+		BitField<RD::BarrierMask> post_barrier = (mip_level == (src_desc.mipCount - 1)) ? RD::BARRIER_MASK_ALL_BARRIERS : RD::BARRIER_MASK_NO_BARRIER;
			
 
				+		RD::get_singleton()->texture_copy(src, dst, Vector3(0, 0, 0), Vector3(0, 0, 0), Vector3(src_desc.width, src_desc.height, src_desc.depth), mip_level, mip_level, 0, 0, post_barrier);
			
 
				+	}
			
 
				+
			
 
				+	return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+static FfxErrorCode execute_gpu_job_compute_rd(FSR2Context::Scratch &p_scratch, const FfxComputeJobDescription &p_job) {
			
 
				+	UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton();
			
 
				+	ERR_FAIL_NULL_V(uniform_set_cache, FFX_ERROR_BACKEND_API_ERROR);
			
 
				+
			
 
				+	FSR2Effect::RootSignature &root_signature = *reinterpret_cast<FSR2Effect::RootSignature *>(p_job.pipeline.rootSignature);
			
 
				+	ERR_FAIL_COND_V(root_signature.shader_rid.is_null(), FFX_ERROR_INVALID_ARGUMENT);
			
 
				+
			
 
				+	FSR2Effect::Pipeline &backend_pipeline = *reinterpret_cast<FSR2Effect::Pipeline *>(p_job.pipeline.pipeline);
			
 
				+	ERR_FAIL_COND_V(backend_pipeline.pipeline_rid.is_null(), FFX_ERROR_INVALID_ARGUMENT);
			
 
				+
			
 
				+	Vector<RD::Uniform> compute_uniforms;
			
 
				+	for (uint32_t i = 0; i < p_job.pipeline.srvCount; i++) {
			
 
				+		RID texture_rid = p_scratch.resources.rids[p_job.srvs[i].internalIndex];
			
 
				+		RD::Uniform texture_uniform(RD::UNIFORM_TYPE_TEXTURE, p_job.pipeline.srvResourceBindings[i].slotIndex, texture_rid);
			
 
				+		compute_uniforms.push_back(texture_uniform);
			
 
				+	}
			
 
				+
			
 
				+	for (uint32_t i = 0; i < p_job.pipeline.uavCount; i++) {
			
 
				+		RID image_rid = p_scratch.resources.rids[p_job.uavs[i].internalIndex];
			
 
				+		RD::Uniform storage_uniform;
			
 
				+		storage_uniform.uniform_type = RD::UNIFORM_TYPE_IMAGE;
			
 
				+		storage_uniform.binding = p_job.pipeline.uavResourceBindings[i].slotIndex;
			
 
				+
			
 
				+		if (p_job.uavMip[i] > 0) {
			
 
				+			LocalVector<RID> &mip_slice_rids = p_scratch.resources.mip_slice_rids[p_job.uavs[i].internalIndex];
			
 
				+			if (mip_slice_rids.is_empty()) {
			
 
				+				mip_slice_rids.resize(p_scratch.resources.descriptions[p_job.uavs[i].internalIndex].mipCount);
			
 
				+			}
			
 
				+
			
 
				+			ERR_FAIL_COND_V(p_job.uavMip[i] >= mip_slice_rids.size(), FFX_ERROR_INVALID_ARGUMENT);
			
 
				+
			
 
				+			if (mip_slice_rids[p_job.uavMip[i]].is_null()) {
			
 
				+				mip_slice_rids[p_job.uavMip[i]] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), image_rid, 0, p_job.uavMip[i]);
			
 
				+			}
			
 
				+
			
 
				+			ERR_FAIL_COND_V(mip_slice_rids[p_job.uavMip[i]].is_null(), FFX_ERROR_BACKEND_API_ERROR);
			
 
				+
			
 
				+			storage_uniform.append_id(mip_slice_rids[p_job.uavMip[i]]);
			
 
				+		} else {
			
 
				+			storage_uniform.append_id(image_rid);
			
 
				+		}
			
 
				+
			
 
				+		compute_uniforms.push_back(storage_uniform);
			
 
				+	}
			
 
				+
			
 
				+	for (uint32_t i = 0; i < p_job.pipeline.constCount; i++) {
			
 
				+		RID buffer_rid = p_scratch.ubo_ring_buffer[p_scratch.ubo_ring_buffer_index];
			
 
				+		p_scratch.ubo_ring_buffer_index = (p_scratch.ubo_ring_buffer_index + 1) % FSR2_UBO_RING_BUFFER_SIZE;
			
 
				+
			
 
				+		BitField<RD::BarrierMask> post_barrier = (i == (p_job.pipeline.constCount - 1)) ? RD::BARRIER_MASK_ALL_BARRIERS : RD::BARRIER_MASK_NO_BARRIER;
			
 
				+		RD::get_singleton()->buffer_update(buffer_rid, 0, p_job.cbs[i].uint32Size * sizeof(uint32_t), p_job.cbs[i].data, post_barrier);
			
 
				+
			
 
				+		RD::Uniform buffer_uniform(RD::UNIFORM_TYPE_UNIFORM_BUFFER, p_job.pipeline.cbResourceBindings[i].slotIndex, buffer_rid);
			
 
				+		compute_uniforms.push_back(buffer_uniform);
			
 
				+	}
			
 
				+
			
 
				+	FSR2Effect::Device &device = *reinterpret_cast<FSR2Effect::Device *>(p_scratch.device);
			
 
				+	RD::Uniform u_point_clamp_sampler(RD::UniformType::UNIFORM_TYPE_SAMPLER, 0, device.point_clamp_sampler);
			
 
				+	RD::Uniform u_linear_clamp_sampler(RD::UniformType::UNIFORM_TYPE_SAMPLER, 1, device.linear_clamp_sampler);
			
 
				+
			
 
				+	RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
			
 
				+	RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, backend_pipeline.pipeline_rid);
			
 
				+	RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(root_signature.shader_rid, 0, u_point_clamp_sampler, u_linear_clamp_sampler), 0);
			
 
				+	RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache_vec(root_signature.shader_rid, 1, compute_uniforms), 1);
			
 
				+	RD::get_singleton()->compute_list_dispatch(compute_list, p_job.dimensions[0], p_job.dimensions[1], p_job.dimensions[2]);
			
 
				+	RD::get_singleton()->compute_list_end();
			
 
				+
			
 
				+	return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+static FfxErrorCode execute_gpu_jobs_rd(FfxFsr2Interface *p_backend_interface, FfxCommandList p_command_list) {
			
 
				+	ERR_FAIL_NULL_V(p_backend_interface, FFX_ERROR_INVALID_ARGUMENT);
			
 
				+
			
 
				+	FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer);
			
 
				+	FfxErrorCode error_code = FFX_OK;
			
 
				+	for (const FfxGpuJobDescription &job : scratch.gpu_jobs) {
			
 
				+		switch (job.jobType) {
			
 
				+			case FFX_GPU_JOB_CLEAR_FLOAT: {
			
 
				+				error_code = execute_gpu_job_clear_float_rd(scratch, job.clearJobDescriptor);
			
 
				+			} break;
			
 
				+			case FFX_GPU_JOB_COPY: {
			
 
				+				error_code = execute_gpu_job_copy_rd(scratch, job.copyJobDescriptor);
			
 
				+			} break;
			
 
				+			case FFX_GPU_JOB_COMPUTE: {
			
 
				+				error_code = execute_gpu_job_compute_rd(scratch, job.computeJobDescriptor);
			
 
				+			} break;
			
 
				+			default: {
			
 
				+				error_code = FFX_ERROR_INVALID_ARGUMENT;
			
 
				+			} break;
			
 
				+		}
			
 
				+
			
 
				+		if (error_code != FFX_OK) {
			
 
				+			scratch.gpu_jobs.clear();
			
 
				+			return error_code;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	scratch.gpu_jobs.clear();
			
 
				+
			
 
				+	return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+static FfxResource get_resource_rd(RID *p_rid, const wchar_t *p_name) {
			
 
				+	FfxResource res = {};
			
 
				+	if (p_rid->is_null()) {
			
 
				+		return res;
			
 
				+	}
			
 
				+
			
 
				+	wcscpy_s(res.name, p_name);
			
 
				+
			
 
				+	RD::TextureFormat texture_format = RD::get_singleton()->texture_get_format(*p_rid);
			
 
				+	res.description.type = rd_texture_type_to_ffx_resource_type(texture_format.texture_type);
			
 
				+	res.description.format = rd_format_to_ffx_surface_format(texture_format.format);
			
 
				+	res.description.width = texture_format.width;
			
 
				+	res.description.height = texture_format.height;
			
 
				+	res.description.depth = texture_format.depth;
			
 
				+	res.description.mipCount = texture_format.mipmaps;
			
 
				+	res.description.flags = FFX_RESOURCE_FLAGS_NONE;
			
 
				+	res.resource = reinterpret_cast<void *>(p_rid);
			
 
				+	res.isDepth = texture_format.usage_bits & RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
			
 
				+
			
 
				+	return res;
			
 
				+}
			
 
				+
			
 
				+FSR2Context::~FSR2Context() {
			
 
				+	ffxFsr2ContextDestroy(&fsr_context);
			
 
				+}
			
 
				+
			
 
				+FSR2Effect::FSR2Effect() {
			
 
				+	FfxDeviceCapabilities &capabilities = device.capabilities;
			
 
				+	uint64_t default_subgroup_size = RD::get_singleton()->limit_get(RD::LIMIT_SUBGROUP_SIZE);
			
 
				+	capabilities.minimumSupportedShaderModel = FFX_SHADER_MODEL_5_1;
			
 
				+	capabilities.waveLaneCountMin = RD::get_singleton()->limit_get(RD::LIMIT_SUBGROUP_MIN_SIZE);
			
 
				+	capabilities.waveLaneCountMax = RD::get_singleton()->limit_get(RD::LIMIT_SUBGROUP_MAX_SIZE);
			
 
				+	capabilities.fp16Supported = RD::get_singleton()->has_feature(RD::Features::SUPPORTS_FSR_HALF_FLOAT);
			
 
				+	capabilities.raytracingSupported = false;
			
 
				+
			
 
				+	bool force_wave_64 = default_subgroup_size == 32 && capabilities.waveLaneCountMax == 64;
			
 
				+	bool use_lut = force_wave_64 || default_subgroup_size == 64;
			
 
				+
			
 
				+	String general_defines_base =
			
 
				+			"\n#define FFX_GPU\n"
			
 
				+			"\n#define FFX_GLSL 1\n"
			
 
				+			"\n#define FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS 1\n"
			
 
				+			"\n#define FFX_FSR2_OPTION_HDR_COLOR_INPUT 1\n"
			
 
				+			"\n#define FFX_FSR2_OPTION_GODOT_REACTIVE_MASK_CLAMP 1\n"
			
 
				+			"\n#define FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS 1\n";
			
 
				+
			
 
				+	if (use_lut) {
			
 
				+		general_defines_base += "\n#define FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE 1\n";
			
 
				+	}
			
 
				+
			
 
				+	String general_defines = general_defines_base;
			
 
				+	if (capabilities.fp16Supported) {
			
 
				+		general_defines += "\n#define FFX_HALF 1\n";
			
 
				+	}
			
 
				+
			
 
				+	Vector<String> modes;
			
 
				+	modes.push_back("");
			
 
				+
			
 
				+	// Since Godot currently lacks a shader reflection mechanism to persist the name of the bindings in the shader cache and
			
 
				+	// there's also no mechanism to compile the shaders offline, the bindings are created manually by looking at the GLSL
			
 
				+	// files included in FSR2 and mapping the macro bindings (#define FSR2_BIND_*) to their respective implementation names.
			
 
				+	//
			
 
				+	// It is not guaranteed these will remain consistent at all between versions of FSR2, so it'll be necessary to keep these
			
 
				+	// bindings up to date whenever the library is updated. In such cases, it is very likely the validation layer will throw an
			
 
				+	// error if the bindings do not match.
			
 
				+
			
 
				+	{
			
 
				+		Pass &pass = device.passes[FFX_FSR2_PASS_DEPTH_CLIP];
			
 
				+		pass.shader = &shaders.depth_clip;
			
 
				+		pass.shader->initialize(modes, general_defines);
			
 
				+		pass.shader_version = pass.shader->version_create();
			
 
				+
			
 
				+		pass.sampled_bindings = {
			
 
				+			FfxResourceBinding{ 0, 0, L"r_reconstructed_previous_nearest_depth" },
			
 
				+			FfxResourceBinding{ 1, 0, L"r_dilated_motion_vectors" },
			
 
				+			FfxResourceBinding{ 2, 0, L"r_dilatedDepth" },
			
 
				+			FfxResourceBinding{ 3, 0, L"r_reactive_mask" },
			
 
				+			FfxResourceBinding{ 4, 0, L"r_transparency_and_composition_mask" },
			
 
				+			FfxResourceBinding{ 5, 0, L"r_prepared_input_color" },
			
 
				+			FfxResourceBinding{ 6, 0, L"r_previous_dilated_motion_vectors" },
			
 
				+			FfxResourceBinding{ 7, 0, L"r_input_motion_vectors" },
			
 
				+			FfxResourceBinding{ 8, 0, L"r_input_color_jittered" },
			
 
				+			FfxResourceBinding{ 9, 0, L"r_input_depth" },
			
 
				+			FfxResourceBinding{ 10, 0, L"r_input_exposure" }
			
 
				+		};
			
 
				+
			
 
				+		pass.storage_bindings = {
			
 
				+			// FSR2_BIND_UAV_DEPTH_CLIP (11) does not point to anything.
			
 
				+			FfxResourceBinding{ 12, 0, L"rw_dilated_reactive_masks" },
			
 
				+			FfxResourceBinding{ 13, 0, L"rw_prepared_input_color" }
			
 
				+		};
			
 
				+
			
 
				+		pass.uniform_bindings = {
			
 
				+			FfxResourceBinding{ 14, 0, L"cbFSR2" }
			
 
				+		};
			
 
				+	}
			
 
				+
			
 
				+	{
			
 
				+		Pass &pass = device.passes[FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH];
			
 
				+		pass.shader = &shaders.reconstruct_previous_depth;
			
 
				+		pass.shader->initialize(modes, general_defines);
			
 
				+		pass.shader_version = pass.shader->version_create();
			
 
				+
			
 
				+		pass.sampled_bindings = {
			
 
				+			FfxResourceBinding{ 0, 0, L"r_input_motion_vectors" },
			
 
				+			FfxResourceBinding{ 1, 0, L"r_input_depth" },
			
 
				+			FfxResourceBinding{ 2, 0, L"r_input_color_jittered" },
			
 
				+			FfxResourceBinding{ 3, 0, L"r_input_exposure" },
			
 
				+			FfxResourceBinding{ 4, 0, L"r_luma_history" }
			
 
				+		};
			
 
				+
			
 
				+		pass.storage_bindings = {
			
 
				+			FfxResourceBinding{ 5, 0, L"rw_reconstructed_previous_nearest_depth" },
			
 
				+			FfxResourceBinding{ 6, 0, L"rw_dilated_motion_vectors" },
			
 
				+			FfxResourceBinding{ 7, 0, L"rw_dilatedDepth" },
			
 
				+			FfxResourceBinding{ 8, 0, L"rw_prepared_input_color" },
			
 
				+			FfxResourceBinding{ 9, 0, L"rw_luma_history" },
			
 
				+			// FSR2_BIND_UAV_LUMA_INSTABILITY (10) does not point to anything.
			
 
				+			FfxResourceBinding{ 11, 0, L"rw_lock_input_luma" }
			
 
				+		};
			
 
				+
			
 
				+		pass.uniform_bindings = {
			
 
				+			FfxResourceBinding{ 12, 0, L"cbFSR2" }
			
 
				+		};
			
 
				+	}
			
 
				+
			
 
				+	{
			
 
				+		Pass &pass = device.passes[FFX_FSR2_PASS_LOCK];
			
 
				+		pass.shader = &shaders.lock;
			
 
				+		pass.shader->initialize(modes, general_defines);
			
 
				+		pass.shader_version = pass.shader->version_create();
			
 
				+
			
 
				+		pass.sampled_bindings = {
			
 
				+			FfxResourceBinding{ 0, 0, L"r_lock_input_luma" }
			
 
				+		};
			
 
				+
			
 
				+		pass.storage_bindings = {
			
 
				+			FfxResourceBinding{ 1, 0, L"rw_new_locks" },
			
 
				+			FfxResourceBinding{ 2, 0, L"rw_reconstructed_previous_nearest_depth" }
			
 
				+		};
			
 
				+
			
 
				+		pass.uniform_bindings = {
			
 
				+			FfxResourceBinding{ 3, 0, L"cbFSR2" }
			
 
				+		};
			
 
				+	}
			
 
				+
			
 
				+	{
			
 
				+		Vector<String> accumulate_modes;
			
 
				+		accumulate_modes.push_back("\n");
			
 
				+		accumulate_modes.push_back("\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n");
			
 
				+
			
 
				+		String general_defines_accumulate;
			
 
				+		if (RD::get_singleton()->get_device_vendor_name() == "NVIDIA") {
			
 
				+			// Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput.
			
 
				+			general_defines_accumulate = general_defines_base;
			
 
				+		} else {
			
 
				+			general_defines_accumulate = general_defines;
			
 
				+		}
			
 
				+
			
 
				+		Pass &pass = device.passes[FFX_FSR2_PASS_ACCUMULATE];
			
 
				+		pass.shader = &shaders.accumulate;
			
 
				+		pass.shader->initialize(accumulate_modes, general_defines_accumulate);
			
 
				+		pass.shader_version = pass.shader->version_create();
			
 
				+
			
 
				+		pass.sampled_bindings = {
			
 
				+			FfxResourceBinding{ 0, 0, L"r_input_exposure" },
			
 
				+			FfxResourceBinding{ 1, 0, L"r_dilated_reactive_masks" },
			
 
				+			FfxResourceBinding{ 2, 0, L"r_input_motion_vectors" },
			
 
				+			FfxResourceBinding{ 3, 0, L"r_internal_upscaled_color" },
			
 
				+			FfxResourceBinding{ 4, 0, L"r_lock_status" },
			
 
				+			FfxResourceBinding{ 5, 0, L"r_input_depth" },
			
 
				+			FfxResourceBinding{ 6, 0, L"r_prepared_input_color" },
			
 
				+			// FSR2_BIND_SRV_LUMA_INSTABILITY(7) does not point to anything.
			
 
				+			FfxResourceBinding{ 8, 0, L"r_lanczos_lut" },
			
 
				+			FfxResourceBinding{ 9, 0, L"r_upsample_maximum_bias_lut" },
			
 
				+			FfxResourceBinding{ 10, 0, L"r_imgMips" },
			
 
				+			FfxResourceBinding{ 11, 0, L"r_auto_exposure" },
			
 
				+			FfxResourceBinding{ 12, 0, L"r_luma_history" }
			
 
				+		};
			
 
				+
			
 
				+		pass.storage_bindings = {
			
 
				+			FfxResourceBinding{ 13, 0, L"rw_internal_upscaled_color" },
			
 
				+			FfxResourceBinding{ 14, 0, L"rw_lock_status" },
			
 
				+			FfxResourceBinding{ 15, 0, L"rw_upscaled_output" },
			
 
				+			FfxResourceBinding{ 16, 0, L"rw_new_locks" },
			
 
				+			FfxResourceBinding{ 17, 0, L"rw_luma_history" }
			
 
				+		};
			
 
				+
			
 
				+		pass.uniform_bindings = {
			
 
				+			FfxResourceBinding{ 18, 0, L"cbFSR2" }
			
 
				+		};
			
 
				+
			
 
				+		// Sharpen pass is a clone of the accumulate pass.
			
 
				+		Pass &sharpen_pass = device.passes[FFX_FSR2_PASS_ACCUMULATE_SHARPEN];
			
 
				+		sharpen_pass = pass;
			
 
				+		sharpen_pass.shader_variant = 1;
			
 
				+	}
			
 
				+
			
 
				+	{
			
 
				+		Pass &pass = device.passes[FFX_FSR2_PASS_RCAS];
			
 
				+		pass.shader = &shaders.rcas;
			
 
				+		pass.shader->initialize(modes, general_defines_base);
			
 
				+		pass.shader_version = pass.shader->version_create();
			
 
				+
			
 
				+		pass.sampled_bindings = {
			
 
				+			FfxResourceBinding{ 0, 0, L"r_input_exposure" },
			
 
				+			FfxResourceBinding{ 1, 0, L"r_rcas_input" }
			
 
				+		};
			
 
				+
			
 
				+		pass.storage_bindings = {
			
 
				+			FfxResourceBinding{ 2, 0, L"rw_upscaled_output" }
			
 
				+		};
			
 
				+
			
 
				+		pass.uniform_bindings = {
			
 
				+			FfxResourceBinding{ 3, 0, L"cbFSR2" },
			
 
				+			FfxResourceBinding{ 4, 0, L"cbRCAS" }
			
 
				+		};
			
 
				+	}
			
 
				+
			
 
				+	{
			
 
				+		Pass &pass = device.passes[FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID];
			
 
				+		pass.shader = &shaders.compute_luminance_pyramid;
			
 
				+		pass.shader->initialize(modes, general_defines_base);
			
 
				+		pass.shader_version = pass.shader->version_create();
			
 
				+
			
 
				+		pass.sampled_bindings = {
			
 
				+			FfxResourceBinding{ 0, 0, L"r_input_color_jittered" }
			
 
				+		};
			
 
				+
			
 
				+		pass.storage_bindings = {
			
 
				+			FfxResourceBinding{ 1, 0, L"rw_spd_global_atomic" },
			
 
				+			FfxResourceBinding{ 2, 0, L"rw_img_mip_shading_change" },
			
 
				+			FfxResourceBinding{ 3, 0, L"rw_img_mip_5" },
			
 
				+			FfxResourceBinding{ 4, 0, L"rw_auto_exposure" }
			
 
				+		};
			
 
				+
			
 
				+		pass.uniform_bindings = {
			
 
				+			FfxResourceBinding{ 5, 0, L"cbFSR2" },
			
 
				+			FfxResourceBinding{ 6, 0, L"cbSPD" }
			
 
				+		};
			
 
				+	}
			
 
				+
			
 
				+	{
			
 
				+		Pass &pass = device.passes[FFX_FSR2_PASS_GENERATE_REACTIVE];
			
 
				+		pass.shader = &shaders.autogen_reactive;
			
 
				+		pass.shader->initialize(modes, general_defines);
			
 
				+		pass.shader_version = pass.shader->version_create();
			
 
				+
			
 
				+		pass.sampled_bindings = {
			
 
				+			FfxResourceBinding{ 0, 0, L"r_input_opaque_only" },
			
 
				+			FfxResourceBinding{ 1, 0, L"r_input_color_jittered" }
			
 
				+		};
			
 
				+
			
 
				+		pass.storage_bindings = {
			
 
				+			FfxResourceBinding{ 2, 0, L"rw_output_autoreactive" }
			
 
				+		};
			
 
				+
			
 
				+		pass.uniform_bindings = {
			
 
				+			FfxResourceBinding{ 3, 0, L"cbGenerateReactive" },
			
 
				+			FfxResourceBinding{ 4, 0, L"cbFSR2" }
			
 
				+		};
			
 
				+	}
			
 
				+
			
 
				+	{
			
 
				+		Pass &pass = device.passes[FFX_FSR2_PASS_TCR_AUTOGENERATE];
			
 
				+		pass.shader = &shaders.tcr_autogen;
			
 
				+		pass.shader->initialize(modes, general_defines);
			
 
				+		pass.shader_version = pass.shader->version_create();
			
 
				+
			
 
				+		pass.sampled_bindings = {
			
 
				+			FfxResourceBinding{ 0, 0, L"r_input_opaque_only" },
			
 
				+			FfxResourceBinding{ 1, 0, L"r_input_color_jittered" },
			
 
				+			FfxResourceBinding{ 2, 0, L"r_input_motion_vectors" },
			
 
				+			FfxResourceBinding{ 3, 0, L"r_input_prev_color_pre_alpha" },
			
 
				+			FfxResourceBinding{ 4, 0, L"r_input_prev_color_post_alpha" },
			
 
				+			FfxResourceBinding{ 5, 0, L"r_reactive_mask" },
			
 
				+			FfxResourceBinding{ 6, 0, L"r_transparency_and_composition_mask" },
			
 
				+			FfxResourceBinding{ 13, 0, L"r_input_depth" }
			
 
				+		};
			
 
				+
			
 
				+		pass.storage_bindings = {
			
 
				+			FfxResourceBinding{ 7, 0, L"rw_output_autoreactive" },
			
 
				+			FfxResourceBinding{ 8, 0, L"rw_output_autocomposition" },
			
 
				+			FfxResourceBinding{ 9, 0, L"rw_output_prev_color_pre_alpha" },
			
 
				+			FfxResourceBinding{ 10, 0, L"rw_output_prev_color_post_alpha" }
			
 
				+		};
			
 
				+
			
 
				+		pass.uniform_bindings = {
			
 
				+			FfxResourceBinding{ 11, 0, L"cbFSR2" },
			
 
				+			FfxResourceBinding{ 12, 0, L"cbGenerateReactive" }
			
 
				+		};
			
 
				+	}
			
 
				+
			
 
				+	RD::SamplerState state;
			
 
				+	state.mag_filter = RD::SAMPLER_FILTER_NEAREST;
			
 
				+	state.min_filter = RD::SAMPLER_FILTER_NEAREST;
			
 
				+	state.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
			
 
				+	state.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
			
 
				+	state.repeat_w = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
			
 
				+	state.min_lod = -1000.0f;
			
 
				+	state.max_lod = 1000.0f;
			
 
				+	state.anisotropy_max = 1.0;
			
 
				+	device.point_clamp_sampler = RD::get_singleton()->sampler_create(state);
			
 
				+	ERR_FAIL_COND(device.point_clamp_sampler.is_null());
			
 
				+
			
 
				+	state.mag_filter = RD::SAMPLER_FILTER_LINEAR;
			
 
				+	state.min_filter = RD::SAMPLER_FILTER_LINEAR;
			
 
				+	device.linear_clamp_sampler = RD::get_singleton()->sampler_create(state);
			
 
				+	ERR_FAIL_COND(device.linear_clamp_sampler.is_null());
			
 
				+}
			
 
				+
			
 
				+FSR2Effect::~FSR2Effect() {
			
 
				+	RD::get_singleton()->free(device.point_clamp_sampler);
			
 
				+	RD::get_singleton()->free(device.linear_clamp_sampler);
			
 
				+
			
 
				+	for (uint32_t i = 0; i < FFX_FSR2_PASS_COUNT; i++) {
			
 
				+		RD::get_singleton()->free(device.passes[i].pipeline.pipeline_rid);
			
 
				+		device.passes[i].shader->version_free(device.passes[i].shader_version);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+FSR2Context *FSR2Effect::create_context(Size2i p_internal_size, Size2i p_target_size) {
			
 
				+	FSR2Context *context = memnew(RendererRD::FSR2Context);
			
 
				+	context->fsr_desc.flags = FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE;
			
 
				+	context->fsr_desc.maxRenderSize.width = p_internal_size.x;
			
 
				+	context->fsr_desc.maxRenderSize.height = p_internal_size.y;
			
 
				+	context->fsr_desc.displaySize.width = p_target_size.x;
			
 
				+	context->fsr_desc.displaySize.height = p_target_size.y;
			
 
				+	context->fsr_desc.device = &device;
			
 
				+
			
 
				+	FfxFsr2Interface &functions = context->fsr_desc.callbacks;
			
 
				+	functions.fpCreateBackendContext = create_backend_context_rd;
			
 
				+	functions.fpGetDeviceCapabilities = get_device_capabilities_rd;
			
 
				+	functions.fpDestroyBackendContext = destroy_backend_context_rd;
			
 
				+	functions.fpCreateResource = create_resource_rd;
			
 
				+	functions.fpRegisterResource = register_resource_rd;
			
 
				+	functions.fpUnregisterResources = unregister_resources_rd;
			
 
				+	functions.fpGetResourceDescription = get_resource_description_rd;
			
 
				+	functions.fpDestroyResource = destroy_resource_rd;
			
 
				+	functions.fpCreatePipeline = create_pipeline_rd;
			
 
				+	functions.fpDestroyPipeline = destroy_pipeline_rd;
			
 
				+	functions.fpScheduleGpuJob = schedule_gpu_job_rd;
			
 
				+	functions.fpExecuteGpuJobs = execute_gpu_jobs_rd;
			
 
				+	functions.scratchBuffer = &context->scratch;
			
 
				+	functions.scratchBufferSize = sizeof(context->scratch);
			
 
				+
			
 
				+	FfxErrorCode result = ffxFsr2ContextCreate(&context->fsr_context, &context->fsr_desc);
			
 
				+	if (result == FFX_OK) {
			
 
				+		return context;
			
 
				+	} else {
			
 
				+		memdelete(context);
			
 
				+		return nullptr;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void FSR2Effect::upscale(const Parameters &p_params) {
			
 
				+	// TODO: Transparency & Composition mask is not implemented.
			
 
				+	FfxFsr2DispatchDescription dispatch_desc = {};
			
 
				+	RID color = p_params.color;
			
 
				+	RID depth = p_params.depth;
			
 
				+	RID velocity = p_params.velocity;
			
 
				+	RID reactive = p_params.reactive;
			
 
				+	RID exposure = p_params.exposure;
			
 
				+	RID output = p_params.output;
			
 
				+	dispatch_desc.commandList = nullptr;
			
 
				+	dispatch_desc.color = get_resource_rd(&color, L"color");
			
 
				+	dispatch_desc.depth = get_resource_rd(&depth, L"depth");
			
 
				+	dispatch_desc.motionVectors = get_resource_rd(&velocity, L"velocity");
			
 
				+	dispatch_desc.reactive = get_resource_rd(&reactive, L"reactive");
			
 
				+	dispatch_desc.exposure = get_resource_rd(&exposure, L"exposure");
			
 
				+	dispatch_desc.transparencyAndComposition = {};
			
 
				+	dispatch_desc.output = get_resource_rd(&output, L"output");
			
 
				+	dispatch_desc.colorOpaqueOnly = {};
			
 
				+	dispatch_desc.jitterOffset.x = p_params.jitter.x;
			
 
				+	dispatch_desc.jitterOffset.y = p_params.jitter.y;
			
 
				+	dispatch_desc.motionVectorScale.x = float(p_params.internal_size.width);
			
 
				+	dispatch_desc.motionVectorScale.y = float(p_params.internal_size.height);
			
 
				+	dispatch_desc.reset = p_params.reset_accumulation;
			
 
				+	dispatch_desc.renderSize.width = p_params.internal_size.width;
			
 
				+	dispatch_desc.renderSize.height = p_params.internal_size.height;
			
 
				+	dispatch_desc.enableSharpening = (p_params.sharpness > 1e-6f);
			
 
				+	dispatch_desc.sharpness = p_params.sharpness;
			
 
				+	dispatch_desc.frameTimeDelta = p_params.delta_time;
			
 
				+	dispatch_desc.preExposure = 1.0f;
			
 
				+	dispatch_desc.cameraNear = p_params.z_near;
			
 
				+	dispatch_desc.cameraFar = p_params.z_far;
			
 
				+	dispatch_desc.cameraFovAngleVertical = p_params.fovy;
			
 
				+	dispatch_desc.viewSpaceToMetersFactor = 1.0f;
			
 
				+	dispatch_desc.enableAutoReactive = false;
			
 
				+	dispatch_desc.autoTcThreshold = 1.0f;
			
 
				+	dispatch_desc.autoTcScale = 1.0f;
			
 
				+	dispatch_desc.autoReactiveScale = 1.0f;
			
 
				+	dispatch_desc.autoReactiveMax = 1.0f;
			
 
				+
			
 
				+	RendererRD::MaterialStorage::store_camera(p_params.reprojection, dispatch_desc.reprojectionMatrix);
			
 
				+
			
 
				+	FfxErrorCode result = ffxFsr2ContextDispatch(&p_params.context->fsr_context, &dispatch_desc);
			
 
				+	ERR_FAIL_COND(result != FFX_OK);
			
 
				+}
			
--- a/servers/rendering/renderer_rd/effects/fsr2.h
+++ b/servers/rendering/renderer_rd/effects/fsr2.h
@@ -0,0 +1,199 @@
 
				+/**************************************************************************/
			
 
				+/*  fsr2.h                                                                */
			
 
				+/**************************************************************************/
			
 
				+/*                         This file is part of:                          */
			
 
				+/*                             GODOT ENGINE                               */
			
 
				+/*                        https://godotengine.org                         */
			
 
				+/**************************************************************************/
			
 
				+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
			
 
				+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
			
 
				+/*                                                                        */
			
 
				+/* Permission is hereby granted, free of charge, to any person obtaining  */
			
 
				+/* a copy of this software and associated documentation files (the        */
			
 
				+/* "Software"), to deal in the Software without restriction, including    */
			
 
				+/* without limitation the rights to use, copy, modify, merge, publish,    */
			
 
				+/* distribute, sublicense, and/or sell copies of the Software, and to     */
			
 
				+/* permit persons to whom the Software is furnished to do so, subject to  */
			
 
				+/* the following conditions:                                              */
			
 
				+/*                                                                        */
			
 
				+/* The above copyright notice and this permission notice shall be         */
			
 
				+/* included in all copies or substantial portions of the Software.        */
			
 
				+/*                                                                        */
			
 
				+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
			
 
				+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
			
 
				+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
			
 
				+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
			
 
				+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
			
 
				+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
			
 
				+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
			
 
				+/**************************************************************************/
			
 
				+
			
 
				+#ifndef FSR2_RD_H
			
 
				+#define FSR2_RD_H
			
 
				+
			
 
				+#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_accumulate_pass.glsl.gen.h"
			
 
				+#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_autogen_reactive_pass.glsl.gen.h"
			
 
				+#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_compute_luminance_pyramid_pass.glsl.gen.h"
			
 
				+#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_depth_clip_pass.glsl.gen.h"
			
 
				+#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_lock_pass.glsl.gen.h"
			
 
				+#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_rcas_pass.glsl.gen.h"
			
 
				+#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_reconstruct_previous_depth_pass.glsl.gen.h"
			
 
				+#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_tcr_autogen_pass.glsl.gen.h"
			
 
				+
			
 
				+// This flag doesn't actually control anything GCC specific in FSR2. It determines
			
 
				+// if symbols should be exported, which is not required for Godot.
			
 
				+#ifndef FFX_GCC
			
 
				+#define FFX_GCC
			
 
				+#endif
			
 
				+
			
 
				+#include "thirdparty/amd-fsr2/ffx_fsr2.h"
			
 
				+
			
 
				+#define FSR2_MAX_QUEUED_FRAMES (4)
			
 
				+#define FSR2_MAX_UNIFORM_BUFFERS (4)
			
 
				+#define FSR2_MAX_BUFFERED_DESCRIPTORS (FFX_FSR2_PASS_COUNT * FSR2_MAX_QUEUED_FRAMES)
			
 
				+#define FSR2_UBO_RING_BUFFER_SIZE (FSR2_MAX_BUFFERED_DESCRIPTORS * FSR2_MAX_UNIFORM_BUFFERS)
			
 
				+
			
 
				+namespace RendererRD {
			
 
				+class FSR2Context {
			
 
				+public:
			
 
				+	enum ResourceID : uint32_t {
			
 
				+		RESOURCE_ID_DYNAMIC = 0xFFFFFFFF
			
 
				+	};
			
 
				+
			
 
				+	struct Resources {
			
 
				+		LocalVector<RID> rids;
			
 
				+		LocalVector<LocalVector<RID>> mip_slice_rids;
			
 
				+		LocalVector<uint32_t> ids;
			
 
				+		LocalVector<FfxResourceDescription> descriptions;
			
 
				+		LocalVector<uint32_t> dynamic_list;
			
 
				+		LocalVector<uint32_t> free_list;
			
 
				+
			
 
				+		uint32_t add(RID p_rid, bool p_dynamic, uint32_t p_id, FfxResourceDescription p_description) {
			
 
				+			uint32_t ret_index;
			
 
				+			if (free_list.is_empty()) {
			
 
				+				ret_index = rids.size();
			
 
				+				uint32_t new_size = ret_index + 1;
			
 
				+				rids.resize(new_size);
			
 
				+				mip_slice_rids.resize(new_size);
			
 
				+				ids.resize(new_size);
			
 
				+				descriptions.resize(new_size);
			
 
				+			} else {
			
 
				+				uint32_t end_index = free_list.size() - 1;
			
 
				+				ret_index = free_list[end_index];
			
 
				+				free_list.resize(end_index);
			
 
				+			}
			
 
				+
			
 
				+			rids[ret_index] = p_rid;
			
 
				+			mip_slice_rids[ret_index].clear();
			
 
				+			ids[ret_index] = p_id;
			
 
				+			descriptions[ret_index] = p_description;
			
 
				+
			
 
				+			if (p_dynamic) {
			
 
				+				dynamic_list.push_back(ret_index);
			
 
				+			}
			
 
				+
			
 
				+			return ret_index;
			
 
				+		}
			
 
				+
			
 
				+		void remove(uint32_t p_index) {
			
 
				+			DEV_ASSERT(p_index < rids.size());
			
 
				+			free_list.push_back(p_index);
			
 
				+			rids[p_index] = RID();
			
 
				+			mip_slice_rids[p_index].clear();
			
 
				+			ids[p_index] = 0;
			
 
				+			descriptions[p_index] = {};
			
 
				+			dynamic_list.erase(p_index);
			
 
				+		}
			
 
				+
			
 
				+		uint32_t size() const {
			
 
				+			return rids.size();
			
 
				+		}
			
 
				+	};
			
 
				+
			
 
				+	struct Scratch {
			
 
				+		Resources resources;
			
 
				+		LocalVector<FfxGpuJobDescription> gpu_jobs;
			
 
				+		RID ubo_ring_buffer[FSR2_UBO_RING_BUFFER_SIZE];
			
 
				+		uint32_t ubo_ring_buffer_index = 0;
			
 
				+		FfxDevice device = nullptr;
			
 
				+	};
			
 
				+
			
 
				+	Scratch scratch;
			
 
				+	FfxFsr2Context fsr_context;
			
 
				+	FfxFsr2ContextDescription fsr_desc;
			
 
				+
			
 
				+	~FSR2Context();
			
 
				+};
			
 
				+
			
 
				+class FSR2Effect {
			
 
				+public:
			
 
				+	struct RootSignature {
			
 
				+		// Proxy structure to store the shader required by RD that uses the terminology used by the FSR2 API.
			
 
				+		RID shader_rid;
			
 
				+	};
			
 
				+
			
 
				+	struct Pipeline {
			
 
				+		RID pipeline_rid;
			
 
				+	};
			
 
				+
			
 
				+	struct Pass {
			
 
				+		ShaderRD *shader;
			
 
				+		RID shader_version;
			
 
				+		RootSignature root_signature;
			
 
				+		uint32_t shader_variant = 0;
			
 
				+		Pipeline pipeline;
			
 
				+		Vector<FfxResourceBinding> sampled_bindings;
			
 
				+		Vector<FfxResourceBinding> storage_bindings;
			
 
				+		Vector<FfxResourceBinding> uniform_bindings;
			
 
				+	};
			
 
				+
			
 
				+	struct Device {
			
 
				+		Pass passes[FFX_FSR2_PASS_COUNT];
			
 
				+		FfxDeviceCapabilities capabilities;
			
 
				+		RID point_clamp_sampler;
			
 
				+		RID linear_clamp_sampler;
			
 
				+	};
			
 
				+
			
 
				+	struct Parameters {
			
 
				+		FSR2Context *context;
			
 
				+		Size2i internal_size;
			
 
				+		RID color;
			
 
				+		RID depth;
			
 
				+		RID velocity;
			
 
				+		RID reactive;
			
 
				+		RID exposure;
			
 
				+		RID output;
			
 
				+		float z_near = 0.0f;
			
 
				+		float z_far = 0.0f;
			
 
				+		float fovy = 0.0f;
			
 
				+		Vector2 jitter;
			
 
				+		float delta_time = 0.0f;
			
 
				+		float sharpness = 0.0f;
			
 
				+		bool reset_accumulation = false;
			
 
				+		Projection reprojection;
			
 
				+	};
			
 
				+
			
 
				+	FSR2Effect();
			
 
				+	~FSR2Effect();
			
 
				+	FSR2Context *create_context(Size2i p_internal_size, Size2i p_target_size);
			
 
				+	void upscale(const Parameters &p_params);
			
 
				+
			
 
				+private:
			
 
				+	struct {
			
 
				+		Fsr2DepthClipPassShaderRD depth_clip;
			
 
				+		Fsr2ReconstructPreviousDepthPassShaderRD reconstruct_previous_depth;
			
 
				+		Fsr2LockPassShaderRD lock;
			
 
				+		Fsr2AccumulatePassShaderRD accumulate;
			
 
				+		Fsr2AccumulatePassShaderRD accumulate_sharpen;
			
 
				+		Fsr2RcasPassShaderRD rcas;
			
 
				+		Fsr2ComputeLuminancePyramidPassShaderRD compute_luminance_pyramid;
			
 
				+		Fsr2AutogenReactivePassShaderRD autogen_reactive;
			
 
				+		Fsr2TcrAutogenPassShaderRD tcr_autogen;
			
 
				+	} shaders;
			
 
				+
			
 
				+	Device device;
			
 
				+};
			
 
				+
			
 
				+} // namespace RendererRD
			
 
				+
			
 
				+#endif // FSR2_RD_H
			
--- a/servers/rendering/renderer_rd/effects/taa.cpp
+++ b/servers/rendering/renderer_rd/effects/taa.cpp
@@ -47,20 +47,6 @@ TAA::~TAA() {
 
				 	taa_shader.version_free(shader_version);
			
 
				 }
			
 
				 
			
 
				-void TAA::msaa_resolve(Ref<RenderSceneBuffersRD> p_render_buffers) {
			
 
				-	if (!p_render_buffers->has_velocity_buffer(true)) {
			
 
				-		// nothing to resolve
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	for (uint32_t v = 0; v < p_render_buffers->get_view_count(); v++) {
			
 
				-		RID velocity_buffer_msaa = p_render_buffers->get_velocity_buffer(true, v);
			
 
				-		RID velocity_buffer = p_render_buffers->get_velocity_buffer(false, v);
			
 
				-
			
 
				-		RD::get_singleton()->texture_resolve_multisample(velocity_buffer_msaa, velocity_buffer);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 void TAA::resolve(RID p_frame, RID p_temp, RID p_depth, RID p_velocity, RID p_prev_velocity, RID p_history, Size2 p_resolution, float p_z_near, float p_z_far) {
			
 
				 	UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton();
			
 
				 	ERR_FAIL_NULL(uniform_set_cache);
			
--- a/servers/rendering/renderer_rd/effects/taa.h
+++ b/servers/rendering/renderer_rd/effects/taa.h
@@ -45,7 +45,6 @@ public:
 
				 	TAA();
			
 
				 	~TAA();
			
 
				 
			
 
				-	void msaa_resolve(Ref<RenderSceneBuffersRD> p_render_buffers);
			
 
				 	void process(Ref<RenderSceneBuffersRD> p_render_buffers, RD::DataFormat p_format, float p_z_near, float p_z_far);
			
 
				 
			
 
				 private:
			
--- a/servers/rendering/renderer_rd/environment/sky.cpp
+++ b/servers/rendering/renderer_rd/environment/sky.cpp
@@ -973,7 +973,7 @@ SkyRD::~SkyRD() {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-void SkyRD::setup_sky(RID p_env, Ref<RenderSceneBuffersRD> p_render_buffers, const PagedArray<RID> &p_lights, RID p_camera_attributes, uint32_t p_view_count, const Projection *p_view_projections, const Vector3 *p_view_eye_offsets, const Transform3D &p_cam_transform, const Projection &p_cam_projection, const Size2i p_screen_size, RendererSceneRenderRD *p_scene_render) {
			
 
				+void SkyRD::setup_sky(RID p_env, Ref<RenderSceneBuffersRD> p_render_buffers, const PagedArray<RID> &p_lights, RID p_camera_attributes, uint32_t p_view_count, const Projection *p_view_projections, const Vector3 *p_view_eye_offsets, const Transform3D &p_cam_transform, const Projection &p_cam_projection, const Size2i p_screen_size, Vector2 p_jitter, RendererSceneRenderRD *p_scene_render) {
			
 
				 	RendererRD::LightStorage *light_storage = RendererRD::LightStorage::get_singleton();
			
 
				 	RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton();
			
 
				 	ERR_FAIL_COND(p_env.is_null());
			
@@ -1173,18 +1173,21 @@ void SkyRD::setup_sky(RID p_env, Ref<RenderSceneBuffersRD> p_render_buffers, con
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	Projection correction;
			
 
				+	correction.add_jitter_offset(p_jitter);
			
 
				+
			
 
				 	sky_scene_state.view_count = p_view_count;
			
 
				 	sky_scene_state.cam_transform = p_cam_transform;
			
 
				-	sky_scene_state.cam_projection = p_cam_projection; // We only use this when rendering a single view.
			
 
				+	sky_scene_state.cam_projection = correction * p_cam_projection; // We only use this when rendering a single view.
			
 
				 
			
 
				 	// Our info in our UBO is only used if we're rendering stereo.
			
 
				 	for (uint32_t i = 0; i < p_view_count; i++) {
			
 
				-		Projection view_inv_projection = p_view_projections[i].inverse();
			
 
				+		Projection view_inv_projection = (correction * p_view_projections[i]).inverse();
			
 
				 		if (p_view_count > 1) {
			
 
				 			RendererRD::MaterialStorage::store_camera(p_cam_projection * view_inv_projection, sky_scene_state.ubo.combined_reprojection[i]);
			
 
				 		} else {
			
 
				 			Projection ident;
			
 
				-			RendererRD::MaterialStorage::store_camera(ident, sky_scene_state.ubo.combined_reprojection[i]);
			
 
				+			RendererRD::MaterialStorage::store_camera(correction, sky_scene_state.ubo.combined_reprojection[i]);
			
 
				 		}
			
 
				 
			
 
				 		RendererRD::MaterialStorage::store_camera(view_inv_projection, sky_scene_state.ubo.view_inv_projections[i]);
			
--- a/servers/rendering/renderer_rd/environment/sky.h
+++ b/servers/rendering/renderer_rd/environment/sky.h
@@ -294,7 +294,7 @@ public:
 
				 	void set_texture_format(RD::DataFormat p_texture_format);
			
 
				 	~SkyRD();
			
 
				 
			
 
				-	void setup_sky(RID p_env, Ref<RenderSceneBuffersRD> p_render_buffers, const PagedArray<RID> &p_lights, RID p_camera_attributes, uint32_t p_view_count, const Projection *p_view_projections, const Vector3 *p_view_eye_offsets, const Transform3D &p_cam_transform, const Projection &p_cam_projection, const Size2i p_screen_size, RendererSceneRenderRD *p_scene_render);
			
 
				+	void setup_sky(RID p_env, Ref<RenderSceneBuffersRD> p_render_buffers, const PagedArray<RID> &p_lights, RID p_camera_attributes, uint32_t p_view_count, const Projection *p_view_projections, const Vector3 *p_view_eye_offsets, const Transform3D &p_cam_transform, const Projection &p_cam_projection, const Size2i p_screen_size, Vector2 p_jitter, RendererSceneRenderRD *p_scene_render);
			
 
				 	void update_radiance_buffers(Ref<RenderSceneBuffersRD> p_render_buffers, RID p_env, const Vector3 &p_global_pos, double p_time, float p_luminance_multiplier = 1.0);
			
 
				 	void update_res_buffers(Ref<RenderSceneBuffersRD> p_render_buffers, RID p_env, double p_time, float p_luminance_multiplier = 1.0);
			
 
				 	void draw_sky(RD::DrawListID p_draw_list, Ref<RenderSceneBuffersRD> p_render_buffers, RID p_env, RID p_fb, double p_time, float p_luminance_multiplier = 1.0);
			
--- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp
+++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp
@@ -105,6 +105,12 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_voxelgi()
 
				 	}
			
 
				 }
			
 
				 
			
 
				+void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_fsr2(RendererRD::FSR2Effect *p_effect) {
			
 
				+	if (fsr2_context == nullptr) {
			
 
				+		fsr2_context = p_effect->create_context(render_buffers->get_internal_size(), render_buffers->get_target_size());
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 void RenderForwardClustered::RenderBufferDataForwardClustered::free_data() {
			
 
				 	// JIC, should already have been cleared
			
 
				 	if (render_buffers) {
			
@@ -120,6 +126,11 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::free_data() {
 
				 		cluster_builder = nullptr;
			
 
				 	}
			
 
				 
			
 
				+	if (fsr2_context) {
			
 
				+		memdelete(fsr2_context);
			
 
				+		fsr2_context = nullptr;
			
 
				+	}
			
 
				+
			
 
				 	if (!render_sdfgi_uniform_set.is_null() && RD::get_singleton()->uniform_set_is_valid(render_sdfgi_uniform_set)) {
			
 
				 		RD::get_singleton()->free(render_sdfgi_uniform_set);
			
 
				 	}
			
@@ -230,6 +241,14 @@ RID RenderForwardClustered::RenderBufferDataForwardClustered::get_specular_only_
 
				 	return FramebufferCacheRD::get_singleton()->get_cache_multiview(render_buffers->get_view_count(), specular);
			
 
				 }
			
 
				 
			
 
				+RID RenderForwardClustered::RenderBufferDataForwardClustered::get_velocity_only_fb() {
			
 
				+	bool use_msaa = render_buffers->get_msaa_3d() != RS::VIEWPORT_MSAA_DISABLED;
			
 
				+
			
 
				+	RID velocity = render_buffers->get_texture(RB_SCOPE_BUFFERS, use_msaa ? RB_TEX_VELOCITY_MSAA : RB_TEX_VELOCITY);
			
 
				+
			
 
				+	return FramebufferCacheRD::get_singleton()->get_cache_multiview(render_buffers->get_view_count(), velocity);
			
 
				+}
			
 
				+
			
 
				 void RenderForwardClustered::setup_render_buffer_data(Ref<RenderSceneBuffersRD> p_render_buffers) {
			
 
				 	Ref<RenderBufferDataForwardClustered> data;
			
 
				 	data.instantiate();
			
@@ -285,8 +304,10 @@ void RenderForwardClustered::_render_list_template(RenderingDevice::DrawListID p
 
				 		const GeometryInstanceSurfaceDataCache *surf = p_params->elements[i];
			
 
				 		const RenderElementInfo &element_info = p_params->element_info[i];
			
 
				 
			
 
				-		if ((p_pass_mode == PASS_MODE_COLOR && !(p_color_pass_flags & COLOR_PASS_FLAG_TRANSPARENT)) && !(surf->flags & GeometryInstanceSurfaceDataCache::FLAG_PASS_OPAQUE)) {
			
 
				-			continue; // Objects with "Depth-prepass" transparency are included in both render lists, but should only be rendered in the transparent pass
			
 
				+		if (p_pass_mode == PASS_MODE_COLOR && surf->color_pass_inclusion_mask && (p_color_pass_flags & surf->color_pass_inclusion_mask) == 0) {
			
 
				+			// Some surfaces can be repeated in multiple render lists. We exclude them from being rendered on the color pass based on the
			
 
				+			// features supported by the pass compared to the exclusion mask.
			
 
				+			continue;
			
 
				 		}
			
 
				 
			
 
				 		if (surf->owner->instance_count == 0) {
			
@@ -582,7 +603,7 @@ void RenderForwardClustered::_render_list_with_threads(RenderListParameters *p_p
 
				 	}
			
 
				 }
			
 
				 
			
 
				-void RenderForwardClustered::_setup_environment(const RenderDataRD *p_render_data, bool p_no_fog, const Size2i &p_screen_size, bool p_flip_y, const Color &p_default_bg_color, bool p_opaque_render_buffers, bool p_pancake_shadows, int p_index) {
			
 
				+void RenderForwardClustered::_setup_environment(const RenderDataRD *p_render_data, bool p_no_fog, const Size2i &p_screen_size, bool p_flip_y, const Color &p_default_bg_color, bool p_opaque_render_buffers, bool p_apply_alpha_multiplier, bool p_pancake_shadows, int p_index) {
			
 
				 	RendererRD::LightStorage *light_storage = RendererRD::LightStorage::get_singleton();
			
 
				 
			
 
				 	Ref<RenderSceneBuffersRD> rd = p_render_data->render_buffers;
			
@@ -598,7 +619,7 @@ void RenderForwardClustered::_setup_environment(const RenderDataRD *p_render_dat
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	p_render_data->scene_data->update_ubo(scene_state.uniform_buffers[p_index], get_debug_draw_mode(), env, reflection_probe_instance, p_render_data->camera_attributes, p_flip_y, p_pancake_shadows, p_screen_size, p_default_bg_color, _render_buffers_get_luminance_multiplier(), p_opaque_render_buffers);
			
 
				+	p_render_data->scene_data->update_ubo(scene_state.uniform_buffers[p_index], get_debug_draw_mode(), env, reflection_probe_instance, p_render_data->camera_attributes, p_flip_y, p_pancake_shadows, p_screen_size, p_default_bg_color, _render_buffers_get_luminance_multiplier(), p_opaque_render_buffers, p_apply_alpha_multiplier);
			
 
				 
			
 
				 	// now do implementation UBO
			
 
				 
			
@@ -775,8 +796,9 @@ _FORCE_INLINE_ static uint32_t _indices_to_primitives(RS::PrimitiveType p_primit
 
				 	static const uint32_t subtractor[RS::PRIMITIVE_MAX] = { 0, 0, 1, 0, 1 };
			
 
				 	return (p_indices - subtractor[p_primitive]) / divisor[p_primitive];
			
 
				 }
			
 
				-void RenderForwardClustered::_fill_render_list(RenderListType p_render_list, const RenderDataRD *p_render_data, PassMode p_pass_mode, uint32_t p_color_pass_flags = 0, bool p_using_sdfgi, bool p_using_opaque_gi, bool p_append) {
			
 
				+void RenderForwardClustered::_fill_render_list(RenderListType p_render_list, const RenderDataRD *p_render_data, PassMode p_pass_mode, bool p_using_sdfgi, bool p_using_opaque_gi, bool p_using_motion_pass, bool p_append) {
			
 
				 	RendererRD::MeshStorage *mesh_storage = RendererRD::MeshStorage::get_singleton();
			
 
				+	uint64_t frame = RSG::rasterizer->get_frame_number();
			
 
				 
			
 
				 	if (p_render_list == RENDER_LIST_OPAQUE) {
			
 
				 		scene_state.used_sss = false;
			
@@ -797,7 +819,9 @@ void RenderForwardClustered::_fill_render_list(RenderListType p_render_list, con
 
				 	if (!p_append) {
			
 
				 		rl->clear();
			
 
				 		if (p_render_list == RENDER_LIST_OPAQUE) {
			
 
				-			render_list[RENDER_LIST_ALPHA].clear(); //opaque fills alpha too
			
 
				+			// Opaque fills motion and alpha lists.
			
 
				+			render_list[RENDER_LIST_MOTION].clear();
			
 
				+			render_list[RENDER_LIST_ALPHA].clear();
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -827,6 +851,7 @@ void RenderForwardClustered::_fill_render_list(RenderListType p_render_list, con
 
				 		}
			
 
				 		bool uses_lightmap = false;
			
 
				 		bool uses_gi = false;
			
 
				+		bool uses_motion = false;
			
 
				 		float fade_alpha = 1.0;
			
 
				 
			
 
				 		if (inst->fade_near || inst->fade_far) {
			
@@ -914,6 +939,14 @@ void RenderForwardClustered::_fill_render_list(RenderListType p_render_list, con
 
				 					inst->gi_offset_cache = 0xFFFFFFFF;
			
 
				 				}
			
 
				 			}
			
 
				+
			
 
				+			if (p_pass_mode == PASS_MODE_COLOR && p_using_motion_pass) {
			
 
				+				bool transform_changed = inst->prev_transform_change_frame == frame;
			
 
				+				bool has_mesh_instance = inst->mesh_instance.is_valid();
			
 
				+				bool uses_particles = inst->base_flags & INSTANCE_DATA_FLAG_PARTICLES;
			
 
				+				bool is_multimesh_with_motion = !uses_particles && (inst->base_flags & INSTANCE_DATA_FLAG_MULTIMESH) && mesh_storage->_multimesh_uses_motion_vectors_offsets(inst->data->base);
			
 
				+				uses_motion = transform_changed || has_mesh_instance || uses_particles || is_multimesh_with_motion;
			
 
				+			}
			
 
				 		}
			
 
				 		inst->flags_cache = flags;
			
 
				 
			
@@ -990,11 +1023,18 @@ void RenderForwardClustered::_fill_render_list(RenderListType p_render_list, con
 
				 				if (!force_alpha && (surf->flags & (GeometryInstanceSurfaceDataCache::FLAG_PASS_DEPTH | GeometryInstanceSurfaceDataCache::FLAG_PASS_OPAQUE))) {
			
 
				 					rl->add_element(surf);
			
 
				 				}
			
 
				+
			
 
				 				if (force_alpha || (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_PASS_ALPHA)) {
			
 
				+					surf->color_pass_inclusion_mask = COLOR_PASS_FLAG_TRANSPARENT;
			
 
				 					render_list[RENDER_LIST_ALPHA].add_element(surf);
			
 
				 					if (uses_gi) {
			
 
				 						surf->sort.uses_forward_gi = 1;
			
 
				 					}
			
 
				+				} else if (p_using_motion_pass && (uses_motion || (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_MOTION_VECTOR))) {
			
 
				+					surf->color_pass_inclusion_mask = COLOR_PASS_FLAG_MOTION_VECTORS;
			
 
				+					render_list[RENDER_LIST_MOTION].add_element(surf);
			
 
				+				} else {
			
 
				+					surf->color_pass_inclusion_mask = 0;
			
 
				 				}
			
 
				 
			
 
				 				if (uses_lightmap) {
			
@@ -1580,16 +1620,24 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co
 
				 
			
 
				 	RENDER_TIMESTAMP("Setup 3D Scene");
			
 
				 
			
 
				+	bool using_debug_mvs = get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_MOTION_VECTORS;
			
 
				+	bool using_taa = rb->get_use_taa();
			
 
				+	bool using_fsr2 = rb->get_scaling_3d_mode() == RS::VIEWPORT_SCALING_3D_MODE_FSR2;
			
 
				+
			
 
				 	// check if we need motion vectors
			
 
				-	if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_MOTION_VECTORS) {
			
 
				-		p_render_data->scene_data->calculate_motion_vectors = true;
			
 
				-	} else if (!is_reflection_probe && rb->get_use_taa()) {
			
 
				-		p_render_data->scene_data->calculate_motion_vectors = true;
			
 
				+	bool motion_vectors_required;
			
 
				+	if (using_debug_mvs) {
			
 
				+		motion_vectors_required = true;
			
 
				+	} else if (!is_reflection_probe && using_taa) {
			
 
				+		motion_vectors_required = true;
			
 
				+	} else if (!is_reflection_probe && using_fsr2) {
			
 
				+		motion_vectors_required = true;
			
 
				 	} else {
			
 
				-		p_render_data->scene_data->calculate_motion_vectors = false;
			
 
				+		motion_vectors_required = false;
			
 
				 	}
			
 
				 
			
 
				 	//p_render_data->scene_data->subsurface_scatter_width = subsurface_scatter_size;
			
 
				+	p_render_data->scene_data->calculate_motion_vectors = motion_vectors_required;
			
 
				 	p_render_data->scene_data->directional_light_count = 0;
			
 
				 	p_render_data->scene_data->opaque_prepass_threshold = 0.99f;
			
 
				 
			
@@ -1607,6 +1655,7 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co
 
				 	bool using_voxelgi = false;
			
 
				 	bool reverse_cull = p_render_data->scene_data->cam_transform.basis.determinant() < 0;
			
 
				 	bool using_ssil = !is_reflection_probe && p_render_data->environment.is_valid() && environment_get_ssil_enabled(p_render_data->environment);
			
 
				+	bool using_motion_pass = rb_data.is_valid() && using_fsr2;
			
 
				 
			
 
				 	if (is_reflection_probe) {
			
 
				 		uint32_t resolution = light_storage->reflection_probe_instance_get_resolution(p_render_data->reflection_probe);
			
@@ -1625,7 +1674,7 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co
 
				 	} else {
			
 
				 		screen_size = rb->get_internal_size();
			
 
				 
			
 
				-		if (rb->get_use_taa() || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_MOTION_VECTORS) {
			
 
				+		if (p_render_data->scene_data->calculate_motion_vectors) {
			
 
				 			color_pass_flags |= COLOR_PASS_FLAG_MOTION_VECTORS;
			
 
				 			scene_shader.enable_advanced_shader_group();
			
 
				 		}
			
@@ -1663,12 +1712,16 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co
 
				 	_setup_voxelgis(*p_render_data->voxel_gi_instances);
			
 
				 	_setup_environment(p_render_data, is_reflection_probe, screen_size, !is_reflection_probe, p_default_bg_color, false);
			
 
				 
			
 
				-	_update_render_base_uniform_set(rb->get_samplers()); //may have changed due to the above (light buffer enlarged, as an example)
			
 
				+	_update_render_base_uniform_set(rb->get_samplers()); // May have changed due to the above (light buffer enlarged, as an example).
			
 
				 
			
 
				-	_fill_render_list(RENDER_LIST_OPAQUE, p_render_data, PASS_MODE_COLOR, color_pass_flags, using_sdfgi, using_sdfgi || using_voxelgi);
			
 
				+	_fill_render_list(RENDER_LIST_OPAQUE, p_render_data, PASS_MODE_COLOR, using_sdfgi, using_sdfgi || using_voxelgi, using_motion_pass);
			
 
				 	render_list[RENDER_LIST_OPAQUE].sort_by_key();
			
 
				+	render_list[RENDER_LIST_MOTION].sort_by_key();
			
 
				 	render_list[RENDER_LIST_ALPHA].sort_by_reverse_depth_and_priority();
			
 
				-	_fill_instance_data(RENDER_LIST_OPAQUE, p_render_data->render_info ? p_render_data->render_info->info[RS::VIEWPORT_RENDER_INFO_TYPE_VISIBLE] : (int *)nullptr);
			
 
				+
			
 
				+	int *render_info = p_render_data->render_info ? p_render_data->render_info->info[RS::VIEWPORT_RENDER_INFO_TYPE_VISIBLE] : (int *)nullptr;
			
 
				+	_fill_instance_data(RENDER_LIST_OPAQUE, render_info);
			
 
				+	_fill_instance_data(RENDER_LIST_MOTION, render_info);
			
 
				 	_fill_instance_data(RENDER_LIST_ALPHA);
			
 
				 
			
 
				 	RD::get_singleton()->draw_command_end_label();
			
@@ -1792,9 +1845,9 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co
 
				 				correction.set_depth_correction(true);
			
 
				 				Projection projection = correction * p_render_data->scene_data->cam_projection;
			
 
				 
			
 
				-				sky.setup_sky(p_render_data->environment, rb, *p_render_data->lights, p_render_data->camera_attributes, 1, &projection, &eye_offset, p_render_data->scene_data->cam_transform, projection, screen_size, this);
			
 
				+				sky.setup_sky(p_render_data->environment, rb, *p_render_data->lights, p_render_data->camera_attributes, 1, &projection, &eye_offset, p_render_data->scene_data->cam_transform, projection, screen_size, Vector2(0.0f, 0.0f), this);
			
 
				 			} else {
			
 
				-				sky.setup_sky(p_render_data->environment, rb, *p_render_data->lights, p_render_data->camera_attributes, p_render_data->scene_data->view_count, p_render_data->scene_data->view_projection, p_render_data->scene_data->view_eye_offset, p_render_data->scene_data->cam_transform, p_render_data->scene_data->cam_projection, screen_size, this);
			
 
				+				sky.setup_sky(p_render_data->environment, rb, *p_render_data->lights, p_render_data->camera_attributes, p_render_data->scene_data->view_count, p_render_data->scene_data->view_projection, p_render_data->scene_data->view_eye_offset, p_render_data->scene_data->cam_transform, p_render_data->scene_data->cam_projection, screen_size, p_render_data->scene_data->taa_jitter, this);
			
 
				 			}
			
 
				 
			
 
				 			sky_energy_multiplier *= bg_energy_multiplier;
			
@@ -1892,37 +1945,71 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co
 
				 	// Shadow pass can change the base uniform set samplers.
			
 
				 	_update_render_base_uniform_set(rb->get_samplers());
			
 
				 
			
 
				-	_setup_environment(p_render_data, is_reflection_probe, screen_size, !is_reflection_probe, p_default_bg_color, true);
			
 
				+	_setup_environment(p_render_data, is_reflection_probe, screen_size, !is_reflection_probe, p_default_bg_color, true, using_motion_pass);
			
 
				 
			
 
				 	RENDER_TIMESTAMP("Render Opaque Pass");
			
 
				 
			
 
				 	RID rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_OPAQUE, p_render_data, radiance_texture, true);
			
 
				-
			
 
				 	bool can_continue_color = !scene_state.used_screen_texture && !using_ssr && !using_sss;
			
 
				 	bool can_continue_depth = !(scene_state.used_depth_texture || scene_state.used_normal_texture) && !using_ssr && !using_sss;
			
 
				 
			
 
				 	{
			
 
				+		bool render_motion_pass = !render_list[RENDER_LIST_MOTION].elements.is_empty();
			
 
				 		bool will_continue_color = (can_continue_color || draw_sky || draw_sky_fog_only || debug_voxelgis || debug_sdfgi_probes);
			
 
				 		bool will_continue_depth = (can_continue_depth || draw_sky || draw_sky_fog_only || debug_voxelgis || debug_sdfgi_probes);
			
 
				+		RD::FinalAction final_color_action = will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ;
			
 
				+		RD::FinalAction final_depth_action = will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ;
			
 
				 
			
 
				-		Vector<Color> c;
			
 
				 		{
			
 
				-			Color cc = clear_color.srgb_to_linear();
			
 
				-			if (using_separate_specular || rb_data.is_valid()) {
			
 
				-				cc.a = 0; //subsurf scatter must be 0
			
 
				+			Vector<Color> c;
			
 
				+			{
			
 
				+				Color cc = clear_color.srgb_to_linear();
			
 
				+				if (using_separate_specular || rb_data.is_valid()) {
			
 
				+					// Effects that rely on separate specular, like subsurface scattering, must clear the alpha to zero.
			
 
				+					cc.a = 0;
			
 
				+				}
			
 
				+				c.push_back(cc);
			
 
				+
			
 
				+				if (rb_data.is_valid()) {
			
 
				+					c.push_back(Color(0, 0, 0, 0)); // Separate specular.
			
 
				+					c.push_back(Color(0, 0, 0, 0)); // Motion vector. Pushed to the clear color vector even if the framebuffer isn't bound.
			
 
				+				}
			
 
				 			}
			
 
				-			c.push_back(cc);
			
 
				 
			
 
				-			if (rb_data.is_valid()) {
			
 
				-				c.push_back(Color(0, 0, 0, 0)); // Separate specular
			
 
				-				c.push_back(Color(0, 0, 0, 0)); // Motion vectors
			
 
				+			uint32_t opaque_color_pass_flags = using_motion_pass ? (color_pass_flags & ~COLOR_PASS_FLAG_MOTION_VECTORS) : color_pass_flags;
			
 
				+			RID opaque_framebuffer = using_motion_pass ? rb_data->get_color_pass_fb(opaque_color_pass_flags) : color_framebuffer;
			
 
				+			RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].element_info.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), reverse_cull, PASS_MODE_COLOR, opaque_color_pass_flags, rb_data.is_null(), p_render_data->directional_light_soft_shadows, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count);
			
 
				+			_render_list_with_threads(&render_list_params, opaque_framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, render_motion_pass ? RD::FINAL_ACTION_CONTINUE : final_color_action, depth_pre_pass ? (continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP) : RD::INITIAL_ACTION_CLEAR, render_motion_pass ? RD::FINAL_ACTION_CONTINUE : final_depth_action, c, 1.0, 0);
			
 
				+		}
			
 
				+
			
 
				+		RD::get_singleton()->draw_command_end_label();
			
 
				+
			
 
				+		if (using_motion_pass) {
			
 
				+			Vector<Color> motion_vector_clear_colors;
			
 
				+			motion_vector_clear_colors.push_back(Color(-1, -1, 0, 0));
			
 
				+			RD::get_singleton()->draw_list_begin(rb_data->get_velocity_only_fb(), RD::INITIAL_ACTION_CLEAR, render_motion_pass ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_CONTINUE, motion_vector_clear_colors);
			
 
				+			RD::get_singleton()->draw_list_end();
			
 
				+		}
			
 
				+
			
 
				+		if (render_motion_pass) {
			
 
				+			RD::get_singleton()->draw_command_begin_label("Render Motion Pass");
			
 
				+
			
 
				+			RENDER_TIMESTAMP("Render Motion Pass");
			
 
				+
			
 
				+			rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_MOTION, p_render_data, radiance_texture, true);
			
 
				+
			
 
				+			RenderListParameters render_list_params(render_list[RENDER_LIST_MOTION].elements.ptr(), render_list[RENDER_LIST_MOTION].element_info.ptr(), render_list[RENDER_LIST_MOTION].elements.size(), reverse_cull, PASS_MODE_COLOR, color_pass_flags, rb_data.is_null(), p_render_data->directional_light_soft_shadows, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count);
			
 
				+			_render_list_with_threads(&render_list_params, color_framebuffer, RD::INITIAL_ACTION_CONTINUE, final_color_action, RD::INITIAL_ACTION_CONTINUE, final_depth_action);
			
 
				+
			
 
				+			if (will_continue_color) {
			
 
				+				// Close the motion vectors framebuffer as it'll no longer be used.
			
 
				+				RD::get_singleton()->draw_list_begin(rb_data->get_velocity_only_fb(), RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_CONTINUE);
			
 
				+				RD::get_singleton()->draw_list_end();
			
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-		RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].element_info.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), reverse_cull, PASS_MODE_COLOR, color_pass_flags, rb_data.is_null(), p_render_data->directional_light_soft_shadows, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count);
			
 
				-		_render_list_with_threads(&render_list_params, color_framebuffer, keep_color ? RD::INITIAL_ACTION_KEEP : RD::INITIAL_ACTION_CLEAR, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, depth_pre_pass ? (continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP) : RD::INITIAL_ACTION_CLEAR, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, c, 1.0, 0);
			
 
				 		if (will_continue_color && using_separate_specular) {
			
 
				-			// close the specular framebuffer, as it's no longer used
			
 
				+			// Close the specular framebuffer as it'll no longer be used.
			
 
				 			RD::get_singleton()->draw_list_begin(rb_data->get_specular_only_fb(), RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, RD::FINAL_ACTION_CONTINUE);
			
 
				 			RD::get_singleton()->draw_list_end();
			
 
				 		}
			
@@ -2052,6 +2139,11 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co
 
				 
			
 
				 	{
			
 
				 		uint32_t transparent_color_pass_flags = (color_pass_flags | COLOR_PASS_FLAG_TRANSPARENT) & ~(COLOR_PASS_FLAG_SEPARATE_SPECULAR);
			
 
				+		if (using_motion_pass) {
			
 
				+			// Motion vectors on transparent draw calls are not required when using the reactive mask.
			
 
				+			transparent_color_pass_flags &= ~(COLOR_PASS_FLAG_MOTION_VECTORS);
			
 
				+		}
			
 
				+
			
 
				 		RID alpha_framebuffer = rb_data.is_valid() ? rb_data->get_color_pass_fb(transparent_color_pass_flags) : color_only_framebuffer;
			
 
				 		RenderListParameters render_list_params(render_list[RENDER_LIST_ALPHA].elements.ptr(), render_list[RENDER_LIST_ALPHA].element_info.ptr(), render_list[RENDER_LIST_ALPHA].elements.size(), false, PASS_MODE_COLOR, transparent_color_pass_flags, rb_data.is_null(), p_render_data->directional_light_soft_shadows, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->scene_data->lod_distance_multiplier, p_render_data->scene_data->screen_mesh_lod_threshold, p_render_data->scene_data->view_count);
			
 
				 		_render_list_with_threads(&render_list_params, alpha_framebuffer, can_continue_color ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, can_continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ);
			
@@ -2064,12 +2156,14 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co
 
				 	RD::get_singleton()->draw_command_begin_label("Resolve");
			
 
				 
			
 
				 	if (rb_data.is_valid() && rb->get_msaa_3d() != RS::VIEWPORT_MSAA_DISABLED) {
			
 
				+		bool resolve_velocity_buffer = (using_taa || using_fsr2) && rb->has_velocity_buffer(true);
			
 
				 		for (uint32_t v = 0; v < rb->get_view_count(); v++) {
			
 
				 			RD::get_singleton()->texture_resolve_multisample(rb->get_color_msaa(v), rb->get_internal_texture(v));
			
 
				 			resolve_effects->resolve_depth(rb->get_depth_msaa(v), rb->get_depth_texture(v), rb->get_internal_size(), texture_multisamples[rb->get_msaa_3d()]);
			
 
				-		}
			
 
				-		if (taa && rb->get_use_taa()) {
			
 
				-			taa->msaa_resolve(rb);
			
 
				+
			
 
				+			if (resolve_velocity_buffer) {
			
 
				+				RD::get_singleton()->texture_resolve_multisample(rb->get_velocity_buffer(true, v), rb->get_velocity_buffer(false, v));
			
 
				+			}
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -2082,9 +2176,51 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co
 
				 	}
			
 
				 	RD::get_singleton()->draw_command_end_label();
			
 
				 
			
 
				-	if (rb_data.is_valid() && taa && rb->get_use_taa()) {
			
 
				-		RENDER_TIMESTAMP("TAA")
			
 
				-		taa->process(rb, _render_buffers_get_color_format(), p_render_data->scene_data->z_near, p_render_data->scene_data->z_far);
			
 
				+	if (rb_data.is_valid() && (using_fsr2 || using_taa)) {
			
 
				+		if (using_fsr2) {
			
 
				+			rb->ensure_upscaled();
			
 
				+			rb_data->ensure_fsr2(fsr2_effect);
			
 
				+
			
 
				+			RID exposure;
			
 
				+			if (RSG::camera_attributes->camera_attributes_uses_auto_exposure(p_render_data->camera_attributes)) {
			
 
				+				exposure = luminance->get_current_luminance_buffer(rb);
			
 
				+			}
			
 
				+
			
 
				+			RENDER_TIMESTAMP("FSR2");
			
 
				+			for (uint32_t v = 0; v < rb->get_view_count(); v++) {
			
 
				+				real_t fov = p_render_data->scene_data->cam_projection.get_fov();
			
 
				+				real_t aspect = p_render_data->scene_data->cam_projection.get_aspect();
			
 
				+				real_t fovy = p_render_data->scene_data->cam_projection.get_fovy(fov, aspect);
			
 
				+				Vector2 jitter = p_render_data->scene_data->taa_jitter * Vector2(rb->get_internal_size()) * 0.5f;
			
 
				+				RendererRD::FSR2Effect::Parameters params;
			
 
				+				params.context = rb_data->get_fsr2_context();
			
 
				+				params.internal_size = rb->get_internal_size();
			
 
				+				params.sharpness = CLAMP(1.0f - (rb->get_fsr_sharpness() / 2.0f), 0.0f, 1.0f);
			
 
				+				params.color = rb->get_internal_texture(v);
			
 
				+				params.depth = rb->get_depth_texture(v);
			
 
				+				params.velocity = rb->get_velocity_buffer(false, v);
			
 
				+				params.reactive = rb->get_internal_texture_reactive(v);
			
 
				+				params.exposure = exposure;
			
 
				+				params.output = rb->get_upscaled_texture(v);
			
 
				+				params.z_near = p_render_data->scene_data->z_near;
			
 
				+				params.z_far = p_render_data->scene_data->z_far;
			
 
				+				params.fovy = fovy;
			
 
				+				params.jitter = jitter;
			
 
				+				params.delta_time = float(time_step);
			
 
				+				params.reset_accumulation = false; // FIXME: The engine does not provide a way to reset the accumulation.
			
 
				+
			
 
				+				const Projection &prev_proj = p_render_data->scene_data->prev_cam_projection;
			
 
				+				const Projection &cur_proj = p_render_data->scene_data->cam_projection;
			
 
				+				const Transform3D &prev_transform = p_render_data->scene_data->prev_cam_transform;
			
 
				+				const Transform3D &cur_transform = p_render_data->scene_data->cam_transform;
			
 
				+				params.reprojection = prev_proj.flipped_y() * prev_transform.affine_inverse() * cur_transform * cur_proj.flipped_y().inverse();
			
 
				+
			
 
				+				fsr2_effect->upscale(params);
			
 
				+			}
			
 
				+		} else if (using_taa) {
			
 
				+			RENDER_TIMESTAMP("TAA");
			
 
				+			taa->process(rb, _render_buffers_get_color_format(), p_render_data->scene_data->z_near, p_render_data->scene_data->z_far);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	if (rb_data.is_valid()) {
			
@@ -2357,7 +2493,7 @@ void RenderForwardClustered::_render_shadow_append(RID p_framebuffer, const Page
 
				 	render_data.instances = &p_instances;
			
 
				 	render_data.render_info = p_render_info;
			
 
				 
			
 
				-	_setup_environment(&render_data, true, Vector2(1, 1), !p_flip_y, Color(), false, p_use_pancake, shadow_pass_index);
			
 
				+	_setup_environment(&render_data, true, Vector2(1, 1), !p_flip_y, Color(), false, false, p_use_pancake, shadow_pass_index);
			
 
				 
			
 
				 	if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_DISABLE_LOD) {
			
 
				 		scene_data.screen_mesh_lod_threshold = 0.0;
			
@@ -2368,7 +2504,7 @@ void RenderForwardClustered::_render_shadow_append(RID p_framebuffer, const Page
 
				 	PassMode pass_mode = p_use_dp ? PASS_MODE_SHADOW_DP : PASS_MODE_SHADOW;
			
 
				 
			
 
				 	uint32_t render_list_from = render_list[RENDER_LIST_SECONDARY].elements.size();
			
 
				-	_fill_render_list(RENDER_LIST_SECONDARY, &render_data, pass_mode, 0, false, false, true);
			
 
				+	_fill_render_list(RENDER_LIST_SECONDARY, &render_data, pass_mode, false, false, false, true);
			
 
				 	uint32_t render_list_size = render_list[RENDER_LIST_SECONDARY].elements.size() - render_list_from;
			
 
				 	render_list[RENDER_LIST_SECONDARY].sort_by_key_range(render_list_from, render_list_size);
			
 
				 	_fill_instance_data(RENDER_LIST_SECONDARY, p_render_info ? p_render_info->info[RS::VIEWPORT_RENDER_INFO_TYPE_SHADOW] : (int *)nullptr, render_list_from, render_list_size, false);
			
@@ -2453,7 +2589,7 @@ void RenderForwardClustered::_render_particle_collider_heightfield(RID p_fb, con
 
				 
			
 
				 	_update_render_base_uniform_set(RendererRD::MaterialStorage::get_singleton()->samplers_rd_get_default());
			
 
				 
			
 
				-	_setup_environment(&render_data, true, Vector2(1, 1), true, Color(), false, false);
			
 
				+	_setup_environment(&render_data, true, Vector2(1, 1), true, Color(), false, false, false);
			
 
				 
			
 
				 	PassMode pass_mode = PASS_MODE_SHADOW;
			
 
				 
			
@@ -3475,6 +3611,10 @@ void RenderForwardClustered::_geometry_instance_add_surface_with_material(Geomet
 
				 		flags |= GeometryInstanceSurfaceDataCache::FLAG_USES_PARTICLE_TRAILS;
			
 
				 	}
			
 
				 
			
 
				+	if (p_material->shader_data->is_animated()) {
			
 
				+		flags |= GeometryInstanceSurfaceDataCache::FLAG_USES_MOTION_VECTOR;
			
 
				+	}
			
 
				+
			
 
				 	SceneShaderForwardClustered::MaterialData *material_shadow = nullptr;
			
 
				 	void *surface_shadow = nullptr;
			
 
				 	if (!p_material->shader_data->uses_particle_trails && !p_material->shader_data->writes_modelview_or_projection && !p_material->shader_data->uses_vertex && !p_material->shader_data->uses_position && !p_material->shader_data->uses_discard && !p_material->shader_data->uses_depth_prepass_alpha && !p_material->shader_data->uses_alpha_clip && !p_material->shader_data->uses_alpha_antialiasing && p_material->shader_data->cull_mode == SceneShaderForwardClustered::ShaderData::CULL_BACK && !p_material->shader_data->uses_point_size) {
			
@@ -3982,6 +4122,7 @@ RenderForwardClustered::RenderForwardClustered() {
 
				 
			
 
				 	resolve_effects = memnew(RendererRD::Resolve());
			
 
				 	taa = memnew(RendererRD::TAA);
			
 
				+	fsr2_effect = memnew(RendererRD::FSR2Effect);
			
 
				 	ss_effects = memnew(RendererRD::SSEffects);
			
 
				 }
			
 
				 
			
@@ -3996,6 +4137,11 @@ RenderForwardClustered::~RenderForwardClustered() {
 
				 		taa = nullptr;
			
 
				 	}
			
 
				 
			
 
				+	if (fsr2_effect) {
			
 
				+		memdelete(fsr2_effect);
			
 
				+		fsr2_effect = nullptr;
			
 
				+	}
			
 
				+
			
 
				 	if (resolve_effects != nullptr) {
			
 
				 		memdelete(resolve_effects);
			
 
				 		resolve_effects = nullptr;
			
--- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h
+++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h
@@ -33,6 +33,7 @@
 
				 
			
 
				 #include "core/templates/paged_allocator.h"
			
 
				 #include "servers/rendering/renderer_rd/cluster_builder_rd.h"
			
 
				+#include "servers/rendering/renderer_rd/effects/fsr2.h"
			
 
				 #include "servers/rendering/renderer_rd/effects/resolve.h"
			
 
				 #include "servers/rendering/renderer_rd/effects/ss_effects.h"
			
 
				 #include "servers/rendering/renderer_rd/effects/taa.h"
			
@@ -84,6 +85,7 @@ class RenderForwardClustered : public RendererSceneRenderRD {
 
				 
			
 
				 	enum RenderListType {
			
 
				 		RENDER_LIST_OPAQUE, //used for opaque objects
			
 
				+		RENDER_LIST_MOTION, //used for opaque objects with motion
			
 
				 		RENDER_LIST_ALPHA, //used for transparent objects
			
 
				 		RENDER_LIST_SECONDARY, //used for shadows and other objects
			
 
				 		RENDER_LIST_MAX
			
@@ -100,6 +102,7 @@ class RenderForwardClustered : public RendererSceneRenderRD {
 
				 
			
 
				 	private:
			
 
				 		RenderSceneBuffersRD *render_buffers = nullptr;
			
 
				+		RendererRD::FSR2Context *fsr2_context = nullptr;
			
 
				 
			
 
				 	public:
			
 
				 		ClusterBuilderRD *cluster_builder = nullptr;
			
@@ -140,10 +143,14 @@ class RenderForwardClustered : public RendererSceneRenderRD {
 
				 		RID get_voxelgi(uint32_t p_layer) { return render_buffers->get_texture_slice(RB_SCOPE_FORWARD_CLUSTERED, RB_TEX_VOXEL_GI, p_layer, 0); }
			
 
				 		RID get_voxelgi_msaa(uint32_t p_layer) { return render_buffers->get_texture_slice(RB_SCOPE_FORWARD_CLUSTERED, RB_TEX_VOXEL_GI_MSAA, p_layer, 0); }
			
 
				 
			
 
				+		void ensure_fsr2(RendererRD::FSR2Effect *p_effect);
			
 
				+		RendererRD::FSR2Context *get_fsr2_context() const { return fsr2_context; }
			
 
				+
			
 
				 		RID get_color_only_fb();
			
 
				 		RID get_color_pass_fb(uint32_t p_color_pass_flags);
			
 
				 		RID get_depth_fb(DepthFrameBufferType p_type = DEPTH_FB);
			
 
				 		RID get_specular_only_fb();
			
 
				+		RID get_velocity_only_fb();
			
 
				 
			
 
				 		virtual void configure(RenderSceneBuffersRD *p_render_buffers) override;
			
 
				 		virtual void free_data() override;
			
@@ -345,7 +352,7 @@ class RenderForwardClustered : public RendererSceneRenderRD {
 
				 
			
 
				 	static RenderForwardClustered *singleton;
			
 
				 
			
 
				-	void _setup_environment(const RenderDataRD *p_render_data, bool p_no_fog, const Size2i &p_screen_size, bool p_flip_y, const Color &p_default_bg_color, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false, int p_index = 0);
			
 
				+	void _setup_environment(const RenderDataRD *p_render_data, bool p_no_fog, const Size2i &p_screen_size, bool p_flip_y, const Color &p_default_bg_color, bool p_opaque_render_buffers = false, bool p_apply_alpha_multiplier = false, bool p_pancake_shadows = false, int p_index = 0);
			
 
				 	void _setup_voxelgis(const PagedArray<RID> &p_voxelgis);
			
 
				 	void _setup_lightmaps(const RenderDataRD *p_render_data, const PagedArray<RID> &p_lightmaps, const Transform3D &p_cam_transform);
			
 
				 
			
@@ -372,7 +379,7 @@ class RenderForwardClustered : public RendererSceneRenderRD {
 
				 
			
 
				 	void _update_instance_data_buffer(RenderListType p_render_list);
			
 
				 	void _fill_instance_data(RenderListType p_render_list, int *p_render_info = nullptr, uint32_t p_offset = 0, int32_t p_max_elements = -1, bool p_update_buffer = true);
			
 
				-	void _fill_render_list(RenderListType p_render_list, const RenderDataRD *p_render_data, PassMode p_pass_mode, uint32_t p_color_pass_flags, bool p_using_sdfgi = false, bool p_using_opaque_gi = false, bool p_append = false);
			
 
				+	void _fill_render_list(RenderListType p_render_list, const RenderDataRD *p_render_data, PassMode p_pass_mode, bool p_using_sdfgi = false, bool p_using_opaque_gi = false, bool p_using_motion_pass = false, bool p_append = false);
			
 
				 
			
 
				 	HashMap<Size2i, RID> sdfgi_framebuffer_size_cache;
			
 
				 
			
@@ -397,6 +404,7 @@ class RenderForwardClustered : public RendererSceneRenderRD {
 
				 			FLAG_USES_NORMAL_TEXTURE = 16384,
			
 
				 			FLAG_USES_DOUBLE_SIDED_SHADOWS = 32768,
			
 
				 			FLAG_USES_PARTICLE_TRAILS = 65536,
			
 
				+			FLAG_USES_MOTION_VECTOR = 131072,
			
 
				 		};
			
 
				 
			
 
				 		union {
			
@@ -424,6 +432,7 @@ class RenderForwardClustered : public RendererSceneRenderRD {
 
				 		RS::PrimitiveType primitive = RS::PRIMITIVE_MAX;
			
 
				 		uint32_t flags = 0;
			
 
				 		uint32_t surface_index = 0;
			
 
				+		uint32_t color_pass_inclusion_mask = 0;
			
 
				 
			
 
				 		void *surface = nullptr;
			
 
				 		RID material_uniform_set;
			
@@ -563,6 +572,7 @@ class RenderForwardClustered : public RendererSceneRenderRD {
 
				 
			
 
				 	RendererRD::Resolve *resolve_effects = nullptr;
			
 
				 	RendererRD::TAA *taa = nullptr;
			
 
				+	RendererRD::FSR2Effect *fsr2_effect = nullptr;
			
 
				 	RendererRD::SSEffects *ss_effects = nullptr;
			
 
				 
			
 
				 	/* Cluster builder */
			
--- a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp
+++ b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp
@@ -823,9 +823,9 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
 
				 				correction.set_depth_correction(true);
			
 
				 				Projection projection = correction * p_render_data->scene_data->cam_projection;
			
 
				 
			
 
				-				sky.setup_sky(p_render_data->environment, p_render_data->render_buffers, *p_render_data->lights, p_render_data->camera_attributes, 1, &projection, &eye_offset, p_render_data->scene_data->cam_transform, projection, screen_size, this);
			
 
				+				sky.setup_sky(p_render_data->environment, p_render_data->render_buffers, *p_render_data->lights, p_render_data->camera_attributes, 1, &projection, &eye_offset, p_render_data->scene_data->cam_transform, projection, screen_size, Vector2(0.0f, 0.0f), this);
			
 
				 			} else {
			
 
				-				sky.setup_sky(p_render_data->environment, p_render_data->render_buffers, *p_render_data->lights, p_render_data->camera_attributes, p_render_data->scene_data->view_count, p_render_data->scene_data->view_projection, p_render_data->scene_data->view_eye_offset, p_render_data->scene_data->cam_transform, p_render_data->scene_data->cam_projection, screen_size, this);
			
 
				+				sky.setup_sky(p_render_data->environment, p_render_data->render_buffers, *p_render_data->lights, p_render_data->camera_attributes, p_render_data->scene_data->view_count, p_render_data->scene_data->view_projection, p_render_data->scene_data->view_eye_offset, p_render_data->scene_data->cam_transform, p_render_data->scene_data->cam_projection, screen_size, p_render_data->scene_data->taa_jitter, this);
			
 
				 			}
			
 
				 
			
 
				 			sky_energy_multiplier *= bg_energy_multiplier;
			
@@ -1908,7 +1908,7 @@ void RenderForwardMobile::_setup_environment(const RenderDataRD *p_render_data,
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	p_render_data->scene_data->update_ubo(scene_state.uniform_buffers[p_index], get_debug_draw_mode(), env, reflection_probe_instance, p_render_data->camera_attributes, p_flip_y, p_pancake_shadows, p_screen_size, p_default_bg_color, _render_buffers_get_luminance_multiplier(), p_opaque_render_buffers);
			
 
				+	p_render_data->scene_data->update_ubo(scene_state.uniform_buffers[p_index], get_debug_draw_mode(), env, reflection_probe_instance, p_render_data->camera_attributes, p_flip_y, p_pancake_shadows, p_screen_size, p_default_bg_color, _render_buffers_get_luminance_multiplier(), p_opaque_render_buffers, false);
			
 
				 }
			
 
				 
			
 
				 void RenderForwardMobile::_fill_element_info(RenderListType p_render_list, uint32_t p_offset, int32_t p_max_elements) {
			
--- a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp
+++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp
@@ -340,14 +340,16 @@ void RendererSceneRenderRD::_render_buffers_post_process_and_tonemap(const Rende
 
				 
			
 
				 	// Glow, auto exposure and DoF (if enabled).
			
 
				 
			
 
				-	Size2i internal_size = rb->get_internal_size();
			
 
				 	Size2i target_size = rb->get_target_size();
			
 
				-
			
 
				 	bool can_use_effects = target_size.x >= 8 && target_size.y >= 8; // FIXME I think this should check internal size, we do all our post processing at this size...
			
 
				 	bool can_use_storage = _render_buffers_can_be_storage();
			
 
				 
			
 
				+	bool use_fsr = fsr && can_use_effects && rb->get_scaling_3d_mode() == RS::VIEWPORT_SCALING_3D_MODE_FSR;
			
 
				+	bool use_upscaled_texture = rb->has_upscaled_texture() && rb->get_scaling_3d_mode() == RS::VIEWPORT_SCALING_3D_MODE_FSR2;
			
 
				+
			
 
				 	RID render_target = rb->get_render_target();
			
 
				-	RID internal_texture = rb->get_internal_texture();
			
 
				+	RID color_texture = use_upscaled_texture ? rb->get_upscaled_texture() : rb->get_internal_texture();
			
 
				+	Size2i color_size = use_upscaled_texture ? target_size : rb->get_internal_size();
			
 
				 
			
 
				 	if (can_use_effects && RSG::camera_attributes->camera_attributes_uses_dof(p_render_data->camera_attributes)) {
			
 
				 		RENDER_TIMESTAMP("Depth of Field");
			
@@ -358,14 +360,14 @@ void RendererSceneRenderRD::_render_buffers_post_process_and_tonemap(const Rende
 
				 		RendererRD::BokehDOF::BokehBuffers buffers;
			
 
				 
			
 
				 		// Textures we use
			
 
				-		buffers.base_texture_size = rb->get_internal_size();
			
 
				+		buffers.base_texture_size = color_size;
			
 
				 		buffers.secondary_texture = rb->get_texture_slice(RB_SCOPE_BUFFERS, RB_TEX_BLUR_0, 0, 0);
			
 
				 		buffers.half_texture[0] = rb->get_texture_slice(RB_SCOPE_BUFFERS, RB_TEX_BLUR_1, 0, 0);
			
 
				 		buffers.half_texture[1] = rb->get_texture_slice(RB_SCOPE_BUFFERS, RB_TEX_BLUR_0, 0, 1);
			
 
				 
			
 
				 		if (can_use_storage) {
			
 
				 			for (uint32_t i = 0; i < rb->get_view_count(); i++) {
			
 
				-				buffers.base_texture = rb->get_internal_texture(i);
			
 
				+				buffers.base_texture = use_upscaled_texture ? rb->get_upscaled_texture(i) : rb->get_internal_texture(i);
			
 
				 				buffers.depth_texture = rb->get_depth_texture(i);
			
 
				 
			
 
				 				// In stereo p_render_data->z_near and p_render_data->z_far can be offset for our combined frustum.
			
@@ -387,7 +389,7 @@ void RendererSceneRenderRD::_render_buffers_post_process_and_tonemap(const Rende
 
				 			buffers.base_weight_fb = rb->weight_buffers[0].fb;
			
 
				 
			
 
				 			for (uint32_t i = 0; i < rb->get_view_count(); i++) {
			
 
				-				buffers.base_texture = rb->get_internal_texture(i);
			
 
				+				buffers.base_texture = use_upscaled_texture ? rb->get_upscaled_texture(i) : rb->get_internal_texture(i);
			
 
				 				buffers.depth_texture = rb->get_depth_texture(i);
			
 
				 				buffers.base_fb = FramebufferCacheRD::get_singleton()->get_cache(buffers.base_texture); // TODO move this into bokeh_dof_raster, we can do this internally
			
 
				 
			
@@ -416,7 +418,7 @@ void RendererSceneRenderRD::_render_buffers_post_process_and_tonemap(const Rende
 
				 		double step = RSG::camera_attributes->camera_attributes_get_auto_exposure_adjust_speed(p_render_data->camera_attributes) * time_step;
			
 
				 		float auto_exposure_min_sensitivity = RSG::camera_attributes->camera_attributes_get_auto_exposure_min_sensitivity(p_render_data->camera_attributes);
			
 
				 		float auto_exposure_max_sensitivity = RSG::camera_attributes->camera_attributes_get_auto_exposure_max_sensitivity(p_render_data->camera_attributes);
			
 
				-		luminance->luminance_reduction(internal_texture, internal_size, luminance_buffers, auto_exposure_min_sensitivity, auto_exposure_max_sensitivity, step, set_immediate);
			
 
				+		luminance->luminance_reduction(color_texture, color_size, luminance_buffers, auto_exposure_min_sensitivity, auto_exposure_max_sensitivity, step, set_immediate);
			
 
				 
			
 
				 		// Swap final reduce with prev luminance.
			
 
				 
			
@@ -525,7 +527,7 @@ void RendererSceneRenderRD::_render_buffers_post_process_and_tonemap(const Rende
 
				 		}
			
 
				 
			
 
				 		tonemap.use_debanding = rb->get_use_debanding();
			
 
				-		tonemap.texture_size = Vector2i(rb->get_internal_size().x, rb->get_internal_size().y);
			
 
				+		tonemap.texture_size = Vector2i(color_size.x, color_size.y);
			
 
				 
			
 
				 		if (p_render_data->environment.is_valid()) {
			
 
				 			tonemap.tonemap_mode = environment_get_tone_mapper(p_render_data->environment);
			
@@ -555,7 +557,8 @@ void RendererSceneRenderRD::_render_buffers_post_process_and_tonemap(const Rende
 
				 		tonemap.convert_to_srgb = !texture_storage->render_target_is_using_hdr(render_target);
			
 
				 
			
 
				 		RID dest_fb;
			
 
				-		if (fsr && can_use_effects && rb->get_scaling_3d_mode() == RS::VIEWPORT_SCALING_3D_MODE_FSR) {
			
 
				+		bool use_intermediate_fb = use_fsr;
			
 
				+		if (use_intermediate_fb) {
			
 
				 			// If we use FSR to upscale we need to write our result into an intermediate buffer.
			
 
				 			// Note that this is cached so we only create the texture the first time.
			
 
				 			RID dest_texture = rb->create_texture(SNAME("Tonemapper"), SNAME("destination"), _render_buffers_get_color_format(), RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT);
			
@@ -567,12 +570,12 @@ void RendererSceneRenderRD::_render_buffers_post_process_and_tonemap(const Rende
 
				 			dest_fb = texture_storage->render_target_get_rd_framebuffer(render_target);
			
 
				 		}
			
 
				 
			
 
				-		tone_mapper->tonemapper(internal_texture, dest_fb, tonemap);
			
 
				+		tone_mapper->tonemapper(color_texture, dest_fb, tonemap);
			
 
				 
			
 
				 		RD::get_singleton()->draw_command_end_label();
			
 
				 	}
			
 
				 
			
 
				-	if (fsr && can_use_effects && rb->get_scaling_3d_mode() == RS::VIEWPORT_SCALING_3D_MODE_FSR) {
			
 
				+	if (use_fsr) {
			
 
				 		RD::get_singleton()->draw_command_begin_label("FSR 1.0 Upscale");
			
 
				 
			
 
				 		for (uint32_t v = 0; v < rb->get_view_count(); v++) {
			
@@ -732,6 +735,11 @@ void RendererSceneRenderRD::_render_buffers_debug_draw(const RenderDataRD *p_ren
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_INTERNAL_BUFFER) {
			
 
				+		Size2 rtsize = texture_storage->render_target_get_size(render_target);
			
 
				+		copy_effects->copy_to_fb_rect(rb->get_internal_texture(), texture_storage->render_target_get_rd_framebuffer(render_target), Rect2(Vector2(), rtsize), false, false);
			
 
				+	}
			
 
				+
			
 
				 	if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_NORMAL_BUFFER && _render_buffers_get_normal_texture(rb).is_valid()) {
			
 
				 		Size2 rtsize = texture_storage->render_target_get_size(render_target);
			
 
				 		copy_effects->copy_to_fb_rect(_render_buffers_get_normal_texture(rb), texture_storage->render_target_get_rd_framebuffer(render_target), Rect2(Vector2(), rtsize), false, false);
			
@@ -745,7 +753,12 @@ void RendererSceneRenderRD::_render_buffers_debug_draw(const RenderDataRD *p_ren
 
				 	}
			
 
				 
			
 
				 	if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_MOTION_VECTORS && _render_buffers_get_velocity_texture(rb).is_valid()) {
			
 
				-		debug_effects->draw_motion_vectors(_render_buffers_get_velocity_texture(rb), texture_storage->render_target_get_rd_framebuffer(render_target), rb->get_internal_size());
			
 
				+		RID velocity = _render_buffers_get_velocity_texture(rb);
			
 
				+		RID depth = rb->get_depth_texture();
			
 
				+		RID dest_fb = texture_storage->render_target_get_rd_framebuffer(render_target);
			
 
				+		Size2i resolution = rb->get_internal_size();
			
 
				+
			
 
				+		debug_effects->draw_motion_vectors(velocity, depth, dest_fb, p_render_data->scene_data->cam_projection, p_render_data->scene_data->cam_transform, p_render_data->scene_data->prev_cam_projection, p_render_data->scene_data->prev_cam_transform, resolution);
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/servers/rendering/renderer_rd/shaders/effects/SCsub
+++ b/servers/rendering/renderer_rd/shaders/effects/SCsub
@@ -15,3 +15,5 @@ if "RD_GLSL" in env["BUILDERS"]:
 
				     # compile shaders
			
 
				     for glsl_file in glsl_files:
			
 
				         env.RD_GLSL(glsl_file)
			
 
				+
			
 
				+SConscript("fsr2/SCsub")
			
--- a/servers/rendering/renderer_rd/shaders/effects/fsr2/SCsub
+++ b/servers/rendering/renderer_rd/shaders/effects/fsr2/SCsub
@@ -0,0 +1,17 @@
 
				+#!/usr/bin/env python
			
 
				+
			
 
				+Import("env")
			
 
				+
			
 
				+if "RD_GLSL" in env["BUILDERS"]:
			
 
				+    # find all include files
			
 
				+    gl_include_files = [str(f) for f in Glob("*_inc.glsl")] + [str(f) for f in Glob("../*_inc.glsl")]
			
 
				+
			
 
				+    # find all shader code(all glsl files excluding our include files)
			
 
				+    glsl_files = [str(f) for f in Glob("*.glsl") if str(f) not in gl_include_files]
			
 
				+
			
 
				+    # make sure we recompile shaders if include files change
			
 
				+    env.Depends([f + ".gen.h" for f in glsl_files], gl_include_files + ["#glsl_builders.py"])
			
 
				+
			
 
				+    # compile shaders
			
 
				+    for glsl_file in glsl_files:
			
 
				+        env.RD_GLSL(glsl_file)
			
--- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_accumulate_pass.glsl
+++ b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_accumulate_pass.glsl
@@ -0,0 +1,8 @@
 
				+#[compute]
			
 
				+
			
 
				+#version 450
			
 
				+
			
 
				+#VERSION_DEFINES
			
 
				+
			
 
				+#include "../motion_vector_inc.glsl"
			
 
				+#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl"
			
--- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_autogen_reactive_pass.glsl
+++ b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_autogen_reactive_pass.glsl
@@ -0,0 +1,8 @@
 
				+#[compute]
			
 
				+
			
 
				+#version 450
			
 
				+
			
 
				+#VERSION_DEFINES
			
 
				+
			
 
				+#include "../motion_vector_inc.glsl"
			
 
				+#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl"
			
--- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_compute_luminance_pyramid_pass.glsl
+++ b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_compute_luminance_pyramid_pass.glsl
@@ -0,0 +1,7 @@
 
				+#[compute]
			
 
				+
			
 
				+#version 450
			
 
				+
			
 
				+#VERSION_DEFINES
			
 
				+
			
 
				+#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl"
			
--- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_depth_clip_pass.glsl
+++ b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_depth_clip_pass.glsl
@@ -0,0 +1,8 @@
 
				+#[compute]
			
 
				+
			
 
				+#version 450
			
 
				+
			
 
				+#VERSION_DEFINES
			
 
				+
			
 
				+#include "../motion_vector_inc.glsl"
			
 
				+#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl"
			
--- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_lock_pass.glsl
+++ b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_lock_pass.glsl
@@ -0,0 +1,7 @@
 
				+#[compute]
			
 
				+
			
 
				+#version 450
			
 
				+
			
 
				+#VERSION_DEFINES
			
 
				+
			
 
				+#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl"
			
--- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_rcas_pass.glsl
+++ b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_rcas_pass.glsl
@@ -0,0 +1,7 @@
 
				+#[compute]
			
 
				+
			
 
				+#version 450
			
 
				+
			
 
				+#VERSION_DEFINES
			
 
				+
			
 
				+#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl"
			
--- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_reconstruct_previous_depth_pass.glsl
+++ b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_reconstruct_previous_depth_pass.glsl
@@ -0,0 +1,8 @@
 
				+#[compute]
			
 
				+
			
 
				+#version 450
			
 
				+
			
 
				+#VERSION_DEFINES
			
 
				+
			
 
				+#include "../motion_vector_inc.glsl"
			
 
				+#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl"
			
--- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_tcr_autogen_pass.glsl
+++ b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_tcr_autogen_pass.glsl
@@ -0,0 +1,8 @@
 
				+#[compute]
			
 
				+
			
 
				+#version 450
			
 
				+
			
 
				+#VERSION_DEFINES
			
 
				+
			
 
				+#include "../motion_vector_inc.glsl"
			
 
				+#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl"
			
--- a/servers/rendering/renderer_rd/shaders/effects/motion_vector_inc.glsl
+++ b/servers/rendering/renderer_rd/shaders/effects/motion_vector_inc.glsl
@@ -0,0 +1,6 @@
 
				+vec2 derive_motion_vector(vec2 uv, float depth, mat4 reprojection_matrix) {
			
 
				+	vec4 previous_pos_ndc = reprojection_matrix * vec4(uv * 2.0f - 1.0f, depth * 2.0f - 1.0f, 1.0f);
			
 
				+	return 0.5f + (previous_pos_ndc.xy / previous_pos_ndc.w) * 0.5f - uv;
			
 
				+}
			
 
				+
			
 
				+#define FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS_FUNCTION(i, j, k) derive_motion_vector(i, j, k)
			
--- a/servers/rendering/renderer_rd/shaders/effects/motion_vectors.glsl
+++ b/servers/rendering/renderer_rd/shaders/effects/motion_vectors.glsl
@@ -18,14 +18,19 @@ void main() {
 
				 
			
 
				 #VERSION_DEFINES
			
 
				 
			
 
				+#include "motion_vector_inc.glsl"
			
 
				+
			
 
				 layout(location = 0) in vec2 uv_interp;
			
 
				 
			
 
				 layout(set = 0, binding = 0) uniform sampler2D source_velocity;
			
 
				+layout(set = 0, binding = 1) uniform sampler2D source_depth;
			
 
				 
			
 
				 layout(location = 0) out vec4 frag_color;
			
 
				 
			
 
				 layout(push_constant, std430) uniform Params {
			
 
				+	highp mat4 reprojection_matrix;
			
 
				 	vec2 resolution;
			
 
				+	bool force_derive_from_depth;
			
 
				 }
			
 
				 params;
			
 
				 
			
@@ -49,7 +54,14 @@ void main() {
 
				 	vec2 pos_pixel = uv_interp * params.resolution;
			
 
				 	vec2 cell_pos_pixel = floor(pos_pixel / cell_size) * cell_size + (cell_size * 0.5f);
			
 
				 	vec2 cell_pos_uv = cell_pos_pixel / params.resolution;
			
 
				-	vec2 cell_pos_previous_uv = cell_pos_uv + textureLod(source_velocity, cell_pos_uv, 0.0f).xy;
			
 
				+	vec2 cell_pos_velocity = textureLod(source_velocity, cell_pos_uv, 0.0f).xy;
			
 
				+	bool derive_velocity = params.force_derive_from_depth || all(lessThanEqual(cell_pos_velocity, vec2(-1.0f, -1.0f)));
			
 
				+	if (derive_velocity) {
			
 
				+		float depth = textureLod(source_depth, cell_pos_uv, 0.0f).x;
			
 
				+		cell_pos_velocity = derive_motion_vector(cell_pos_uv, depth, params.reprojection_matrix);
			
 
				+	}
			
 
				+
			
 
				+	vec2 cell_pos_previous_uv = cell_pos_uv + cell_pos_velocity;
			
 
				 
			
 
				 	// Draw the shapes.
			
 
				 	float epsilon = 1e-6f;
			
@@ -76,5 +88,10 @@ void main() {
 
				 		alpha = 0.0f;
			
 
				 	}
			
 
				 
			
 
				+	if (derive_velocity) {
			
 
				+		color = vec3(1.0f, 1.0f, 1.0f) - color;
			
 
				+		alpha *= 0.5f;
			
 
				+	}
			
 
				+
			
 
				 	frag_color = vec4(color, alpha);
			
 
				 }
			
--- a/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl
+++ b/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl
@@ -2280,6 +2280,8 @@ void fragment_shader(in SceneData scene_data) {
 
				 
			
 
				 #else //MODE_SEPARATE_SPECULAR
			
 
				 
			
 
				+	alpha *= scene_data.pass_alpha_multiplier;
			
 
				+
			
 
				 #ifdef MODE_UNSHADED
			
 
				 	frag_color = vec4(albedo, alpha);
			
 
				 #else
			
--- a/servers/rendering/renderer_rd/shaders/scene_data_inc.glsl
+++ b/servers/rendering/renderer_rd/shaders/scene_data_inc.glsl
@@ -64,6 +64,6 @@ struct SceneData {
 
				 
			
 
				 	bool pancake_shadows;
			
 
				 	uint camera_visible_layers;
			
 
				-	uint pad2;
			
 
				+	float pass_alpha_multiplier;
			
 
				 	uint pad3;
			
 
				 };
			
--- a/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp
+++ b/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp
@@ -1392,12 +1392,18 @@ void MeshStorage::_multimesh_get_motion_vectors_offsets(RID p_multimesh, uint32_
 
				 	MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
			
 
				 	ERR_FAIL_COND(!multimesh);
			
 
				 	r_current_offset = multimesh->motion_vectors_current_offset;
			
 
				-	if (RSG::rasterizer->get_frame_number() - multimesh->motion_vectors_last_change >= 2) {
			
 
				+	if (!_multimesh_uses_motion_vectors(multimesh)) {
			
 
				 		multimesh->motion_vectors_previous_offset = multimesh->motion_vectors_current_offset;
			
 
				 	}
			
 
				 	r_prev_offset = multimesh->motion_vectors_previous_offset;
			
 
				 }
			
 
				 
			
 
				+bool MeshStorage::_multimesh_uses_motion_vectors_offsets(RID p_multimesh) {
			
 
				+	MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
			
 
				+	ERR_FAIL_NULL_V(multimesh, false);
			
 
				+	return _multimesh_uses_motion_vectors(multimesh);
			
 
				+}
			
 
				+
			
 
				 int MeshStorage::multimesh_get_instance_count(RID p_multimesh) const {
			
 
				 	MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
			
 
				 	ERR_FAIL_COND_V(!multimesh, 0);
			
@@ -1500,6 +1506,10 @@ void MeshStorage::_multimesh_update_motion_vectors_data_cache(MultiMesh *multime
 
				 	}
			
 
				 }
			
 
				 
			
 
				+bool MeshStorage::_multimesh_uses_motion_vectors(MultiMesh *multimesh) {
			
 
				+	return (RSG::rasterizer->get_frame_number() - multimesh->motion_vectors_last_change) < 2;
			
 
				+}
			
 
				+
			
 
				 void MeshStorage::_multimesh_mark_dirty(MultiMesh *multimesh, int p_index, bool p_aabb) {
			
 
				 	uint32_t region_index = p_index / MULTIMESH_DIRTY_REGION_SIZE;
			
 
				 #ifdef DEBUG_ENABLED
			
--- a/servers/rendering/renderer_rd/storage_rd/mesh_storage.h
+++ b/servers/rendering/renderer_rd/storage_rd/mesh_storage.h
@@ -244,6 +244,7 @@ private:
 
				 	_FORCE_INLINE_ void _multimesh_make_local(MultiMesh *multimesh) const;
			
 
				 	_FORCE_INLINE_ void _multimesh_enable_motion_vectors(MultiMesh *multimesh);
			
 
				 	_FORCE_INLINE_ void _multimesh_update_motion_vectors_data_cache(MultiMesh *multimesh);
			
 
				+	_FORCE_INLINE_ bool _multimesh_uses_motion_vectors(MultiMesh *multimesh);
			
 
				 	_FORCE_INLINE_ void _multimesh_mark_dirty(MultiMesh *multimesh, int p_index, bool p_aabb);
			
 
				 	_FORCE_INLINE_ void _multimesh_mark_all_dirty(MultiMesh *multimesh, bool p_data, bool p_aabb);
			
 
				 	_FORCE_INLINE_ void _multimesh_re_create_aabb(MultiMesh *multimesh, const float *p_data, int p_instances);
			
@@ -622,6 +623,8 @@ public:
 
				 
			
 
				 	void _update_dirty_multimeshes();
			
 
				 	void _multimesh_get_motion_vectors_offsets(RID p_multimesh, uint32_t &r_current_offset, uint32_t &r_prev_offset);
			
 
				+	bool _multimesh_uses_motion_vectors_offsets(RID p_multimesh);
			
 
				+	bool _multimesh_uses_motion_vectors(RID p_multimesh);
			
 
				 
			
 
				 	_FORCE_INLINE_ RS::MultimeshTransformFormat multimesh_get_transform_format(RID p_multimesh) const {
			
 
				 		MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh);
			
--- a/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp
+++ b/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp
@@ -52,6 +52,7 @@ void RenderSceneBuffersRD::_bind_methods() {
 
				 	ClassDB::bind_method(D_METHOD("get_texture", "context", "name"), &RenderSceneBuffersRD::get_texture);
			
 
				 	ClassDB::bind_method(D_METHOD("get_texture_format", "context", "name"), &RenderSceneBuffersRD::_get_texture_format);
			
 
				 	ClassDB::bind_method(D_METHOD("get_texture_slice", "context", "name", "layer", "mipmap", "layers", "mipmaps"), &RenderSceneBuffersRD::get_texture_slice);
			
 
				+	ClassDB::bind_method(D_METHOD("get_texture_slice_view", "context", "name", "layer", "mipmap", "layers", "mipmaps", "view"), &RenderSceneBuffersRD::_get_texture_slice_view);
			
 
				 	ClassDB::bind_method(D_METHOD("get_texture_slice_size", "context", "name", "mipmap"), &RenderSceneBuffersRD::get_texture_slice_size);
			
 
				 	ClassDB::bind_method(D_METHOD("clear_context", "context"), &RenderSceneBuffersRD::clear_context);
			
 
				 
			
@@ -95,8 +96,8 @@ void RenderSceneBuffersRD::free_named_texture(NamedTexture &p_named_texture) {
 
				 void RenderSceneBuffersRD::update_samplers() {
			
 
				 	float computed_mipmap_bias = texture_mipmap_bias;
			
 
				 
			
 
				-	if (use_taa) {
			
 
				-		// Use negative mipmap LOD bias when TAA is enabled to compensate for loss of sharpness.
			
 
				+	if (use_taa || (scaling_3d_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR2)) {
			
 
				+		// Use negative mipmap LOD bias when TAA or FSR2 is enabled to compensate for loss of sharpness.
			
 
				 		// This restores sharpness in still images to be roughly at the same level as without TAA,
			
 
				 		// but moving scenes will still be blurrier.
			
 
				 		computed_mipmap_bias -= 0.5;
			
@@ -388,6 +389,15 @@ Ref<RDTextureFormat> RenderSceneBuffersRD::_get_texture_format(const StringName
 
				 	return tf;
			
 
				 }
			
 
				 
			
 
				+RID RenderSceneBuffersRD::_get_texture_slice_view(const StringName &p_context, const StringName &p_texture_name, const uint32_t p_layer, const uint32_t p_mipmap, const uint32_t p_layers, const uint32_t p_mipmaps, const Ref<RDTextureView> p_view) {
			
 
				+	RD::TextureView texture_view;
			
 
				+	if (p_view.is_valid()) {
			
 
				+		texture_view = p_view->base;
			
 
				+	}
			
 
				+
			
 
				+	return get_texture_slice_view(p_context, p_texture_name, p_layer, p_mipmap, p_layers, p_mipmaps, texture_view);
			
 
				+}
			
 
				+
			
 
				 const RD::TextureFormat RenderSceneBuffersRD::get_texture_format(const StringName &p_context, const StringName &p_texture_name) const {
			
 
				 	NTKey key(p_context, p_texture_name);
			
 
				 
			
@@ -397,6 +407,10 @@ const RD::TextureFormat RenderSceneBuffersRD::get_texture_format(const StringNam
 
				 }
			
 
				 
			
 
				 RID RenderSceneBuffersRD::get_texture_slice(const StringName &p_context, const StringName &p_texture_name, const uint32_t p_layer, const uint32_t p_mipmap, const uint32_t p_layers, const uint32_t p_mipmaps) {
			
 
				+	return get_texture_slice_view(p_context, p_texture_name, p_layer, p_mipmap, p_layers, p_mipmaps, RD::TextureView());
			
 
				+}
			
 
				+
			
 
				+RID RenderSceneBuffersRD::get_texture_slice_view(const StringName &p_context, const StringName &p_texture_name, const uint32_t p_layer, const uint32_t p_mipmap, const uint32_t p_layers, const uint32_t p_mipmaps, RD::TextureView p_view) {
			
 
				 	NTKey key(p_context, p_texture_name);
			
 
				 
			
 
				 	// check if this is a known texture
			
@@ -413,19 +427,20 @@ RID RenderSceneBuffersRD::get_texture_slice(const StringName &p_context, const S
 
				 	ERR_FAIL_COND_V(p_mipmap + p_mipmaps > named_texture.format.mipmaps, RID());
			
 
				 
			
 
				 	// asking the whole thing? just return the original
			
 
				-	if (p_layer == 0 && p_mipmap == 0 && named_texture.format.array_layers == p_layers && named_texture.format.mipmaps == p_mipmaps) {
			
 
				+	RD::TextureView default_view = RD::TextureView();
			
 
				+	if (p_layer == 0 && p_mipmap == 0 && named_texture.format.array_layers == p_layers && named_texture.format.mipmaps == p_mipmaps && p_view == default_view) {
			
 
				 		return named_texture.texture;
			
 
				 	}
			
 
				 
			
 
				 	// see if we have this
			
 
				-	NTSliceKey slice_key(p_layer, p_layers, p_mipmap, p_mipmaps);
			
 
				+	NTSliceKey slice_key(p_layer, p_layers, p_mipmap, p_mipmaps, p_view);
			
 
				 	if (named_texture.slices.has(slice_key)) {
			
 
				 		return named_texture.slices[slice_key];
			
 
				 	}
			
 
				 
			
 
				 	// create our slice
			
 
				 	RID &slice = named_texture.slices[slice_key];
			
 
				-	slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), named_texture.texture, p_layer, p_mipmap, p_mipmaps, p_layers > 1 ? RD::TEXTURE_SLICE_2D_ARRAY : RD::TEXTURE_SLICE_2D, p_layers);
			
 
				+	slice = RD::get_singleton()->texture_create_shared_from_slice(p_view, named_texture.texture, p_layer, p_mipmap, p_mipmaps, p_layers > 1 ? RD::TEXTURE_SLICE_2D_ARRAY : RD::TEXTURE_SLICE_2D, p_layers);
			
 
				 
			
 
				 	Array arr;
			
 
				 	arr.push_back(p_context);
			
@@ -434,7 +449,12 @@ RID RenderSceneBuffersRD::get_texture_slice(const StringName &p_context, const S
 
				 	arr.push_back(itos(p_layers));
			
 
				 	arr.push_back(itos(p_mipmap));
			
 
				 	arr.push_back(itos(p_mipmaps));
			
 
				-	RD::get_singleton()->set_resource_name(slice, String("RenderBuffer {0}/{1}, layer {2}/{3}, mipmap {4}/{5}").format(arr));
			
 
				+	arr.push_back(itos(p_view.format_override));
			
 
				+	arr.push_back(itos(p_view.swizzle_r));
			
 
				+	arr.push_back(itos(p_view.swizzle_g));
			
 
				+	arr.push_back(itos(p_view.swizzle_b));
			
 
				+	arr.push_back(itos(p_view.swizzle_a));
			
 
				+	RD::get_singleton()->set_resource_name(slice, String("RenderBuffer {0}/{1}, layer {2}/{3}, mipmap {4}/{5}, view {6}/{7}/{8}/{9}/{10}").format(arr));
			
 
				 
			
 
				 	// and return our slice
			
 
				 	return slice;
			
@@ -479,7 +499,13 @@ void RenderSceneBuffersRD::allocate_blur_textures() {
 
				 		return;
			
 
				 	}
			
 
				 
			
 
				-	uint32_t mipmaps_required = Image::get_image_required_mipmaps(internal_size.x, internal_size.y, Image::FORMAT_RGBAH);
			
 
				+	Size2i blur_size = internal_size;
			
 
				+	if (scaling_3d_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR2) {
			
 
				+		// The blur texture should be as big as the target size when using an upscaler.
			
 
				+		blur_size = target_size;
			
 
				+	}
			
 
				+
			
 
				+	uint32_t mipmaps_required = Image::get_image_required_mipmaps(blur_size.x, blur_size.y, Image::FORMAT_RGBAH);
			
 
				 
			
 
				 	uint32_t usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT;
			
 
				 	if (can_be_storage) {
			
@@ -488,12 +514,12 @@ void RenderSceneBuffersRD::allocate_blur_textures() {
 
				 		usage_bits += RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT;
			
 
				 	}
			
 
				 
			
 
				-	create_texture(RB_SCOPE_BUFFERS, RB_TEX_BLUR_0, base_data_format, usage_bits, RD::TEXTURE_SAMPLES_1, internal_size, view_count, mipmaps_required);
			
 
				-	create_texture(RB_SCOPE_BUFFERS, RB_TEX_BLUR_1, base_data_format, usage_bits, RD::TEXTURE_SAMPLES_1, Size2i(internal_size.x >> 1, internal_size.y >> 1), view_count, mipmaps_required - 1);
			
 
				+	create_texture(RB_SCOPE_BUFFERS, RB_TEX_BLUR_0, base_data_format, usage_bits, RD::TEXTURE_SAMPLES_1, blur_size, view_count, mipmaps_required);
			
 
				+	create_texture(RB_SCOPE_BUFFERS, RB_TEX_BLUR_1, base_data_format, usage_bits, RD::TEXTURE_SAMPLES_1, Size2i(blur_size.x >> 1, blur_size.y >> 1), view_count, mipmaps_required - 1);
			
 
				 
			
 
				 	// if !can_be_storage we need a half width version
			
 
				 	if (!can_be_storage) {
			
 
				-		create_texture(RB_SCOPE_BUFFERS, RB_TEX_HALF_BLUR, base_data_format, usage_bits, RD::TEXTURE_SAMPLES_1, Size2i(internal_size.x >> 1, internal_size.y), 1, mipmaps_required);
			
 
				+		create_texture(RB_SCOPE_BUFFERS, RB_TEX_HALF_BLUR, base_data_format, usage_bits, RD::TEXTURE_SAMPLES_1, Size2i(blur_size.x >> 1, blur_size.y), 1, mipmaps_required);
			
 
				 	}
			
 
				 
			
 
				 	// TODO redo this:
			
@@ -502,8 +528,8 @@ void RenderSceneBuffersRD::allocate_blur_textures() {
 
				 
			
 
				 		RD::TextureFormat tf;
			
 
				 		tf.format = RD::DATA_FORMAT_R16_SFLOAT; // We could probably use DATA_FORMAT_R8_SNORM if we don't pre-multiply by blur_size but that depends on whether we can remove DEPTH_GAP
			
 
				-		tf.width = internal_size.x;
			
 
				-		tf.height = internal_size.y;
			
 
				+		tf.width = blur_size.x;
			
 
				+		tf.height = blur_size.y;
			
 
				 		tf.texture_type = RD::TEXTURE_TYPE_2D;
			
 
				 		tf.array_layers = 1; // Our DOF effect handles one eye per turn
			
 
				 		tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT;
			
@@ -603,6 +629,16 @@ RID RenderSceneBuffersRD::get_depth_texture(const uint32_t p_layer) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+// Upscaled texture.
			
 
				+
			
 
				+void RenderSceneBuffersRD::ensure_upscaled() {
			
 
				+	if (!has_upscaled_texture()) {
			
 
				+		uint32_t usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | (can_be_storage ? RD::TEXTURE_USAGE_STORAGE_BIT : 0) | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT;
			
 
				+		usage_bits |= RD::TEXTURE_USAGE_INPUT_ATTACHMENT_BIT;
			
 
				+		create_texture(RB_SCOPE_BUFFERS, RB_TEX_COLOR_UPSCALED, base_data_format, usage_bits, RD::TEXTURE_SAMPLES_1, target_size);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 // Velocity texture.
			
 
				 
			
 
				 void RenderSceneBuffersRD::ensure_velocity() {
			
--- a/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.h
+++ b/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.h
@@ -31,6 +31,7 @@
 
				 #ifndef RENDER_SCENE_BUFFERS_RD_H
			
 
				 #define RENDER_SCENE_BUFFERS_RD_H
			
 
				 
			
 
				+#include "../effects/fsr2.h"
			
 
				 #include "../effects/vrs.h"
			
 
				 #include "../framebuffer_cache_rd.h"
			
 
				 #include "core/templates/hash_map.h"
			
@@ -47,6 +48,7 @@
 
				 #define RB_TEXTURE SNAME("texture")
			
 
				 #define RB_TEX_COLOR SNAME("color")
			
 
				 #define RB_TEX_COLOR_MSAA SNAME("color_msaa")
			
 
				+#define RB_TEX_COLOR_UPSCALED SNAME("color_upscaled")
			
 
				 #define RB_TEX_DEPTH SNAME("depth")
			
 
				 #define RB_TEX_DEPTH_MSAA SNAME("depth_msaa")
			
 
				 #define RB_TEX_VELOCITY SNAME("velocity")
			
@@ -114,9 +116,10 @@ private:
 
				 		uint32_t layers;
			
 
				 		uint32_t mipmap;
			
 
				 		uint32_t mipmaps;
			
 
				+		RD::TextureView texture_view;
			
 
				 
			
 
				 		bool operator==(const NTSliceKey &p_val) const {
			
 
				-			return (layer == p_val.layer) && (layers == p_val.layers) && (mipmap == p_val.mipmap) && (mipmaps == p_val.mipmaps);
			
 
				+			return (layer == p_val.layer) && (layers == p_val.layers) && (mipmap == p_val.mipmap) && (mipmaps == p_val.mipmaps) && (texture_view == p_val.texture_view);
			
 
				 		}
			
 
				 
			
 
				 		static uint32_t hash(const NTSliceKey &p_val) {
			
@@ -124,15 +127,21 @@ private:
 
				 			h = hash_murmur3_one_32(p_val.layers, h);
			
 
				 			h = hash_murmur3_one_32(p_val.mipmap, h);
			
 
				 			h = hash_murmur3_one_32(p_val.mipmaps, h);
			
 
				+			h = hash_murmur3_one_32(p_val.texture_view.format_override);
			
 
				+			h = hash_murmur3_one_32(p_val.texture_view.swizzle_r, h);
			
 
				+			h = hash_murmur3_one_32(p_val.texture_view.swizzle_g, h);
			
 
				+			h = hash_murmur3_one_32(p_val.texture_view.swizzle_b, h);
			
 
				+			h = hash_murmur3_one_32(p_val.texture_view.swizzle_a, h);
			
 
				 			return hash_fmix32(h);
			
 
				 		}
			
 
				 
			
 
				 		NTSliceKey() {}
			
 
				-		NTSliceKey(uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps) {
			
 
				+		NTSliceKey(uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps, RD::TextureView p_texture_view) {
			
 
				 			layer = p_layer;
			
 
				 			layers = p_layers;
			
 
				 			mipmap = p_mipmap;
			
 
				 			mipmaps = p_mipmaps;
			
 
				+			texture_view = p_texture_view;
			
 
				 		}
			
 
				 	};
			
 
				 
			
@@ -190,6 +199,7 @@ public:
 
				 	RID get_texture(const StringName &p_context, const StringName &p_texture_name) const;
			
 
				 	const RD::TextureFormat get_texture_format(const StringName &p_context, const StringName &p_texture_name) const;
			
 
				 	RID get_texture_slice(const StringName &p_context, const StringName &p_texture_name, const uint32_t p_layer, const uint32_t p_mipmap, const uint32_t p_layers = 1, const uint32_t p_mipmaps = 1);
			
 
				+	RID get_texture_slice_view(const StringName &p_context, const StringName &p_texture_name, const uint32_t p_layer, const uint32_t p_mipmap, const uint32_t p_layers = 1, const uint32_t p_mipmaps = 1, RD::TextureView p_view = RD::TextureView());
			
 
				 	Size2i get_texture_slice_size(const StringName &p_context, const StringName &p_texture_name, const uint32_t p_mipmap);
			
 
				 
			
 
				 	void clear_context(const StringName &p_context);
			
@@ -230,6 +240,14 @@ public:
 
				 	_FORCE_INLINE_ RID get_internal_texture(const uint32_t p_layer) {
			
 
				 		return get_texture_slice(RB_SCOPE_BUFFERS, RB_TEX_COLOR, p_layer, 0);
			
 
				 	}
			
 
				+	_FORCE_INLINE_ RID get_internal_texture_reactive(const uint32_t p_layer) {
			
 
				+		RD::TextureView alpha_only_view;
			
 
				+		alpha_only_view.swizzle_r = RD::TEXTURE_SWIZZLE_A;
			
 
				+		alpha_only_view.swizzle_g = RD::TEXTURE_SWIZZLE_A;
			
 
				+		alpha_only_view.swizzle_b = RD::TEXTURE_SWIZZLE_A;
			
 
				+		alpha_only_view.swizzle_a = RD::TEXTURE_SWIZZLE_A;
			
 
				+		return get_texture_slice_view(RB_SCOPE_BUFFERS, RB_TEX_COLOR, p_layer, 0, 1, 1, alpha_only_view);
			
 
				+	}
			
 
				 	_FORCE_INLINE_ RID get_color_msaa() const {
			
 
				 		return get_texture(RB_SCOPE_BUFFERS, RB_TEX_COLOR_MSAA);
			
 
				 	}
			
@@ -251,6 +269,19 @@ public:
 
				 	// back buffer (color)
			
 
				 	RID get_back_buffer_texture() const { return has_texture(RB_SCOPE_BUFFERS, RB_TEX_BLUR_0) ? get_texture(RB_SCOPE_BUFFERS, RB_TEX_BLUR_0) : RID(); } // We (re)use our blur texture here.
			
 
				 
			
 
				+	// Upscaled.
			
 
				+	void ensure_upscaled();
			
 
				+
			
 
				+	_FORCE_INLINE_ bool has_upscaled_texture() const {
			
 
				+		return has_texture(RB_SCOPE_BUFFERS, RB_TEX_COLOR_UPSCALED);
			
 
				+	}
			
 
				+	_FORCE_INLINE_ RID get_upscaled_texture() const {
			
 
				+		return get_texture(RB_SCOPE_BUFFERS, RB_TEX_COLOR_UPSCALED);
			
 
				+	}
			
 
				+	_FORCE_INLINE_ RID get_upscaled_texture(const uint32_t p_layer) {
			
 
				+		return get_texture_slice(RB_SCOPE_BUFFERS, RB_TEX_COLOR_UPSCALED, p_layer, 0);
			
 
				+	}
			
 
				+
			
 
				 	// Velocity, currently only used by TAA (Clustered) but we'll be using this in other places soon too.
			
 
				 
			
 
				 	void ensure_velocity();
			
@@ -271,6 +302,7 @@ private:
 
				 	RID _create_texture_from_format(const StringName &p_context, const StringName &p_texture_name, const Ref<RDTextureFormat> &p_texture_format, const Ref<RDTextureView> &p_view = Ref<RDTextureView>(), bool p_unique = true);
			
 
				 	RID _create_texture_view(const StringName &p_context, const StringName &p_texture_name, const StringName p_view_name, const Ref<RDTextureView> p_view = Ref<RDTextureView>());
			
 
				 	Ref<RDTextureFormat> _get_texture_format(const StringName &p_context, const StringName &p_texture_name) const;
			
 
				+	RID _get_texture_slice_view(const StringName &p_context, const StringName &p_texture_name, const uint32_t p_layer, const uint32_t p_mipmap, const uint32_t p_layers = 1, const uint32_t p_mipmaps = 1, const Ref<RDTextureView> p_view = Ref<RDTextureView>());
			
 
				 
			
 
				 	// For color and depth as exposed to extensions, we return the buffer that we're rendering into.
			
 
				 	// Resolving happens after effects etc. are run.
			
--- a/servers/rendering/renderer_rd/storage_rd/render_scene_data_rd.cpp
+++ b/servers/rendering/renderer_rd/storage_rd/render_scene_data_rd.cpp
@@ -38,7 +38,7 @@ RID RenderSceneDataRD::create_uniform_buffer() {
 
				 	return RD::get_singleton()->uniform_buffer_create(sizeof(UBODATA));
			
 
				 }
			
 
				 
			
 
				-void RenderSceneDataRD::update_ubo(RID p_uniform_buffer, RS::ViewportDebugDraw p_debug_mode, RID p_env, RID p_reflection_probe_instance, RID p_camera_attributes, bool p_flip_y, bool p_pancake_shadows, const Size2i &p_screen_size, const Color &p_default_bg_color, float p_luminance_multiplier, bool p_opaque_render_buffers) {
			
 
				+void RenderSceneDataRD::update_ubo(RID p_uniform_buffer, RS::ViewportDebugDraw p_debug_mode, RID p_env, RID p_reflection_probe_instance, RID p_camera_attributes, bool p_flip_y, bool p_pancake_shadows, const Size2i &p_screen_size, const Color &p_default_bg_color, float p_luminance_multiplier, bool p_opaque_render_buffers, bool p_apply_alpha_multiplier) {
			
 
				 	RendererSceneRenderRD *render_scene_render = RendererSceneRenderRD::get_singleton();
			
 
				 
			
 
				 	UBODATA ubo_data;
			
@@ -89,6 +89,7 @@ void RenderSceneDataRD::update_ubo(RID p_uniform_buffer, RS::ViewportDebugDraw p
 
				 	RendererRD::MaterialStorage::store_soft_shadow_kernel(render_scene_render->penumbra_shadow_kernel_get(), ubo.penumbra_shadow_kernel);
			
 
				 	RendererRD::MaterialStorage::store_soft_shadow_kernel(render_scene_render->soft_shadow_kernel_get(), ubo.soft_shadow_kernel);
			
 
				 	ubo.camera_visible_layers = camera_visible_layers;
			
 
				+	ubo.pass_alpha_multiplier = p_opaque_render_buffers && p_apply_alpha_multiplier ? 0.0f : 1.0f;
			
 
				 
			
 
				 	ubo.viewport_size[0] = p_screen_size.x;
			
 
				 	ubo.viewport_size[1] = p_screen_size.y;
			
--- a/servers/rendering/renderer_rd/storage_rd/render_scene_data_rd.h
+++ b/servers/rendering/renderer_rd/storage_rd/render_scene_data_rd.h
@@ -77,7 +77,7 @@ public:
 
				 	float time_step;
			
 
				 
			
 
				 	RID create_uniform_buffer();
			
 
				-	void update_ubo(RID p_uniform_buffer, RS::ViewportDebugDraw p_debug_mode, RID p_env, RID p_reflection_probe_instance, RID p_camera_attributes, bool p_flip_y, bool p_pancake_shadows, const Size2i &p_screen_size, const Color &p_default_bg_color, float p_luminance_multiplier, bool p_opaque_render_buffers);
			
 
				+	void update_ubo(RID p_uniform_buffer, RS::ViewportDebugDraw p_debug_mode, RID p_env, RID p_reflection_probe_instance, RID p_camera_attributes, bool p_flip_y, bool p_pancake_shadows, const Size2i &p_screen_size, const Color &p_default_bg_color, float p_luminance_multiplier, bool p_opaque_render_buffers, bool p_apply_alpha_multiplier);
			
 
				 	RID get_uniform_buffer();
			
 
				 
			
 
				 private:
			
@@ -144,7 +144,7 @@ private:
 
				 
			
 
				 		uint32_t pancake_shadows;
			
 
				 		uint32_t camera_visible_layers;
			
 
				-		uint32_t pad2;
			
 
				+		float pass_alpha_multiplier;
			
 
				 		uint32_t pad3;
			
 
				 	};
			
 
				 
			
--- a/servers/rendering/renderer_scene_cull.cpp
+++ b/servers/rendering/renderer_scene_cull.cpp
@@ -37,6 +37,21 @@
 
				 
			
 
				 #include <new>
			
 
				 
			
 
				+/* HALTON SEQUENCE */
			
 
				+
			
 
				+#ifndef _3D_DISABLED
			
 
				+static float get_halton_value(int p_index, int p_base) {
			
 
				+	float f = 1;
			
 
				+	float r = 0;
			
 
				+	while (p_index > 0) {
			
 
				+		f = f / static_cast<float>(p_base);
			
 
				+		r = r + f * (p_index % p_base);
			
 
				+		p_index = p_index / p_base;
			
 
				+	}
			
 
				+	return r * 2.0f - 1.0f;
			
 
				+}
			
 
				+#endif // _3D_DISABLED
			
 
				+
			
 
				 /* CAMERA API */
			
 
				 
			
 
				 RID RendererSceneCull::camera_allocate() {
			
@@ -2498,15 +2513,26 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons
 
				 	return animated_material_found;
			
 
				 }
			
 
				 
			
 
				-void RendererSceneCull::render_camera(const Ref<RenderSceneBuffers> &p_render_buffers, RID p_camera, RID p_scenario, RID p_viewport, Size2 p_viewport_size, bool p_use_taa, float p_screen_mesh_lod_threshold, RID p_shadow_atlas, Ref<XRInterface> &p_xr_interface, RenderInfo *r_render_info) {
			
 
				+void RendererSceneCull::render_camera(const Ref<RenderSceneBuffers> &p_render_buffers, RID p_camera, RID p_scenario, RID p_viewport, Size2 p_viewport_size, uint32_t p_jitter_phase_count, float p_screen_mesh_lod_threshold, RID p_shadow_atlas, Ref<XRInterface> &p_xr_interface, RenderInfo *r_render_info) {
			
 
				 #ifndef _3D_DISABLED
			
 
				 
			
 
				 	Camera *camera = camera_owner.get_or_null(p_camera);
			
 
				 	ERR_FAIL_COND(!camera);
			
 
				 
			
 
				 	Vector2 jitter;
			
 
				-	if (p_use_taa) {
			
 
				-		jitter = taa_jitter_array[RSG::rasterizer->get_frame_number() % TAA_JITTER_COUNT] / p_viewport_size;
			
 
				+	if (p_jitter_phase_count > 0) {
			
 
				+		uint32_t current_jitter_count = camera_jitter_array.size();
			
 
				+		if (p_jitter_phase_count != current_jitter_count) {
			
 
				+			// Resize the jitter array and fill it with the pre-computed Halton sequence.
			
 
				+			camera_jitter_array.resize(p_jitter_phase_count);
			
 
				+
			
 
				+			for (uint32_t i = current_jitter_count; i < p_jitter_phase_count; i++) {
			
 
				+				camera_jitter_array[i].x = get_halton_value(i, 2);
			
 
				+				camera_jitter_array[i].y = get_halton_value(i, 3);
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		jitter = camera_jitter_array[RSG::rasterizer->get_frame_number() % p_jitter_phase_count] / p_viewport_size;
			
 
				 	}
			
 
				 
			
 
				 	RendererSceneRender::CameraData camera_data;
			
@@ -4113,17 +4139,6 @@ void RendererSceneCull::set_scene_render(RendererSceneRender *p_scene_render) {
 
				 	geometry_instance_pair_mask = scene_render->geometry_instance_get_pair_mask();
			
 
				 }
			
 
				 
			
 
				-float get_halton_value(int index, int base) {
			
 
				-	float f = 1;
			
 
				-	float r = 0;
			
 
				-	while (index > 0) {
			
 
				-		f = f / static_cast<float>(base);
			
 
				-		r = r + f * (index % base);
			
 
				-		index = index / base;
			
 
				-	}
			
 
				-	return r * 2.0f - 1.0f;
			
 
				-};
			
 
				-
			
 
				 RendererSceneCull::RendererSceneCull() {
			
 
				 	render_pass = 1;
			
 
				 	singleton = this;
			
@@ -4148,12 +4163,6 @@ RendererSceneCull::RendererSceneCull() {
 
				 	thread_cull_threshold = GLOBAL_GET("rendering/limits/spatial_indexer/threaded_cull_minimum_instances");
			
 
				 	thread_cull_threshold = MAX(thread_cull_threshold, (uint32_t)WorkerThreadPool::get_singleton()->get_thread_count()); //make sure there is at least one thread per CPU
			
 
				 
			
 
				-	taa_jitter_array.resize(TAA_JITTER_COUNT);
			
 
				-	for (int i = 0; i < TAA_JITTER_COUNT; i++) {
			
 
				-		taa_jitter_array[i].x = get_halton_value(i, 2);
			
 
				-		taa_jitter_array[i].y = get_halton_value(i, 3);
			
 
				-	}
			
 
				-
			
 
				 	dummy_occlusion_culling = memnew(RendererSceneOcclusionCull);
			
 
				 }
			
 
				 
			
--- a/servers/rendering/renderer_scene_cull.h
+++ b/servers/rendering/renderer_scene_cull.h
@@ -954,8 +954,7 @@ public:
 
				 
			
 
				 	uint32_t geometry_instance_pair_mask = 0; // used in traditional forward, unnecessary on clustered
			
 
				 
			
 
				-	const int TAA_JITTER_COUNT = 16;
			
 
				-	LocalVector<Vector2> taa_jitter_array;
			
 
				+	LocalVector<Vector2> camera_jitter_array;
			
 
				 
			
 
				 	virtual RID instance_allocate();
			
 
				 	virtual void instance_initialize(RID p_rid);
			
@@ -1089,7 +1088,7 @@ public:
 
				 	void _render_scene(const RendererSceneRender::CameraData *p_camera_data, const Ref<RenderSceneBuffers> &p_render_buffers, RID p_environment, RID p_force_camera_attributes, uint32_t p_visible_layers, RID p_scenario, RID p_viewport, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_mesh_lod_threshold, bool p_using_shadows = true, RenderInfo *r_render_info = nullptr);
			
 
				 	void render_empty_scene(const Ref<RenderSceneBuffers> &p_render_buffers, RID p_scenario, RID p_shadow_atlas);
			
 
				 
			
 
				-	void render_camera(const Ref<RenderSceneBuffers> &p_render_buffers, RID p_camera, RID p_scenario, RID p_viewport, Size2 p_viewport_size, bool p_use_taa, float p_screen_mesh_lod_threshold, RID p_shadow_atlas, Ref<XRInterface> &p_xr_interface, RenderingMethod::RenderInfo *r_render_info = nullptr);
			
 
				+	void render_camera(const Ref<RenderSceneBuffers> &p_render_buffers, RID p_camera, RID p_scenario, RID p_viewport, Size2 p_viewport_size, uint32_t p_jitter_phase_count, float p_screen_mesh_lod_threshold, RID p_shadow_atlas, Ref<XRInterface> &p_xr_interface, RenderingMethod::RenderInfo *r_render_info = nullptr);
			
 
				 	void update_dirty_instances();
			
 
				 
			
 
				 	void render_particle_colliders();
			
--- a/servers/rendering/renderer_viewport.cpp
+++ b/servers/rendering/renderer_viewport.cpp
@@ -118,22 +118,29 @@ void RendererViewport::_configure_3d_render_buffers(Viewport *p_viewport) {
 
				 		} else {
			
 
				 			float scaling_3d_scale = p_viewport->scaling_3d_scale;
			
 
				 			RS::ViewportScaling3DMode scaling_3d_mode = p_viewport->scaling_3d_mode;
			
 
				+			bool scaling_3d_is_fsr = (scaling_3d_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR) || (scaling_3d_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR2);
			
 
				+			bool use_taa = p_viewport->use_taa;
			
 
				 
			
 
				-			if ((scaling_3d_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR) && (scaling_3d_scale > 1.0)) {
			
 
				+			if (scaling_3d_is_fsr && (scaling_3d_scale > 1.0)) {
			
 
				 				// FSR is not designed for downsampling.
			
 
				 				// Fall back to bilinear scaling.
			
 
				+				WARN_PRINT_ONCE("FSR 3D resolution scaling is not designed for downsampling. Falling back to bilinear 3D resolution scaling.");
			
 
				 				scaling_3d_mode = RS::VIEWPORT_SCALING_3D_MODE_BILINEAR;
			
 
				 			}
			
 
				 
			
 
				-			if ((scaling_3d_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR) && !p_viewport->fsr_enabled) {
			
 
				+			bool upscaler_available = p_viewport->fsr_enabled;
			
 
				+			if (scaling_3d_is_fsr && !upscaler_available) {
			
 
				 				// FSR is not actually available.
			
 
				 				// Fall back to bilinear scaling.
			
 
				-				WARN_PRINT_ONCE("FSR 1.0 3D resolution scaling is not available. Falling back to bilinear 3D resolution scaling.");
			
 
				+				WARN_PRINT_ONCE("FSR 3D resolution scaling is not available. Falling back to bilinear 3D resolution scaling.");
			
 
				 				scaling_3d_mode = RS::VIEWPORT_SCALING_3D_MODE_BILINEAR;
			
 
				 			}
			
 
				 
			
 
				-			if (scaling_3d_scale == 1.0) {
			
 
				-				scaling_3d_mode = RS::VIEWPORT_SCALING_3D_MODE_OFF;
			
 
				+			if (use_taa && scaling_3d_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR2) {
			
 
				+				// FSR2 can't be used with TAA.
			
 
				+				// Turn it off and prefer using FSR2.
			
 
				+				WARN_PRINT_ONCE("FSR 2 is not compatible with TAA. Disabling TAA internally.");
			
 
				+				use_taa = false;
			
 
				 			}
			
 
				 
			
 
				 			int width;
			
@@ -151,6 +158,7 @@ void RendererViewport::_configure_3d_render_buffers(Viewport *p_viewport) {
 
				 					render_height = height;
			
 
				 					break;
			
 
				 				case RS::VIEWPORT_SCALING_3D_MODE_FSR:
			
 
				+				case RS::VIEWPORT_SCALING_3D_MODE_FSR2:
			
 
				 					width = p_viewport->size.width;
			
 
				 					height = p_viewport->size.height;
			
 
				 					render_width = MAX(width * scaling_3d_scale, 1.0); // width / (width * scaling)
			
@@ -174,7 +182,17 @@ void RendererViewport::_configure_3d_render_buffers(Viewport *p_viewport) {
 
				 					break;
			
 
				 			}
			
 
				 
			
 
				+			uint32_t jitter_phase_count = 0;
			
 
				+			if (scaling_3d_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR2) {
			
 
				+				// Implementation has been copied from ffxFsr2GetJitterPhaseCount.
			
 
				+				jitter_phase_count = uint32_t(8.0f * pow(float(width) / render_width, 2.0f));
			
 
				+			} else if (use_taa) {
			
 
				+				// Default jitter count for TAA.
			
 
				+				jitter_phase_count = 16;
			
 
				+			}
			
 
				+
			
 
				 			p_viewport->internal_size = Size2(render_width, render_height);
			
 
				+			p_viewport->jitter_phase_count = jitter_phase_count;
			
 
				 
			
 
				 			// At resolution scales lower than 1.0, use negative texture mipmap bias
			
 
				 			// to compensate for the loss of sharpness.
			
@@ -190,7 +208,7 @@ void RendererViewport::_configure_3d_render_buffers(Viewport *p_viewport) {
 
				 			rb_config.set_screen_space_aa(p_viewport->screen_space_aa);
			
 
				 			rb_config.set_fsr_sharpness(p_viewport->fsr_sharpness);
			
 
				 			rb_config.set_texture_mipmap_bias(texture_mipmap_bias);
			
 
				-			rb_config.set_use_taa(p_viewport->use_taa);
			
 
				+			rb_config.set_use_taa(use_taa);
			
 
				 
			
 
				 			p_viewport->render_buffers->configure(&rb_config);
			
 
				 		}
			
@@ -221,7 +239,7 @@ void RendererViewport::_draw_3d(Viewport *p_viewport) {
 
				 	}
			
 
				 
			
 
				 	float screen_mesh_lod_threshold = p_viewport->mesh_lod_threshold / float(p_viewport->size.width);
			
 
				-	RSG::scene->render_camera(p_viewport->render_buffers, p_viewport->camera, p_viewport->scenario, p_viewport->self, p_viewport->internal_size, p_viewport->use_taa, screen_mesh_lod_threshold, p_viewport->shadow_atlas, xr_interface, &p_viewport->render_info);
			
 
				+	RSG::scene->render_camera(p_viewport->render_buffers, p_viewport->camera, p_viewport->scenario, p_viewport->self, p_viewport->internal_size, p_viewport->jitter_phase_count, screen_mesh_lod_threshold, p_viewport->shadow_atlas, xr_interface, &p_viewport->render_info);
			
 
				 
			
 
				 	RENDER_TIMESTAMP("< Render 3D Scene");
			
 
				 }
			
@@ -825,8 +843,20 @@ void RendererViewport::viewport_set_use_xr(RID p_viewport, bool p_use_xr) {
 
				 void RendererViewport::viewport_set_scaling_3d_mode(RID p_viewport, RS::ViewportScaling3DMode p_mode) {
			
 
				 	Viewport *viewport = viewport_owner.get_or_null(p_viewport);
			
 
				 	ERR_FAIL_COND(!viewport);
			
 
				+	ERR_FAIL_COND_EDMSG(p_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR2 && OS::get_singleton()->get_current_rendering_method() != "forward_plus", "FSR2 is only available when using the Forward+ renderer.");
			
 
				 
			
 
				+	if (viewport->scaling_3d_mode == p_mode) {
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	bool motion_vectors_before = _viewport_requires_motion_vectors(viewport);
			
 
				 	viewport->scaling_3d_mode = p_mode;
			
 
				+
			
 
				+	bool motion_vectors_after = _viewport_requires_motion_vectors(viewport);
			
 
				+	if (motion_vectors_before != motion_vectors_after) {
			
 
				+		num_viewports_with_motion_vectors += motion_vectors_after ? 1 : -1;
			
 
				+	}
			
 
				+
			
 
				 	_configure_3d_render_buffers(viewport);
			
 
				 }
			
 
				 
			
@@ -888,6 +918,10 @@ void RendererViewport::_viewport_set_size(Viewport *p_viewport, int p_width, int
 
				 	}
			
 
				 }
			
 
				 
			
 
				+bool RendererViewport::_viewport_requires_motion_vectors(Viewport *p_viewport) {
			
 
				+	return p_viewport->use_taa || p_viewport->scaling_3d_mode == RenderingServer::VIEWPORT_SCALING_3D_MODE_FSR2;
			
 
				+}
			
 
				+
			
 
				 void RendererViewport::viewport_set_active(RID p_viewport, bool p_active) {
			
 
				 	Viewport *viewport = viewport_owner.get_or_null(p_viewport);
			
 
				 	ERR_FAIL_COND(!viewport);
			
@@ -1193,8 +1227,15 @@ void RendererViewport::viewport_set_use_taa(RID p_viewport, bool p_use_taa) {
 
				 	if (viewport->use_taa == p_use_taa) {
			
 
				 		return;
			
 
				 	}
			
 
				+
			
 
				+	bool motion_vectors_before = _viewport_requires_motion_vectors(viewport);
			
 
				 	viewport->use_taa = p_use_taa;
			
 
				-	num_viewports_with_motion_vectors += p_use_taa ? 1 : -1;
			
 
				+
			
 
				+	bool motion_vectors_after = _viewport_requires_motion_vectors(viewport);
			
 
				+	if (motion_vectors_before != motion_vectors_after) {
			
 
				+		num_viewports_with_motion_vectors += motion_vectors_after ? 1 : -1;
			
 
				+	}
			
 
				+
			
 
				 	_configure_3d_render_buffers(viewport);
			
 
				 }
			
 
				 
			
@@ -1379,7 +1420,7 @@ bool RendererViewport::free(RID p_rid) {
 
				 			RendererSceneOcclusionCull::get_singleton()->remove_buffer(p_rid);
			
 
				 		}
			
 
				 
			
 
				-		if (viewport->use_taa) {
			
 
				+		if (_viewport_requires_motion_vectors(viewport)) {
			
 
				 			num_viewports_with_motion_vectors--;
			
 
				 		}
			
 
				 
			
--- a/servers/rendering/renderer_viewport.h
+++ b/servers/rendering/renderer_viewport.h
@@ -63,6 +63,7 @@ public:
 
				 		float fsr_sharpness = 0.2f;
			
 
				 		float texture_mipmap_bias = 0.0f;
			
 
				 		bool fsr_enabled = false;
			
 
				+		uint32_t jitter_phase_count = 0;
			
 
				 		RS::ViewportUpdateMode update_mode = RenderingServer::VIEWPORT_UPDATE_WHEN_VISIBLE;
			
 
				 		RID render_target;
			
 
				 		RID render_target_texture;
			
@@ -203,6 +204,7 @@ public:
 
				 private:
			
 
				 	Vector<Viewport *> _sort_active_viewports();
			
 
				 	void _viewport_set_size(Viewport *p_viewport, int p_width, int p_height, uint32_t p_view_count);
			
 
				+	bool _viewport_requires_motion_vectors(Viewport *p_viewport);
			
 
				 	void _configure_3d_render_buffers(Viewport *p_viewport);
			
 
				 	void _draw_3d(Viewport *p_viewport);
			
 
				 	void _draw_viewport(Viewport *p_viewport);
			
--- a/servers/rendering/rendering_device.h
+++ b/servers/rendering/rendering_device.h
@@ -518,6 +518,22 @@ public:
 
				 		TextureSwizzle swizzle_b;
			
 
				 		TextureSwizzle swizzle_a;
			
 
				 
			
 
				+		bool operator==(const TextureView &p_view) const {
			
 
				+			if (format_override != p_view.format_override) {
			
 
				+				return false;
			
 
				+			} else if (swizzle_r != p_view.swizzle_r) {
			
 
				+				return false;
			
 
				+			} else if (swizzle_g != p_view.swizzle_g) {
			
 
				+				return false;
			
 
				+			} else if (swizzle_b != p_view.swizzle_b) {
			
 
				+				return false;
			
 
				+			} else if (swizzle_a != p_view.swizzle_a) {
			
 
				+				return false;
			
 
				+			} else {
			
 
				+				return true;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				 		TextureView() {
			
 
				 			format_override = DATA_FORMAT_MAX; //means, use same as format
			
 
				 			swizzle_r = TEXTURE_SWIZZLE_R;
			
@@ -1270,6 +1286,8 @@ public:
 
				 		LIMIT_MAX_VIEWPORT_DIMENSIONS_X,
			
 
				 		LIMIT_MAX_VIEWPORT_DIMENSIONS_Y,
			
 
				 		LIMIT_SUBGROUP_SIZE,
			
 
				+		LIMIT_SUBGROUP_MIN_SIZE,
			
 
				+		LIMIT_SUBGROUP_MAX_SIZE,
			
 
				 		LIMIT_SUBGROUP_IN_SHADERS, // Set flags using SHADER_STAGE_VERTEX_BIT, SHADER_STAGE_FRAGMENT_BIT, etc.
			
 
				 		LIMIT_SUBGROUP_OPERATIONS,
			
 
				 		LIMIT_VRS_TEXEL_WIDTH,
			
--- a/servers/rendering/rendering_method.h
+++ b/servers/rendering/rendering_method.h
@@ -301,7 +301,7 @@ public:
 
				 		int info[RS::VIEWPORT_RENDER_INFO_TYPE_MAX][RS::VIEWPORT_RENDER_INFO_MAX] = {};
			
 
				 	};
			
 
				 
			
 
				-	virtual void render_camera(const Ref<RenderSceneBuffers> &p_render_buffers, RID p_camera, RID p_scenario, RID p_viewport, Size2 p_viewport_size, bool p_use_taa, float p_mesh_lod_threshold, RID p_shadow_atlas, Ref<XRInterface> &p_xr_interface, RenderInfo *r_render_info = nullptr) = 0;
			
 
				+	virtual void render_camera(const Ref<RenderSceneBuffers> &p_render_buffers, RID p_camera, RID p_scenario, RID p_viewport, Size2 p_viewport_size, uint32_t p_jitter_phase_count, float p_mesh_lod_threshold, RID p_shadow_atlas, Ref<XRInterface> &p_xr_interface, RenderInfo *r_render_info = nullptr) = 0;
			
 
				 
			
 
				 	virtual void update() = 0;
			
 
				 	virtual void render_probes() = 0;
			
--- a/servers/rendering/storage/render_scene_buffers.cpp
+++ b/servers/rendering/storage/render_scene_buffers.cpp
@@ -49,7 +49,7 @@ void RenderSceneBuffersConfiguration::_bind_methods() {
 
				 
			
 
				 	ClassDB::bind_method(D_METHOD("get_scaling_3d_mode"), &RenderSceneBuffersConfiguration::get_scaling_3d_mode);
			
 
				 	ClassDB::bind_method(D_METHOD("set_scaling_3d_mode", "scaling_3d_mode"), &RenderSceneBuffersConfiguration::set_scaling_3d_mode);
			
 
				-	ADD_PROPERTY(PropertyInfo(Variant::INT, "scaling_3d_mode", PROPERTY_HINT_ENUM, "Bilinear (Fastest),FSR 1.0 (Fast)"), "set_scaling_3d_mode", "get_scaling_3d_mode"); // TODO VIEWPORT_SCALING_3D_MODE_OFF is possible here too, but we can't specify an enum string for it.
			
 
				+	ADD_PROPERTY(PropertyInfo(Variant::INT, "scaling_3d_mode", PROPERTY_HINT_ENUM, "Bilinear (Fastest),FSR 1.0 (Fast),FSR 2.2 (Slow)"), "set_scaling_3d_mode", "get_scaling_3d_mode"); // TODO VIEWPORT_SCALING_3D_MODE_OFF is possible here too, but we can't specify an enum string for it.
			
 
				 
			
 
				 	ClassDB::bind_method(D_METHOD("get_msaa_3d"), &RenderSceneBuffersConfiguration::get_msaa_3d);
			
 
				 	ClassDB::bind_method(D_METHOD("set_msaa_3d", "msaa_3d"), &RenderSceneBuffersConfiguration::set_msaa_3d);
			
--- a/servers/rendering_server.cpp
+++ b/servers/rendering_server.cpp
@@ -2252,6 +2252,7 @@ void RenderingServer::_bind_methods() {
 
				 
			
 
				 	BIND_ENUM_CONSTANT(VIEWPORT_SCALING_3D_MODE_BILINEAR);
			
 
				 	BIND_ENUM_CONSTANT(VIEWPORT_SCALING_3D_MODE_FSR);
			
 
				+	BIND_ENUM_CONSTANT(VIEWPORT_SCALING_3D_MODE_FSR2);
			
 
				 	BIND_ENUM_CONSTANT(VIEWPORT_SCALING_3D_MODE_MAX);
			
 
				 
			
 
				 	BIND_ENUM_CONSTANT(VIEWPORT_UPDATE_DISABLED);
			
@@ -2329,6 +2330,7 @@ void RenderingServer::_bind_methods() {
 
				 	BIND_ENUM_CONSTANT(VIEWPORT_DEBUG_DRAW_CLUSTER_REFLECTION_PROBES);
			
 
				 	BIND_ENUM_CONSTANT(VIEWPORT_DEBUG_DRAW_OCCLUDERS);
			
 
				 	BIND_ENUM_CONSTANT(VIEWPORT_DEBUG_DRAW_MOTION_VECTORS);
			
 
				+	BIND_ENUM_CONSTANT(VIEWPORT_DEBUG_DRAW_INTERNAL_BUFFER);
			
 
				 
			
 
				 	BIND_ENUM_CONSTANT(VIEWPORT_VRS_DISABLED);
			
 
				 	BIND_ENUM_CONSTANT(VIEWPORT_VRS_TEXTURE);
			
@@ -2959,7 +2961,7 @@ void RenderingServer::init() {
 
				 	GLOBAL_DEF(PropertyInfo(Variant::FLOAT, "rendering/anti_aliasing/screen_space_roughness_limiter/amount", PROPERTY_HINT_RANGE, "0.01,4.0,0.01"), 0.25);
			
 
				 	GLOBAL_DEF(PropertyInfo(Variant::FLOAT, "rendering/anti_aliasing/screen_space_roughness_limiter/limit", PROPERTY_HINT_RANGE, "0.01,1.0,0.01"), 0.18);
			
 
				 
			
 
				-	GLOBAL_DEF(PropertyInfo(Variant::INT, "rendering/scaling_3d/mode", PROPERTY_HINT_ENUM, "Bilinear (Fastest),FSR 1.0 (Fast)"), 0);
			
 
				+	GLOBAL_DEF(PropertyInfo(Variant::INT, "rendering/scaling_3d/mode", PROPERTY_HINT_ENUM, "Bilinear (Fastest),FSR 1.0 (Fast),FSR 2.2 (Slow)"), 0);
			
 
				 	GLOBAL_DEF(PropertyInfo(Variant::FLOAT, "rendering/scaling_3d/scale", PROPERTY_HINT_RANGE, "0.25,2.0,0.01"), 1.0);
			
 
				 	GLOBAL_DEF(PropertyInfo(Variant::FLOAT, "rendering/scaling_3d/fsr_sharpness", PROPERTY_HINT_RANGE, "0,2,0.1"), 0.2f);
			
 
				 	GLOBAL_DEF(PropertyInfo(Variant::FLOAT, "rendering/textures/default_filters/texture_mipmap_bias", PROPERTY_HINT_RANGE, "-2,2,0.001"), 0.0f);
			
--- a/servers/rendering_server.h
+++ b/servers/rendering_server.h
@@ -807,6 +807,7 @@ public:
 
				 	enum ViewportScaling3DMode {
			
 
				 		VIEWPORT_SCALING_3D_MODE_BILINEAR,
			
 
				 		VIEWPORT_SCALING_3D_MODE_FSR,
			
 
				+		VIEWPORT_SCALING_3D_MODE_FSR2,
			
 
				 		VIEWPORT_SCALING_3D_MODE_MAX,
			
 
				 		VIEWPORT_SCALING_3D_MODE_OFF = 255, // for internal use only
			
 
				 	};
			
@@ -971,6 +972,7 @@ public:
 
				 		VIEWPORT_DEBUG_DRAW_CLUSTER_REFLECTION_PROBES,
			
 
				 		VIEWPORT_DEBUG_DRAW_OCCLUDERS,
			
 
				 		VIEWPORT_DEBUG_DRAW_MOTION_VECTORS,
			
 
				+		VIEWPORT_DEBUG_DRAW_INTERNAL_BUFFER,
			
 
				 	};
			
 
				 
			
 
				 	virtual void viewport_set_debug_draw(RID p_viewport, ViewportDebugDraw p_draw) = 0;
			
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -17,6 +17,21 @@ Files extracted from upstream source:
 
				 - `license.txt`
			
 
				 
			
 
				 
			
 
				+## amd-fsr2
			
 
				+
			
 
				+- Upstream: https://github.com/GPUOpen-Effects/FidelityFX-FSR2
			
 
				+- Version: 2.2.1 (1680d1edd5c034f88ebbbb793d8b88f8842cf804, 2023)
			
 
				+- License: MIT
			
 
				+
			
 
				+Files extracted from upstream source:
			
 
				+
			
 
				+- `ffx_*.cpp` and `ffx_*.h` from `src/ffx-fsr2-api`
			
 
				+- `shaders` folder from `src/ffx-fsr2-api` with `ffx_*.hlsl` files excluded
			
 
				+- `LICENSE.txt`
			
 
				+
			
 
				+Apply `patches` to add the new options required by Godot and general compilation fixes.
			
 
				+
			
 
				+
			
 
				 ## angle
			
 
				 
			
 
				 - Upstream: https://chromium.googlesource.com/angle/angle/
			
--- a/thirdparty/amd-fsr2/LICENSE.txt
+++ b/thirdparty/amd-fsr2/LICENSE.txt
@@ -0,0 +1,21 @@
 
				+FidelityFX Super Resolution 2.2
			
 
				+=================================
			
 
				+Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+
			
 
				+Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+of this software and associated documentation files (the "Software"), to deal
			
 
				+in the Software without restriction, including without limitation the rights
			
 
				+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+copies of the Software, and to permit persons to whom the Software is
			
 
				+furnished to do so, subject to the following conditions:
			
 
				+
			
 
				+The above copyright notice and this permission notice shall be included in
			
 
				+all copies or substantial portions of the Software.
			
 
				+
			
 
				+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
			
 
				+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+THE SOFTWARE.
			
--- a/thirdparty/amd-fsr2/ffx_assert.cpp
+++ b/thirdparty/amd-fsr2/ffx_assert.cpp
@@ -0,0 +1,81 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#include "ffx_assert.h"
			
 
				+#include <stdlib.h>  // for malloc()
			
 
				+
			
 
				+#ifdef _WIN32
			
 
				+#ifndef WIN32_LEAN_AND_MEAN
			
 
				+#define WIN32_LEAN_AND_MEAN
			
 
				+#endif
			
 
				+#include <windows.h>  // required for OutputDebugString()
			
 
				+#include <stdio.h>    // required for sprintf_s
			
 
				+#endif                // #ifndef _WIN32
			
 
				+
			
 
				+static FfxAssertCallback s_assertCallback;
			
 
				+
			
 
				+// set the printing callback function
			
 
				+void ffxAssertSetPrintingCallback(FfxAssertCallback callback)
			
 
				+{
			
 
				+    s_assertCallback = callback;
			
 
				+    return;
			
 
				+}
			
 
				+
			
 
				+// implementation of assert reporting
			
 
				+bool ffxAssertReport(const char* file, int32_t line, const char* condition, const char* message)
			
 
				+{
			
 
				+    if (!file) {
			
 
				+
			
 
				+        return true;
			
 
				+    }
			
 
				+
			
 
				+#ifdef _WIN32
			
 
				+    // form the final assertion string and output to the TTY.
			
 
				+    const size_t bufferSize = static_cast<size_t>(snprintf(nullptr, 0, "%s(%d): ASSERTION FAILED. %s\n", file, line, message ? message : condition)) + 1;
			
 
				+    char*        tempBuf    = static_cast<char*>(malloc(bufferSize));
			
 
				+    if (!tempBuf) {
			
 
				+
			
 
				+        return true;
			
 
				+    }
			
 
				+
			
 
				+    if (!message) {
			
 
				+        sprintf_s(tempBuf, bufferSize, "%s(%d): ASSERTION FAILED. %s\n", file, line, condition);
			
 
				+    } else {
			
 
				+        sprintf_s(tempBuf, bufferSize, "%s(%d): ASSERTION FAILED. %s\n", file, line, message);
			
 
				+    }
			
 
				+
			
 
				+    if (!s_assertCallback) {
			
 
				+        OutputDebugStringA(tempBuf);
			
 
				+    } else {
			
 
				+        s_assertCallback(tempBuf);
			
 
				+    }
			
 
				+
			
 
				+    // free the buffer.
			
 
				+    free(tempBuf);
			
 
				+
			
 
				+#else
			
 
				+    FFX_UNUSED(line);
			
 
				+    FFX_UNUSED(condition);
			
 
				+    FFX_UNUSED(message);
			
 
				+#endif
			
 
				+
			
 
				+    return true;
			
 
				+}
			
--- a/thirdparty/amd-fsr2/ffx_assert.h
+++ b/thirdparty/amd-fsr2/ffx_assert.h
@@ -0,0 +1,132 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#pragma once
			
 
				+
			
 
				+#include "ffx_types.h"
			
 
				+#include "ffx_util.h"
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif  // #ifdef __cplusplus
			
 
				+
			
 
				+#ifdef _DEBUG
			
 
				+#ifdef _WIN32
			
 
				+
			
 
				+#ifdef DISABLE_FFX_DEBUG_BREAK
			
 
				+#define FFX_DEBUG_BREAK \
			
 
				+    {                   \
			
 
				+    }
			
 
				+#else
			
 
				+/// Macro to force the debugger to break at this point in the code.
			
 
				+#define FFX_DEBUG_BREAK __debugbreak();
			
 
				+#endif
			
 
				+#else
			
 
				+#define FFX_DEBUG_BREAK \
			
 
				+    {                   \
			
 
				+    }
			
 
				+#endif
			
 
				+#else
			
 
				+// don't allow debug break in release builds.
			
 
				+#define FFX_DEBUG_BREAK
			
 
				+#endif
			
 
				+
			
 
				+/// A typedef for the callback function for assert printing.
			
 
				+///
			
 
				+/// This can be used to re-route printing of assert messages from the FFX backend
			
 
				+/// to another destination. For example instead of the default behaviour of printing
			
 
				+/// the assert messages to the debugger's TTY the message can be re-routed to a
			
 
				+/// MessageBox in a GUI application.
			
 
				+///
			
 
				+/// @param [in] message                 The message generated by the assert.
			
 
				+///
			
 
				+typedef void (*FfxAssertCallback)(const char* message);
			
 
				+
			
 
				+/// Function to report an assert.
			
 
				+///
			
 
				+/// @param [in] file                    The name of the file as a string.
			
 
				+/// @param [in] line                    The index of the line in the file.
			
 
				+/// @param [in] condition               The boolean condition that was tested.
			
 
				+/// @param [in] msg                     The optional message to print.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Always returns true.
			
 
				+///
			
 
				+FFX_API bool ffxAssertReport(const char* file, int32_t line, const char* condition, const char* msg);
			
 
				+
			
 
				+/// Provides the ability to set a callback for assert messages.
			
 
				+///
			
 
				+/// @param [in] callback                The callback function that will receive assert messages.
			
 
				+///
			
 
				+FFX_API void ffxAssertSetPrintingCallback(FfxAssertCallback callback);
			
 
				+
			
 
				+#ifdef _DEBUG
			
 
				+/// Standard assert macro.
			
 
				+#define FFX_ASSERT(condition)                                                      \
			
 
				+    do                                                                             \
			
 
				+    {                                                                              \
			
 
				+        if (!(condition) && ffxAssertReport(__FILE__, __LINE__, #condition, NULL)) \
			
 
				+            FFX_DEBUG_BREAK                                                        \
			
 
				+    } while (0)
			
 
				+
			
 
				+/// Assert macro with message.
			
 
				+#define FFX_ASSERT_MESSAGE(condition, msg)                                        \
			
 
				+    do                                                                            \
			
 
				+    {                                                                             \
			
 
				+        if (!(condition) && ffxAssertReport(__FILE__, __LINE__, #condition, msg)) \
			
 
				+            FFX_DEBUG_BREAK                                                       \
			
 
				+    } while (0)
			
 
				+
			
 
				+/// Assert macro that always fails.
			
 
				+#define FFX_ASSERT_FAIL(message)                            \
			
 
				+    do                                                      \
			
 
				+    {                                                       \
			
 
				+        ffxAssertReport(__FILE__, __LINE__, NULL, message); \
			
 
				+        FFX_DEBUG_BREAK                                     \
			
 
				+    } while (0)
			
 
				+#else
			
 
				+// asserts disabled
			
 
				+#define FFX_ASSERT(condition)  \
			
 
				+    do                         \
			
 
				+    {                          \
			
 
				+        FFX_UNUSED(condition); \
			
 
				+    } while (0)
			
 
				+
			
 
				+#define FFX_ASSERT_MESSAGE(condition, message) \
			
 
				+    do                                         \
			
 
				+    {                                          \
			
 
				+        FFX_UNUSED(condition);                 \
			
 
				+        FFX_UNUSED(message);                   \
			
 
				+    } while (0)
			
 
				+
			
 
				+#define FFX_ASSERT_FAIL(message) \
			
 
				+    do                           \
			
 
				+    {                            \
			
 
				+        FFX_UNUSED(message);     \
			
 
				+    } while (0)
			
 
				+#endif  // #if _DEBUG
			
 
				+
			
 
				+/// Simple static assert.
			
 
				+#define FFX_STATIC_ASSERT(condition) static_assert(condition, #condition)
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif  // #ifdef __cplusplus
			
--- a/thirdparty/amd-fsr2/ffx_error.h
+++ b/thirdparty/amd-fsr2/ffx_error.h
@@ -0,0 +1,59 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#pragma once
			
 
				+
			
 
				+#include "ffx_types.h"
			
 
				+
			
 
				+/// Typedef for error codes returned from functions in the FidelityFX SDK.
			
 
				+typedef int32_t FfxErrorCode;
			
 
				+
			
 
				+static const FfxErrorCode FFX_OK                            = 0;           ///< The operation completed successfully.
			
 
				+static const FfxErrorCode FFX_ERROR_INVALID_POINTER         = 0x80000000;  ///< The operation failed due to an invalid pointer.
			
 
				+static const FfxErrorCode FFX_ERROR_INVALID_ALIGNMENT       = 0x80000001;  ///< The operation failed due to an invalid alignment.
			
 
				+static const FfxErrorCode FFX_ERROR_INVALID_SIZE            = 0x80000002;  ///< The operation failed due to an invalid size.
			
 
				+static const FfxErrorCode FFX_EOF                           = 0x80000003;  ///< The end of the file was encountered.
			
 
				+static const FfxErrorCode FFX_ERROR_INVALID_PATH            = 0x80000004;  ///< The operation failed because the specified path was invalid.
			
 
				+static const FfxErrorCode FFX_ERROR_EOF                     = 0x80000005;  ///< The operation failed because end of file was reached.
			
 
				+static const FfxErrorCode FFX_ERROR_MALFORMED_DATA          = 0x80000006;  ///< The operation failed because of some malformed data.
			
 
				+static const FfxErrorCode FFX_ERROR_OUT_OF_MEMORY           = 0x80000007;  ///< The operation failed because it ran out memory.
			
 
				+static const FfxErrorCode FFX_ERROR_INCOMPLETE_INTERFACE    = 0x80000008;  ///< The operation failed because the interface was not fully configured.
			
 
				+static const FfxErrorCode FFX_ERROR_INVALID_ENUM            = 0x80000009;  ///< The operation failed because of an invalid enumeration value.
			
 
				+static const FfxErrorCode FFX_ERROR_INVALID_ARGUMENT        = 0x8000000a;  ///< The operation failed because an argument was invalid.
			
 
				+static const FfxErrorCode FFX_ERROR_OUT_OF_RANGE            = 0x8000000b;  ///< The operation failed because a value was out of range.
			
 
				+static const FfxErrorCode FFX_ERROR_NULL_DEVICE             = 0x8000000c;  ///< The operation failed because a device was null.
			
 
				+static const FfxErrorCode FFX_ERROR_BACKEND_API_ERROR       = 0x8000000d;  ///< The operation failed because the backend API returned an error code.
			
 
				+static const FfxErrorCode FFX_ERROR_INSUFFICIENT_MEMORY     = 0x8000000e;  ///< The operation failed because there was not enough memory.
			
 
				+
			
 
				+/// Helper macro to return error code y from a function when a specific condition, x, is not met.
			
 
				+#define FFX_RETURN_ON_ERROR(x, y)                   \
			
 
				+    if (!(x))                                       \
			
 
				+    {                                               \
			
 
				+        return (y);                                 \
			
 
				+    }
			
 
				+
			
 
				+/// Helper macro to return error code x from a function when it is not FFX_OK.
			
 
				+#define FFX_VALIDATE(x)                             \
			
 
				+    {                                               \
			
 
				+        FfxErrorCode ret = x;                       \
			
 
				+        FFX_RETURN_ON_ERROR(ret == FFX_OK, ret);    \
			
 
				+    }
			
 
				+
			
--- a/thirdparty/amd-fsr2/ffx_fsr2.cpp
+++ b/thirdparty/amd-fsr2/ffx_fsr2.cpp
@@ -0,0 +1,1373 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#include <algorithm>    // for max used inside SPD CPU code.
			
 
				+#include <cmath>        // for fabs, abs, sinf, sqrt, etc.
			
 
				+#include <string.h>     // for memset
			
 
				+#include <cfloat>       // for FLT_EPSILON
			
 
				+#include "ffx_fsr2.h"
			
 
				+#define FFX_CPU
			
 
				+#include "shaders/ffx_core.h"
			
 
				+#include "shaders/ffx_fsr1.h"
			
 
				+#include "shaders/ffx_spd.h"
			
 
				+#include "shaders/ffx_fsr2_callbacks_hlsl.h"
			
 
				+
			
 
				+#include "ffx_fsr2_maximum_bias.h"
			
 
				+
			
 
				+#ifdef __clang__
			
 
				+#pragma clang diagnostic ignored "-Wunused-variable"
			
 
				+#endif
			
 
				+
			
 
				+// -- GODOT start --
			
 
				+#ifndef _countof
			
 
				+#define _countof(array) (sizeof(array) / sizeof(array[0]))
			
 
				+#endif
			
 
				+
			
 
				+#ifndef _MSC_VER
			
 
				+#include <wchar.h>
			
 
				+#define wcscpy_s wcscpy
			
 
				+#endif
			
 
				+// -- GODOT end --
			
 
				+
			
 
				+// max queued frames for descriptor management
			
 
				+static const uint32_t FSR2_MAX_QUEUED_FRAMES = 16;
			
 
				+
			
 
				+#include "ffx_fsr2_private.h"
			
 
				+
			
 
				+// lists to map shader resource bindpoint name to resource identifier
			
 
				+typedef struct ResourceBinding
			
 
				+{
			
 
				+    uint32_t    index;
			
 
				+    wchar_t     name[64];
			
 
				+}ResourceBinding;
			
 
				+
			
 
				+static const ResourceBinding srvResourceBindingTable[] =
			
 
				+{
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR,                              L"r_input_color_jittered"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY,                        L"r_input_opaque_only"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS,                     L"r_input_motion_vectors"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH,                              L"r_input_depth" },
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE,                           L"r_input_exposure"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE,                            L"r_auto_exposure"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK,                      L"r_reactive_mask"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK,  L"r_transparency_and_composition_mask"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH,     L"r_reconstructed_previous_nearest_depth"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS,                   L"r_dilated_motion_vectors"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS,          L"r_previous_dilated_motion_vectors"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH,                            L"r_dilatedDepth"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR,                  L"r_internal_upscaled_color"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS,                              L"r_lock_status"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR,                     L"r_prepared_input_color"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY,                             L"r_luma_history" },
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT,                               L"r_rcas_input"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT,                              L"r_lanczos_lut"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE,                          L"r_imgMips"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE,    L"r_img_mip_shading_change"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5,                 L"r_img_mip_5"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT,                L"r_upsample_maximum_bias_lut"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS,                   L"r_dilated_reactive_masks"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS,                                L"r_new_locks"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA,                          L"r_lock_input_luma"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR,                     L"r_input_prev_color_pre_alpha"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR,                    L"r_input_prev_color_post_alpha"},
			
 
				+};
			
 
				+
			
 
				+static const ResourceBinding uavResourceBindingTable[] =
			
 
				+{
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH,    L"rw_reconstructed_previous_nearest_depth"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS,                  L"rw_dilated_motion_vectors"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH,                           L"rw_dilatedDepth"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR,                 L"rw_internal_upscaled_color"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS,                             L"rw_lock_status"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR,                    L"rw_prepared_input_color"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY,                            L"rw_luma_history"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT,                         L"rw_upscaled_output"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE,   L"rw_img_mip_shading_change"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5,                L"rw_img_mip_5"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS,                  L"rw_dilated_reactive_masks"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE,                           L"rw_auto_exposure"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT,                        L"rw_spd_global_atomic"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS,                               L"rw_new_locks"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA,                         L"rw_lock_input_luma"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE,                            L"rw_output_autoreactive"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION,                         L"rw_output_autocomposition"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR,                    L"rw_output_prev_color_pre_alpha"},
			
 
				+    {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR,                   L"rw_output_prev_color_post_alpha"},
			
 
				+};
			
 
				+
			
 
				+static const ResourceBinding cbResourceBindingTable[] =
			
 
				+{
			
 
				+    {FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2,           L"cbFSR2"},
			
 
				+    {FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD,            L"cbSPD"},
			
 
				+    {FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS,           L"cbRCAS"},
			
 
				+    {FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE,    L"cbGenerateReactive"},
			
 
				+};
			
 
				+
			
 
				+// Broad structure of the root signature.
			
 
				+typedef enum Fsr2RootSignatureLayout {
			
 
				+
			
 
				+    FSR2_ROOT_SIGNATURE_LAYOUT_UAVS,
			
 
				+    FSR2_ROOT_SIGNATURE_LAYOUT_SRVS,
			
 
				+    FSR2_ROOT_SIGNATURE_LAYOUT_CONSTANTS,
			
 
				+    FSR2_ROOT_SIGNATURE_LAYOUT_CONSTANTS_REGISTER_1,
			
 
				+    FSR2_ROOT_SIGNATURE_LAYOUT_PARAMETER_COUNT
			
 
				+} Fsr2RootSignatureLayout;
			
 
				+
			
 
				+typedef struct Fsr2RcasConstants {
			
 
				+
			
 
				+    uint32_t                    rcasConfig[4];
			
 
				+} FfxRcasConstants;
			
 
				+
			
 
				+typedef struct Fsr2SpdConstants {
			
 
				+
			
 
				+    uint32_t                    mips;
			
 
				+    uint32_t                    numworkGroups;
			
 
				+    uint32_t                    workGroupOffset[2];
			
 
				+    uint32_t                    renderSize[2];
			
 
				+} Fsr2SpdConstants;
			
 
				+
			
 
				+typedef struct Fsr2GenerateReactiveConstants
			
 
				+{
			
 
				+    float       scale;
			
 
				+    float       threshold;
			
 
				+    float       binaryValue;
			
 
				+    uint32_t    flags;
			
 
				+
			
 
				+} Fsr2GenerateReactiveConstants;
			
 
				+
			
 
				+typedef struct Fsr2GenerateReactiveConstants2
			
 
				+{
			
 
				+    float       autoTcThreshold;
			
 
				+    float       autoTcScale;
			
 
				+    float       autoReactiveScale;
			
 
				+    float       autoReactiveMax;
			
 
				+
			
 
				+} Fsr2GenerateReactiveConstants2;
			
 
				+
			
 
				+typedef union Fsr2SecondaryUnion {
			
 
				+
			
 
				+    Fsr2RcasConstants               rcas;
			
 
				+    Fsr2SpdConstants                spd;
			
 
				+    Fsr2GenerateReactiveConstants2  autogenReactive;
			
 
				+} Fsr2SecondaryUnion;
			
 
				+
			
 
				+typedef struct Fsr2ResourceDescription {
			
 
				+
			
 
				+    uint32_t                    id;
			
 
				+    const wchar_t*              name;
			
 
				+    FfxResourceUsage            usage;
			
 
				+    FfxSurfaceFormat            format;
			
 
				+    uint32_t                    width;
			
 
				+    uint32_t                    height;
			
 
				+    uint32_t                    mipCount;
			
 
				+    FfxResourceFlags            flags;
			
 
				+    uint32_t                    initDataSize;
			
 
				+    void*                       initData;
			
 
				+} Fsr2ResourceDescription;
			
 
				+
			
 
				+FfxConstantBuffer globalFsr2ConstantBuffers[4] = {
			
 
				+    { sizeof(Fsr2Constants) / sizeof(uint32_t) },
			
 
				+    { sizeof(Fsr2SpdConstants) / sizeof(uint32_t) },
			
 
				+    { sizeof(Fsr2RcasConstants) / sizeof(uint32_t) },
			
 
				+    { sizeof(Fsr2GenerateReactiveConstants) / sizeof(uint32_t) }
			
 
				+};
			
 
				+
			
 
				+// Lanczos
			
 
				+static float lanczos2(float value)
			
 
				+{
			
 
				+    return abs(value) < FFX_EPSILON ? 1.f : (sinf(FFX_PI * value) / (FFX_PI * value)) * (sinf(0.5f * FFX_PI * value) / (0.5f * FFX_PI * value));
			
 
				+}
			
 
				+
			
 
				+// Calculate halton number for index and base.
			
 
				+static float halton(int32_t index, int32_t base)
			
 
				+{
			
 
				+    float f = 1.0f, result = 0.0f;
			
 
				+
			
 
				+    for (int32_t currentIndex = index; currentIndex > 0;) {
			
 
				+
			
 
				+        f /= (float)base;
			
 
				+        result = result + f * (float)(currentIndex % base);
			
 
				+        currentIndex = (uint32_t)(floorf((float)(currentIndex) / (float)(base)));
			
 
				+    }
			
 
				+
			
 
				+    return result;
			
 
				+}
			
 
				+
			
 
				+static void fsr2DebugCheckDispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription* params)
			
 
				+{
			
 
				+    if (params->commandList == nullptr)
			
 
				+    {
			
 
				+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"commandList is null");
			
 
				+    }
			
 
				+
			
 
				+    if (params->color.resource == nullptr)
			
 
				+    {
			
 
				+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"color resource is null");
			
 
				+    }
			
 
				+
			
 
				+    if (params->depth.resource == nullptr)
			
 
				+    {
			
 
				+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"depth resource is null");
			
 
				+    }
			
 
				+
			
 
				+    if (params->motionVectors.resource == nullptr)
			
 
				+    {
			
 
				+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"motionVectors resource is null");
			
 
				+    }
			
 
				+
			
 
				+    if (params->exposure.resource != nullptr)
			
 
				+    {
			
 
				+        if ((context->contextDescription.flags & FFX_FSR2_ENABLE_AUTO_EXPOSURE) == FFX_FSR2_ENABLE_AUTO_EXPOSURE)
			
 
				+        {
			
 
				+            context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"exposure resource provided, however auto exposure flag is present");
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    if (params->output.resource == nullptr)
			
 
				+    {
			
 
				+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"output resource is null");
			
 
				+    }
			
 
				+
			
 
				+    if (fabs(params->jitterOffset.x) > 1.0f || fabs(params->jitterOffset.y) > 1.0f)
			
 
				+    {
			
 
				+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"jitterOffset contains value outside of expected range [-1.0, 1.0]");
			
 
				+    }
			
 
				+
			
 
				+    if ((params->motionVectorScale.x > (float)context->contextDescription.maxRenderSize.width) ||
			
 
				+        (params->motionVectorScale.y > (float)context->contextDescription.maxRenderSize.height))
			
 
				+    {
			
 
				+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"motionVectorScale contains scale value greater than maxRenderSize");
			
 
				+    }
			
 
				+    if ((params->motionVectorScale.x == 0.0f) ||
			
 
				+        (params->motionVectorScale.y == 0.0f))
			
 
				+    {
			
 
				+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"motionVectorScale contains zero scale value");
			
 
				+    }
			
 
				+
			
 
				+    if ((params->renderSize.width > context->contextDescription.maxRenderSize.width) ||
			
 
				+        (params->renderSize.height > context->contextDescription.maxRenderSize.height))
			
 
				+    {
			
 
				+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"renderSize is greater than context maxRenderSize");
			
 
				+    }
			
 
				+    if ((params->renderSize.width == 0) ||
			
 
				+        (params->renderSize.height == 0))
			
 
				+    {
			
 
				+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"renderSize contains zero dimension");
			
 
				+    }
			
 
				+
			
 
				+    if (params->sharpness < 0.0f || params->sharpness > 1.0f)
			
 
				+    {
			
 
				+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"sharpness contains value outside of expected range [0.0, 1.0]");
			
 
				+    }
			
 
				+
			
 
				+    if (params->frameTimeDelta < 1.0f)
			
 
				+    {
			
 
				+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"frameTimeDelta is less than 1.0f - this value should be milliseconds (~16.6f for 60fps)");
			
 
				+    }
			
 
				+
			
 
				+    if (params->preExposure == 0.0f)
			
 
				+    {
			
 
				+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"preExposure provided as 0.0f which is invalid");
			
 
				+    }
			
 
				+
			
 
				+    bool infiniteDepth = (context->contextDescription.flags & FFX_FSR2_ENABLE_DEPTH_INFINITE) == FFX_FSR2_ENABLE_DEPTH_INFINITE;
			
 
				+    bool inverseDepth = (context->contextDescription.flags & FFX_FSR2_ENABLE_DEPTH_INVERTED) == FFX_FSR2_ENABLE_DEPTH_INVERTED;
			
 
				+
			
 
				+    if (inverseDepth)
			
 
				+    {
			
 
				+        if (params->cameraNear < params->cameraFar)
			
 
				+        {
			
 
				+            context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING,
			
 
				+                L"FFX_FSR2_ENABLE_DEPTH_INVERTED flag is present yet cameraNear is less than cameraFar");
			
 
				+        }
			
 
				+        if (infiniteDepth)
			
 
				+        {
			
 
				+            if (params->cameraNear != FLT_MAX)
			
 
				+            {
			
 
				+                context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING,
			
 
				+                    L"FFX_FSR2_ENABLE_DEPTH_INFINITE and FFX_FSR2_ENABLE_DEPTH_INVERTED present, yet cameraNear != FLT_MAX");
			
 
				+            }
			
 
				+        }
			
 
				+        if (params->cameraFar < 0.075f)
			
 
				+        {
			
 
				+            context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING,
			
 
				+                L"FFX_FSR2_ENABLE_DEPTH_INFINITE and FFX_FSR2_ENABLE_DEPTH_INVERTED present, cameraFar value is very low which may result in depth separation artefacting");
			
 
				+        }
			
 
				+    }
			
 
				+    else
			
 
				+    {
			
 
				+        if (params->cameraNear > params->cameraFar)
			
 
				+        {
			
 
				+            context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING,
			
 
				+                L"cameraNear is greater than cameraFar in non-inverted-depth context");
			
 
				+        }
			
 
				+        if (infiniteDepth)
			
 
				+        {
			
 
				+            if (params->cameraFar != FLT_MAX)
			
 
				+            {
			
 
				+                context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING,
			
 
				+                    L"FFX_FSR2_ENABLE_DEPTH_INFINITE and FFX_FSR2_ENABLE_DEPTH_INVERTED present, yet cameraFar != FLT_MAX");
			
 
				+            }
			
 
				+        }
			
 
				+        if (params->cameraNear < 0.075f)
			
 
				+        {
			
 
				+            context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING,
			
 
				+                L"FFX_FSR2_ENABLE_DEPTH_INFINITE and FFX_FSR2_ENABLE_DEPTH_INVERTED present, cameraNear value is very low which may result in depth separation artefacting");
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    if (params->cameraFovAngleVertical <= 0.0f)
			
 
				+    {
			
 
				+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"cameraFovAngleVertical is 0.0f - this value should be > 0.0f");
			
 
				+    }
			
 
				+    if (params->cameraFovAngleVertical > FFX_PI)
			
 
				+    {
			
 
				+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"cameraFovAngleVertical is greater than 180 degrees/PI");
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline)
			
 
				+{
			
 
				+    for (uint32_t srvIndex = 0; srvIndex < inoutPipeline->srvCount; ++srvIndex)
			
 
				+    {
			
 
				+        int32_t mapIndex = 0;
			
 
				+        for (mapIndex = 0; mapIndex < _countof(srvResourceBindingTable); ++mapIndex)
			
 
				+        {
			
 
				+            if (0 == wcscmp(srvResourceBindingTable[mapIndex].name, inoutPipeline->srvResourceBindings[srvIndex].name))
			
 
				+                break;
			
 
				+        }
			
 
				+        if (mapIndex == _countof(srvResourceBindingTable))
			
 
				+            return FFX_ERROR_INVALID_ARGUMENT;
			
 
				+
			
 
				+        inoutPipeline->srvResourceBindings[srvIndex].resourceIdentifier = srvResourceBindingTable[mapIndex].index;
			
 
				+    }
			
 
				+
			
 
				+    for (uint32_t uavIndex = 0; uavIndex < inoutPipeline->uavCount; ++uavIndex)
			
 
				+    {
			
 
				+        int32_t mapIndex = 0;
			
 
				+        for (mapIndex = 0; mapIndex < _countof(uavResourceBindingTable); ++mapIndex)
			
 
				+        {
			
 
				+            if (0 == wcscmp(uavResourceBindingTable[mapIndex].name, inoutPipeline->uavResourceBindings[uavIndex].name))
			
 
				+                break;
			
 
				+        }
			
 
				+        if (mapIndex == _countof(uavResourceBindingTable))
			
 
				+            return FFX_ERROR_INVALID_ARGUMENT;
			
 
				+
			
 
				+        inoutPipeline->uavResourceBindings[uavIndex].resourceIdentifier = uavResourceBindingTable[mapIndex].index;
			
 
				+    }
			
 
				+
			
 
				+    for (uint32_t cbIndex = 0; cbIndex < inoutPipeline->constCount; ++cbIndex)
			
 
				+    {
			
 
				+        int32_t mapIndex = 0;
			
 
				+        for (mapIndex = 0; mapIndex < _countof(cbResourceBindingTable); ++mapIndex)
			
 
				+        {
			
 
				+            if (0 == wcscmp(cbResourceBindingTable[mapIndex].name, inoutPipeline->cbResourceBindings[cbIndex].name))
			
 
				+                break;
			
 
				+        }
			
 
				+        if (mapIndex == _countof(cbResourceBindingTable))
			
 
				+            return FFX_ERROR_INVALID_ARGUMENT;
			
 
				+
			
 
				+        inoutPipeline->cbResourceBindings[cbIndex].resourceIdentifier = cbResourceBindingTable[mapIndex].index;
			
 
				+    }
			
 
				+
			
 
				+    return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static FfxErrorCode createPipelineStates(FfxFsr2Context_Private* context)
			
 
				+{
			
 
				+    FFX_ASSERT(context);
			
 
				+
			
 
				+    const size_t samplerCount = 2;
			
 
				+    FfxFilterType samplers[samplerCount];
			
 
				+    samplers[0] = FFX_FILTER_TYPE_POINT;
			
 
				+    samplers[1] = FFX_FILTER_TYPE_LINEAR;
			
 
				+
			
 
				+    const size_t rootConstantCount = 2;
			
 
				+    uint32_t rootConstants[rootConstantCount];
			
 
				+    rootConstants[0] = sizeof(Fsr2Constants) / sizeof(uint32_t);
			
 
				+    rootConstants[1] = sizeof(Fsr2SecondaryUnion) / sizeof(uint32_t);
			
 
				+
			
 
				+    FfxPipelineDescription pipelineDescription;
			
 
				+    pipelineDescription.contextFlags = context->contextDescription.flags;
			
 
				+    pipelineDescription.samplerCount = samplerCount;
			
 
				+    pipelineDescription.samplers = samplers;
			
 
				+    pipelineDescription.rootConstantBufferCount = rootConstantCount;
			
 
				+    pipelineDescription.rootConstantBufferSizes = rootConstants;
			
 
				+
			
 
				+    // New interface: will handle RootSignature in backend
			
 
				+    // set up pipeline descriptor (basically RootSignature and binding)
			
 
				+    FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID, &pipelineDescription, &context->pipelineComputeLuminancePyramid));
			
 
				+    FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_RCAS, &pipelineDescription, &context->pipelineRCAS));
			
 
				+    FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_GENERATE_REACTIVE, &pipelineDescription, &context->pipelineGenerateReactive));
			
 
				+    FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_TCR_AUTOGENERATE, &pipelineDescription, &context->pipelineTcrAutogenerate));
			
 
				+
			
 
				+    pipelineDescription.rootConstantBufferCount = 1;
			
 
				+    FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_DEPTH_CLIP, &pipelineDescription, &context->pipelineDepthClip));
			
 
				+    FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH, &pipelineDescription, &context->pipelineReconstructPreviousDepth));
			
 
				+    FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_LOCK, &pipelineDescription, &context->pipelineLock));
			
 
				+    FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_ACCUMULATE, &pipelineDescription, &context->pipelineAccumulate));
			
 
				+    FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_ACCUMULATE_SHARPEN, &pipelineDescription, &context->pipelineAccumulateSharpen));
			
 
				+    
			
 
				+    // for each pipeline: re-route/fix-up IDs based on names
			
 
				+    patchResourceBindings(&context->pipelineDepthClip);
			
 
				+    patchResourceBindings(&context->pipelineReconstructPreviousDepth);
			
 
				+    patchResourceBindings(&context->pipelineLock);
			
 
				+    patchResourceBindings(&context->pipelineAccumulate);
			
 
				+    patchResourceBindings(&context->pipelineComputeLuminancePyramid);
			
 
				+    patchResourceBindings(&context->pipelineAccumulateSharpen);
			
 
				+    patchResourceBindings(&context->pipelineRCAS);
			
 
				+    patchResourceBindings(&context->pipelineGenerateReactive);
			
 
				+    patchResourceBindings(&context->pipelineTcrAutogenerate);
			
 
				+
			
 
				+    return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+static FfxErrorCode generateReactiveMaskInternal(FfxFsr2Context_Private* contextPrivate, const FfxFsr2DispatchDescription* params);
			
 
				+
			
 
				+static FfxErrorCode fsr2Create(FfxFsr2Context_Private* context, const FfxFsr2ContextDescription* contextDescription)
			
 
				+{
			
 
				+    FFX_ASSERT(context);
			
 
				+    FFX_ASSERT(contextDescription);
			
 
				+
			
 
				+    // Setup the data for implementation.
			
 
				+    memset(context, 0, sizeof(FfxFsr2Context_Private));
			
 
				+    context->device = contextDescription->device;
			
 
				+
			
 
				+    memcpy(&context->contextDescription, contextDescription, sizeof(FfxFsr2ContextDescription));
			
 
				+
			
 
				+    if ((context->contextDescription.flags & FFX_FSR2_ENABLE_DEBUG_CHECKING) == FFX_FSR2_ENABLE_DEBUG_CHECKING)
			
 
				+    {
			
 
				+        if (context->contextDescription.fpMessage == nullptr)
			
 
				+        {
			
 
				+            FFX_ASSERT(context->contextDescription.fpMessage != nullptr);
			
 
				+            // remove the debug checking flag - we have no message function
			
 
				+            context->contextDescription.flags &= ~FFX_FSR2_ENABLE_DEBUG_CHECKING;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // Create the device.
			
 
				+    FfxErrorCode errorCode = context->contextDescription.callbacks.fpCreateBackendContext(&context->contextDescription.callbacks, context->device);
			
 
				+    FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
			
 
				+
			
 
				+    // call out for device caps.
			
 
				+    errorCode = context->contextDescription.callbacks.fpGetDeviceCapabilities(&context->contextDescription.callbacks, &context->deviceCapabilities, context->device);
			
 
				+    FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
			
 
				+
			
 
				+    // set defaults
			
 
				+    context->firstExecution = true;
			
 
				+    context->resourceFrameIndex = 0;
			
 
				+
			
 
				+    context->constants.displaySize[0] = contextDescription->displaySize.width;
			
 
				+    context->constants.displaySize[1] = contextDescription->displaySize.height;
			
 
				+
			
 
				+    // generate the data for the LUT.
			
 
				+    const uint32_t lanczos2LutWidth = 128;
			
 
				+    int16_t lanczos2Weights[lanczos2LutWidth] = { };
			
 
				+
			
 
				+    for (uint32_t currentLanczosWidthIndex = 0; currentLanczosWidthIndex < lanczos2LutWidth; currentLanczosWidthIndex++) {
			
 
				+
			
 
				+        const float x = 2.0f * currentLanczosWidthIndex / float(lanczos2LutWidth - 1);
			
 
				+        const float y = lanczos2(x);
			
 
				+        lanczos2Weights[currentLanczosWidthIndex] = int16_t(roundf(y * 32767.0f));
			
 
				+    }
			
 
				+
			
 
				+    // upload path only supports R16_SNORM, let's go and convert
			
 
				+    int16_t maximumBias[FFX_FSR2_MAXIMUM_BIAS_TEXTURE_WIDTH * FFX_FSR2_MAXIMUM_BIAS_TEXTURE_HEIGHT];
			
 
				+    for (uint32_t i = 0; i < FFX_FSR2_MAXIMUM_BIAS_TEXTURE_WIDTH * FFX_FSR2_MAXIMUM_BIAS_TEXTURE_HEIGHT; ++i) {
			
 
				+
			
 
				+        maximumBias[i] = int16_t(roundf(ffxFsr2MaximumBias[i] / 2.0f * 32767.0f));
			
 
				+    }
			
 
				+
			
 
				+    uint8_t defaultReactiveMaskData = 0U;
			
 
				+    uint32_t atomicInitData = 0U;
			
 
				+    float defaultExposure[] = { 0.0f, 0.0f };
			
 
				+    const FfxResourceType texture1dResourceType = (context->contextDescription.flags & FFX_FSR2_ENABLE_TEXTURE1D_USAGE) ? FFX_RESOURCE_TYPE_TEXTURE1D : FFX_RESOURCE_TYPE_TEXTURE2D;
			
 
				+
			
 
				+    // declare internal resources needed
			
 
				+    const Fsr2ResourceDescription internalSurfaceDesc[] = {
			
 
				+
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR, L"FSR2_PreparedInputColor", FFX_RESOURCE_USAGE_UAV,
			
 
				+            FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
			
 
				+
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, L"FSR2_ReconstructedPrevNearestDepth", FFX_RESOURCE_USAGE_UAV,
			
 
				+            FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
			
 
				+
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1, L"FSR2_InternalDilatedVelocity1", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
			
 
				+            FFX_SURFACE_FORMAT_R16G16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
			
 
				+
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2, L"FSR2_InternalDilatedVelocity2", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
			
 
				+            FFX_SURFACE_FORMAT_R16G16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
			
 
				+
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH, L"FSR2_DilatedDepth", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
			
 
				+            FFX_SURFACE_FORMAT_R32_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
			
 
				+            
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1, L"FSR2_LockStatus1", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
			
 
				+            FFX_SURFACE_FORMAT_R16G16_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE },
			
 
				+
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2, L"FSR2_LockStatus2", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
			
 
				+            FFX_SURFACE_FORMAT_R16G16_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE },
			
 
				+
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA, L"FSR2_LockInputLuma", (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV),
			
 
				+            FFX_SURFACE_FORMAT_R16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
			
 
				+
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS, L"FSR2_NewLocks", (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV),
			
 
				+            FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
			
 
				+
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1, L"FSR2_InternalUpscaled1", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
			
 
				+            FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE },
			
 
				+
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2, L"FSR2_InternalUpscaled2", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
			
 
				+            FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE },
			
 
				+
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE, L"FSR2_ExposureMips", FFX_RESOURCE_USAGE_UAV,
			
 
				+            FFX_SURFACE_FORMAT_R16_FLOAT, contextDescription->maxRenderSize.width / 2, contextDescription->maxRenderSize.height / 2, 0, FFX_RESOURCE_FLAGS_ALIASABLE },
			
 
				+
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1, L"FSR2_LumaHistory1", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
			
 
				+            FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE },
			
 
				+
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2, L"FSR2_LumaHistory2", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
			
 
				+            FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE },
			
 
				+
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT, L"FSR2_SpdAtomicCounter", (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV),
			
 
				+            FFX_SURFACE_FORMAT_R32_UINT, 1, 1, 1, FFX_RESOURCE_FLAGS_ALIASABLE, sizeof(atomicInitData), &atomicInitData },
			
 
				+
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS, L"FSR2_DilatedReactiveMasks", FFX_RESOURCE_USAGE_UAV,
			
 
				+            FFX_SURFACE_FORMAT_R8G8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
			
 
				+
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT, L"FSR2_LanczosLutData", FFX_RESOURCE_USAGE_READ_ONLY,
			
 
				+            FFX_SURFACE_FORMAT_R16_SNORM, lanczos2LutWidth, 1, 1, FFX_RESOURCE_FLAGS_NONE, sizeof(lanczos2Weights), lanczos2Weights },
			
 
				+
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY, L"FSR2_DefaultReactiviyMask", FFX_RESOURCE_USAGE_READ_ONLY,
			
 
				+            FFX_SURFACE_FORMAT_R8_UNORM, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE, sizeof(defaultReactiveMaskData), &defaultReactiveMaskData },
			
 
				+
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT, L"FSR2_MaximumUpsampleBias", FFX_RESOURCE_USAGE_READ_ONLY,
			
 
				+            FFX_SURFACE_FORMAT_R16_SNORM, FFX_FSR2_MAXIMUM_BIAS_TEXTURE_WIDTH, FFX_FSR2_MAXIMUM_BIAS_TEXTURE_HEIGHT, 1, FFX_RESOURCE_FLAGS_NONE, sizeof(maximumBias), maximumBias },
			
 
				+
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE, L"FSR2_DefaultExposure", FFX_RESOURCE_USAGE_READ_ONLY,
			
 
				+            FFX_SURFACE_FORMAT_R32G32_FLOAT, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE, sizeof(defaultExposure), defaultExposure },
			
 
				+
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE, L"FSR2_AutoExposure", FFX_RESOURCE_USAGE_UAV,
			
 
				+            FFX_SURFACE_FORMAT_R32G32_FLOAT, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE },
			
 
				+
			
 
				+
			
 
				+        // only one for now, will need pingpont to respect the motion vectors
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE, L"FSR2_AutoReactive", FFX_RESOURCE_USAGE_UAV,
			
 
				+            FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION, L"FSR2_AutoComposition", FFX_RESOURCE_USAGE_UAV,
			
 
				+            FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1, L"FSR2_PrevPreAlpha0", FFX_RESOURCE_USAGE_UAV,
			
 
				+            FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1, L"FSR2_PrevPostAlpha0", FFX_RESOURCE_USAGE_UAV,
			
 
				+            FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2, L"FSR2_PrevPreAlpha1", FFX_RESOURCE_USAGE_UAV,
			
 
				+            FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
			
 
				+        {   FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2, L"FSR2_PrevPostAlpha1", FFX_RESOURCE_USAGE_UAV,
			
 
				+            FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
			
 
				+
			
 
				+    };
			
 
				+
			
 
				+    // clear the SRV resources to NULL.
			
 
				+    memset(context->srvResources, 0, sizeof(context->srvResources));
			
 
				+
			
 
				+    for (int32_t currentSurfaceIndex = 0; currentSurfaceIndex < FFX_ARRAY_ELEMENTS(internalSurfaceDesc); ++currentSurfaceIndex) {
			
 
				+
			
 
				+        const Fsr2ResourceDescription* currentSurfaceDescription = &internalSurfaceDesc[currentSurfaceIndex];
			
 
				+        const FfxResourceType resourceType = currentSurfaceDescription->height > 1 ? FFX_RESOURCE_TYPE_TEXTURE2D : texture1dResourceType;
			
 
				+        const FfxResourceDescription resourceDescription = { resourceType, currentSurfaceDescription->format, currentSurfaceDescription->width, currentSurfaceDescription->height, 1, currentSurfaceDescription->mipCount };
			
 
				+        const FfxResourceStates initialState = (currentSurfaceDescription->usage == FFX_RESOURCE_USAGE_READ_ONLY) ? FFX_RESOURCE_STATE_COMPUTE_READ : FFX_RESOURCE_STATE_UNORDERED_ACCESS;
			
 
				+        const FfxCreateResourceDescription createResourceDescription = { FFX_HEAP_TYPE_DEFAULT, resourceDescription, initialState, currentSurfaceDescription->initDataSize, currentSurfaceDescription->initData, currentSurfaceDescription->name, currentSurfaceDescription->usage, currentSurfaceDescription->id };
			
 
				+
			
 
				+        FFX_VALIDATE(context->contextDescription.callbacks.fpCreateResource(&context->contextDescription.callbacks, &createResourceDescription, &context->srvResources[currentSurfaceDescription->id]));
			
 
				+    }
			
 
				+
			
 
				+    // copy resources to uavResrouces list
			
 
				+    memcpy(context->uavResources, context->srvResources, sizeof(context->srvResources));
			
 
				+
			
 
				+    // avoid compiling pipelines on first render
			
 
				+    {
			
 
				+        context->refreshPipelineStates = false;
			
 
				+        errorCode = createPipelineStates(context);
			
 
				+        FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
			
 
				+    }
			
 
				+    return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+static void fsr2SafeReleasePipeline(FfxFsr2Context_Private* context, FfxPipelineState* pipeline)
			
 
				+{
			
 
				+    FFX_ASSERT(pipeline);
			
 
				+
			
 
				+    context->contextDescription.callbacks.fpDestroyPipeline(&context->contextDescription.callbacks, pipeline);
			
 
				+}
			
 
				+
			
 
				+static void fsr2SafeReleaseResource(FfxFsr2Context_Private* context, FfxResourceInternal resource)
			
 
				+{
			
 
				+    context->contextDescription.callbacks.fpDestroyResource(&context->contextDescription.callbacks, resource);
			
 
				+}
			
 
				+
			
 
				+static void fsr2SafeReleaseDevice(FfxFsr2Context_Private* context, FfxDevice* device)
			
 
				+{
			
 
				+    if (*device == nullptr) {
			
 
				+        return;
			
 
				+    }
			
 
				+
			
 
				+    context->contextDescription.callbacks.fpDestroyBackendContext(&context->contextDescription.callbacks);
			
 
				+    *device = nullptr;
			
 
				+}
			
 
				+
			
 
				+static FfxErrorCode fsr2Release(FfxFsr2Context_Private* context)
			
 
				+{
			
 
				+    FFX_ASSERT(context);
			
 
				+
			
 
				+    fsr2SafeReleasePipeline(context, &context->pipelineDepthClip);
			
 
				+    fsr2SafeReleasePipeline(context, &context->pipelineReconstructPreviousDepth);
			
 
				+    fsr2SafeReleasePipeline(context, &context->pipelineLock);
			
 
				+    fsr2SafeReleasePipeline(context, &context->pipelineAccumulate);
			
 
				+    fsr2SafeReleasePipeline(context, &context->pipelineAccumulateSharpen);
			
 
				+    fsr2SafeReleasePipeline(context, &context->pipelineRCAS);
			
 
				+    fsr2SafeReleasePipeline(context, &context->pipelineComputeLuminancePyramid);
			
 
				+    fsr2SafeReleasePipeline(context, &context->pipelineGenerateReactive);
			
 
				+    fsr2SafeReleasePipeline(context, &context->pipelineTcrAutogenerate);
			
 
				+
			
 
				+    // unregister resources not created internally
			
 
				+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
			
 
				+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
			
 
				+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
			
 
				+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
			
 
				+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
			
 
				+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
			
 
				+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
			
 
				+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
			
 
				+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
			
 
				+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
			
 
				+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
			
 
				+
			
 
				+    // release internal resources
			
 
				+    for (int32_t currentResourceIndex = 0; currentResourceIndex < FFX_FSR2_RESOURCE_IDENTIFIER_COUNT; ++currentResourceIndex) {
			
 
				+
			
 
				+        fsr2SafeReleaseResource(context, context->srvResources[currentResourceIndex]);
			
 
				+    }
			
 
				+
			
 
				+    fsr2SafeReleaseDevice(context, &context->device);
			
 
				+
			
 
				+    return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+static void setupDeviceDepthToViewSpaceDepthParams(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription* params)
			
 
				+{
			
 
				+    const bool bInverted = (context->contextDescription.flags & FFX_FSR2_ENABLE_DEPTH_INVERTED) == FFX_FSR2_ENABLE_DEPTH_INVERTED;
			
 
				+    const bool bInfinite = (context->contextDescription.flags & FFX_FSR2_ENABLE_DEPTH_INFINITE) == FFX_FSR2_ENABLE_DEPTH_INFINITE;
			
 
				+
			
 
				+    // make sure it has no impact if near and far plane values are swapped in dispatch params
			
 
				+    // the flags "inverted" and "infinite" will decide what transform to use
			
 
				+    float fMin = FFX_MINIMUM(params->cameraNear, params->cameraFar);
			
 
				+    float fMax = FFX_MAXIMUM(params->cameraNear, params->cameraFar);
			
 
				+
			
 
				+    if (bInverted) {
			
 
				+        float tmp = fMin;
			
 
				+        fMin = fMax;
			
 
				+        fMax = tmp;
			
 
				+    }
			
 
				+
			
 
				+    // a 0 0 0   x
			
 
				+    // 0 b 0 0   y
			
 
				+    // 0 0 c d   z
			
 
				+    // 0 0 e 0   1
			
 
				+
			
 
				+    const float fQ = fMax / (fMin - fMax);
			
 
				+    const float d = -1.0f; // for clarity
			
 
				+
			
 
				+    const float matrix_elem_c[2][2] = {
			
 
				+        fQ,                     // non reversed, non infinite
			
 
				+        -1.0f - FLT_EPSILON,    // non reversed, infinite
			
 
				+        fQ,                     // reversed, non infinite
			
 
				+        0.0f + FLT_EPSILON      // reversed, infinite
			
 
				+    };
			
 
				+
			
 
				+    const float matrix_elem_e[2][2] = {
			
 
				+        fQ * fMin,             // non reversed, non infinite
			
 
				+        -fMin - FLT_EPSILON,    // non reversed, infinite
			
 
				+        fQ * fMin,             // reversed, non infinite
			
 
				+        fMax,                  // reversed, infinite
			
 
				+    };
			
 
				+
			
 
				+    context->constants.deviceToViewDepth[0] = d * matrix_elem_c[bInverted][bInfinite];
			
 
				+    context->constants.deviceToViewDepth[1] = matrix_elem_e[bInverted][bInfinite];
			
 
				+
			
 
				+    // revert x and y coords
			
 
				+    const float aspect = params->renderSize.width / float(params->renderSize.height);
			
 
				+    const float cotHalfFovY = cosf(0.5f * params->cameraFovAngleVertical) / sinf(0.5f * params->cameraFovAngleVertical);
			
 
				+    const float a = cotHalfFovY / aspect;
			
 
				+    const float b = cotHalfFovY;
			
 
				+
			
 
				+    context->constants.deviceToViewDepth[2] = (1.0f / a);
			
 
				+    context->constants.deviceToViewDepth[3] = (1.0f / b);
			
 
				+}
			
 
				+
			
 
				+static void scheduleDispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription* params, const FfxPipelineState* pipeline, uint32_t dispatchX, uint32_t dispatchY)
			
 
				+{
			
 
				+    FfxComputeJobDescription jobDescriptor = {};
			
 
				+
			
 
				+    for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvCount; ++currentShaderResourceViewIndex) {
			
 
				+
			
 
				+        const uint32_t currentResourceId = pipeline->srvResourceBindings[currentShaderResourceViewIndex].resourceIdentifier;
			
 
				+        const FfxResourceInternal currentResource = context->srvResources[currentResourceId];
			
 
				+        jobDescriptor.srvs[currentShaderResourceViewIndex] = currentResource;
			
 
				+        wcscpy_s(jobDescriptor.srvNames[currentShaderResourceViewIndex], pipeline->srvResourceBindings[currentShaderResourceViewIndex].name);
			
 
				+    }
			
 
				+
			
 
				+    for (uint32_t currentUnorderedAccessViewIndex = 0; currentUnorderedAccessViewIndex < pipeline->uavCount; ++currentUnorderedAccessViewIndex) {
			
 
				+
			
 
				+        const uint32_t currentResourceId = pipeline->uavResourceBindings[currentUnorderedAccessViewIndex].resourceIdentifier;
			
 
				+        wcscpy_s(jobDescriptor.uavNames[currentUnorderedAccessViewIndex], pipeline->uavResourceBindings[currentUnorderedAccessViewIndex].name);
			
 
				+
			
 
				+        if (currentResourceId >= FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0 && currentResourceId <= FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12)
			
 
				+        {
			
 
				+            const FfxResourceInternal currentResource = context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE];
			
 
				+            jobDescriptor.uavs[currentUnorderedAccessViewIndex] = currentResource;
			
 
				+            jobDescriptor.uavMip[currentUnorderedAccessViewIndex] = currentResourceId - FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0;
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            const FfxResourceInternal currentResource = context->uavResources[currentResourceId];
			
 
				+            jobDescriptor.uavs[currentUnorderedAccessViewIndex] = currentResource;
			
 
				+            jobDescriptor.uavMip[currentUnorderedAccessViewIndex] = 0;
			
 
				+        }
			
 
				+    }
			
 
				+    
			
 
				+    jobDescriptor.dimensions[0] = dispatchX;
			
 
				+    jobDescriptor.dimensions[1] = dispatchY;
			
 
				+    jobDescriptor.dimensions[2] = 1;
			
 
				+    jobDescriptor.pipeline = *pipeline;
			
 
				+
			
 
				+    for (uint32_t currentRootConstantIndex = 0; currentRootConstantIndex < pipeline->constCount; ++currentRootConstantIndex) {
			
 
				+        wcscpy_s( jobDescriptor.cbNames[currentRootConstantIndex], pipeline->cbResourceBindings[currentRootConstantIndex].name);
			
 
				+        jobDescriptor.cbs[currentRootConstantIndex] = globalFsr2ConstantBuffers[pipeline->cbResourceBindings[currentRootConstantIndex].resourceIdentifier];
			
 
				+        jobDescriptor.cbSlotIndex[currentRootConstantIndex] = pipeline->cbResourceBindings[currentRootConstantIndex].slotIndex;
			
 
				+    }
			
 
				+
			
 
				+    FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE };
			
 
				+    dispatchJob.computeJobDescriptor = jobDescriptor;
			
 
				+
			
 
				+    context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &dispatchJob);
			
 
				+}
			
 
				+
			
 
				+static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription* params)
			
 
				+{
			
 
				+    if ((context->contextDescription.flags & FFX_FSR2_ENABLE_DEBUG_CHECKING) == FFX_FSR2_ENABLE_DEBUG_CHECKING)
			
 
				+    {
			
 
				+        fsr2DebugCheckDispatch(context, params);
			
 
				+    }
			
 
				+    // take a short cut to the command list
			
 
				+    FfxCommandList commandList = params->commandList;
			
 
				+
			
 
				+    // try and refresh shaders first. Early exit in case of error.
			
 
				+    if (context->refreshPipelineStates) {
			
 
				+
			
 
				+        context->refreshPipelineStates = false;
			
 
				+
			
 
				+        const FfxErrorCode errorCode = createPipelineStates(context);
			
 
				+        FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
			
 
				+    }
			
 
				+
			
 
				+    if (context->firstExecution)
			
 
				+    {
			
 
				+        FfxGpuJobDescription clearJob = { FFX_GPU_JOB_CLEAR_FLOAT };
			
 
				+
			
 
				+        const float clearValuesToZeroFloat[]{ 0.f, 0.f, 0.f, 0.f };
			
 
				+        memcpy(clearJob.clearJobDescriptor.color, clearValuesToZeroFloat, 4 * sizeof(float));
			
 
				+
			
 
				+        clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1];
			
 
				+        context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob);
			
 
				+        clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2];
			
 
				+        context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob);
			
 
				+        clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR];
			
 
				+        context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob);
			
 
				+    }
			
 
				+
			
 
				+    // Prepare per frame descriptor tables
			
 
				+    const bool isOddFrame = !!(context->resourceFrameIndex & 1);
			
 
				+    const uint32_t currentCpuOnlyTableBase = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_COUNT : 0;
			
 
				+    const uint32_t currentGpuTableBase = 2 * FFX_FSR2_RESOURCE_IDENTIFIER_COUNT * context->resourceFrameIndex;
			
 
				+    const uint32_t lockStatusSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2 : FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1;
			
 
				+    const uint32_t lockStatusUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 : FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2;
			
 
				+    const uint32_t upscaledColorSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 : FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1;
			
 
				+    const uint32_t upscaledColorUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1 : FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2;
			
 
				+    const uint32_t dilatedMotionVectorsResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2 : FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1;
			
 
				+    const uint32_t previousDilatedMotionVectorsResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1 : FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2;
			
 
				+    const uint32_t lumaHistorySrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2 : FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1;
			
 
				+    const uint32_t lumaHistoryUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1 : FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2;
			
 
				+
			
 
				+    const uint32_t prevPreAlphaColorSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2 : FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1;
			
 
				+    const uint32_t prevPreAlphaColorUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1 : FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2;
			
 
				+    const uint32_t prevPostAlphaColorSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2 : FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1;
			
 
				+    const uint32_t prevPostAlphaColorUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1 : FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2;
			
 
				+
			
 
				+    const bool resetAccumulation = params->reset || context->firstExecution;
			
 
				+    context->firstExecution = false;
			
 
				+
			
 
				+    context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, &params->color, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]);
			
 
				+    context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, &params->depth, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH]);
			
 
				+    context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, &params->motionVectors, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS]);
			
 
				+
			
 
				+    // if auto exposure is enabled use the auto exposure SRV, otherwise what the app sends.
			
 
				+    if (context->contextDescription.flags & FFX_FSR2_ENABLE_AUTO_EXPOSURE) {
			
 
				+        context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE];
			
 
				+    } else {
			
 
				+        if (ffxFsr2ResourceIsNull(params->exposure)) {
			
 
				+            context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE];
			
 
				+        } else {
			
 
				+            context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, &params->exposure, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE]);
			
 
				+        }
			
 
				+    }
			
 
				+ 
			
 
				+    if (params->enableAutoReactive)
			
 
				+    {
			
 
				+        context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, &params->colorOpaqueOnly, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR]);
			
 
				+    }
			
 
				+    
			
 
				+    if (ffxFsr2ResourceIsNull(params->reactive)) {
			
 
				+        context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY];
			
 
				+    }
			
 
				+    else {
			
 
				+        context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, &params->reactive, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK]);
			
 
				+    }
			
 
				+    
			
 
				+    if (ffxFsr2ResourceIsNull(params->transparencyAndComposition)) {
			
 
				+        context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY];
			
 
				+    } else {
			
 
				+        context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, &params->transparencyAndComposition, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK]);
			
 
				+    }
			
 
				+
			
 
				+    context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, &params->output, &context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT]);
			
 
				+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS] = context->srvResources[lockStatusSrvResourceIndex];
			
 
				+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR] = context->srvResources[upscaledColorSrvResourceIndex];
			
 
				+    context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS] = context->uavResources[lockStatusUavResourceIndex];
			
 
				+    context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR] = context->uavResources[upscaledColorUavResourceIndex];
			
 
				+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT] = context->uavResources[upscaledColorUavResourceIndex];
			
 
				+
			
 
				+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS] = context->srvResources[dilatedMotionVectorsResourceIndex];
			
 
				+    context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS] = context->uavResources[dilatedMotionVectorsResourceIndex];
			
 
				+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS] = context->srvResources[previousDilatedMotionVectorsResourceIndex];
			
 
				+
			
 
				+    context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY] = context->uavResources[lumaHistoryUavResourceIndex];
			
 
				+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY] = context->srvResources[lumaHistorySrvResourceIndex];
			
 
				+
			
 
				+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR]  = context->srvResources[prevPreAlphaColorSrvResourceIndex];
			
 
				+    context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR]  = context->uavResources[prevPreAlphaColorUavResourceIndex];
			
 
				+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR] = context->srvResources[prevPostAlphaColorSrvResourceIndex];
			
 
				+    context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR] = context->uavResources[prevPostAlphaColorUavResourceIndex];
			
 
				+
			
 
				+    // actual resource size may differ from render/display resolution (e.g. due to Hw/API restrictions), so query the descriptor for UVs adjustment
			
 
				+    const FfxResourceDescription resourceDescInputColor = context->contextDescription.callbacks.fpGetResourceDescription(&context->contextDescription.callbacks, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]);
			
 
				+    const FfxResourceDescription resourceDescLockStatus = context->contextDescription.callbacks.fpGetResourceDescription(&context->contextDescription.callbacks, context->srvResources[lockStatusSrvResourceIndex]);
			
 
				+    const FfxResourceDescription resourceDescReactiveMask = context->contextDescription.callbacks.fpGetResourceDescription(&context->contextDescription.callbacks, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK]);
			
 
				+    FFX_ASSERT(resourceDescInputColor.type == FFX_RESOURCE_TYPE_TEXTURE2D);
			
 
				+    FFX_ASSERT(resourceDescLockStatus.type == FFX_RESOURCE_TYPE_TEXTURE2D);
			
 
				+
			
 
				+    context->constants.jitterOffset[0] = params->jitterOffset.x;
			
 
				+    context->constants.jitterOffset[1] = params->jitterOffset.y;
			
 
				+    context->constants.renderSize[0] = int32_t(params->renderSize.width ? params->renderSize.width   : resourceDescInputColor.width);
			
 
				+    context->constants.renderSize[1] = int32_t(params->renderSize.height ? params->renderSize.height : resourceDescInputColor.height);
			
 
				+    context->constants.maxRenderSize[0] = int32_t(context->contextDescription.maxRenderSize.width);
			
 
				+    context->constants.maxRenderSize[1] = int32_t(context->contextDescription.maxRenderSize.height);
			
 
				+    context->constants.inputColorResourceDimensions[0] = resourceDescInputColor.width;
			
 
				+    context->constants.inputColorResourceDimensions[1] = resourceDescInputColor.height;
			
 
				+
			
 
				+    // compute the horizontal FOV for the shader from the vertical one.
			
 
				+    const float aspectRatio = (float)params->renderSize.width / (float)params->renderSize.height;
			
 
				+    const float cameraAngleHorizontal = atan(tan(params->cameraFovAngleVertical / 2) * aspectRatio) * 2;
			
 
				+    context->constants.tanHalfFOV = tanf(cameraAngleHorizontal * 0.5f);
			
 
				+    context->constants.viewSpaceToMetersFactor = (params->viewSpaceToMetersFactor > 0.0f) ? params->viewSpaceToMetersFactor : 1.0f;
			
 
				+
			
 
				+    // compute params to enable device depth to view space depth computation in shader
			
 
				+    setupDeviceDepthToViewSpaceDepthParams(context, params);
			
 
				+
			
 
				+    // To be updated if resource is larger than the actual image size
			
 
				+    context->constants.downscaleFactor[0] = float(context->constants.renderSize[0]) / context->contextDescription.displaySize.width;
			
 
				+    context->constants.downscaleFactor[1] = float(context->constants.renderSize[1]) / context->contextDescription.displaySize.height;
			
 
				+    context->constants.previousFramePreExposure = context->constants.preExposure;
			
 
				+    context->constants.preExposure = (params->preExposure != 0) ? params->preExposure : 1.0f;
			
 
				+
			
 
				+    // motion vector data
			
 
				+    const int32_t* motionVectorsTargetSize = (context->contextDescription.flags & FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) ? context->constants.displaySize : context->constants.renderSize;
			
 
				+
			
 
				+    context->constants.motionVectorScale[0] = (params->motionVectorScale.x / motionVectorsTargetSize[0]);
			
 
				+    context->constants.motionVectorScale[1] = (params->motionVectorScale.y / motionVectorsTargetSize[1]);
			
 
				+
			
 
				+    // compute jitter cancellation
			
 
				+    if (context->contextDescription.flags & FFX_FSR2_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION) {
			
 
				+
			
 
				+        context->constants.motionVectorJitterCancellation[0] = (context->previousJitterOffset[0] - context->constants.jitterOffset[0]) / motionVectorsTargetSize[0];
			
 
				+        context->constants.motionVectorJitterCancellation[1] = (context->previousJitterOffset[1] - context->constants.jitterOffset[1]) / motionVectorsTargetSize[1];
			
 
				+
			
 
				+        context->previousJitterOffset[0] = context->constants.jitterOffset[0];
			
 
				+        context->previousJitterOffset[1] = context->constants.jitterOffset[1];
			
 
				+    }
			
 
				+
			
 
				+    // lock data, assuming jitter sequence length computation for now
			
 
				+    const int32_t jitterPhaseCount = ffxFsr2GetJitterPhaseCount(params->renderSize.width, context->contextDescription.displaySize.width);
			
 
				+
			
 
				+    // init on first frame
			
 
				+    if (resetAccumulation || context->constants.jitterPhaseCount == 0) {
			
 
				+        context->constants.jitterPhaseCount = (float)jitterPhaseCount;
			
 
				+    } else {
			
 
				+        const int32_t jitterPhaseCountDelta = (int32_t)(jitterPhaseCount - context->constants.jitterPhaseCount);
			
 
				+        if (jitterPhaseCountDelta > 0) {
			
 
				+            context->constants.jitterPhaseCount++;
			
 
				+        } else if (jitterPhaseCountDelta < 0) {
			
 
				+            context->constants.jitterPhaseCount--;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // convert delta time to seconds and clamp to [0, 1].
			
 
				+    context->constants.deltaTime = FFX_MAXIMUM(0.0f, FFX_MINIMUM(1.0f, params->frameTimeDelta / 1000.0f));
			
 
				+
			
 
				+    if (resetAccumulation) {
			
 
				+        context->constants.frameIndex = 0;
			
 
				+    } else {
			
 
				+        context->constants.frameIndex++;
			
 
				+    }
			
 
				+
			
 
				+    // shading change usage of the SPD mip levels.
			
 
				+    context->constants.lumaMipLevelToUse = uint32_t(FFX_FSR2_SHADING_CHANGE_MIP_LEVEL);
			
 
				+
			
 
				+    const float mipDiv = float(2 << context->constants.lumaMipLevelToUse);
			
 
				+    context->constants.lumaMipDimensions[0] = uint32_t(context->constants.maxRenderSize[0] / mipDiv);
			
 
				+    context->constants.lumaMipDimensions[1] = uint32_t(context->constants.maxRenderSize[1] / mipDiv);
			
 
				+
			
 
				+	// -- GODOT start --
			
 
				+    memcpy(context->constants.reprojectionMatrix, params->reprojectionMatrix, sizeof(context->constants.reprojectionMatrix));
			
 
				+	// -- GODOT end --
			
 
				+
			
 
				+    // reactive mask bias
			
 
				+    const int32_t threadGroupWorkRegionDim = 8;
			
 
				+    const int32_t dispatchSrcX = (context->constants.renderSize[0] + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
			
 
				+    const int32_t dispatchSrcY = (context->constants.renderSize[1] + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
			
 
				+    const int32_t dispatchDstX = (context->contextDescription.displaySize.width + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
			
 
				+    const int32_t dispatchDstY = (context->contextDescription.displaySize.height + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
			
 
				+
			
 
				+    // Clear reconstructed depth for max depth store.
			
 
				+    if (resetAccumulation) {
			
 
				+
			
 
				+        FfxGpuJobDescription clearJob = { FFX_GPU_JOB_CLEAR_FLOAT };
			
 
				+
			
 
				+        // LockStatus resource has no sign bit, callback functions are compensating for this.
			
 
				+        // Clearing the resource must follow the same logic.
			
 
				+        float clearValuesLockStatus[4]{};
			
 
				+        clearValuesLockStatus[LOCK_LIFETIME_REMAINING] = 0.0f;
			
 
				+        clearValuesLockStatus[LOCK_TEMPORAL_LUMA] = 0.0f;
			
 
				+
			
 
				+        memcpy(clearJob.clearJobDescriptor.color, clearValuesLockStatus, 4 * sizeof(float));
			
 
				+        clearJob.clearJobDescriptor.target = context->srvResources[lockStatusSrvResourceIndex];
			
 
				+        context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob);
			
 
				+
			
 
				+        const float clearValuesToZeroFloat[]{ 0.f, 0.f, 0.f, 0.f };
			
 
				+        memcpy(clearJob.clearJobDescriptor.color, clearValuesToZeroFloat, 4 * sizeof(float));
			
 
				+        clearJob.clearJobDescriptor.target = context->srvResources[upscaledColorSrvResourceIndex];
			
 
				+        context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob);
			
 
				+
			
 
				+        clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE];
			
 
				+        context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob);
			
 
				+
			
 
				+        //if (context->contextDescription.flags & FFX_FSR2_ENABLE_AUTO_EXPOSURE)
			
 
				+        // Auto exposure always used to track luma changes in locking logic
			
 
				+        {
			
 
				+            const float clearValuesExposure[]{ -1.f, 1e8f, 0.f, 0.f };
			
 
				+            memcpy(clearJob.clearJobDescriptor.color, clearValuesExposure, 4 * sizeof(float));
			
 
				+            clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE];
			
 
				+            context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // Auto exposure
			
 
				+    uint32_t dispatchThreadGroupCountXY[2];
			
 
				+    uint32_t workGroupOffset[2];
			
 
				+    uint32_t numWorkGroupsAndMips[2];
			
 
				+    uint32_t rectInfo[4] = { 0, 0, params->renderSize.width, params->renderSize.height };
			
 
				+    SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo);
			
 
				+
			
 
				+    // downsample
			
 
				+    Fsr2SpdConstants luminancePyramidConstants;
			
 
				+    luminancePyramidConstants.numworkGroups = numWorkGroupsAndMips[0];
			
 
				+    luminancePyramidConstants.mips = numWorkGroupsAndMips[1];
			
 
				+    luminancePyramidConstants.workGroupOffset[0] = workGroupOffset[0];
			
 
				+    luminancePyramidConstants.workGroupOffset[1] = workGroupOffset[1];
			
 
				+    luminancePyramidConstants.renderSize[0] = params->renderSize.width;
			
 
				+    luminancePyramidConstants.renderSize[1] = params->renderSize.height;
			
 
				+
			
 
				+    // compute the constants.
			
 
				+    Fsr2RcasConstants rcasConsts = {};
			
 
				+    const float sharpenessRemapped = (-2.0f * params->sharpness) + 2.0f;
			
 
				+    FsrRcasCon(rcasConsts.rcasConfig, sharpenessRemapped);
			
 
				+
			
 
				+    Fsr2GenerateReactiveConstants2 genReactiveConsts = {};
			
 
				+    genReactiveConsts.autoTcThreshold = params->autoTcThreshold;
			
 
				+    genReactiveConsts.autoTcScale = params->autoTcScale;
			
 
				+    genReactiveConsts.autoReactiveScale = params->autoReactiveScale;
			
 
				+    genReactiveConsts.autoReactiveMax = params->autoReactiveMax;
			
 
				+
			
 
				+    // initialize constantBuffers data
			
 
				+    memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2].data,        &context->constants,        globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2].uint32Size * sizeof(uint32_t));
			
 
				+    memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD].data,         &luminancePyramidConstants, globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD].uint32Size  * sizeof(uint32_t));
			
 
				+    memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS].data,        &rcasConsts,                globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS].uint32Size * sizeof(uint32_t));
			
 
				+    memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE].data, &genReactiveConsts,         globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE].uint32Size * sizeof(uint32_t));
			
 
				+
			
 
				+    // Auto reactive
			
 
				+    if (params->enableAutoReactive)
			
 
				+    {
			
 
				+        generateReactiveMaskInternal(context, params);
			
 
				+        context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE];
			
 
				+        context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION];
			
 
				+    }
			
 
				+    scheduleDispatch(context, params, &context->pipelineComputeLuminancePyramid, dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1]);
			
 
				+    scheduleDispatch(context, params, &context->pipelineReconstructPreviousDepth, dispatchSrcX, dispatchSrcY);
			
 
				+    scheduleDispatch(context, params, &context->pipelineDepthClip, dispatchSrcX, dispatchSrcY);
			
 
				+
			
 
				+    const bool sharpenEnabled = params->enableSharpening;
			
 
				+
			
 
				+    scheduleDispatch(context, params, &context->pipelineLock, dispatchSrcX, dispatchSrcY);
			
 
				+    scheduleDispatch(context, params, sharpenEnabled ? &context->pipelineAccumulateSharpen : &context->pipelineAccumulate, dispatchDstX, dispatchDstY);
			
 
				+
			
 
				+    // RCAS
			
 
				+    if (sharpenEnabled) {
			
 
				+
			
 
				+        // dispatch RCAS
			
 
				+        const int32_t threadGroupWorkRegionDimRCAS = 16;
			
 
				+        const int32_t dispatchX = (context->contextDescription.displaySize.width + (threadGroupWorkRegionDimRCAS - 1)) / threadGroupWorkRegionDimRCAS;
			
 
				+        const int32_t dispatchY = (context->contextDescription.displaySize.height + (threadGroupWorkRegionDimRCAS - 1)) / threadGroupWorkRegionDimRCAS;
			
 
				+        scheduleDispatch(context, params, &context->pipelineRCAS, dispatchX, dispatchY);
			
 
				+    }
			
 
				+
			
 
				+    context->resourceFrameIndex = (context->resourceFrameIndex + 1) % FSR2_MAX_QUEUED_FRAMES;
			
 
				+
			
 
				+    // Fsr2MaxQueuedFrames must be an even number.
			
 
				+    FFX_STATIC_ASSERT((FSR2_MAX_QUEUED_FRAMES & 1) == 0);
			
 
				+
			
 
				+    context->contextDescription.callbacks.fpExecuteGpuJobs(&context->contextDescription.callbacks, commandList);
			
 
				+
			
 
				+    // release dynamic resources
			
 
				+    context->contextDescription.callbacks.fpUnregisterResources(&context->contextDescription.callbacks);
			
 
				+
			
 
				+    return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+FfxErrorCode ffxFsr2ContextCreate(FfxFsr2Context* context, const FfxFsr2ContextDescription* contextDescription)
			
 
				+{
			
 
				+    // zero context memory
			
 
				+    memset(context, 0, sizeof(FfxFsr2Context));
			
 
				+
			
 
				+    // check pointers are valid.
			
 
				+    FFX_RETURN_ON_ERROR(
			
 
				+        context,
			
 
				+        FFX_ERROR_INVALID_POINTER);
			
 
				+    FFX_RETURN_ON_ERROR(
			
 
				+        contextDescription,
			
 
				+        FFX_ERROR_INVALID_POINTER);
			
 
				+
			
 
				+    // validate that all callbacks are set for the interface
			
 
				+    FFX_RETURN_ON_ERROR(contextDescription->callbacks.fpGetDeviceCapabilities, FFX_ERROR_INCOMPLETE_INTERFACE);
			
 
				+    FFX_RETURN_ON_ERROR(contextDescription->callbacks.fpCreateBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE);
			
 
				+    FFX_RETURN_ON_ERROR(contextDescription->callbacks.fpDestroyBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE);
			
 
				+
			
 
				+    // if a scratch buffer is declared, then we must have a size
			
 
				+    if (contextDescription->callbacks.scratchBuffer) {
			
 
				+
			
 
				+        FFX_RETURN_ON_ERROR(contextDescription->callbacks.scratchBufferSize, FFX_ERROR_INCOMPLETE_INTERFACE);
			
 
				+    }
			
 
				+
			
 
				+    // ensure the context is large enough for the internal context.
			
 
				+    FFX_STATIC_ASSERT(sizeof(FfxFsr2Context) >= sizeof(FfxFsr2Context_Private));
			
 
				+
			
 
				+    // create the context.
			
 
				+    FfxFsr2Context_Private* contextPrivate = (FfxFsr2Context_Private*)(context);
			
 
				+    const FfxErrorCode errorCode = fsr2Create(contextPrivate, contextDescription);
			
 
				+
			
 
				+    return errorCode;
			
 
				+}
			
 
				+
			
 
				+FfxErrorCode ffxFsr2ContextDestroy(FfxFsr2Context* context)
			
 
				+{
			
 
				+    FFX_RETURN_ON_ERROR(
			
 
				+        context,
			
 
				+        FFX_ERROR_INVALID_POINTER);
			
 
				+
			
 
				+    // destroy the context.
			
 
				+    FfxFsr2Context_Private* contextPrivate = (FfxFsr2Context_Private*)(context);
			
 
				+    const FfxErrorCode errorCode = fsr2Release(contextPrivate);
			
 
				+    return errorCode;
			
 
				+}
			
 
				+
			
 
				+FfxErrorCode ffxFsr2ContextDispatch(FfxFsr2Context* context, const FfxFsr2DispatchDescription* dispatchParams)
			
 
				+{
			
 
				+    FFX_RETURN_ON_ERROR(
			
 
				+        context,
			
 
				+        FFX_ERROR_INVALID_POINTER);
			
 
				+    FFX_RETURN_ON_ERROR(
			
 
				+        dispatchParams,
			
 
				+        FFX_ERROR_INVALID_POINTER);
			
 
				+
			
 
				+    FfxFsr2Context_Private* contextPrivate = (FfxFsr2Context_Private*)(context);
			
 
				+
			
 
				+    // validate that renderSize is within the maximum.
			
 
				+    FFX_RETURN_ON_ERROR(
			
 
				+        dispatchParams->renderSize.width <= contextPrivate->contextDescription.maxRenderSize.width,
			
 
				+        FFX_ERROR_OUT_OF_RANGE);
			
 
				+    FFX_RETURN_ON_ERROR(
			
 
				+        dispatchParams->renderSize.height <= contextPrivate->contextDescription.maxRenderSize.height,
			
 
				+        FFX_ERROR_OUT_OF_RANGE);
			
 
				+    FFX_RETURN_ON_ERROR(
			
 
				+        contextPrivate->device,
			
 
				+        FFX_ERROR_NULL_DEVICE);
			
 
				+
			
 
				+    // dispatch the FSR2 passes.
			
 
				+    const FfxErrorCode errorCode = fsr2Dispatch(contextPrivate, dispatchParams);
			
 
				+    return errorCode;
			
 
				+}
			
 
				+
			
 
				+float ffxFsr2GetUpscaleRatioFromQualityMode(FfxFsr2QualityMode qualityMode)
			
 
				+{
			
 
				+    switch (qualityMode) {
			
 
				+
			
 
				+    case FFX_FSR2_QUALITY_MODE_QUALITY:
			
 
				+        return 1.5f;
			
 
				+    case FFX_FSR2_QUALITY_MODE_BALANCED:
			
 
				+        return 1.7f;
			
 
				+    case FFX_FSR2_QUALITY_MODE_PERFORMANCE:
			
 
				+        return 2.0f;
			
 
				+    case FFX_FSR2_QUALITY_MODE_ULTRA_PERFORMANCE:
			
 
				+        return 3.0f;
			
 
				+    default:
			
 
				+        return 0.0f;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+FfxErrorCode ffxFsr2GetRenderResolutionFromQualityMode(
			
 
				+    uint32_t* renderWidth,
			
 
				+    uint32_t* renderHeight,
			
 
				+    uint32_t displayWidth,
			
 
				+    uint32_t displayHeight,
			
 
				+    FfxFsr2QualityMode qualityMode)
			
 
				+{
			
 
				+    FFX_RETURN_ON_ERROR(
			
 
				+        renderWidth,
			
 
				+        FFX_ERROR_INVALID_POINTER);
			
 
				+    FFX_RETURN_ON_ERROR(
			
 
				+        renderHeight,
			
 
				+        FFX_ERROR_INVALID_POINTER);
			
 
				+    FFX_RETURN_ON_ERROR(
			
 
				+        FFX_FSR2_QUALITY_MODE_QUALITY <= qualityMode && qualityMode <= FFX_FSR2_QUALITY_MODE_ULTRA_PERFORMANCE,
			
 
				+        FFX_ERROR_INVALID_ENUM);
			
 
				+
			
 
				+    // scale by the predefined ratios in each dimension.
			
 
				+    const float ratio = ffxFsr2GetUpscaleRatioFromQualityMode(qualityMode);
			
 
				+    const uint32_t scaledDisplayWidth = (uint32_t)((float)displayWidth / ratio);
			
 
				+    const uint32_t scaledDisplayHeight = (uint32_t)((float)displayHeight / ratio);
			
 
				+    *renderWidth = scaledDisplayWidth;
			
 
				+    *renderHeight = scaledDisplayHeight;
			
 
				+
			
 
				+    return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+FfxErrorCode ffxFsr2ContextEnqueueRefreshPipelineRequest(FfxFsr2Context* context)
			
 
				+{
			
 
				+    FFX_RETURN_ON_ERROR(
			
 
				+        context,
			
 
				+        FFX_ERROR_INVALID_POINTER);
			
 
				+
			
 
				+    FfxFsr2Context_Private* contextPrivate = (FfxFsr2Context_Private*)context;
			
 
				+    contextPrivate->refreshPipelineStates = true;
			
 
				+
			
 
				+    return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+int32_t ffxFsr2GetJitterPhaseCount(int32_t renderWidth, int32_t displayWidth)
			
 
				+{
			
 
				+    const float basePhaseCount = 8.0f;
			
 
				+    const int32_t jitterPhaseCount = int32_t(basePhaseCount * pow((float(displayWidth) / renderWidth), 2.0f));
			
 
				+    return jitterPhaseCount;
			
 
				+}
			
 
				+
			
 
				+FfxErrorCode ffxFsr2GetJitterOffset(float* outX, float* outY, int32_t index, int32_t phaseCount)
			
 
				+{
			
 
				+    FFX_RETURN_ON_ERROR(
			
 
				+        outX,
			
 
				+        FFX_ERROR_INVALID_POINTER);
			
 
				+    FFX_RETURN_ON_ERROR(
			
 
				+        outY,
			
 
				+        FFX_ERROR_INVALID_POINTER);
			
 
				+    FFX_RETURN_ON_ERROR(
			
 
				+        phaseCount > 0,
			
 
				+        FFX_ERROR_INVALID_ARGUMENT);
			
 
				+
			
 
				+    const float x = halton((index % phaseCount) + 1, 2) - 0.5f;
			
 
				+    const float y = halton((index % phaseCount) + 1, 3) - 0.5f;
			
 
				+
			
 
				+    *outX = x;
			
 
				+    *outY = y;
			
 
				+    return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+FFX_API bool ffxFsr2ResourceIsNull(FfxResource resource)
			
 
				+{
			
 
				+    return resource.resource == NULL;
			
 
				+}
			
 
				+
			
 
				+FfxErrorCode ffxFsr2ContextGenerateReactiveMask(FfxFsr2Context* context, const FfxFsr2GenerateReactiveDescription* params)
			
 
				+{
			
 
				+    FFX_RETURN_ON_ERROR(
			
 
				+        context,
			
 
				+        FFX_ERROR_INVALID_POINTER);
			
 
				+    FFX_RETURN_ON_ERROR(
			
 
				+        params,
			
 
				+        FFX_ERROR_INVALID_POINTER);
			
 
				+    FFX_RETURN_ON_ERROR(
			
 
				+        params->commandList,
			
 
				+        FFX_ERROR_INVALID_POINTER);
			
 
				+
			
 
				+    FfxFsr2Context_Private* contextPrivate = (FfxFsr2Context_Private*)(context);
			
 
				+
			
 
				+    FFX_RETURN_ON_ERROR(
			
 
				+        contextPrivate->device,
			
 
				+        FFX_ERROR_NULL_DEVICE);
			
 
				+
			
 
				+    if (contextPrivate->refreshPipelineStates) {
			
 
				+
			
 
				+        createPipelineStates(contextPrivate);
			
 
				+        contextPrivate->refreshPipelineStates = false;
			
 
				+    }
			
 
				+
			
 
				+    // take a short cut to the command list
			
 
				+    FfxCommandList commandList = params->commandList;
			
 
				+
			
 
				+    FfxPipelineState* pipeline = &contextPrivate->pipelineGenerateReactive;
			
 
				+
			
 
				+    const int32_t threadGroupWorkRegionDim = 8;
			
 
				+    const int32_t dispatchSrcX = (params->renderSize.width  + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
			
 
				+    const int32_t dispatchSrcY = (params->renderSize.height + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
			
 
				+
			
 
				+    // save internal reactive resource
			
 
				+    FfxResourceInternal internalReactive = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE];
			
 
				+
			
 
				+    FfxComputeJobDescription jobDescriptor = {};
			
 
				+    contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, &params->colorOpaqueOnly, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY]);
			
 
				+    contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, &params->colorPreUpscale, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]);
			
 
				+    contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, &params->outReactive, &contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE]);
			
 
				+    
			
 
				+    jobDescriptor.uavs[0] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE];
			
 
				+
			
 
				+    wcscpy_s(jobDescriptor.srvNames[0], pipeline->srvResourceBindings[0].name);
			
 
				+    wcscpy_s(jobDescriptor.srvNames[1], pipeline->srvResourceBindings[1].name);
			
 
				+    wcscpy_s(jobDescriptor.uavNames[0], pipeline->uavResourceBindings[0].name);
			
 
				+
			
 
				+    jobDescriptor.dimensions[0] = dispatchSrcX;
			
 
				+    jobDescriptor.dimensions[1] = dispatchSrcY;
			
 
				+    jobDescriptor.dimensions[2] = 1;
			
 
				+    jobDescriptor.pipeline = *pipeline;
			
 
				+
			
 
				+    for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvCount; ++currentShaderResourceViewIndex) {
			
 
				+
			
 
				+        const uint32_t currentResourceId = pipeline->srvResourceBindings[currentShaderResourceViewIndex].resourceIdentifier;
			
 
				+        const FfxResourceInternal currentResource = contextPrivate->srvResources[currentResourceId];
			
 
				+        jobDescriptor.srvs[currentShaderResourceViewIndex] = currentResource;
			
 
				+        wcscpy_s(jobDescriptor.srvNames[currentShaderResourceViewIndex], pipeline->srvResourceBindings[currentShaderResourceViewIndex].name);
			
 
				+    }
			
 
				+
			
 
				+    Fsr2GenerateReactiveConstants constants = {};
			
 
				+    constants.scale = params->scale;
			
 
				+    constants.threshold = params->cutoffThreshold;
			
 
				+    constants.binaryValue = params->binaryValue;
			
 
				+    constants.flags = params->flags;
			
 
				+
			
 
				+    jobDescriptor.cbs[0].uint32Size = sizeof(constants);
			
 
				+    memcpy(&jobDescriptor.cbs[0].data, &constants, sizeof(constants));
			
 
				+    wcscpy_s(jobDescriptor.cbNames[0], pipeline->cbResourceBindings[0].name);
			
 
				+
			
 
				+    FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE };
			
 
				+    dispatchJob.computeJobDescriptor = jobDescriptor;
			
 
				+
			
 
				+    contextPrivate->contextDescription.callbacks.fpScheduleGpuJob(&contextPrivate->contextDescription.callbacks, &dispatchJob);
			
 
				+
			
 
				+    contextPrivate->contextDescription.callbacks.fpExecuteGpuJobs(&contextPrivate->contextDescription.callbacks, commandList);
			
 
				+
			
 
				+    // restore internal reactive
			
 
				+    contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE] = internalReactive;
			
 
				+
			
 
				+    return FFX_OK;
			
 
				+}
			
 
				+
			
 
				+static FfxErrorCode generateReactiveMaskInternal(FfxFsr2Context_Private* contextPrivate, const FfxFsr2DispatchDescription* params)
			
 
				+{
			
 
				+    if (contextPrivate->refreshPipelineStates) {
			
 
				+
			
 
				+        createPipelineStates(contextPrivate);
			
 
				+        contextPrivate->refreshPipelineStates = false;
			
 
				+    }
			
 
				+
			
 
				+    // take a short cut to the command list
			
 
				+    FfxCommandList commandList = params->commandList;
			
 
				+
			
 
				+    FfxPipelineState* pipeline = &contextPrivate->pipelineTcrAutogenerate;
			
 
				+
			
 
				+    const int32_t threadGroupWorkRegionDim = 8;
			
 
				+    const int32_t dispatchSrcX = (params->renderSize.width + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
			
 
				+    const int32_t dispatchSrcY = (params->renderSize.height + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
			
 
				+
			
 
				+    FfxComputeJobDescription jobDescriptor = {};
			
 
				+    contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, &params->colorOpaqueOnly, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY]);
			
 
				+    contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, &params->color, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]);
			
 
				+
			
 
				+    jobDescriptor.uavs[0] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE];
			
 
				+    jobDescriptor.uavs[1] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION];
			
 
				+    jobDescriptor.uavs[2] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR];
			
 
				+    jobDescriptor.uavs[3] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR];
			
 
				+
			
 
				+    wcscpy_s(jobDescriptor.uavNames[0], pipeline->uavResourceBindings[0].name);
			
 
				+    wcscpy_s(jobDescriptor.uavNames[1], pipeline->uavResourceBindings[1].name);
			
 
				+    wcscpy_s(jobDescriptor.uavNames[2], pipeline->uavResourceBindings[2].name);
			
 
				+    wcscpy_s(jobDescriptor.uavNames[3], pipeline->uavResourceBindings[3].name);
			
 
				+
			
 
				+    jobDescriptor.dimensions[0] = dispatchSrcX;
			
 
				+    jobDescriptor.dimensions[1] = dispatchSrcY;
			
 
				+    jobDescriptor.dimensions[2] = 1;
			
 
				+    jobDescriptor.pipeline = *pipeline;
			
 
				+
			
 
				+    for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvCount; ++currentShaderResourceViewIndex) {
			
 
				+
			
 
				+        const uint32_t currentResourceId = pipeline->srvResourceBindings[currentShaderResourceViewIndex].resourceIdentifier;
			
 
				+        const FfxResourceInternal currentResource = contextPrivate->srvResources[currentResourceId];
			
 
				+        jobDescriptor.srvs[currentShaderResourceViewIndex] = currentResource;
			
 
				+        wcscpy_s(jobDescriptor.srvNames[currentShaderResourceViewIndex], pipeline->srvResourceBindings[currentShaderResourceViewIndex].name);
			
 
				+    }
			
 
				+
			
 
				+    for (uint32_t currentRootConstantIndex = 0; currentRootConstantIndex < pipeline->constCount; ++currentRootConstantIndex) {
			
 
				+        wcscpy_s(jobDescriptor.cbNames[currentRootConstantIndex], pipeline->cbResourceBindings[currentRootConstantIndex].name);
			
 
				+        jobDescriptor.cbs[currentRootConstantIndex] = globalFsr2ConstantBuffers[pipeline->cbResourceBindings[currentRootConstantIndex].resourceIdentifier];
			
 
				+        jobDescriptor.cbSlotIndex[currentRootConstantIndex] = pipeline->cbResourceBindings[currentRootConstantIndex].slotIndex;
			
 
				+    }
			
 
				+
			
 
				+    FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE };
			
 
				+    dispatchJob.computeJobDescriptor = jobDescriptor;
			
 
				+
			
 
				+    contextPrivate->contextDescription.callbacks.fpScheduleGpuJob(&contextPrivate->contextDescription.callbacks, &dispatchJob);
			
 
				+
			
 
				+    return FFX_OK;
			
 
				+}
			
--- a/thirdparty/amd-fsr2/ffx_fsr2.h
+++ b/thirdparty/amd-fsr2/ffx_fsr2.h
@@ -0,0 +1,458 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+
			
 
				+// @defgroup FSR2
			
 
				+
			
 
				+#pragma once
			
 
				+
			
 
				+// Include the interface for the backend of the FSR2 API.
			
 
				+#include "ffx_fsr2_interface.h"
			
 
				+
			
 
				+/// FidelityFX Super Resolution 2 major version.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+#define FFX_FSR2_VERSION_MAJOR      (2)
			
 
				+
			
 
				+/// FidelityFX Super Resolution 2 minor version.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+#define FFX_FSR2_VERSION_MINOR      (2)
			
 
				+
			
 
				+/// FidelityFX Super Resolution 2 patch version.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+#define FFX_FSR2_VERSION_PATCH      (1)
			
 
				+
			
 
				+/// The size of the context specified in 32bit values.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+#define FFX_FSR2_CONTEXT_SIZE       (16536)
			
 
				+
			
 
				+#if defined(__cplusplus)
			
 
				+extern "C" {
			
 
				+#endif // #if defined(__cplusplus)
			
 
				+
			
 
				+/// An enumeration of all the quality modes supported by FidelityFX Super
			
 
				+/// Resolution 2 upscaling.
			
 
				+///
			
 
				+/// In order to provide a consistent user experience across multiple
			
 
				+/// applications which implement FSR2. It is strongly recommended that the
			
 
				+/// following preset scaling factors are made available through your
			
 
				+/// application's user interface.
			
 
				+///
			
 
				+/// If your application does not expose the notion of preset scaling factors
			
 
				+/// for upscaling algorithms (perhaps instead implementing a fixed ratio which
			
 
				+/// is immutable) or implementing a more dynamic scaling scheme (such as
			
 
				+/// dynamic resolution scaling), then there is no need to use these presets.
			
 
				+///
			
 
				+/// Please note that <c><i>FFX_FSR2_QUALITY_MODE_ULTRA_PERFORMANCE</i></c> is
			
 
				+/// an optional mode which may introduce significant quality degradation in the
			
 
				+/// final image. As such it is recommended that you evaluate the final results
			
 
				+/// of using this scaling mode before deciding if you should include it in your
			
 
				+/// application.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+typedef enum FfxFsr2QualityMode {
			
 
				+
			
 
				+    FFX_FSR2_QUALITY_MODE_QUALITY                       = 1,        ///< Perform upscaling with a per-dimension upscaling ratio of 1.5x.
			
 
				+    FFX_FSR2_QUALITY_MODE_BALANCED                      = 2,        ///< Perform upscaling with a per-dimension upscaling ratio of 1.7x.
			
 
				+    FFX_FSR2_QUALITY_MODE_PERFORMANCE                   = 3,        ///< Perform upscaling with a per-dimension upscaling ratio of 2.0x.
			
 
				+    FFX_FSR2_QUALITY_MODE_ULTRA_PERFORMANCE             = 4         ///< Perform upscaling with a per-dimension upscaling ratio of 3.0x.
			
 
				+} FfxFsr2QualityMode;
			
 
				+
			
 
				+/// An enumeration of bit flags used when creating a
			
 
				+/// <c><i>FfxFsr2Context</i></c>. See <c><i>FfxFsr2ContextDescription</i></c>.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+typedef enum FfxFsr2InitializationFlagBits {
			
 
				+
			
 
				+    FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE                  = (1<<0),   ///< A bit indicating if the input color data provided is using a high-dynamic range.
			
 
				+    FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS   = (1<<1),   ///< A bit indicating if the motion vectors are rendered at display resolution.
			
 
				+    FFX_FSR2_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION  = (1<<2),   ///< A bit indicating that the motion vectors have the jittering pattern applied to them.
			
 
				+    FFX_FSR2_ENABLE_DEPTH_INVERTED                      = (1<<3),   ///< A bit indicating that the input depth buffer data provided is inverted [1..0].
			
 
				+    FFX_FSR2_ENABLE_DEPTH_INFINITE                      = (1<<4),   ///< A bit indicating that the input depth buffer data provided is using an infinite far plane.
			
 
				+    FFX_FSR2_ENABLE_AUTO_EXPOSURE                       = (1<<5),   ///< A bit indicating if automatic exposure should be applied to input color data.
			
 
				+    FFX_FSR2_ENABLE_DYNAMIC_RESOLUTION                  = (1<<6),   ///< A bit indicating that the application uses dynamic resolution scaling.
			
 
				+    FFX_FSR2_ENABLE_TEXTURE1D_USAGE                     = (1<<7),   ///< A bit indicating that the backend should use 1D textures.
			
 
				+    FFX_FSR2_ENABLE_DEBUG_CHECKING                      = (1<<8),   ///< A bit indicating that the runtime should check some API values and report issues.
			
 
				+} FfxFsr2InitializationFlagBits;
			
 
				+
			
 
				+/// A structure encapsulating the parameters required to initialize FidelityFX
			
 
				+/// Super Resolution 2 upscaling.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+typedef struct FfxFsr2ContextDescription {
			
 
				+
			
 
				+    uint32_t                    flags;                              ///< A collection of <c><i>FfxFsr2InitializationFlagBits</i></c>.
			
 
				+    FfxDimensions2D             maxRenderSize;                      ///< The maximum size that rendering will be performed at.
			
 
				+    FfxDimensions2D             displaySize;                        ///< The size of the presentation resolution targeted by the upscaling process.
			
 
				+    FfxFsr2Interface            callbacks;                          ///< A set of pointers to the backend implementation for FSR 2.0.
			
 
				+    FfxDevice                   device;                             ///< The abstracted device which is passed to some callback functions.
			
 
				+
			
 
				+    FfxFsr2Message              fpMessage;                          ///< A pointer to a function that can recieve messages from the runtime.
			
 
				+} FfxFsr2ContextDescription;
			
 
				+
			
 
				+/// A structure encapsulating the parameters for dispatching the various passes
			
 
				+/// of FidelityFX Super Resolution 2.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+typedef struct FfxFsr2DispatchDescription {
			
 
				+
			
 
				+    FfxCommandList              commandList;                        ///< The <c><i>FfxCommandList</i></c> to record FSR2 rendering commands into.
			
 
				+    FfxResource                 color;                              ///< A <c><i>FfxResource</i></c> containing the color buffer for the current frame (at render resolution).
			
 
				+    FfxResource                 depth;                              ///< A <c><i>FfxResource</i></c> containing 32bit depth values for the current frame (at render resolution).
			
 
				+    FfxResource                 motionVectors;                      ///< A <c><i>FfxResource</i></c> containing 2-dimensional motion vectors (at render resolution if <c><i>FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS</i></c> is not set).
			
 
				+    FfxResource                 exposure;                           ///< A optional <c><i>FfxResource</i></c> containing a 1x1 exposure value.
			
 
				+    FfxResource                 reactive;                           ///< A optional <c><i>FfxResource</i></c> containing alpha value of reactive objects in the scene.
			
 
				+    FfxResource                 transparencyAndComposition;         ///< A optional <c><i>FfxResource</i></c> containing alpha value of special objects in the scene.
			
 
				+    FfxResource                 output;                             ///< A <c><i>FfxResource</i></c> containing the output color buffer for the current frame (at presentation resolution).
			
 
				+    FfxFloatCoords2D            jitterOffset;                       ///< The subpixel jitter offset applied to the camera.
			
 
				+    FfxFloatCoords2D            motionVectorScale;                  ///< The scale factor to apply to motion vectors.
			
 
				+    FfxDimensions2D             renderSize;                         ///< The resolution that was used for rendering the input resources.
			
 
				+    bool                        enableSharpening;                   ///< Enable an additional sharpening pass.
			
 
				+    float                       sharpness;                          ///< The sharpness value between 0 and 1, where 0 is no additional sharpness and 1 is maximum additional sharpness.
			
 
				+    float                       frameTimeDelta;                     ///< The time elapsed since the last frame (expressed in milliseconds).
			
 
				+    float                       preExposure;                        ///< The pre exposure value (must be > 0.0f)
			
 
				+    bool                        reset;                              ///< A boolean value which when set to true, indicates the camera has moved discontinuously.
			
 
				+    float                       cameraNear;                         ///< The distance to the near plane of the camera.
			
 
				+    float                       cameraFar;                          ///< The distance to the far plane of the camera.
			
 
				+    float                       cameraFovAngleVertical;             ///< The camera angle field of view in the vertical direction (expressed in radians).
			
 
				+    float                       viewSpaceToMetersFactor;            ///< The scale factor to convert view space units to meters
			
 
				+
			
 
				+    // EXPERIMENTAL reactive mask generation parameters
			
 
				+    bool                        enableAutoReactive;                 ///< A boolean value to indicate internal reactive autogeneration should be used
			
 
				+    FfxResource                 colorOpaqueOnly;                    ///< A <c><i>FfxResource</i></c> containing the opaque only color buffer for the current frame (at render resolution).
			
 
				+    float                       autoTcThreshold;                    ///< Cutoff value for TC
			
 
				+    float                       autoTcScale;                        ///< A value to scale the transparency and composition mask
			
 
				+    float                       autoReactiveScale;                  ///< A value to scale the reactive mask
			
 
				+    float                       autoReactiveMax;                    ///< A value to clamp the reactive mask
			
 
				+
			
 
				+    // -- GODOT start --
			
 
				+    float                       reprojectionMatrix[16];             ///< The matrix used for reprojecting pixels with invalid motion vectors by using the depth.
			
 
				+	// -- GODOT end --
			
 
				+
			
 
				+} FfxFsr2DispatchDescription;
			
 
				+
			
 
				+/// A structure encapsulating the parameters for automatic generation of a reactive mask
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+typedef struct FfxFsr2GenerateReactiveDescription {
			
 
				+
			
 
				+    FfxCommandList              commandList;                        ///< The <c><i>FfxCommandList</i></c> to record FSR2 rendering commands into.
			
 
				+    FfxResource                 colorOpaqueOnly;                    ///< A <c><i>FfxResource</i></c> containing the opaque only color buffer for the current frame (at render resolution).
			
 
				+    FfxResource                 colorPreUpscale;                    ///< A <c><i>FfxResource</i></c> containing the opaque+translucent color buffer for the current frame (at render resolution).
			
 
				+    FfxResource                 outReactive;                        ///< A <c><i>FfxResource</i></c> containing the surface to generate the reactive mask into.
			
 
				+    FfxDimensions2D             renderSize;                         ///< The resolution that was used for rendering the input resources.
			
 
				+    float                       scale;                              ///< A value to scale the output
			
 
				+    float                       cutoffThreshold;                    ///< A threshold value to generate a binary reactive mask
			
 
				+    float                       binaryValue;                        ///< A value to set for the binary reactive mask
			
 
				+    uint32_t                    flags;                              ///< Flags to determine how to generate the reactive mask
			
 
				+} FfxFsr2GenerateReactiveDescription;
			
 
				+
			
 
				+/// A structure encapsulating the FidelityFX Super Resolution 2 context.
			
 
				+///
			
 
				+/// This sets up an object which contains all persistent internal data and
			
 
				+/// resources that are required by FSR2.
			
 
				+///
			
 
				+/// The <c><i>FfxFsr2Context</i></c> object should have a lifetime matching
			
 
				+/// your use of FSR2. Before destroying the FSR2 context care should be taken
			
 
				+/// to ensure the GPU is not accessing the resources created or used by FSR2.
			
 
				+/// It is therefore recommended that the GPU is idle before destroying the
			
 
				+/// FSR2 context.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+typedef struct FfxFsr2Context {
			
 
				+
			
 
				+    uint32_t                    data[FFX_FSR2_CONTEXT_SIZE];        ///< An opaque set of <c>uint32_t</c> which contain the data for the context.
			
 
				+} FfxFsr2Context;
			
 
				+
			
 
				+/// Create a FidelityFX Super Resolution 2 context from the parameters
			
 
				+/// programmed to the <c><i>FfxFsr2CreateParams</i></c> structure.
			
 
				+///
			
 
				+/// The context structure is the main object used to interact with the FSR2
			
 
				+/// API, and is responsible for the management of the internal resources used
			
 
				+/// by the FSR2 algorithm. When this API is called, multiple calls will be
			
 
				+/// made via the pointers contained in the <c><i>callbacks</i></c> structure.
			
 
				+/// These callbacks will attempt to retreive the device capabilities, and
			
 
				+/// create the internal resources, and pipelines required by FSR2's
			
 
				+/// frame-to-frame function. Depending on the precise configuration used when
			
 
				+/// creating the <c><i>FfxFsr2Context</i></c> a different set of resources and
			
 
				+/// pipelines might be requested via the callback functions.
			
 
				+///
			
 
				+/// The flags included in the <c><i>flags</i></c> field of
			
 
				+/// <c><i>FfxFsr2Context</i></c> how match the configuration of your
			
 
				+/// application as well as the intended use of FSR2. It is important that these
			
 
				+/// flags are set correctly (as well as a correct programmed
			
 
				+/// <c><i>FfxFsr2DispatchDescription</i></c>) to ensure correct operation. It is
			
 
				+/// recommended to consult the overview documentation for further details on
			
 
				+/// how FSR2 should be integerated into an application.
			
 
				+///
			
 
				+/// When the <c><i>FfxFsr2Context</i></c> is created, you should use the
			
 
				+/// <c><i>ffxFsr2ContextDispatch</i></c> function each frame where FSR2
			
 
				+/// upscaling should be applied. See the documentation of
			
 
				+/// <c><i>ffxFsr2ContextDispatch</i></c> for more details.
			
 
				+///
			
 
				+/// The <c><i>FfxFsr2Context</i></c> should be destroyed when use of it is
			
 
				+/// completed, typically when an application is unloaded or FSR2 upscaling is
			
 
				+/// disabled by a user. To destroy the FSR2 context you should call
			
 
				+/// <c><i>ffxFsr2ContextDestroy</i></c>.
			
 
				+///
			
 
				+/// @param [out] context                A pointer to a <c><i>FfxFsr2Context</i></c> structure to populate.
			
 
				+/// @param [in]  contextDescription     A pointer to a <c><i>FfxFsr2ContextDescription</i></c> structure.
			
 
				+///
			
 
				+/// @retval
			
 
				+/// FFX_OK                              The operation completed successfully.
			
 
				+/// @retval
			
 
				+/// FFX_ERROR_CODE_NULL_POINTER         The operation failed because either <c><i>context</i></c> or <c><i>contextDescription</i></c> was <c><i>NULL</i></c>.
			
 
				+/// @retval
			
 
				+/// FFX_ERROR_INCOMPLETE_INTERFACE      The operation failed because the <c><i>FfxFsr2ContextDescription.callbacks</i></c>  was not fully specified.
			
 
				+/// @retval
			
 
				+/// FFX_ERROR_BACKEND_API_ERROR         The operation failed because of an error returned from the backend.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+FFX_API FfxErrorCode ffxFsr2ContextCreate(FfxFsr2Context* context, const FfxFsr2ContextDescription* contextDescription);
			
 
				+
			
 
				+/// Dispatch the various passes that constitute FidelityFX Super Resolution 2.
			
 
				+///
			
 
				+/// FSR2 is a composite effect, meaning that it is compromised of multiple
			
 
				+/// constituent passes (implemented as one or more clears, copies and compute
			
 
				+/// dispatches). The <c><i>ffxFsr2ContextDispatch</i></c> function is the
			
 
				+/// function which (via the use of the functions contained in the
			
 
				+/// <c><i>callbacks</i></c> field of the <c><i>FfxFsr2Context</i></c>
			
 
				+/// structure) utlimately generates the sequence of graphics API calls required
			
 
				+/// each frame.
			
 
				+///
			
 
				+/// As with the creation of the <c><i>FfxFsr2Context</i></c> correctly
			
 
				+/// programming the <c><i>FfxFsr2DispatchDescription</i></c> is key to ensuring
			
 
				+/// the correct operation of FSR2. It is particularly important to ensure that
			
 
				+/// camera jitter is correctly applied to your application's projection matrix
			
 
				+/// (or camera origin for raytraced applications). FSR2 provides the
			
 
				+/// <c><i>ffxFsr2GetJitterPhaseCount</i></c> and
			
 
				+/// <c><i>ffxFsr2GetJitterOffset</i></c> entry points to help applications
			
 
				+/// correctly compute the camera jitter. Whatever jitter pattern is used by the
			
 
				+/// application it should be correctly programmed to the
			
 
				+/// <c><i>jitterOffset</i></c> field of the <c><i>dispatchDescription</i></c>
			
 
				+/// structure. For more guidance on camera jitter please consult the
			
 
				+/// documentation for <c><i>ffxFsr2GetJitterOffset</i></c> as well as the
			
 
				+/// accompanying overview documentation for FSR2.
			
 
				+///
			
 
				+/// @param [in] context                 A pointer to a <c><i>FfxFsr2Context</i></c> structure.
			
 
				+/// @param [in] dispatchDescription     A pointer to a <c><i>FfxFsr2DispatchDescription</i></c> structure.
			
 
				+///
			
 
				+/// @retval
			
 
				+/// FFX_OK                              The operation completed successfully.
			
 
				+/// @retval
			
 
				+/// FFX_ERROR_CODE_NULL_POINTER         The operation failed because either <c><i>context</i></c> or <c><i>dispatchDescription</i></c> was <c><i>NULL</i></c>.
			
 
				+/// @retval
			
 
				+/// FFX_ERROR_OUT_OF_RANGE              The operation failed because <c><i>dispatchDescription.renderSize</i></c> was larger than the maximum render resolution.
			
 
				+/// @retval
			
 
				+/// FFX_ERROR_NULL_DEVICE               The operation failed because the device inside the context was <c><i>NULL</i></c>.
			
 
				+/// @retval
			
 
				+/// FFX_ERROR_BACKEND_API_ERROR         The operation failed because of an error returned from the backend.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+FFX_API FfxErrorCode ffxFsr2ContextDispatch(FfxFsr2Context* context, const FfxFsr2DispatchDescription* dispatchDescription);
			
 
				+
			
 
				+/// A helper function generate a Reactive mask from an opaque only texure and one containing translucent objects.
			
 
				+///
			
 
				+/// @param [in] context                 A pointer to a <c><i>FfxFsr2Context</i></c> structure.
			
 
				+/// @param [in] params                  A pointer to a <c><i>FfxFsr2GenerateReactiveDescription</i></c> structure
			
 
				+///
			
 
				+/// @retval
			
 
				+/// FFX_OK                              The operation completed successfully.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+FFX_API FfxErrorCode ffxFsr2ContextGenerateReactiveMask(FfxFsr2Context* context, const FfxFsr2GenerateReactiveDescription* params);
			
 
				+
			
 
				+/// Destroy the FidelityFX Super Resolution context.
			
 
				+///
			
 
				+/// @param [out] context                A pointer to a <c><i>FfxFsr2Context</i></c> structure to destroy.
			
 
				+///
			
 
				+/// @retval
			
 
				+/// FFX_OK                              The operation completed successfully.
			
 
				+/// @retval
			
 
				+/// FFX_ERROR_CODE_NULL_POINTER         The operation failed because either <c><i>context</i></c> was <c><i>NULL</i></c>.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+FFX_API FfxErrorCode ffxFsr2ContextDestroy(FfxFsr2Context* context);
			
 
				+
			
 
				+/// Get the upscale ratio from the quality mode.
			
 
				+///
			
 
				+/// The following table enumerates the mapping of the quality modes to
			
 
				+/// per-dimension scaling ratios.
			
 
				+///
			
 
				+/// Quality preset                                        | Scale factor
			
 
				+/// ----------------------------------------------------- | -------------
			
 
				+/// <c><i>FFX_FSR2_QUALITY_MODE_QUALITY</i></c>           | 1.5x
			
 
				+/// <c><i>FFX_FSR2_QUALITY_MODE_BALANCED</i></c>          | 1.7x
			
 
				+/// <c><i>FFX_FSR2_QUALITY_MODE_PERFORMANCE</i></c>       | 2.0x
			
 
				+/// <c><i>FFX_FSR2_QUALITY_MODE_ULTRA_PERFORMANCE</i></c> | 3.0x
			
 
				+///
			
 
				+/// Passing an invalid <c><i>qualityMode</i></c> will return 0.0f.
			
 
				+///
			
 
				+/// @param [in] qualityMode             The quality mode preset.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The upscaling the per-dimension upscaling ratio for
			
 
				+/// <c><i>qualityMode</i></c> according to the table above.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+FFX_API float ffxFsr2GetUpscaleRatioFromQualityMode(FfxFsr2QualityMode qualityMode);
			
 
				+
			
 
				+/// A helper function to calculate the rendering resolution from a target
			
 
				+/// resolution and desired quality level.
			
 
				+///
			
 
				+/// This function applies the scaling factor returned by
			
 
				+/// <c><i>ffxFsr2GetUpscaleRatioFromQualityMode</i></c> to each dimension.
			
 
				+///
			
 
				+/// @param [out] renderWidth            A pointer to a <c>uint32_t</c> which will hold the calculated render resolution width.
			
 
				+/// @param [out] renderHeight           A pointer to a <c>uint32_t</c> which will hold the calculated render resolution height.
			
 
				+/// @param [in] displayWidth            The target display resolution width.
			
 
				+/// @param [in] displayHeight           The target display resolution height.
			
 
				+/// @param [in] qualityMode             The desired quality mode for FSR 2 upscaling.
			
 
				+///
			
 
				+/// @retval
			
 
				+/// FFX_OK                              The operation completed successfully.
			
 
				+/// @retval
			
 
				+/// FFX_ERROR_INVALID_POINTER           Either <c><i>renderWidth</i></c> or <c><i>renderHeight</i></c> was <c>NULL</c>.
			
 
				+/// @retval
			
 
				+/// FFX_ERROR_INVALID_ENUM              An invalid quality mode was specified.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+FFX_API FfxErrorCode ffxFsr2GetRenderResolutionFromQualityMode(
			
 
				+    uint32_t* renderWidth,
			
 
				+    uint32_t* renderHeight,
			
 
				+    uint32_t displayWidth,
			
 
				+    uint32_t displayHeight,
			
 
				+    FfxFsr2QualityMode qualityMode);
			
 
				+
			
 
				+/// A helper function to calculate the jitter phase count from display
			
 
				+/// resolution.
			
 
				+///
			
 
				+/// For more detailed information about the application of camera jitter to
			
 
				+/// your application's rendering please refer to the
			
 
				+/// <c><i>ffxFsr2GetJitterOffset</i></c> function.
			
 
				+/// 
			
 
				+/// The table below shows the jitter phase count which this function
			
 
				+/// would return for each of the quality presets.
			
 
				+///
			
 
				+/// Quality preset                                        | Scale factor  | Phase count
			
 
				+/// ----------------------------------------------------- | ------------- | ---------------
			
 
				+/// <c><i>FFX_FSR2_QUALITY_MODE_QUALITY</i></c>           | 1.5x          | 18
			
 
				+/// <c><i>FFX_FSR2_QUALITY_MODE_BALANCED</i></c>          | 1.7x          | 23
			
 
				+/// <c><i>FFX_FSR2_QUALITY_MODE_PERFORMANCE</i></c>       | 2.0x          | 32
			
 
				+/// <c><i>FFX_FSR2_QUALITY_MODE_ULTRA_PERFORMANCE</i></c> | 3.0x          | 72
			
 
				+/// Custom                                                | [1..n]x       | ceil(8*n^2)
			
 
				+///
			
 
				+/// @param [in] renderWidth             The render resolution width.
			
 
				+/// @param [in] displayWidth            The display resolution width.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The jitter phase count for the scaling factor between <c><i>renderWidth</i></c> and <c><i>displayWidth</i></c>.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+FFX_API int32_t ffxFsr2GetJitterPhaseCount(int32_t renderWidth, int32_t displayWidth);
			
 
				+
			
 
				+/// A helper function to calculate the subpixel jitter offset.
			
 
				+///
			
 
				+/// FSR2 relies on the application to apply sub-pixel jittering while rendering.
			
 
				+/// This is typically included in the projection matrix of the camera. To make
			
 
				+/// the application of camera jitter simple, the FSR2 API provides a small set
			
 
				+/// of utility function which computes the sub-pixel jitter offset for a
			
 
				+/// particular frame within a sequence of separate jitter offsets. To begin, the
			
 
				+/// index within the jitter phase must be computed. To calculate the
			
 
				+/// sequence's length, you can call the <c><i>ffxFsr2GetJitterPhaseCount</i></c>
			
 
				+/// function. The index should be a value which is incremented each frame modulo
			
 
				+/// the length of the sequence computed by <c><i>ffxFsr2GetJitterPhaseCount</i></c>.
			
 
				+/// The index within the jitter phase  is passed to
			
 
				+/// <c><i>ffxFsr2GetJitterOffset</i></c> via the <c><i>index</i></c> parameter.
			
 
				+///
			
 
				+/// This function uses a Halton(2,3) sequence to compute the jitter offset.
			
 
				+/// The ultimate index used for the sequence is <c><i>index</i></c> %
			
 
				+/// <c><i>phaseCount</i></c>.
			
 
				+///
			
 
				+/// It is important to understand that the values returned from the
			
 
				+/// <c><i>ffxFsr2GetJitterOffset</i></c> function are in unit pixel space, and
			
 
				+/// in order to composite this correctly into a projection matrix we must
			
 
				+/// convert them into projection offsets. This is done as per the pseudo code
			
 
				+/// listing which is shown below.
			
 
				+///
			
 
				+///     const int32_t jitterPhaseCount = ffxFsr2GetJitterPhaseCount(renderWidth, displayWidth);
			
 
				+///
			
 
				+///     float jitterX = 0;
			
 
				+///     float jitterY = 0;
			
 
				+///     ffxFsr2GetJitterOffset(&jitterX, &jitterY, index, jitterPhaseCount);
			
 
				+/// 
			
 
				+///     const float jitterX = 2.0f * jitterX / (float)renderWidth;
			
 
				+///     const float jitterY = -2.0f * jitterY / (float)renderHeight;
			
 
				+///     const Matrix4 jitterTranslationMatrix = translateMatrix(Matrix3::identity, Vector3(jitterX, jitterY, 0));
			
 
				+///     const Matrix4 jitteredProjectionMatrix = jitterTranslationMatrix * projectionMatrix;
			
 
				+/// 
			
 
				+/// Jitter should be applied to all rendering. This includes opaque, alpha
			
 
				+/// transparent, and raytraced objects. For rasterized objects, the sub-pixel
			
 
				+/// jittering values calculated by the <c><i>iffxFsr2GetJitterOffset</i></c>
			
 
				+/// function can be applied to the camera projection matrix which is ultimately
			
 
				+/// used to perform transformations during vertex shading. For raytraced
			
 
				+/// rendering, the sub-pixel jitter should be applied to the ray's origin,
			
 
				+/// often the camera's position.
			
 
				+/// 
			
 
				+/// Whether you elect to use the <c><i>ffxFsr2GetJitterOffset</i></c> function
			
 
				+/// or your own sequence generator, you must program the
			
 
				+/// <c><i>jitterOffset</i></c> field of the
			
 
				+/// <c><i>FfxFsr2DispatchParameters</i></c> structure in order to inform FSR2
			
 
				+/// of the jitter offset that has been applied in order to render each frame.
			
 
				+/// 
			
 
				+/// If not using the recommended <c><i>ffxFsr2GetJitterOffset</i></c> function,
			
 
				+/// care should be taken that your jitter sequence never generates a null vector;
			
 
				+/// that is value of 0 in both the X and Y dimensions.
			
 
				+///
			
 
				+/// @param [out] outX                   A pointer to a <c>float</c> which will contain the subpixel jitter offset for the x dimension.
			
 
				+/// @param [out] outY                   A pointer to a <c>float</c> which will contain the subpixel jitter offset for the y dimension.
			
 
				+/// @param [in] index                   The index within the jitter sequence.
			
 
				+/// @param [in] phaseCount              The length of jitter phase. See <c><i>ffxFsr2GetJitterPhaseCount</i></c>.
			
 
				+/// 
			
 
				+/// @retval
			
 
				+/// FFX_OK                              The operation completed successfully.
			
 
				+/// @retval
			
 
				+/// FFX_ERROR_INVALID_POINTER           Either <c><i>outX</i></c> or <c><i>outY</i></c> was <c>NULL</c>.
			
 
				+/// @retval
			
 
				+/// FFX_ERROR_INVALID_ARGUMENT          Argument <c><i>phaseCount</i></c> must be greater than 0.
			
 
				+/// 
			
 
				+/// @ingroup FSR2
			
 
				+FFX_API FfxErrorCode ffxFsr2GetJitterOffset(float* outX, float* outY, int32_t index, int32_t phaseCount);
			
 
				+
			
 
				+/// A helper function to check if a resource is
			
 
				+/// <c><i>FFX_FSR2_RESOURCE_IDENTIFIER_NULL</i></c>.
			
 
				+///
			
 
				+/// @param [in] resource                A <c><i>FfxResource</i></c>.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// true                                The <c><i>resource</i></c> was not <c><i>FFX_FSR2_RESOURCE_IDENTIFIER_NULL</i></c>.
			
 
				+/// @returns
			
 
				+/// false                               The <c><i>resource</i></c> was <c><i>FFX_FSR2_RESOURCE_IDENTIFIER_NULL</i></c>.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+FFX_API bool ffxFsr2ResourceIsNull(FfxResource resource);
			
 
				+
			
 
				+#if defined(__cplusplus)
			
 
				+}
			
 
				+#endif // #if defined(__cplusplus)
			
--- a/thirdparty/amd-fsr2/ffx_fsr2_interface.h
+++ b/thirdparty/amd-fsr2/ffx_fsr2_interface.h
@@ -0,0 +1,395 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#pragma once
			
 
				+
			
 
				+#include "ffx_assert.h"
			
 
				+#include "ffx_types.h"
			
 
				+#include "ffx_error.h"
			
 
				+
			
 
				+// Include the FSR2 resources defined in the HLSL code. This shared here to avoid getting out of sync.
			
 
				+#define FFX_CPU
			
 
				+#include "shaders/ffx_fsr2_resources.h"
			
 
				+#include "shaders/ffx_fsr2_common.h"
			
 
				+
			
 
				+#if defined(__cplusplus)
			
 
				+extern "C" {
			
 
				+#endif // #if defined(__cplusplus)
			
 
				+
			
 
				+FFX_FORWARD_DECLARE(FfxFsr2Interface);
			
 
				+
			
 
				+/// An enumeration of all the passes which constitute the FSR2 algorithm.
			
 
				+///
			
 
				+/// FSR2 is implemented as a composite of several compute passes each
			
 
				+/// computing a key part of the final result. Each call to the 
			
 
				+/// <c><i>FfxFsr2ScheduleGpuJobFunc</i></c> callback function will
			
 
				+/// correspond to a single pass included in <c><i>FfxFsr2Pass</i></c>. For a
			
 
				+/// more comprehensive description of each pass, please refer to the FSR2
			
 
				+/// reference documentation.
			
 
				+///
			
 
				+/// Please note in some cases e.g.: <c><i>FFX_FSR2_PASS_ACCUMULATE</i></c>
			
 
				+/// and <c><i>FFX_FSR2_PASS_ACCUMULATE_SHARPEN</i></c> either one pass or the
			
 
				+/// other will be used (they are mutually exclusive). The choice of which will
			
 
				+/// depend on the way the <c><i>FfxFsr2Context</i></c> is created and the
			
 
				+/// precise contents of <c><i>FfxFsr2DispatchParamters</i></c> each time a call
			
 
				+/// is made to <c><i>ffxFsr2ContextDispatch</i></c>.
			
 
				+/// 
			
 
				+/// @ingroup FSR2
			
 
				+typedef enum FfxFsr2Pass {
			
 
				+
			
 
				+    FFX_FSR2_PASS_DEPTH_CLIP = 0,                                       ///< A pass which performs depth clipping.
			
 
				+    FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH = 1,                       ///< A pass which performs reconstruction of previous frame's depth.
			
 
				+    FFX_FSR2_PASS_LOCK = 2,                                             ///< A pass which calculates pixel locks.
			
 
				+    FFX_FSR2_PASS_ACCUMULATE = 3,                                       ///< A pass which performs upscaling.
			
 
				+    FFX_FSR2_PASS_ACCUMULATE_SHARPEN = 4,                               ///< A pass which performs upscaling when sharpening is used.
			
 
				+    FFX_FSR2_PASS_RCAS = 5,                                             ///< A pass which performs sharpening.
			
 
				+    FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID = 6,                        ///< A pass which generates the luminance mipmap chain for the current frame.
			
 
				+    FFX_FSR2_PASS_GENERATE_REACTIVE = 7,                                ///< An optional pass to generate a reactive mask
			
 
				+    FFX_FSR2_PASS_TCR_AUTOGENERATE = 8,                                 ///< An optional pass to generate a texture-and-composition and reactive masks
			
 
				+
			
 
				+    FFX_FSR2_PASS_COUNT                                                 ///< The number of passes performed by FSR2.
			
 
				+} FfxFsr2Pass;
			
 
				+
			
 
				+typedef enum FfxFsr2MsgType {
			
 
				+    FFX_FSR2_MESSAGE_TYPE_ERROR = 0,
			
 
				+    FFX_FSR2_MESSAGE_TYPE_WARNING = 1,
			
 
				+    FFX_FSR2_MESSAGE_TYPE_COUNT
			
 
				+} FfxFsr2MsgType;
			
 
				+
			
 
				+/// Create and initialize the backend context.
			
 
				+///
			
 
				+/// The callback function sets up the backend context for rendering.
			
 
				+/// It will create or reference the device and create required internal data structures.
			
 
				+///
			
 
				+/// @param [in] backendInterface                    A pointer to the backend interface.
			
 
				+/// @param [in] device                              The FfxDevice obtained by ffxGetDevice(DX12/VK/...).
			
 
				+///
			
 
				+/// @retval
			
 
				+/// FFX_OK                                          The operation completed successfully.
			
 
				+/// @retval
			
 
				+/// Anything else                                   The operation failed.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+typedef FfxErrorCode (*FfxFsr2CreateBackendContextFunc)(
			
 
				+    FfxFsr2Interface* backendInterface,
			
 
				+    FfxDevice device);
			
 
				+
			
 
				+/// Get a list of capabilities of the device.
			
 
				+///
			
 
				+/// When creating an <c><i>FfxFsr2Context</i></c> it is desirable for the FSR2
			
 
				+/// core implementation to be aware of certain characteristics of the platform
			
 
				+/// that is being targetted. This is because some optimizations which FSR2
			
 
				+/// attempts to perform are more effective on certain classes of hardware than
			
 
				+/// others, or are not supported by older hardware. In order to avoid cases
			
 
				+/// where optimizations actually have the effect of decreasing performance, or
			
 
				+/// reduce the breadth of support provided by FSR2, FSR2 queries the
			
 
				+/// capabilities of the device to make such decisions.
			
 
				+///
			
 
				+/// For target platforms with fixed hardware support you need not implement
			
 
				+/// this callback function by querying the device, but instead may hardcore
			
 
				+/// what features are available on the platform.
			
 
				+///
			
 
				+/// @param [in] backendInterface                    A pointer to the backend interface.
			
 
				+/// @param [out] outDeviceCapabilities              The device capabilities structure to fill out.
			
 
				+/// @param [in] device                              The device to query for capabilities.
			
 
				+///
			
 
				+/// @retval
			
 
				+/// FFX_OK                                          The operation completed successfully.
			
 
				+/// @retval
			
 
				+/// Anything else                                   The operation failed.
			
 
				+/// 
			
 
				+/// @ingroup FSR2
			
 
				+typedef FfxErrorCode(*FfxFsr2GetDeviceCapabilitiesFunc)(
			
 
				+    FfxFsr2Interface* backendInterface,
			
 
				+    FfxDeviceCapabilities* outDeviceCapabilities,
			
 
				+    FfxDevice device);
			
 
				+
			
 
				+/// Destroy the backend context and dereference the device.
			
 
				+///
			
 
				+/// This function is called when the <c><i>FfxFsr2Context</i></c> is destroyed.
			
 
				+///
			
 
				+/// @param [in] backendInterface                    A pointer to the backend interface.
			
 
				+///
			
 
				+/// @retval
			
 
				+/// FFX_OK                                          The operation completed successfully.
			
 
				+/// @retval
			
 
				+/// Anything else                                   The operation failed.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+typedef FfxErrorCode(*FfxFsr2DestroyBackendContextFunc)(
			
 
				+    FfxFsr2Interface* backendInterface);
			
 
				+
			
 
				+/// Create a resource.
			
 
				+///
			
 
				+/// This callback is intended for the backend to create internal resources.
			
 
				+///
			
 
				+/// Please note: It is also possible that the creation of resources might
			
 
				+/// itself cause additional resources to be created by simply calling the
			
 
				+/// <c><i>FfxFsr2CreateResourceFunc</i></c> function pointer again. This is
			
 
				+/// useful when handling the initial creation of resources which must be
			
 
				+/// initialized. The flow in such a case would be an initial call to create the
			
 
				+/// CPU-side resource, another to create the GPU-side resource, and then a call
			
 
				+/// to schedule a copy render job to move the data between the two. Typically
			
 
				+/// this type of function call flow is only seen during the creation of an
			
 
				+/// <c><i>FfxFsr2Context</i></c>.
			
 
				+///
			
 
				+/// @param [in] backendInterface                    A pointer to the backend interface.
			
 
				+/// @param [in] createResourceDescription           A pointer to a <c><i>FfxCreateResourceDescription</i></c>.
			
 
				+/// @param [out] outResource                        A pointer to a <c><i>FfxResource</i></c> object.
			
 
				+///
			
 
				+/// @retval
			
 
				+/// FFX_OK                                          The operation completed successfully.
			
 
				+/// @retval
			
 
				+/// Anything else                                   The operation failed.
			
 
				+/// 
			
 
				+/// @ingroup FSR2
			
 
				+typedef FfxErrorCode (*FfxFsr2CreateResourceFunc)(
			
 
				+    FfxFsr2Interface* backendInterface,
			
 
				+    const FfxCreateResourceDescription* createResourceDescription,
			
 
				+    FfxResourceInternal* outResource);
			
 
				+
			
 
				+/// Register a resource in the backend for the current frame.
			
 
				+///
			
 
				+/// Since FSR2 and the backend are not aware how many different
			
 
				+/// resources will get passed to FSR2 over time, it's not safe 
			
 
				+/// to register all resources simultaneously in the backend.
			
 
				+/// Also passed resources may not be valid after the dispatch call.
			
 
				+/// As a result it's safest to register them as FfxResourceInternal 
			
 
				+/// and clear them at the end of the dispatch call.
			
 
				+///
			
 
				+/// @param [in] backendInterface                    A pointer to the backend interface.
			
 
				+/// @param [in] inResource                          A pointer to a <c><i>FfxResource</i></c>.
			
 
				+/// @param [out] outResource                        A pointer to a <c><i>FfxResourceInternal</i></c> object.
			
 
				+///
			
 
				+/// @retval
			
 
				+/// FFX_OK                                          The operation completed successfully.
			
 
				+/// @retval
			
 
				+/// Anything else                                   The operation failed.
			
 
				+/// 
			
 
				+/// @ingroup FSR2
			
 
				+typedef FfxErrorCode(*FfxFsr2RegisterResourceFunc)(
			
 
				+    FfxFsr2Interface* backendInterface,
			
 
				+    const FfxResource* inResource,
			
 
				+    FfxResourceInternal* outResource);
			
 
				+
			
 
				+/// Unregister all temporary FfxResourceInternal from the backend.
			
 
				+///
			
 
				+/// Unregister FfxResourceInternal referencing resources passed to 
			
 
				+/// a function as a parameter.
			
 
				+///
			
 
				+/// @param [in] backendInterface                    A pointer to the backend interface.
			
 
				+///
			
 
				+/// @retval
			
 
				+/// FFX_OK                                          The operation completed successfully.
			
 
				+/// @retval
			
 
				+/// Anything else                                   The operation failed.
			
 
				+/// 
			
 
				+/// @ingroup FSR2
			
 
				+typedef FfxErrorCode(*FfxFsr2UnregisterResourcesFunc)(
			
 
				+    FfxFsr2Interface* backendInterface);
			
 
				+
			
 
				+/// Retrieve a <c><i>FfxResourceDescription</i></c> matching a
			
 
				+/// <c><i>FfxResource</i></c> structure. 
			
 
				+///
			
 
				+/// @param [in] backendInterface                    A pointer to the backend interface.
			
 
				+/// @param [in] resource                            A pointer to a <c><i>FfxResource</i></c> object.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A description of the resource.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+typedef FfxResourceDescription (*FfxFsr2GetResourceDescriptionFunc)(
			
 
				+    FfxFsr2Interface* backendInterface,
			
 
				+    FfxResourceInternal resource);
			
 
				+
			
 
				+/// Destroy a resource
			
 
				+///
			
 
				+/// This callback is intended for the backend to release an internal resource.
			
 
				+///
			
 
				+/// @param [in] backendInterface                    A pointer to the backend interface.
			
 
				+/// @param [in] resource                            A pointer to a <c><i>FfxResource</i></c> object.
			
 
				+/// 
			
 
				+/// @retval
			
 
				+/// FFX_OK                                          The operation completed successfully.
			
 
				+/// @retval
			
 
				+/// Anything else                                   The operation failed.
			
 
				+/// 
			
 
				+/// @ingroup FSR2
			
 
				+typedef FfxErrorCode (*FfxFsr2DestroyResourceFunc)(
			
 
				+    FfxFsr2Interface* backendInterface,
			
 
				+    FfxResourceInternal resource);
			
 
				+
			
 
				+/// Create a render pipeline.
			
 
				+///
			
 
				+/// A rendering pipeline contains the shader as well as resource bindpoints
			
 
				+/// and samplers.
			
 
				+/// 
			
 
				+/// @param [in] backendInterface                    A pointer to the backend interface.
			
 
				+/// @param [in] pass                                The identifier for the pass.
			
 
				+/// @param [in] pipelineDescription                 A pointer to a <c><i>FfxPipelineDescription</i></c> describing the pipeline to be created.
			
 
				+/// @param [out] outPipeline                        A pointer to a <c><i>FfxPipelineState</i></c> structure which should be populated.
			
 
				+/// 
			
 
				+/// @retval
			
 
				+/// FFX_OK                                          The operation completed successfully.
			
 
				+/// @retval
			
 
				+/// Anything else                                   The operation failed.
			
 
				+/// 
			
 
				+/// @ingroup FSR2
			
 
				+typedef FfxErrorCode (*FfxFsr2CreatePipelineFunc)(
			
 
				+    FfxFsr2Interface* backendInterface,
			
 
				+    FfxFsr2Pass pass,
			
 
				+    const FfxPipelineDescription* pipelineDescription,
			
 
				+    FfxPipelineState* outPipeline);
			
 
				+
			
 
				+/// Destroy a render pipeline.
			
 
				+///
			
 
				+/// @param [in] backendInterface                    A pointer to the backend interface.
			
 
				+/// @param [out] pipeline                           A pointer to a <c><i>FfxPipelineState</i></c> structure which should be released.
			
 
				+/// 
			
 
				+/// @retval
			
 
				+/// FFX_OK                                          The operation completed successfully.
			
 
				+/// @retval
			
 
				+/// Anything else                                   The operation failed.
			
 
				+/// 
			
 
				+/// @ingroup FSR2
			
 
				+typedef FfxErrorCode (*FfxFsr2DestroyPipelineFunc)(
			
 
				+    FfxFsr2Interface* backendInterface,
			
 
				+    FfxPipelineState* pipeline);
			
 
				+
			
 
				+/// Schedule a render job to be executed on the next call of
			
 
				+/// <c><i>FfxFsr2ExecuteGpuJobsFunc</i></c>.
			
 
				+///
			
 
				+/// Render jobs can perform one of three different tasks: clear, copy or
			
 
				+/// compute dispatches.
			
 
				+///
			
 
				+/// @param [in] backendInterface                    A pointer to the backend interface.
			
 
				+/// @param [in] job                                 A pointer to a <c><i>FfxGpuJobDescription</i></c> structure.
			
 
				+/// 
			
 
				+/// @retval
			
 
				+/// FFX_OK                                          The operation completed successfully.
			
 
				+/// @retval
			
 
				+/// Anything else                                   The operation failed.
			
 
				+/// 
			
 
				+/// @ingroup FSR2
			
 
				+typedef FfxErrorCode (*FfxFsr2ScheduleGpuJobFunc)(
			
 
				+    FfxFsr2Interface* backendInterface,
			
 
				+    const FfxGpuJobDescription* job);
			
 
				+
			
 
				+/// Execute scheduled render jobs on the <c><i>comandList</i></c> provided.
			
 
				+/// 
			
 
				+/// The recording of the graphics API commands should take place in this
			
 
				+/// callback function, the render jobs which were previously enqueued (via
			
 
				+/// callbacks made to <c><i>FfxFsr2ScheduleGpuJobFunc</i></c>) should be
			
 
				+/// processed in the order they were received. Advanced users might choose to
			
 
				+/// reorder the rendering jobs, but should do so with care to respect the
			
 
				+/// resource dependencies.
			
 
				+/// 
			
 
				+/// Depending on the precise contents of <c><i>FfxFsr2DispatchDescription</i></c> a
			
 
				+/// different number of render jobs might have previously been enqueued (for
			
 
				+/// example if sharpening is toggled on and off).
			
 
				+/// 
			
 
				+/// @param [in] backendInterface                    A pointer to the backend interface.
			
 
				+/// @param [in] commandList                         A pointer to a <c><i>FfxCommandList</i></c> structure.
			
 
				+/// 
			
 
				+/// @retval
			
 
				+/// FFX_OK                                          The operation completed successfully.
			
 
				+/// @retval
			
 
				+/// Anything else                                   The operation failed.
			
 
				+/// 
			
 
				+/// @ingroup FSR2
			
 
				+typedef FfxErrorCode (*FfxFsr2ExecuteGpuJobsFunc)(
			
 
				+    FfxFsr2Interface* backendInterface,
			
 
				+    FfxCommandList commandList);
			
 
				+
			
 
				+/// Pass a string message
			
 
				+///
			
 
				+/// Used for debug messages.
			
 
				+///
			
 
				+/// @param [in] type                       The type of message.
			
 
				+/// @param [in] message                    A string message to pass.
			
 
				+///
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+typedef void(*FfxFsr2Message)(
			
 
				+    FfxFsr2MsgType type,
			
 
				+    const wchar_t* message);
			
 
				+
			
 
				+/// A structure encapsulating the interface between the core implentation of
			
 
				+/// the FSR2 algorithm and any graphics API that it should ultimately call.
			
 
				+/// 
			
 
				+/// This set of functions serves as an abstraction layer between FSR2 and the
			
 
				+/// API used to implement it. While FSR2 ships with backends for DirectX12 and
			
 
				+/// Vulkan, it is possible to implement your own backend for other platforms or
			
 
				+/// which sits ontop of your engine's own abstraction layer. For details on the
			
 
				+/// expectations of what each function should do you should refer the
			
 
				+/// description of the following function pointer types:
			
 
				+/// 
			
 
				+///     <c><i>FfxFsr2CreateDeviceFunc</i></c>
			
 
				+///     <c><i>FfxFsr2GetDeviceCapabilitiesFunc</i></c>
			
 
				+///     <c><i>FfxFsr2DestroyDeviceFunc</i></c>
			
 
				+///     <c><i>FfxFsr2CreateResourceFunc</i></c>
			
 
				+///     <c><i>FfxFsr2GetResourceDescriptionFunc</i></c>
			
 
				+///     <c><i>FfxFsr2DestroyResourceFunc</i></c>
			
 
				+///     <c><i>FfxFsr2CreatePipelineFunc</i></c>
			
 
				+///     <c><i>FfxFsr2DestroyPipelineFunc</i></c>
			
 
				+///     <c><i>FfxFsr2ScheduleGpuJobFunc</i></c>
			
 
				+///     <c><i>FfxFsr2ExecuteGpuJobsFunc</i></c>
			
 
				+///
			
 
				+/// Depending on the graphics API that is abstracted by the backend, it may be
			
 
				+/// required that the backend is to some extent stateful. To ensure that
			
 
				+/// applications retain full control to manage the memory used by FSR2, the
			
 
				+/// <c><i>scratchBuffer</i></c> and <c><i>scratchBufferSize</i></c> fields are
			
 
				+/// provided. A backend should provide a means of specifying how much scratch
			
 
				+/// memory is required for its internal implementation (e.g: via a function
			
 
				+/// or constant value). The application is that responsible for allocating that
			
 
				+/// memory and providing it when setting up the FSR2 backend. Backends provided
			
 
				+/// with FSR2 do not perform dynamic memory allocations, and instead
			
 
				+/// suballocate all memory from the scratch buffers provided.
			
 
				+///
			
 
				+/// The <c><i>scratchBuffer</i></c> and <c><i>scratchBufferSize</i></c> fields
			
 
				+/// should be populated according to the requirements of each backend. For
			
 
				+/// example, if using the DirectX 12 backend you should call the 
			
 
				+/// <c><i>ffxFsr2GetScratchMemorySizeDX12</i></c> function. It is not required
			
 
				+/// that custom backend implementations use a scratch buffer.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+typedef struct FfxFsr2Interface {
			
 
				+
			
 
				+    FfxFsr2CreateBackendContextFunc         fpCreateBackendContext;         ///< A callback function to create and initialize the backend context.
			
 
				+    FfxFsr2GetDeviceCapabilitiesFunc        fpGetDeviceCapabilities;        ///< A callback function to query device capabilites.
			
 
				+    FfxFsr2DestroyBackendContextFunc        fpDestroyBackendContext;        ///< A callback function to destroy the backendcontext. This also dereferences the device.
			
 
				+    FfxFsr2CreateResourceFunc               fpCreateResource;               ///< A callback function to create a resource.
			
 
				+    FfxFsr2RegisterResourceFunc             fpRegisterResource;             ///< A callback function to register an external resource.
			
 
				+    FfxFsr2UnregisterResourcesFunc          fpUnregisterResources;          ///< A callback function to unregister external resource.
			
 
				+    FfxFsr2GetResourceDescriptionFunc       fpGetResourceDescription;       ///< A callback function to retrieve a resource description.
			
 
				+    FfxFsr2DestroyResourceFunc              fpDestroyResource;              ///< A callback function to destroy a resource.
			
 
				+    FfxFsr2CreatePipelineFunc               fpCreatePipeline;               ///< A callback function to create a render or compute pipeline.
			
 
				+    FfxFsr2DestroyPipelineFunc              fpDestroyPipeline;              ///< A callback function to destroy a render or compute pipeline.
			
 
				+    FfxFsr2ScheduleGpuJobFunc               fpScheduleGpuJob;               ///< A callback function to schedule a render job.
			
 
				+    FfxFsr2ExecuteGpuJobsFunc               fpExecuteGpuJobs;               ///< A callback function to execute all queued render jobs.
			
 
				+
			
 
				+    void*                                   scratchBuffer;                  ///< A preallocated buffer for memory utilized internally by the backend.
			
 
				+    size_t                                  scratchBufferSize;              ///< Size of the buffer pointed to by <c><i>scratchBuffer</i></c>.
			
 
				+} FfxFsr2Interface;
			
 
				+
			
 
				+#if defined(__cplusplus)
			
 
				+}
			
 
				+#endif // #if defined(__cplusplus)
			
--- a/thirdparty/amd-fsr2/ffx_fsr2_maximum_bias.h
+++ b/thirdparty/amd-fsr2/ffx_fsr2_maximum_bias.h
@@ -0,0 +1,46 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+// @internal
			
 
				+
			
 
				+#pragma once
			
 
				+
			
 
				+static const int FFX_FSR2_MAXIMUM_BIAS_TEXTURE_WIDTH = 16;
			
 
				+static const int FFX_FSR2_MAXIMUM_BIAS_TEXTURE_HEIGHT = 16;
			
 
				+static const float ffxFsr2MaximumBias[] = {
			
 
				+	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.876f,	1.809f,	1.772f,	1.753f,	1.748f,
			
 
				+	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.869f,	1.801f,	1.764f,	1.745f,	1.739f,
			
 
				+	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.976f,	1.841f,	1.774f,	1.737f,	1.716f,	1.71f,
			
 
				+	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.914f,	1.784f,	1.716f,	1.673f,	1.649f,	1.641f,
			
 
				+	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.793f,	1.676f,	1.604f,	1.562f,	1.54f,	1.533f,
			
 
				+	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.802f,	1.619f,	1.536f,	1.492f,	1.467f,	1.454f,	1.449f,
			
 
				+	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.812f,	1.575f,	1.496f,	1.456f,	1.432f,	1.416f,	1.408f,	1.405f,
			
 
				+	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.555f,	1.479f,	1.438f,	1.413f,	1.398f,	1.387f,	1.381f,	1.379f,
			
 
				+	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.812f,	1.555f,	1.474f,	1.43f,	1.404f,	1.387f,	1.376f,	1.368f,	1.363f,	1.362f,
			
 
				+	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.802f,	1.575f,	1.479f,	1.43f,	1.401f,	1.382f,	1.369f,	1.36f,	1.354f,	1.351f,	1.35f,
			
 
				+	2.0f,	2.0f,	1.976f,	1.914f,	1.793f,	1.619f,	1.496f,	1.438f,	1.404f,	1.382f,	1.367f,	1.357f,	1.349f,	1.344f,	1.341f,	1.34f,
			
 
				+	1.876f,	1.869f,	1.841f,	1.784f,	1.676f,	1.536f,	1.456f,	1.413f,	1.387f,	1.369f,	1.357f,	1.347f,	1.341f,	1.336f,	1.333f,	1.332f,
			
 
				+	1.809f,	1.801f,	1.774f,	1.716f,	1.604f,	1.492f,	1.432f,	1.398f,	1.376f,	1.36f,	1.349f,	1.341f,	1.335f,	1.33f,	1.328f,	1.327f,
			
 
				+	1.772f,	1.764f,	1.737f,	1.673f,	1.562f,	1.467f,	1.416f,	1.387f,	1.368f,	1.354f,	1.344f,	1.336f,	1.33f,	1.326f,	1.323f,	1.323f,
			
 
				+	1.753f,	1.745f,	1.716f,	1.649f,	1.54f,	1.454f,	1.408f,	1.381f,	1.363f,	1.351f,	1.341f,	1.333f,	1.328f,	1.323f,	1.321f,	1.32f,
			
 
				+	1.748f,	1.739f,	1.71f,	1.641f,	1.533f,	1.449f,	1.405f,	1.379f,	1.362f,	1.35f,	1.34f,	1.332f,	1.327f,	1.323f,	1.32f,	1.319f,
			
 
				+
			
 
				+};
			
--- a/thirdparty/amd-fsr2/ffx_fsr2_private.h
+++ b/thirdparty/amd-fsr2/ffx_fsr2_private.h
@@ -0,0 +1,86 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#pragma once
			
 
				+
			
 
				+// Constants for FSR2 DX12 dispatches. Must be kept in sync with cbFSR2 in ffx_fsr2_callbacks_hlsl.h
			
 
				+typedef struct Fsr2Constants {
			
 
				+
			
 
				+    int32_t                     renderSize[2];
			
 
				+    int32_t                     maxRenderSize[2];
			
 
				+    int32_t                     displaySize[2];
			
 
				+    int32_t                     inputColorResourceDimensions[2];
			
 
				+    int32_t                     lumaMipDimensions[2];
			
 
				+    int32_t                     lumaMipLevelToUse;
			
 
				+    int32_t                     frameIndex;
			
 
				+    
			
 
				+    float                       deviceToViewDepth[4];
			
 
				+    float                       jitterOffset[2];
			
 
				+    float                       motionVectorScale[2];
			
 
				+    float                       downscaleFactor[2];
			
 
				+    float                       motionVectorJitterCancellation[2];
			
 
				+    float                       preExposure;
			
 
				+    float                       previousFramePreExposure;
			
 
				+    float                       tanHalfFOV;
			
 
				+    float                       jitterPhaseCount;
			
 
				+    float                       deltaTime;
			
 
				+    float                       dynamicResChangeFactor;
			
 
				+    float                       viewSpaceToMetersFactor;
			
 
				+
			
 
				+	// -- GODOT start --
			
 
				+    float                       pad;
			
 
				+    float                       reprojectionMatrix[16];
			
 
				+	// -- GODOT end --
			
 
				+} Fsr2Constants;
			
 
				+
			
 
				+struct FfxFsr2ContextDescription;
			
 
				+struct FfxDeviceCapabilities;
			
 
				+struct FfxPipelineState;
			
 
				+struct FfxResource;
			
 
				+
			
 
				+// FfxFsr2Context_Private
			
 
				+// The private implementation of the FSR2 context.
			
 
				+typedef struct FfxFsr2Context_Private {
			
 
				+
			
 
				+    FfxFsr2ContextDescription   contextDescription;
			
 
				+    Fsr2Constants               constants;
			
 
				+    FfxDevice                   device;
			
 
				+    FfxDeviceCapabilities       deviceCapabilities;
			
 
				+    FfxPipelineState            pipelineDepthClip;
			
 
				+    FfxPipelineState            pipelineReconstructPreviousDepth;
			
 
				+    FfxPipelineState            pipelineLock;
			
 
				+    FfxPipelineState            pipelineAccumulate;
			
 
				+    FfxPipelineState            pipelineAccumulateSharpen;
			
 
				+    FfxPipelineState            pipelineRCAS;
			
 
				+    FfxPipelineState            pipelineComputeLuminancePyramid;
			
 
				+    FfxPipelineState            pipelineGenerateReactive;
			
 
				+    FfxPipelineState            pipelineTcrAutogenerate;
			
 
				+
			
 
				+    // 2 arrays of resources, as e.g. FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS will use different resources when bound as SRV vs when bound as UAV
			
 
				+    FfxResourceInternal         srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_COUNT];
			
 
				+    FfxResourceInternal         uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_COUNT];
			
 
				+
			
 
				+    bool                        firstExecution;
			
 
				+    bool                        refreshPipelineStates;
			
 
				+    uint32_t                    resourceFrameIndex;
			
 
				+    float                       previousJitterOffset[2];
			
 
				+    int32_t                     jitterPhaseCountRemaining;
			
 
				+} FfxFsr2Context_Private;
			
--- a/thirdparty/amd-fsr2/ffx_types.h
+++ b/thirdparty/amd-fsr2/ffx_types.h
@@ -0,0 +1,367 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#pragma once
			
 
				+
			
 
				+#include <stdint.h>
			
 
				+// -- GODOT start --
			
 
				+#include <stdlib.h>
			
 
				+// -- GODOT end --
			
 
				+
			
 
				+#if defined (FFX_GCC)
			
 
				+/// FidelityFX exported functions
			
 
				+#define FFX_API
			
 
				+#else
			
 
				+/// FidelityFX exported functions
			
 
				+#define FFX_API __declspec(dllexport)
			
 
				+#endif // #if defined (FFX_GCC)
			
 
				+
			
 
				+/// Maximum supported number of simultaneously bound SRVs.
			
 
				+#define FFX_MAX_NUM_SRVS            16
			
 
				+
			
 
				+/// Maximum supported number of simultaneously bound UAVs.
			
 
				+#define FFX_MAX_NUM_UAVS            8
			
 
				+
			
 
				+/// Maximum number of constant buffers bound.
			
 
				+#define FFX_MAX_NUM_CONST_BUFFERS   2
			
 
				+
			
 
				+/// Maximum size of bound constant buffers.
			
 
				+#define FFX_MAX_CONST_SIZE          64
			
 
				+
			
 
				+/// Off by default warnings
			
 
				+#if defined(_MSC_VER)
			
 
				+#pragma warning(disable : 4365 4710 4820 5039)
			
 
				+#elif defined(__clang__)
			
 
				+#pragma clang diagnostic ignored "-Wunused-parameter"
			
 
				+#pragma clang diagnostic ignored "-Wmissing-field-initializers"
			
 
				+#pragma clang diagnostic ignored "-Wsign-compare"
			
 
				+#pragma clang diagnostic ignored "-Wunused-function"
			
 
				+#pragma clang diagnostic ignored "-Wignored-qualifiers"
			
 
				+#elif defined(__GNUC__)
			
 
				+#pragma GCC diagnostic ignored "-Wunused-function"
			
 
				+#endif
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C" {
			
 
				+#endif  // #ifdef __cplusplus
			
 
				+
			
 
				+/// An enumeration of surface formats.
			
 
				+typedef enum FfxSurfaceFormat {
			
 
				+
			
 
				+    FFX_SURFACE_FORMAT_UNKNOWN,                     ///< Unknown format
			
 
				+    FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS,       ///< 32 bit per channel, 4 channel typeless format
			
 
				+    FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT,          ///< 32 bit per channel, 4 channel float format
			
 
				+    FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT,          ///< 16 bit per channel, 4 channel float format
			
 
				+    FFX_SURFACE_FORMAT_R16G16B16A16_UNORM,          ///< 16 bit per channel, 4 channel unsigned normalized format
			
 
				+    FFX_SURFACE_FORMAT_R32G32_FLOAT,                ///< 32 bit per channel, 2 channel float format
			
 
				+    FFX_SURFACE_FORMAT_R32_UINT,                    ///< 32 bit per channel, 1 channel float format
			
 
				+    FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS,           ///<  8 bit per channel, 4 channel float format
			
 
				+    FFX_SURFACE_FORMAT_R8G8B8A8_UNORM,              ///<  8 bit per channel, 4 channel unsigned normalized format
			
 
				+    FFX_SURFACE_FORMAT_R11G11B10_FLOAT,             ///< 32 bit 3 channel float format
			
 
				+    FFX_SURFACE_FORMAT_R16G16_FLOAT,                ///< 16 bit per channel, 2 channel float format
			
 
				+    FFX_SURFACE_FORMAT_R16G16_UINT,                 ///< 16 bit per channel, 2 channel unsigned int format
			
 
				+    FFX_SURFACE_FORMAT_R16_FLOAT,                   ///< 16 bit per channel, 1 channel float format
			
 
				+    FFX_SURFACE_FORMAT_R16_UINT,                    ///< 16 bit per channel, 1 channel unsigned int format
			
 
				+    FFX_SURFACE_FORMAT_R16_UNORM,                   ///< 16 bit per channel, 1 channel unsigned normalized format
			
 
				+    FFX_SURFACE_FORMAT_R16_SNORM,                   ///< 16 bit per channel, 1 channel signed normalized format
			
 
				+    FFX_SURFACE_FORMAT_R8_UNORM,                    ///<  8 bit per channel, 1 channel unsigned normalized format
			
 
				+    FFX_SURFACE_FORMAT_R8_UINT,                     ///<  8 bit per channel, 1 channel unsigned int format
			
 
				+    FFX_SURFACE_FORMAT_R8G8_UNORM,                  ///<  8 bit per channel, 2 channel unsigned normalized format
			
 
				+    FFX_SURFACE_FORMAT_R32_FLOAT                    ///< 32 bit per channel, 1 channel float format
			
 
				+} FfxSurfaceFormat;
			
 
				+
			
 
				+/// An enumeration of resource usage.
			
 
				+typedef enum FfxResourceUsage {
			
 
				+
			
 
				+    FFX_RESOURCE_USAGE_READ_ONLY = 0,               ///< No usage flags indicate a resource is read only.
			
 
				+    FFX_RESOURCE_USAGE_RENDERTARGET = (1<<0),       ///< Indicates a resource will be used as render target.
			
 
				+    FFX_RESOURCE_USAGE_UAV = (1<<1),                ///< Indicates a resource will be used as UAV.
			
 
				+} FfxResourceUsage;
			
 
				+
			
 
				+/// An enumeration of resource states.
			
 
				+typedef enum FfxResourceStates {
			
 
				+
			
 
				+    FFX_RESOURCE_STATE_UNORDERED_ACCESS = (1<<0),   ///< Indicates a resource is in the state to be used as UAV.
			
 
				+    FFX_RESOURCE_STATE_COMPUTE_READ = (1 << 1),     ///< Indicates a resource is in the state to be read by compute shaders.
			
 
				+    FFX_RESOURCE_STATE_COPY_SRC = (1 << 2),         ///< Indicates a resource is in the state to be used as source in a copy command.
			
 
				+    FFX_RESOURCE_STATE_COPY_DEST = (1 << 3),        ///< Indicates a resource is in the state to be used as destination in a copy command.
			
 
				+    FFX_RESOURCE_STATE_GENERIC_READ = (FFX_RESOURCE_STATE_COPY_SRC | FFX_RESOURCE_STATE_COMPUTE_READ),  ///< Indicates a resource is in generic (slow) read state.
			
 
				+} FfxResourceStates;
			
 
				+
			
 
				+/// An enumeration of surface dimensions.
			
 
				+typedef enum FfxResourceDimension {
			
 
				+
			
 
				+    FFX_RESOURCE_DIMENSION_TEXTURE_1D,              ///< A resource with a single dimension.
			
 
				+    FFX_RESOURCE_DIMENSION_TEXTURE_2D,              ///< A resource with two dimensions.
			
 
				+} FfxResourceDimension;
			
 
				+
			
 
				+/// An enumeration of surface dimensions.
			
 
				+typedef enum FfxResourceFlags {
			
 
				+
			
 
				+    FFX_RESOURCE_FLAGS_NONE         = 0,            ///< No flags.
			
 
				+    FFX_RESOURCE_FLAGS_ALIASABLE    = (1<<0),       ///< A bit indicating a resource does not need to persist across frames.
			
 
				+} FfxResourceFlags;
			
 
				+
			
 
				+/// An enumeration of all resource view types.
			
 
				+typedef enum FfxResourceViewType {
			
 
				+
			
 
				+    FFX_RESOURCE_VIEW_UNORDERED_ACCESS,             ///< The resource view is an unordered access view (UAV).
			
 
				+    FFX_RESOURCE_VIEW_SHADER_READ,                  ///< The resource view is a shader resource view (SRV).
			
 
				+} FfxResourceViewType;
			
 
				+
			
 
				+/// The type of filtering to perform when reading a texture.
			
 
				+typedef enum FfxFilterType {
			
 
				+
			
 
				+    FFX_FILTER_TYPE_POINT,                          ///< Point sampling.
			
 
				+    FFX_FILTER_TYPE_LINEAR                          ///< Sampling with interpolation.
			
 
				+} FfxFilterType;
			
 
				+
			
 
				+/// An enumeration of all supported shader models.
			
 
				+typedef enum FfxShaderModel {
			
 
				+
			
 
				+    FFX_SHADER_MODEL_5_1,                           ///< Shader model 5.1.
			
 
				+    FFX_SHADER_MODEL_6_0,                           ///< Shader model 6.0.
			
 
				+    FFX_SHADER_MODEL_6_1,                           ///< Shader model 6.1.
			
 
				+    FFX_SHADER_MODEL_6_2,                           ///< Shader model 6.2.
			
 
				+    FFX_SHADER_MODEL_6_3,                           ///< Shader model 6.3.
			
 
				+    FFX_SHADER_MODEL_6_4,                           ///< Shader model 6.4.
			
 
				+    FFX_SHADER_MODEL_6_5,                           ///< Shader model 6.5.
			
 
				+    FFX_SHADER_MODEL_6_6,                           ///< Shader model 6.6.
			
 
				+    FFX_SHADER_MODEL_6_7,                           ///< Shader model 6.7.
			
 
				+} FfxShaderModel;
			
 
				+
			
 
				+// An enumeration for different resource types
			
 
				+typedef enum FfxResourceType {
			
 
				+
			
 
				+    FFX_RESOURCE_TYPE_BUFFER,                       ///< The resource is a buffer.
			
 
				+    FFX_RESOURCE_TYPE_TEXTURE1D,                    ///< The resource is a 1-dimensional texture.
			
 
				+    FFX_RESOURCE_TYPE_TEXTURE2D,                    ///< The resource is a 2-dimensional texture.
			
 
				+    FFX_RESOURCE_TYPE_TEXTURE3D,                    ///< The resource is a 3-dimensional texture.
			
 
				+} FfxResourceType;
			
 
				+
			
 
				+/// An enumeration for different heap types
			
 
				+typedef enum FfxHeapType {
			
 
				+
			
 
				+    FFX_HEAP_TYPE_DEFAULT = 0,                      ///< Local memory.
			
 
				+    FFX_HEAP_TYPE_UPLOAD                            ///< Heap used for uploading resources.
			
 
				+} FfxHeapType;
			
 
				+
			
 
				+/// An enumberation for different render job types
			
 
				+typedef enum FfxGpuJobType {
			
 
				+
			
 
				+    FFX_GPU_JOB_CLEAR_FLOAT = 0,                 ///< The GPU job is performing a floating-point clear.
			
 
				+    FFX_GPU_JOB_COPY = 1,                        ///< The GPU job is performing a copy.
			
 
				+    FFX_GPU_JOB_COMPUTE = 2,                     ///< The GPU job is performing a compute dispatch.
			
 
				+} FfxGpuJobType;
			
 
				+
			
 
				+/// A typedef representing the graphics device.
			
 
				+typedef void* FfxDevice;
			
 
				+
			
 
				+/// A typedef representing a command list or command buffer.
			
 
				+typedef void* FfxCommandList;
			
 
				+
			
 
				+/// A typedef for a root signature.
			
 
				+typedef void* FfxRootSignature;
			
 
				+
			
 
				+/// A typedef for a pipeline state object.
			
 
				+typedef void* FfxPipeline;
			
 
				+
			
 
				+/// A structure encapasulating a collection of device capabilities.
			
 
				+typedef struct FfxDeviceCapabilities {
			
 
				+
			
 
				+    FfxShaderModel                  minimumSupportedShaderModel;            ///< The minimum shader model supported by the device.
			
 
				+    uint32_t                        waveLaneCountMin;                       ///< The minimum supported wavefront width.
			
 
				+    uint32_t                        waveLaneCountMax;                       ///< The maximum supported wavefront width.
			
 
				+    bool                            fp16Supported;                          ///< The device supports FP16 in hardware.
			
 
				+    bool                            raytracingSupported;                    ///< The device supports raytracing.
			
 
				+} FfxDeviceCapabilities;
			
 
				+
			
 
				+/// A structure encapsulating a 2-dimensional point, using 32bit unsigned integers.
			
 
				+typedef struct FfxDimensions2D {
			
 
				+
			
 
				+    uint32_t                        width;                                  ///< The width of a 2-dimensional range.
			
 
				+    uint32_t                        height;                                 ///< The height of a 2-dimensional range.
			
 
				+} FfxDimensions2D;
			
 
				+
			
 
				+/// A structure encapsulating a 2-dimensional point,
			
 
				+typedef struct FfxIntCoords2D {
			
 
				+
			
 
				+    int32_t                         x;                                      ///< The x coordinate of a 2-dimensional point.
			
 
				+    int32_t                         y;                                      ///< The y coordinate of a 2-dimensional point.
			
 
				+} FfxIntCoords2D;
			
 
				+
			
 
				+/// A structure encapsulating a 2-dimensional set of floating point coordinates.
			
 
				+typedef struct FfxFloatCoords2D {
			
 
				+
			
 
				+    float                           x;                                      ///< The x coordinate of a 2-dimensional point.
			
 
				+    float                           y;                                      ///< The y coordinate of a 2-dimensional point.
			
 
				+} FfxFloatCoords2D;
			
 
				+
			
 
				+/// A structure describing a resource.
			
 
				+typedef struct FfxResourceDescription {
			
 
				+
			
 
				+    FfxResourceType                 type;                                   ///< The type of the resource.
			
 
				+    FfxSurfaceFormat                format;                                 ///< The surface format.
			
 
				+    uint32_t                        width;                                  ///< The width of the resource.
			
 
				+    uint32_t                        height;                                 ///< The height of the resource.
			
 
				+    uint32_t                        depth;                                  ///< The depth of the resource.
			
 
				+    uint32_t                        mipCount;                               ///< Number of mips (or 0 for full mipchain).
			
 
				+    FfxResourceFlags                flags;                                  ///< A set of <c><i>FfxResourceFlags</i></c> flags.
			
 
				+} FfxResourceDescription;
			
 
				+
			
 
				+/// An outward facing structure containing a resource
			
 
				+typedef struct FfxResource {
			
 
				+    void*                           resource;                               ///< pointer to the resource.
			
 
				+    wchar_t                         name[64];
			
 
				+    FfxResourceDescription          description;
			
 
				+    FfxResourceStates               state;
			
 
				+    bool                            isDepth;
			
 
				+    uint64_t                        descriptorData;
			
 
				+} FfxResource;
			
 
				+
			
 
				+/// An internal structure containing a handle to a resource and resource views
			
 
				+typedef struct FfxResourceInternal {
			
 
				+    int32_t                         internalIndex;                          ///< The index of the resource.
			
 
				+} FfxResourceInternal;
			
 
				+
			
 
				+
			
 
				+/// A structure defining a resource bind point
			
 
				+typedef struct FfxResourceBinding
			
 
				+{
			
 
				+    uint32_t    slotIndex;
			
 
				+    uint32_t    resourceIdentifier;
			
 
				+    wchar_t     name[64];
			
 
				+}FfxResourceBinding;
			
 
				+
			
 
				+/// A structure encapsulating a single pass of an algorithm.
			
 
				+typedef struct FfxPipelineState {
			
 
				+
			
 
				+    FfxRootSignature                rootSignature;                                  ///< The pipelines rootSignature
			
 
				+    FfxPipeline                     pipeline;                                       ///< The pipeline object
			
 
				+    uint32_t                        uavCount;                                       ///< Count of UAVs used in this pipeline
			
 
				+    uint32_t                        srvCount;                                       ///< Count of SRVs used in this pipeline
			
 
				+    uint32_t                        constCount;                                     ///< Count of constant buffers used in this pipeline
			
 
				+
			
 
				+    FfxResourceBinding              uavResourceBindings[FFX_MAX_NUM_UAVS];          ///< Array of ResourceIdentifiers bound as UAVs
			
 
				+    FfxResourceBinding              srvResourceBindings[FFX_MAX_NUM_SRVS];          ///< Array of ResourceIdentifiers bound as SRVs
			
 
				+    FfxResourceBinding              cbResourceBindings[FFX_MAX_NUM_CONST_BUFFERS];  ///< Array of ResourceIdentifiers bound as CBs
			
 
				+} FfxPipelineState;
			
 
				+
			
 
				+/// A structure containing the data required to create a resource.
			
 
				+typedef struct FfxCreateResourceDescription {
			
 
				+    
			
 
				+    FfxHeapType                     heapType;                               ///< The heap type to hold the resource, typically <c><i>FFX_HEAP_TYPE_DEFAULT</i></c>.
			
 
				+    FfxResourceDescription          resourceDescription;                    ///< A resource description.
			
 
				+    FfxResourceStates               initalState;                            ///< The initial resource state.
			
 
				+    uint32_t                        initDataSize;                           ///< Size of initial data buffer.
			
 
				+    void*                           initData;                               ///< Buffer containing data to fill the resource.
			
 
				+    const wchar_t*                  name;                                   ///< Name of the resource.
			
 
				+    FfxResourceUsage                usage;                                  ///< Resource usage flags.
			
 
				+    uint32_t                        id;                                     ///< Internal resource ID.
			
 
				+} FfxCreateResourceDescription;
			
 
				+
			
 
				+/// A structure containing the description used to create a
			
 
				+/// <c><i>FfxPipeline</i></c> structure.
			
 
				+///
			
 
				+/// A pipeline is the name given to a shader and the collection of state that
			
 
				+/// is required to dispatch it. In the context of FSR2 and its architecture
			
 
				+/// this means that a <c><i>FfxPipelineDescription</i></c> will map to either a
			
 
				+/// monolithic object in an explicit API (such as a
			
 
				+/// <c><i>PipelineStateObject</i></c> in DirectX 12). Or a shader and some
			
 
				+/// ancillary API objects (in something like DirectX 11).
			
 
				+///
			
 
				+/// The <c><i>contextFlags</i></c> field contains a copy of the flags passed
			
 
				+/// to <c><i>ffxFsr2ContextCreate</i></c> via the <c><i>flags</i></c> field of
			
 
				+/// the <c><i>FfxFsr2InitializationParams</i></c> structure. These flags are
			
 
				+/// used to determine which permutation of a pipeline for a specific
			
 
				+/// <c><i>FfxFsr2Pass</i></c> should be used to implement the features required
			
 
				+/// by each application, as well as to acheive the best performance on specific
			
 
				+/// target hardware configurations.
			
 
				+/// 
			
 
				+/// When using one of the provided backends for FSR2 (such as DirectX 12 or
			
 
				+/// Vulkan) the data required to create a pipeline is compiled offline and
			
 
				+/// included into the backend library that you are using. For cases where the
			
 
				+/// backend interface is overriden by providing custom callback function
			
 
				+/// implementations care should be taken to respect the contents of the
			
 
				+/// <c><i>contextFlags</i></c> field in order to correctly support the options
			
 
				+/// provided by FSR2, and acheive best performance.
			
 
				+///
			
 
				+/// @ingroup FSR2
			
 
				+typedef struct FfxPipelineDescription {
			
 
				+
			
 
				+    uint32_t                            contextFlags;                   ///< A collection of <c><i>FfxFsr2InitializationFlagBits</i></c> which were passed to the context.
			
 
				+    FfxFilterType*                      samplers;                       ///< Array of static samplers.
			
 
				+    size_t                              samplerCount;                   ///< The number of samples contained inside <c><i>samplers</i></c>.
			
 
				+    const uint32_t*                     rootConstantBufferSizes;        ///< Array containing the sizes of the root constant buffers (count of 32 bit elements).
			
 
				+    uint32_t                            rootConstantBufferCount;        ///< The number of root constants contained within <c><i>rootConstantBufferSizes</i></c>.
			
 
				+} FfxPipelineDescription;
			
 
				+
			
 
				+/// A structure containing a constant buffer.
			
 
				+typedef struct FfxConstantBuffer {
			
 
				+
			
 
				+    uint32_t                        uint32Size;                             ///< Size of 32 bit chunks used in the constant buffer
			
 
				+    uint32_t                        data[FFX_MAX_CONST_SIZE];               ///< Constant buffer data
			
 
				+}FfxConstantBuffer;
			
 
				+
			
 
				+/// A structure describing a clear render job.
			
 
				+typedef struct FfxClearFloatJobDescription {
			
 
				+
			
 
				+    float                           color[4];                               ///< The clear color of the resource.
			
 
				+    FfxResourceInternal             target;                                 ///< The resource to be cleared.
			
 
				+} FfxClearFloatJobDescription;
			
 
				+
			
 
				+/// A structure describing a compute render job.
			
 
				+typedef struct FfxComputeJobDescription {
			
 
				+
			
 
				+    FfxPipelineState                pipeline;                               ///< Compute pipeline for the render job.
			
 
				+    uint32_t                        dimensions[3];                          ///< Dispatch dimensions.
			
 
				+    FfxResourceInternal             srvs[FFX_MAX_NUM_SRVS];                 ///< SRV resources to be bound in the compute job.
			
 
				+    wchar_t                         srvNames[FFX_MAX_NUM_SRVS][64];
			
 
				+    FfxResourceInternal             uavs[FFX_MAX_NUM_UAVS];                 ///< UAV resources to be bound in the compute job.
			
 
				+    uint32_t                        uavMip[FFX_MAX_NUM_UAVS];               ///< Mip level of UAV resources to be bound in the compute job.
			
 
				+    wchar_t                         uavNames[FFX_MAX_NUM_UAVS][64];
			
 
				+    FfxConstantBuffer               cbs[FFX_MAX_NUM_CONST_BUFFERS];         ///< Constant buffers to be bound in the compute job.
			
 
				+    wchar_t                         cbNames[FFX_MAX_NUM_CONST_BUFFERS][64];
			
 
				+    uint32_t                        cbSlotIndex[FFX_MAX_NUM_CONST_BUFFERS]; ///< Slot index in the descriptor table
			
 
				+} FfxComputeJobDescription;
			
 
				+
			
 
				+/// A structure describing a copy render job.
			
 
				+typedef struct FfxCopyJobDescription
			
 
				+{
			
 
				+    FfxResourceInternal                     src;                                    ///< Source resource for the copy.
			
 
				+    FfxResourceInternal                     dst;                                    ///< Destination resource for the copy.
			
 
				+} FfxCopyJobDescription;
			
 
				+
			
 
				+/// A structure describing a single render job.
			
 
				+typedef struct FfxGpuJobDescription{
			
 
				+
			
 
				+    FfxGpuJobType                jobType;                                    ///< Type of the job.
			
 
				+
			
 
				+    union {
			
 
				+        FfxClearFloatJobDescription clearJobDescriptor;                     ///< Clear job descriptor. Valid when <c><i>jobType</i></c> is <c><i>FFX_RENDER_JOB_CLEAR_FLOAT</i></c>.
			
 
				+        FfxCopyJobDescription       copyJobDescriptor;                      ///< Copy job descriptor. Valid when <c><i>jobType</i></c> is <c><i>FFX_RENDER_JOB_COPY</i></c>.
			
 
				+        FfxComputeJobDescription    computeJobDescriptor;                   ///< Compute job descriptor. Valid when <c><i>jobType</i></c> is <c><i>FFX_RENDER_JOB_COMPUTE</i></c>.
			
 
				+    };
			
 
				+} FfxGpuJobDescription;
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}
			
 
				+#endif  // #ifdef __cplusplus
			
--- a/thirdparty/amd-fsr2/ffx_util.h
+++ b/thirdparty/amd-fsr2/ffx_util.h
@@ -0,0 +1,78 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#pragma once
			
 
				+
			
 
				+#include "ffx_types.h"
			
 
				+
			
 
				+/// The value of Pi.
			
 
				+const float FFX_PI = 3.141592653589793f;
			
 
				+
			
 
				+/// An epsilon value for floating point numbers.
			
 
				+const float FFX_EPSILON = 1e-06f;
			
 
				+
			
 
				+/// Helper macro to create the version number.
			
 
				+#define FFX_MAKE_VERSION(major, minor, patch) ((major << 22) | (minor << 12) | patch)
			
 
				+
			
 
				+///< Use this to specify no version.
			
 
				+#define FFX_UNSPECIFIED_VERSION     0xFFFFAD00
			
 
				+
			
 
				+/// Helper macro to avoid warnings about unused variables.
			
 
				+#define FFX_UNUSED(x)               ((void)(x))
			
 
				+
			
 
				+/// Helper macro to align an integer to the specified power of 2 boundary
			
 
				+#define FFX_ALIGN_UP(x, y)          (((x) + ((y)-1)) & ~((y)-1))
			
 
				+
			
 
				+/// Helper macro to check if a value is aligned.
			
 
				+#define FFX_IS_ALIGNED(x)           (((x) != 0) && ((x) & ((x)-1)))
			
 
				+
			
 
				+/// Helper macro to stringify a value.
			
 
				+#define FFX_STR(s)                  FFX_XSTR(s)
			
 
				+#define FFX_XSTR(s)                 #s
			
 
				+
			
 
				+/// Helper macro to forward declare a structure.
			
 
				+#define FFX_FORWARD_DECLARE(x)      typedef struct x x
			
 
				+
			
 
				+/// Helper macro to return the maximum of two values.
			
 
				+#define FFX_MAXIMUM(x, y)           (((x) > (y)) ? (x) : (y))
			
 
				+
			
 
				+/// Helper macro to return the minimum of two values.
			
 
				+#define FFX_MINIMUM(x, y)           (((x) < (y)) ? (x) : (y))
			
 
				+
			
 
				+/// Helper macro to do safe free on a pointer.
			
 
				+#define FFX_SAFE_FREE(x) \
			
 
				+    if (x)               \
			
 
				+    free(x)
			
 
				+
			
 
				+/// Helper macro to return the abs of an integer value.
			
 
				+#define FFX_ABSOLUTE(x)                 (((x) < 0) ? (-(x)) : (x))
			
 
				+
			
 
				+/// Helper macro to return sign of a value.
			
 
				+#define FFX_SIGN(x)                     (((x) < 0) ? -1 : 1)
			
 
				+
			
 
				+/// Helper macro to work out the number of elements in an array.
			
 
				+#define FFX_ARRAY_ELEMENTS(x)           (int32_t)((sizeof(x) / sizeof(0 [x])) / ((size_t)(!(sizeof(x) % sizeof(0 [x])))))
			
 
				+
			
 
				+/// The maximum length of a path that can be specified to the FidelityFX API.
			
 
				+#define FFX_MAXIMUM_PATH                (260)
			
 
				+
			
 
				+/// Helper macro to check if the specified key is set in a bitfield.
			
 
				+#define FFX_CONTAINS_FLAG(options, key) ((options & key) == key)
			
--- a/thirdparty/amd-fsr2/patches/godot-changes.patch
+++ b/thirdparty/amd-fsr2/patches/godot-changes.patch
--- a/thirdparty/amd-fsr2/shaders/ffx_common_types.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_common_types.h
@@ -0,0 +1,429 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+#ifndef FFX_COMMON_TYPES_H
			
 
				+#define FFX_COMMON_TYPES_H
			
 
				+
			
 
				+#if defined(FFX_CPU)
			
 
				+#define FFX_PARAMETER_IN
			
 
				+#define FFX_PARAMETER_OUT
			
 
				+#define FFX_PARAMETER_INOUT
			
 
				+#elif defined(FFX_HLSL)
			
 
				+#define FFX_PARAMETER_IN        in
			
 
				+#define FFX_PARAMETER_OUT       out
			
 
				+#define FFX_PARAMETER_INOUT     inout
			
 
				+#elif defined(FFX_GLSL)
			
 
				+#define FFX_PARAMETER_IN        in
			
 
				+#define FFX_PARAMETER_OUT       out
			
 
				+#define FFX_PARAMETER_INOUT     inout
			
 
				+#endif // #if defined(FFX_CPU)
			
 
				+
			
 
				+#if defined(FFX_CPU)
			
 
				+/// A typedef for a boolean value.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+typedef bool FfxBoolean;
			
 
				+
			
 
				+/// A typedef for a unsigned 8bit integer.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+typedef uint8_t FfxUInt8;
			
 
				+
			
 
				+/// A typedef for a unsigned 16bit integer.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+typedef uint16_t FfxUInt16;
			
 
				+
			
 
				+/// A typedef for a unsigned 32bit integer.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+typedef uint32_t FfxUInt32;
			
 
				+
			
 
				+/// A typedef for a unsigned 64bit integer.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+typedef uint64_t FfxUInt64;
			
 
				+
			
 
				+/// A typedef for a signed 8bit integer.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+typedef int8_t FfxInt8;
			
 
				+
			
 
				+/// A typedef for a signed 16bit integer.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+typedef int16_t FfxInt16;
			
 
				+
			
 
				+/// A typedef for a signed 32bit integer.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+typedef int32_t FfxInt32;
			
 
				+
			
 
				+/// A typedef for a signed 64bit integer.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+typedef int64_t FfxInt64;
			
 
				+
			
 
				+/// A typedef for a floating point value.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+typedef float FfxFloat32;
			
 
				+
			
 
				+/// A typedef for a 2-dimensional floating point value.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+typedef float FfxFloat32x2[2];
			
 
				+
			
 
				+/// A typedef for a 3-dimensional floating point value.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+typedef float FfxFloat32x3[3];
			
 
				+
			
 
				+/// A typedef for a 4-dimensional floating point value.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+typedef float FfxFloat32x4[4];
			
 
				+
			
 
				+/// A typedef for a 2-dimensional 32bit unsigned integer.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+typedef uint32_t FfxUInt32x2[2];
			
 
				+
			
 
				+/// A typedef for a 3-dimensional 32bit unsigned integer.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+typedef uint32_t FfxUInt32x3[3];
			
 
				+
			
 
				+/// A typedef for a 4-dimensional 32bit unsigned integer.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+typedef uint32_t FfxUInt32x4[4];
			
 
				+#endif // #if defined(FFX_CPU)
			
 
				+
			
 
				+#if defined(FFX_HLSL)
			
 
				+/// A typedef for a boolean value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+typedef bool FfxBoolean;
			
 
				+
			
 
				+#if FFX_HLSL_6_2
			
 
				+typedef float32_t   FfxFloat32;
			
 
				+typedef float32_t2  FfxFloat32x2;
			
 
				+typedef float32_t3  FfxFloat32x3;
			
 
				+typedef float32_t4  FfxFloat32x4;
			
 
				+
			
 
				+/// A typedef for a unsigned 32bit integer.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+typedef uint32_t    FfxUInt32;
			
 
				+typedef uint32_t2   FfxUInt32x2;
			
 
				+typedef uint32_t3   FfxUInt32x3;
			
 
				+typedef uint32_t4   FfxUInt32x4;
			
 
				+typedef int32_t     FfxInt32;
			
 
				+typedef int32_t2    FfxInt32x2;
			
 
				+typedef int32_t3    FfxInt32x3;
			
 
				+typedef int32_t4    FfxInt32x4;
			
 
				+#else
			
 
				+#define FfxFloat32   float
			
 
				+#define FfxFloat32x2 float2
			
 
				+#define FfxFloat32x3 float3
			
 
				+#define FfxFloat32x4 float4
			
 
				+
			
 
				+/// A typedef for a unsigned 32bit integer.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+typedef uint        FfxUInt32;
			
 
				+typedef uint2       FfxUInt32x2;
			
 
				+typedef uint3       FfxUInt32x3;
			
 
				+typedef uint4       FfxUInt32x4;
			
 
				+typedef int         FfxInt32;
			
 
				+typedef int2        FfxInt32x2;
			
 
				+typedef int3        FfxInt32x3;
			
 
				+typedef int4        FfxInt32x4;
			
 
				+#endif // #if defined(FFX_HLSL_6_2)
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+#if FFX_HLSL_6_2
			
 
				+typedef float16_t   FfxFloat16;
			
 
				+typedef float16_t2  FfxFloat16x2;
			
 
				+typedef float16_t3  FfxFloat16x3;
			
 
				+typedef float16_t4  FfxFloat16x4;
			
 
				+
			
 
				+/// A typedef for an unsigned 16bit integer.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+typedef uint16_t    FfxUInt16;
			
 
				+typedef uint16_t2   FfxUInt16x2;
			
 
				+typedef uint16_t3   FfxUInt16x3;
			
 
				+typedef uint16_t4   FfxUInt16x4;
			
 
				+
			
 
				+/// A typedef for a signed 16bit integer.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+typedef int16_t     FfxInt16;
			
 
				+typedef int16_t2    FfxInt16x2;
			
 
				+typedef int16_t3    FfxInt16x3;
			
 
				+typedef int16_t4    FfxInt16x4;
			
 
				+#else
			
 
				+typedef min16float  FfxFloat16;
			
 
				+typedef min16float2 FfxFloat16x2;
			
 
				+typedef min16float3 FfxFloat16x3;
			
 
				+typedef min16float4 FfxFloat16x4;
			
 
				+
			
 
				+/// A typedef for an unsigned 16bit integer.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+typedef min16uint   FfxUInt16;
			
 
				+typedef min16uint2  FfxUInt16x2;
			
 
				+typedef min16uint3  FfxUInt16x3;
			
 
				+typedef min16uint4  FfxUInt16x4;
			
 
				+
			
 
				+/// A typedef for a signed 16bit integer.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+typedef min16int    FfxInt16;
			
 
				+typedef min16int2   FfxInt16x2;
			
 
				+typedef min16int3   FfxInt16x3;
			
 
				+typedef min16int4   FfxInt16x4;
			
 
				+#endif  // FFX_HLSL_6_2
			
 
				+#endif // FFX_HALF
			
 
				+#endif // #if defined(FFX_HLSL)
			
 
				+
			
 
				+#if defined(FFX_GLSL)
			
 
				+/// A typedef for a boolean value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FfxBoolean   bool
			
 
				+#define FfxFloat32   float
			
 
				+#define FfxFloat32x2 vec2
			
 
				+#define FfxFloat32x3 vec3
			
 
				+#define FfxFloat32x4 vec4
			
 
				+#define FfxUInt32    uint
			
 
				+#define FfxUInt32x2  uvec2
			
 
				+#define FfxUInt32x3  uvec3
			
 
				+#define FfxUInt32x4  uvec4
			
 
				+#define FfxInt32     int
			
 
				+#define FfxInt32x2   ivec2
			
 
				+#define FfxInt32x3   ivec3
			
 
				+#define FfxInt32x4   ivec4
			
 
				+#if FFX_HALF
			
 
				+#define FfxFloat16   float16_t
			
 
				+#define FfxFloat16x2 f16vec2
			
 
				+#define FfxFloat16x3 f16vec3
			
 
				+#define FfxFloat16x4 f16vec4
			
 
				+#define FfxUInt16    uint16_t
			
 
				+#define FfxUInt16x2  u16vec2
			
 
				+#define FfxUInt16x3  u16vec3
			
 
				+#define FfxUInt16x4  u16vec4
			
 
				+#define FfxInt16     int16_t
			
 
				+#define FfxInt16x2   i16vec2
			
 
				+#define FfxInt16x3   i16vec3
			
 
				+#define FfxInt16x4   i16vec4
			
 
				+#endif // FFX_HALF
			
 
				+#endif // #if defined(FFX_GLSL)
			
 
				+
			
 
				+// Global toggles:
			
 
				+// #define FFX_HALF            (1)
			
 
				+// #define FFX_HLSL_6_2        (1)
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+
			
 
				+#if FFX_HLSL_6_2
			
 
				+
			
 
				+#define FFX_MIN16_SCALAR( TypeName, BaseComponentType )           typedef BaseComponentType##16_t TypeName;
			
 
				+#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL )      typedef vector<BaseComponentType##16_t, COL> TypeName;
			
 
				+#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType##16_t, ROW, COL> TypeName;
			
 
				+
			
 
				+#define FFX_16BIT_SCALAR( TypeName, BaseComponentType )           typedef BaseComponentType##16_t TypeName;
			
 
				+#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL )      typedef vector<BaseComponentType##16_t, COL> TypeName;
			
 
				+#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType##16_t, ROW, COL> TypeName;
			
 
				+
			
 
				+#else //FFX_HLSL_6_2
			
 
				+
			
 
				+#define FFX_MIN16_SCALAR( TypeName, BaseComponentType )           typedef min16##BaseComponentType TypeName;
			
 
				+#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL )      typedef vector<min16##BaseComponentType, COL> TypeName;
			
 
				+#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<min16##BaseComponentType, ROW, COL> TypeName;
			
 
				+
			
 
				+#define FFX_16BIT_SCALAR( TypeName, BaseComponentType )           FFX_MIN16_SCALAR( TypeName, BaseComponentType );
			
 
				+#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL )      FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL );
			
 
				+#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL );
			
 
				+
			
 
				+#endif //FFX_HLSL_6_2
			
 
				+
			
 
				+#else //FFX_HALF
			
 
				+
			
 
				+#define FFX_MIN16_SCALAR( TypeName, BaseComponentType )           typedef BaseComponentType TypeName;
			
 
				+#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL )      typedef vector<BaseComponentType, COL> TypeName;
			
 
				+#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType, ROW, COL> TypeName;
			
 
				+
			
 
				+#define FFX_16BIT_SCALAR( TypeName, BaseComponentType )           typedef BaseComponentType TypeName;
			
 
				+#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL )      typedef vector<BaseComponentType, COL> TypeName;
			
 
				+#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType, ROW, COL> TypeName;
			
 
				+
			
 
				+#endif //FFX_HALF
			
 
				+
			
 
				+#if defined(FFX_GPU)
			
 
				+// Common typedefs:
			
 
				+#if defined(FFX_HLSL)
			
 
				+FFX_MIN16_SCALAR( FFX_MIN16_F , float );
			
 
				+FFX_MIN16_VECTOR( FFX_MIN16_F2, float, 2 );
			
 
				+FFX_MIN16_VECTOR( FFX_MIN16_F3, float, 3 );
			
 
				+FFX_MIN16_VECTOR( FFX_MIN16_F4, float, 4 );
			
 
				+
			
 
				+FFX_MIN16_SCALAR( FFX_MIN16_I,  int );
			
 
				+FFX_MIN16_VECTOR( FFX_MIN16_I2, int, 2 );
			
 
				+FFX_MIN16_VECTOR( FFX_MIN16_I3, int, 3 );
			
 
				+FFX_MIN16_VECTOR( FFX_MIN16_I4, int, 4 );
			
 
				+
			
 
				+FFX_MIN16_SCALAR( FFX_MIN16_U,  uint );
			
 
				+FFX_MIN16_VECTOR( FFX_MIN16_U2, uint, 2 );
			
 
				+FFX_MIN16_VECTOR( FFX_MIN16_U3, uint, 3 );
			
 
				+FFX_MIN16_VECTOR( FFX_MIN16_U4, uint, 4 );
			
 
				+
			
 
				+FFX_16BIT_SCALAR( FFX_F16_t , float );
			
 
				+FFX_16BIT_VECTOR( FFX_F16_t2, float, 2 );
			
 
				+FFX_16BIT_VECTOR( FFX_F16_t3, float, 3 );
			
 
				+FFX_16BIT_VECTOR( FFX_F16_t4, float, 4 );
			
 
				+
			
 
				+FFX_16BIT_SCALAR( FFX_I16_t,  int );
			
 
				+FFX_16BIT_VECTOR( FFX_I16_t2, int, 2 );
			
 
				+FFX_16BIT_VECTOR( FFX_I16_t3, int, 3 );
			
 
				+FFX_16BIT_VECTOR( FFX_I16_t4, int, 4 );
			
 
				+
			
 
				+FFX_16BIT_SCALAR( FFX_U16_t,  uint );
			
 
				+FFX_16BIT_VECTOR( FFX_U16_t2, uint, 2 );
			
 
				+FFX_16BIT_VECTOR( FFX_U16_t3, uint, 3 );
			
 
				+FFX_16BIT_VECTOR( FFX_U16_t4, uint, 4 );
			
 
				+
			
 
				+#define TYPEDEF_MIN16_TYPES(Prefix)           \
			
 
				+typedef FFX_MIN16_F     Prefix##_F;           \
			
 
				+typedef FFX_MIN16_F2    Prefix##_F2;          \
			
 
				+typedef FFX_MIN16_F3    Prefix##_F3;          \
			
 
				+typedef FFX_MIN16_F4    Prefix##_F4;          \
			
 
				+typedef FFX_MIN16_I     Prefix##_I;           \
			
 
				+typedef FFX_MIN16_I2    Prefix##_I2;          \
			
 
				+typedef FFX_MIN16_I3    Prefix##_I3;          \
			
 
				+typedef FFX_MIN16_I4    Prefix##_I4;          \
			
 
				+typedef FFX_MIN16_U     Prefix##_U;           \
			
 
				+typedef FFX_MIN16_U2    Prefix##_U2;          \
			
 
				+typedef FFX_MIN16_U3    Prefix##_U3;          \
			
 
				+typedef FFX_MIN16_U4    Prefix##_U4;
			
 
				+
			
 
				+#define TYPEDEF_16BIT_TYPES(Prefix)           \
			
 
				+typedef FFX_16BIT_F     Prefix##_F;           \
			
 
				+typedef FFX_16BIT_F2    Prefix##_F2;          \
			
 
				+typedef FFX_16BIT_F3    Prefix##_F3;          \
			
 
				+typedef FFX_16BIT_F4    Prefix##_F4;          \
			
 
				+typedef FFX_16BIT_I     Prefix##_I;           \
			
 
				+typedef FFX_16BIT_I2    Prefix##_I2;          \
			
 
				+typedef FFX_16BIT_I3    Prefix##_I3;          \
			
 
				+typedef FFX_16BIT_I4    Prefix##_I4;          \
			
 
				+typedef FFX_16BIT_U     Prefix##_U;           \
			
 
				+typedef FFX_16BIT_U2    Prefix##_U2;          \
			
 
				+typedef FFX_16BIT_U3    Prefix##_U3;          \
			
 
				+typedef FFX_16BIT_U4    Prefix##_U4;
			
 
				+
			
 
				+#define TYPEDEF_FULL_PRECISION_TYPES(Prefix)  \
			
 
				+typedef FfxFloat32      Prefix##_F;           \
			
 
				+typedef FfxFloat32x2    Prefix##_F2;          \
			
 
				+typedef FfxFloat32x3    Prefix##_F3;          \
			
 
				+typedef FfxFloat32x4    Prefix##_F4;          \
			
 
				+typedef FfxInt32        Prefix##_I;           \
			
 
				+typedef FfxInt32x2      Prefix##_I2;          \
			
 
				+typedef FfxInt32x3      Prefix##_I3;          \
			
 
				+typedef FfxInt32x4      Prefix##_I4;          \
			
 
				+typedef FfxUInt32       Prefix##_U;           \
			
 
				+typedef FfxUInt32x2     Prefix##_U2;          \
			
 
				+typedef FfxUInt32x3     Prefix##_U3;          \
			
 
				+typedef FfxUInt32x4     Prefix##_U4;
			
 
				+#endif // #if defined(FFX_HLSL)
			
 
				+
			
 
				+#if defined(FFX_GLSL)
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+
			
 
				+#define  FFX_MIN16_F  float16_t
			
 
				+#define  FFX_MIN16_F2 f16vec2
			
 
				+#define  FFX_MIN16_F3 f16vec3
			
 
				+#define  FFX_MIN16_F4 f16vec4
			
 
				+
			
 
				+#define  FFX_MIN16_I  int16_t
			
 
				+#define  FFX_MIN16_I2 i16vec2
			
 
				+#define  FFX_MIN16_I3 i16vec3
			
 
				+#define  FFX_MIN16_I4 i16vec4
			
 
				+
			
 
				+#define  FFX_MIN16_U  uint16_t
			
 
				+#define  FFX_MIN16_U2 u16vec2
			
 
				+#define  FFX_MIN16_U3 u16vec3
			
 
				+#define  FFX_MIN16_U4 u16vec4
			
 
				+
			
 
				+#define FFX_16BIT_F  float16_t
			
 
				+#define FFX_16BIT_F2 f16vec2
			
 
				+#define FFX_16BIT_F3 f16vec3
			
 
				+#define FFX_16BIT_F4 f16vec4
			
 
				+
			
 
				+#define FFX_16BIT_I  int16_t
			
 
				+#define FFX_16BIT_I2 i16vec2
			
 
				+#define FFX_16BIT_I3 i16vec3
			
 
				+#define FFX_16BIT_I4 i16vec4
			
 
				+
			
 
				+#define FFX_16BIT_U  uint16_t
			
 
				+#define FFX_16BIT_U2 u16vec2
			
 
				+#define FFX_16BIT_U3 u16vec3
			
 
				+#define FFX_16BIT_U4 u16vec4
			
 
				+
			
 
				+#else // FFX_HALF
			
 
				+
			
 
				+#define  FFX_MIN16_F  float
			
 
				+#define  FFX_MIN16_F2 vec2
			
 
				+#define  FFX_MIN16_F3 vec3
			
 
				+#define  FFX_MIN16_F4 vec4
			
 
				+
			
 
				+#define  FFX_MIN16_I  int
			
 
				+#define  FFX_MIN16_I2 ivec2
			
 
				+#define  FFX_MIN16_I3 ivec3
			
 
				+#define  FFX_MIN16_I4 ivec4
			
 
				+
			
 
				+#define  FFX_MIN16_U  uint
			
 
				+#define  FFX_MIN16_U2 uvec2
			
 
				+#define  FFX_MIN16_U3 uvec3
			
 
				+#define  FFX_MIN16_U4 uvec4
			
 
				+
			
 
				+#define FFX_16BIT_F  float
			
 
				+#define FFX_16BIT_F2 vec2
			
 
				+#define FFX_16BIT_F3 vec3
			
 
				+#define FFX_16BIT_F4 vec4
			
 
				+
			
 
				+#define FFX_16BIT_I  int
			
 
				+#define FFX_16BIT_I2 ivec2
			
 
				+#define FFX_16BIT_I3 ivec3
			
 
				+#define FFX_16BIT_I4 ivec4
			
 
				+
			
 
				+#define FFX_16BIT_U  uint
			
 
				+#define FFX_16BIT_U2 uvec2
			
 
				+#define FFX_16BIT_U3 uvec3
			
 
				+#define FFX_16BIT_U4 uvec4
			
 
				+
			
 
				+#endif // FFX_HALF
			
 
				+
			
 
				+#endif // #if defined(FFX_GLSL)
			
 
				+
			
 
				+#endif // #if defined(FFX_GPU)
			
 
				+#endif // #ifndef FFX_COMMON_TYPES_H
			
--- a/thirdparty/amd-fsr2/shaders/ffx_core.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_core.h
@@ -0,0 +1,52 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+/// @defgroup Core
			
 
				+/// @defgroup HLSL
			
 
				+/// @defgroup GLSL
			
 
				+/// @defgroup GPU
			
 
				+/// @defgroup CPU
			
 
				+/// @defgroup CAS
			
 
				+/// @defgroup FSR1
			
 
				+
			
 
				+#if !defined(FFX_CORE_H)
			
 
				+#define FFX_CORE_H
			
 
				+
			
 
				+#include "ffx_common_types.h"
			
 
				+
			
 
				+#if defined(FFX_CPU)
			
 
				+    #include "ffx_core_cpu.h"
			
 
				+#endif // #if defined(FFX_CPU)
			
 
				+
			
 
				+#if defined(FFX_GLSL) && defined(FFX_GPU)
			
 
				+    #include "ffx_core_glsl.h"
			
 
				+#endif // #if defined(FFX_GLSL) && defined(FFX_GPU)
			
 
				+
			
 
				+#if defined(FFX_HLSL) && defined(FFX_GPU)
			
 
				+    #include "ffx_core_hlsl.h"
			
 
				+#endif // #if defined(FFX_HLSL) && defined(FFX_GPU)
			
 
				+
			
 
				+#if defined(FFX_GPU)
			
 
				+    #include "ffx_core_gpu_common.h"
			
 
				+    #include "ffx_core_gpu_common_half.h"
			
 
				+    #include "ffx_core_portability.h"
			
 
				+#endif // #if defined(FFX_GPU)
			
 
				+#endif // #if !defined(FFX_CORE_H)
			
--- a/thirdparty/amd-fsr2/shaders/ffx_core_cpu.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_core_cpu.h
@@ -0,0 +1,332 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+/// A define for a true value in a boolean expression.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+#define FFX_TRUE (1)
			
 
				+
			
 
				+/// A define for a false value in a boolean expression.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+#define FFX_FALSE (0)
			
 
				+ 
			
 
				+#if !defined(FFX_STATIC)
			
 
				+/// A define to abstract declaration of static variables and functions.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+#define FFX_STATIC static
			
 
				+#endif // #if !defined(FFX_STATIC)
			
 
				+
			
 
				+#ifdef __clang__
			
 
				+#pragma clang diagnostic ignored "-Wunused-variable"
			
 
				+#endif
			
 
				+
			
 
				+/// Interpret the bit layout of an IEEE-754 floating point value as an unsigned integer.
			
 
				+///
			
 
				+/// @param [in] x               A 32bit floating value.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An unsigned 32bit integer value containing the bit pattern of <c><i>x</i></c>.
			
 
				+/// 
			
 
				+/// @ingroup CPU
			
 
				+FFX_STATIC FfxUInt32 ffxAsUInt32(FfxFloat32 x)
			
 
				+{
			
 
				+    union
			
 
				+    {
			
 
				+        FfxFloat32 f;
			
 
				+        FfxUInt32  u;
			
 
				+    } bits;
			
 
				+
			
 
				+    bits.f = x;
			
 
				+    return bits.u;
			
 
				+}
			
 
				+
			
 
				+FFX_STATIC FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b)
			
 
				+{
			
 
				+    return a[0] * b[0] + a[1] * b[1];
			
 
				+}
			
 
				+
			
 
				+FFX_STATIC FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b)
			
 
				+{
			
 
				+    return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
			
 
				+}
			
 
				+
			
 
				+FFX_STATIC FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b)
			
 
				+{
			
 
				+    return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
			
 
				+}
			
 
				+
			
 
				+/// Compute the linear interopation between two values.
			
 
				+///
			
 
				+/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
			
 
				+/// following math:
			
 
				+///
			
 
				+///     (1 - t) * x + t * y
			
 
				+///
			
 
				+/// @param [in] x               The first value to lerp between.
			
 
				+/// @param [in] y               The second value to lerp between.
			
 
				+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+FFX_STATIC FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
			
 
				+{
			
 
				+    return y * t + (-x * t + x);
			
 
				+}
			
 
				+
			
 
				+/// Compute the reciprocal of a value.
			
 
				+///
			
 
				+/// @param [in] x               The value to compute the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The reciprocal value of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+FFX_STATIC FfxFloat32 ffxReciprocal(FfxFloat32 a)
			
 
				+{
			
 
				+    return 1.0f / a;
			
 
				+}
			
 
				+
			
 
				+/// Compute the square root of a value.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the square root of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+FFX_STATIC FfxFloat32 ffxSqrt(FfxFloat32 x)
			
 
				+{
			
 
				+    return sqrt(x);
			
 
				+}
			
 
				+
			
 
				+FFX_STATIC FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)
			
 
				+{
			
 
				+    return FfxUInt32(FfxInt32(a) >> FfxInt32(b));
			
 
				+}
			
 
				+
			
 
				+/// Compute the factional part of a decimal value.
			
 
				+///
			
 
				+/// This function calculates <c><i>x - floor(x)</i></c>. 
			
 
				+///
			
 
				+/// @param [in] x               The value to compute the fractional part from.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The fractional part of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+FFX_STATIC FfxFloat32 ffxFract(FfxFloat32 a)
			
 
				+{
			
 
				+    return a - floor(a);
			
 
				+}
			
 
				+
			
 
				+/// Compute the reciprocal square root of a value.
			
 
				+///
			
 
				+/// @param [in] x               The value to compute the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The reciprocal square root value of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+FFX_STATIC FfxFloat32 rsqrt(FfxFloat32 a)
			
 
				+{
			
 
				+    return ffxReciprocal(ffxSqrt(a));
			
 
				+}
			
 
				+
			
 
				+FFX_STATIC FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y)
			
 
				+{
			
 
				+    return x < y ? x : y;
			
 
				+}
			
 
				+
			
 
				+FFX_STATIC FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y)
			
 
				+{
			
 
				+    return x < y ? x : y;
			
 
				+}
			
 
				+
			
 
				+FFX_STATIC FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y)
			
 
				+{
			
 
				+    return x > y ? x : y;
			
 
				+}
			
 
				+
			
 
				+FFX_STATIC FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y)
			
 
				+{
			
 
				+    return x > y ? x : y;
			
 
				+}
			
 
				+
			
 
				+/// Clamp a value to a [0..1] range.
			
 
				+///
			
 
				+/// @param [in] x               The value to clamp to [0..1] range.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The clamped version of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+FFX_STATIC FfxFloat32 ffxSaturate(FfxFloat32 a)
			
 
				+{
			
 
				+    return ffxMin(1.0f, ffxMax(0.0f, a));
			
 
				+}
			
 
				+
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+FFX_STATIC void opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
			
 
				+{
			
 
				+    d[0] = a[0] + b;
			
 
				+    d[1] = a[1] + b;
			
 
				+    d[2] = a[2] + b;
			
 
				+    return;
			
 
				+}
			
 
				+
			
 
				+FFX_STATIC void opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a)
			
 
				+{
			
 
				+    d[0] = a[0];
			
 
				+    d[1] = a[1];
			
 
				+    d[2] = a[2];
			
 
				+    return;
			
 
				+}
			
 
				+
			
 
				+FFX_STATIC void opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b)
			
 
				+{
			
 
				+    d[0] = a[0] * b[0];
			
 
				+    d[1] = a[1] * b[1];
			
 
				+    d[2] = a[2] * b[2];
			
 
				+    return;
			
 
				+}
			
 
				+
			
 
				+FFX_STATIC void opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
			
 
				+{
			
 
				+    d[0] = a[0] * b;
			
 
				+    d[1] = a[1] * b;
			
 
				+    d[2] = a[2] * b;
			
 
				+    return;
			
 
				+}
			
 
				+
			
 
				+FFX_STATIC void opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a)
			
 
				+{
			
 
				+    d[0] = ffxReciprocal(a[0]);
			
 
				+    d[1] = ffxReciprocal(a[1]);
			
 
				+    d[2] = ffxReciprocal(a[2]);
			
 
				+    return;
			
 
				+}
			
 
				+
			
 
				+/// Convert FfxFloat32 to half (in lower 16-bits of output).
			
 
				+/// 
			
 
				+/// This function implements the same fast technique that is documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
			
 
				+/// 
			
 
				+/// The function supports denormals.
			
 
				+/// 
			
 
				+/// Some conversion rules are to make computations possibly "safer" on the GPU,
			
 
				+///  -INF & -NaN -> -65504
			
 
				+///  +INF & +NaN -> +65504
			
 
				+///
			
 
				+/// @param [in] f               The 32bit floating point value to convert.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// The closest 16bit floating point value to <c><i>f</i></c>.
			
 
				+/// 
			
 
				+/// @ingroup CPU
			
 
				+FFX_STATIC FfxUInt32 f32tof16(FfxFloat32 f)
			
 
				+{
			
 
				+    static FfxUInt16 base[512] = {
			
 
				+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
			
 
				+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
			
 
				+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
			
 
				+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
			
 
				+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
			
 
				+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400,
			
 
				+        0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000,
			
 
				+        0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
			
 
				+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
			
 
				+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
			
 
				+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
			
 
				+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
			
 
				+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
			
 
				+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
			
 
				+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
			
 
				+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
			
 
				+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
			
 
				+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
			
 
				+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002,
			
 
				+        0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00,
			
 
				+        0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, 0xf000, 0xf400, 0xf800,
			
 
				+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
			
 
				+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
			
 
				+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
			
 
				+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
			
 
				+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
			
 
				+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff
			
 
				+    };
			
 
				+    
			
 
				+    static FfxUInt8 shift[512] = {
			
 
				+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
			
 
				+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
			
 
				+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
			
 
				+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
			
 
				+        0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
			
 
				+        0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
			
 
				+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
			
 
				+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
			
 
				+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
			
 
				+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
			
 
				+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
			
 
				+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
			
 
				+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
			
 
				+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
			
 
				+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
			
 
				+        0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18,
			
 
				+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
			
 
				+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
			
 
				+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
			
 
				+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
			
 
				+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18
			
 
				+    };
			
 
				+
			
 
				+    union
			
 
				+    {
			
 
				+        FfxFloat32      f;
			
 
				+        FfxUInt32 u;
			
 
				+    } bits;
			
 
				+
			
 
				+    bits.f       = f;
			
 
				+    FfxUInt32 u = bits.u;
			
 
				+    FfxUInt32 i = u >> 23;
			
 
				+    return (FfxUInt32)(base[i]) + ((u & 0x7fffff) >> shift[i]);
			
 
				+}
			
 
				+
			
 
				+/// Pack 2x32-bit floating point values in a single 32bit value.
			
 
				+///
			
 
				+/// This function first converts each component of <c><i>value</i></c> into their nearest 16-bit floating
			
 
				+/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the
			
 
				+/// 32bit unsigned integer respectively.
			
 
				+///
			
 
				+/// @param [in] value               A 2-dimensional floating point value to convert and pack.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A packed 32bit value containing 2 16bit floating point values.
			
 
				+///
			
 
				+/// @ingroup CPU
			
 
				+FFX_STATIC FfxUInt32 packHalf2x16(FfxFloat32x2 a)
			
 
				+{
			
 
				+    return f32tof16(a[0]) + (f32tof16(a[1]) << 16);
			
 
				+}
			
--- a/thirdparty/amd-fsr2/shaders/ffx_core_glsl.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_core_glsl.h
@@ -0,0 +1,1669 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+/// A define for abstracting shared memory between shading languages.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_GROUPSHARED shared
			
 
				+
			
 
				+/// A define for abstracting compute memory barriers between shading languages.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_GROUP_MEMORY_BARRIER() barrier()
			
 
				+
			
 
				+/// A define added to accept static markup on functions to aid CPU/GPU portability of code.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_STATIC
			
 
				+
			
 
				+/// A define for abstracting loop unrolling between shading languages.
			
 
				+///
			
 
				+/// @ingroup GPU 
			
 
				+#define FFX_UNROLL
			
 
				+
			
 
				+/// A define for abstracting a 'greater than' comparison operator between two types.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_GREATER_THAN(x, y) greaterThan(x, y)
			
 
				+
			
 
				+/// A define for abstracting a 'greater than or equal' comparison operator between two types.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_GREATER_THAN_EQUAL(x, y) greaterThanEqual(x, y)
			
 
				+
			
 
				+/// A define for abstracting a 'less than' comparison operator between two types.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_LESS_THAN(x, y) lessThan(x, y)
			
 
				+
			
 
				+/// A define for abstracting a 'less than or equal' comparison operator between two types.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_LESS_THAN_EQUAL(x, y) lessThanEqual(x, y)
			
 
				+
			
 
				+/// A define for abstracting an 'equal' comparison operator between two types.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_EQUAL(x, y) equal(x, y)
			
 
				+
			
 
				+/// A define for abstracting a 'not equal' comparison operator between two types.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_NOT_EQUAL(x, y) notEqual(x, y)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 1-dimensional floating point vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_FLOAT32(x)   FfxFloat32(x)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 2-dimensional floating point vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32x2(FfxFloat32(x))
			
 
				+
			
 
				+/// Broadcast a scalar value to a 3-dimensional floating point vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32x3(FfxFloat32(x))
			
 
				+
			
 
				+/// Broadcast a scalar value to a 4-dimensional floating point vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32x4(FfxFloat32(x))
			
 
				+
			
 
				+/// Broadcast a scalar value to a 1-dimensional unsigned integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_UINT32(x)   FfxUInt32(x)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_UINT32X2(x) FfxUInt32x2(FfxUInt32(x))
			
 
				+
			
 
				+/// Broadcast a scalar value to a 3-dimensional unsigned integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_UINT32X3(x) FfxUInt32x3(FfxUInt32(x))
			
 
				+
			
 
				+/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_UINT32X4(x) FfxUInt32x4(FfxUInt32(x))
			
 
				+
			
 
				+/// Broadcast a scalar value to a 1-dimensional signed integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_INT32(x)   FfxInt32(x)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 2-dimensional signed integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_INT32X2(x) FfxInt32x2(FfxInt32(x))
			
 
				+
			
 
				+/// Broadcast a scalar value to a 3-dimensional signed integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_INT32X3(x) FfxInt32x3(FfxInt32(x))
			
 
				+
			
 
				+/// Broadcast a scalar value to a 4-dimensional signed integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_INT32X4(x) FfxInt32x4(FfxInt32(x))
			
 
				+
			
 
				+/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_FLOAT16(x)   FFX_MIN16_F(x)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_FLOAT16X2(x) FFX_MIN16_F2(FFX_MIN16_F(x))
			
 
				+
			
 
				+/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_FLOAT16X3(x) FFX_MIN16_F3(FFX_MIN16_F(x))
			
 
				+
			
 
				+/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_FLOAT16X4(x) FFX_MIN16_F4(FFX_MIN16_F(x))
			
 
				+
			
 
				+/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_UINT16(x)   FFX_MIN16_U(x)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_UINT16X2(x) FFX_MIN16_U2(FFX_MIN16_U(x))
			
 
				+
			
 
				+/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_UINT16X3(x) FFX_MIN16_U3(FFX_MIN16_U(x))
			
 
				+
			
 
				+/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_UINT16X4(x) FFX_MIN16_U4(FFX_MIN16_U(x))
			
 
				+
			
 
				+/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_INT16(x)   FFX_MIN16_I(x)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_INT16X2(x) FFX_MIN16_I2(FFX_MIN16_I(x))
			
 
				+
			
 
				+/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_INT16X3(x) FFX_MIN16_I3(FFX_MIN16_I(x))
			
 
				+
			
 
				+/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_INT16X4(x) FFX_MIN16_I4(FFX_MIN16_I(x))
			
 
				+
			
 
				+#if !defined(FFX_SKIP_EXT)
			
 
				+#if FFX_HALF
			
 
				+    #extension GL_EXT_shader_16bit_storage : require
			
 
				+    #extension GL_EXT_shader_explicit_arithmetic_types : require
			
 
				+#endif // FFX_HALF
			
 
				+
			
 
				+#if defined(FFX_LONG)
			
 
				+    #extension GL_ARB_gpu_shader_int64 : require
			
 
				+    #extension GL_NV_shader_atomic_int64 : require
			
 
				+#endif // #if defined(FFX_LONG)
			
 
				+
			
 
				+#if defined(FFX_WAVE)
			
 
				+    #extension GL_KHR_shader_subgroup_arithmetic : require
			
 
				+    #extension GL_KHR_shader_subgroup_ballot : require
			
 
				+    #extension GL_KHR_shader_subgroup_quad : require
			
 
				+    #extension GL_KHR_shader_subgroup_shuffle : require
			
 
				+#endif // #if defined(FFX_WAVE)
			
 
				+#endif // #if !defined(FFX_SKIP_EXT)
			
 
				+
			
 
				+// Forward declarations
			
 
				+FfxFloat32   ffxSqrt(FfxFloat32 x);
			
 
				+FfxFloat32x2 ffxSqrt(FfxFloat32x2 x);
			
 
				+FfxFloat32x3 ffxSqrt(FfxFloat32x3 x);
			
 
				+FfxFloat32x4 ffxSqrt(FfxFloat32x4 x);
			
 
				+
			
 
				+/// Interprets the bit pattern of x as a floating-point number.
			
 
				+///
			
 
				+/// @param [in] value               The input value.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The input interpreted as a floating-point number.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32 ffxAsFloat(FfxUInt32 x)
			
 
				+{
			
 
				+    return uintBitsToFloat(x);
			
 
				+}
			
 
				+
			
 
				+/// Interprets the bit pattern of x as a floating-point number.
			
 
				+///
			
 
				+/// @param [in] value               The input value.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The input interpreted as a floating-point number.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x)
			
 
				+{
			
 
				+    return uintBitsToFloat(x);
			
 
				+}
			
 
				+
			
 
				+/// Interprets the bit pattern of x as a floating-point number.
			
 
				+///
			
 
				+/// @param [in] value               The input value.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The input interpreted as a floating-point number.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x)
			
 
				+{
			
 
				+    return uintBitsToFloat(x);
			
 
				+}
			
 
				+
			
 
				+/// Interprets the bit pattern of x as a floating-point number.
			
 
				+///
			
 
				+/// @param [in] value               The input value.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The input interpreted as a floating-point number.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x)
			
 
				+{
			
 
				+    return uintBitsToFloat(x);
			
 
				+}
			
 
				+
			
 
				+/// Interprets the bit pattern of x as an unsigned integer.
			
 
				+///
			
 
				+/// @param [in] value               The input value.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The input interpreted as an unsigned integer.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxUInt32 ffxAsUInt32(FfxFloat32 x)
			
 
				+{
			
 
				+    return floatBitsToUint(x);
			
 
				+}
			
 
				+
			
 
				+/// Interprets the bit pattern of x as an unsigned integer.
			
 
				+///
			
 
				+/// @param [in] value               The input value.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The input interpreted as an unsigned integer.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x)
			
 
				+{
			
 
				+    return floatBitsToUint(x);
			
 
				+}
			
 
				+
			
 
				+/// Interprets the bit pattern of x as an unsigned integer.
			
 
				+///
			
 
				+/// @param [in] value               The input value.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The input interpreted as an unsigned integer.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x)
			
 
				+{
			
 
				+    return floatBitsToUint(x);
			
 
				+}
			
 
				+
			
 
				+/// Interprets the bit pattern of x as an unsigned integer.
			
 
				+///
			
 
				+/// @param [in] value               The input value.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The input interpreted as an unsigned integer.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x)
			
 
				+{
			
 
				+    return floatBitsToUint(x);
			
 
				+}
			
 
				+
			
 
				+/// Convert a 32bit IEEE 754 floating point value to its nearest 16bit equivalent.
			
 
				+///
			
 
				+/// @param [in] value               The value to convert.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// The nearest 16bit equivalent of <c><i>value</i></c>.
			
 
				+/// 
			
 
				+/// @ingroup GLSL
			
 
				+FfxUInt32 f32tof16(FfxFloat32 value)
			
 
				+{
			
 
				+    return packHalf2x16(FfxFloat32x2(value, 0.0));
			
 
				+}
			
 
				+
			
 
				+/// Broadcast a scalar value to a 2-dimensional floating point vector.
			
 
				+///
			
 
				+/// @param [in] value               The value to to broadcast.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A 2-dimensional floating point vector with <c><i>value</i></c> in each component.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x2 ffxBroadcast2(FfxFloat32 value)
			
 
				+{
			
 
				+    return FfxFloat32x2(value, value);
			
 
				+}
			
 
				+
			
 
				+/// Broadcast a scalar value to a 3-dimensional floating point vector.
			
 
				+///
			
 
				+/// @param [in] value               The value to to broadcast.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A 3-dimensional floating point vector with <c><i>value</i></c> in each component.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x3 ffxBroadcast3(FfxFloat32 value)
			
 
				+{
			
 
				+    return FfxFloat32x3(value, value, value);
			
 
				+}
			
 
				+
			
 
				+/// Broadcast a scalar value to a 4-dimensional floating point vector.
			
 
				+///
			
 
				+/// @param [in] value               The value to to broadcast.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A 4-dimensional floating point vector with <c><i>value</i></c> in each component.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x4 ffxBroadcast4(FfxFloat32 value)
			
 
				+{
			
 
				+    return FfxFloat32x4(value, value, value, value);
			
 
				+}
			
 
				+
			
 
				+/// Broadcast a scalar value to a 2-dimensional signed integer vector.
			
 
				+///
			
 
				+/// @param [in] value               The value to to broadcast.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A 2-dimensional signed integer vector with <c><i>value</i></c> in each component.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxInt32x2 ffxBroadcast2(FfxInt32 value)
			
 
				+{
			
 
				+    return FfxInt32x2(value, value);
			
 
				+}
			
 
				+
			
 
				+/// Broadcast a scalar value to a 3-dimensional signed integer vector.
			
 
				+///
			
 
				+/// @param [in] value               The value to to broadcast.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A 3-dimensional signed integer vector with <c><i>value</i></c> in each component.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxInt32x3 ffxBroadcast3(FfxInt32 value)
			
 
				+{
			
 
				+    return FfxInt32x3(value, value, value);
			
 
				+}
			
 
				+
			
 
				+/// Broadcast a scalar value to a 4-dimensional signed integer vector.
			
 
				+///
			
 
				+/// @param [in] value               The value to to broadcast.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A 4-dimensional signed integer vector with <c><i>value</i></c> in each component.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxInt32x4 ffxBroadcast4(FfxInt32 value)
			
 
				+{
			
 
				+    return FfxInt32x4(value, value, value, value);
			
 
				+}
			
 
				+
			
 
				+/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
			
 
				+///
			
 
				+/// @param [in] value               The value to to broadcast.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A 2-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxUInt32x2 ffxBroadcast2(FfxUInt32 value)
			
 
				+{
			
 
				+    return FfxUInt32x2(value, value);
			
 
				+}
			
 
				+
			
 
				+/// Broadcast a scalar value to a 3-dimensional unsigned integer vector.
			
 
				+///
			
 
				+/// @param [in] value               The value to to broadcast.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A 3-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxUInt32x3 ffxBroadcast3(FfxUInt32 value)
			
 
				+{
			
 
				+    return FfxUInt32x3(value, value, value);
			
 
				+}
			
 
				+
			
 
				+/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
			
 
				+///
			
 
				+/// @param [in] value               The value to to broadcast.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A 4-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxUInt32x4 ffxBroadcast4(FfxUInt32 value)
			
 
				+{
			
 
				+    return FfxUInt32x4(value, value, value, value);
			
 
				+}
			
 
				+
			
 
				+///
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits)
			
 
				+{
			
 
				+    return bitfieldExtract(src, FfxInt32(off), FfxInt32(bits));
			
 
				+}
			
 
				+
			
 
				+///
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask)
			
 
				+{
			
 
				+    return (ins & mask) | (src & (~mask));
			
 
				+}
			
 
				+
			
 
				+// Proxy for V_BFI_B32 where the 'mask' is set as 'bits', 'mask=(1<<bits)-1', and 'bits' needs to be an immediate.
			
 
				+///
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxUInt32 bitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits)
			
 
				+{
			
 
				+    return bitfieldInsert(src, ins, 0, FfxInt32(bits));
			
 
				+}
			
 
				+
			
 
				+/// Compute the linear interopation between two values.
			
 
				+///
			
 
				+/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
			
 
				+/// following math:
			
 
				+///
			
 
				+///     (1 - t) * x + t * y
			
 
				+///
			
 
				+/// @param [in] x               The first value to lerp between.
			
 
				+/// @param [in] y               The second value to lerp between.
			
 
				+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
			
 
				+{
			
 
				+    return mix(x, y, t);
			
 
				+}
			
 
				+
			
 
				+/// Compute the linear interopation between two values.
			
 
				+///
			
 
				+/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
			
 
				+/// following math:
			
 
				+///
			
 
				+///     (1 - t) * x + t * y
			
 
				+///
			
 
				+/// @param [in] x               The first value to lerp between.
			
 
				+/// @param [in] y               The second value to lerp between.
			
 
				+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t)
			
 
				+{
			
 
				+    return mix(x, y, t);
			
 
				+}
			
 
				+
			
 
				+/// Compute the linear interopation between two values.
			
 
				+///
			
 
				+/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
			
 
				+/// following math:
			
 
				+///
			
 
				+///     (1 - t) * x + t * y
			
 
				+///
			
 
				+/// @param [in] x               The first value to lerp between.
			
 
				+/// @param [in] y               The second value to lerp between.
			
 
				+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t)
			
 
				+{
			
 
				+    return mix(x, y, t);
			
 
				+}
			
 
				+
			
 
				+/// Compute the linear interopation between two values.
			
 
				+///
			
 
				+/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
			
 
				+/// following math:
			
 
				+///
			
 
				+///     (1 - t) * x + t * y
			
 
				+///
			
 
				+/// @param [in] x               The first value to lerp between.
			
 
				+/// @param [in] y               The second value to lerp between.
			
 
				+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t)
			
 
				+{
			
 
				+    return mix(x, y, t);
			
 
				+}
			
 
				+
			
 
				+/// Compute the linear interopation between two values.
			
 
				+///
			
 
				+/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
			
 
				+/// following math:
			
 
				+///
			
 
				+///     (1 - t) * x + t * y
			
 
				+///
			
 
				+/// @param [in] x               The first value to lerp between.
			
 
				+/// @param [in] y               The second value to lerp between.
			
 
				+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t)
			
 
				+{
			
 
				+    return mix(x, y, t);
			
 
				+}
			
 
				+
			
 
				+/// Compute the linear interopation between two values.
			
 
				+///
			
 
				+/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
			
 
				+/// following math:
			
 
				+///
			
 
				+///     (1 - t) * x + t * y
			
 
				+///
			
 
				+/// @param [in] x               The first value to lerp between.
			
 
				+/// @param [in] y               The second value to lerp between.
			
 
				+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t)
			
 
				+{
			
 
				+    return mix(x, y, t);
			
 
				+}
			
 
				+
			
 
				+/// Compute the linear interopation between two values.
			
 
				+///
			
 
				+/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
			
 
				+/// following math:
			
 
				+///
			
 
				+///     (1 - t) * x + t * y
			
 
				+///
			
 
				+/// @param [in] x               The first value to lerp between.
			
 
				+/// @param [in] y               The second value to lerp between.
			
 
				+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t)
			
 
				+{
			
 
				+    return mix(x, y, t);
			
 
				+}
			
 
				+
			
 
				+/// Compute the maximum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single V_MAX3_F32 operation on
			
 
				+/// GCN or RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the max calculation.
			
 
				+/// @param [in] y               The second value to include in the max calcuation.
			
 
				+/// @param [in] z               The third value to include in the max calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
			
 
				+{
			
 
				+    return max(x, max(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the maximum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on
			
 
				+/// GCN or RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the max calculation.
			
 
				+/// @param [in] y               The second value to include in the max calcuation.
			
 
				+/// @param [in] z               The third value to include in the max calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
			
 
				+{
			
 
				+    return max(x, max(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the maximum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on
			
 
				+/// GCN or RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the max calculation.
			
 
				+/// @param [in] y               The second value to include in the max calcuation.
			
 
				+/// @param [in] z               The third value to include in the max calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
			
 
				+{
			
 
				+    return max(x, max(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the maximum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on
			
 
				+/// GCN or RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the max calculation.
			
 
				+/// @param [in] y               The second value to include in the max calcuation.
			
 
				+/// @param [in] z               The third value to include in the max calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
			
 
				+{
			
 
				+    return max(x, max(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the maximum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on
			
 
				+/// GCN or RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the max calculation.
			
 
				+/// @param [in] y               The second value to include in the max calcuation.
			
 
				+/// @param [in] z               The third value to include in the max calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
			
 
				+{
			
 
				+    return max(x, max(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the maximum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on
			
 
				+/// GCN or RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the max calculation.
			
 
				+/// @param [in] y               The second value to include in the max calcuation.
			
 
				+/// @param [in] z               The third value to include in the max calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
			
 
				+{
			
 
				+    return max(x, max(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the maximum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on
			
 
				+/// GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the max calculation.
			
 
				+/// @param [in] y               The second value to include in the max calcuation.
			
 
				+/// @param [in] z               The third value to include in the max calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
			
 
				+{
			
 
				+    return max(x, max(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the maximum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on
			
 
				+/// GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the max calculation.
			
 
				+/// @param [in] y               The second value to include in the max calcuation.
			
 
				+/// @param [in] z               The third value to include in the max calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
			
 
				+{
			
 
				+    return max(x, max(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the median of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on
			
 
				+/// GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the median calculation.
			
 
				+/// @param [in] y               The second value to include in the median calcuation.
			
 
				+/// @param [in] z               The third value to include in the median calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the median of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on
			
 
				+/// GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the median calculation.
			
 
				+/// @param [in] y               The second value to include in the median calcuation.
			
 
				+/// @param [in] z               The third value to include in the median calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the median of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on
			
 
				+/// GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the median calculation.
			
 
				+/// @param [in] y               The second value to include in the median calcuation.
			
 
				+/// @param [in] z               The third value to include in the median calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the median of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on
			
 
				+/// GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the median calculation.
			
 
				+/// @param [in] y               The second value to include in the median calcuation.
			
 
				+/// @param [in] z               The third value to include in the median calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the median of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on
			
 
				+/// GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the median calculation.
			
 
				+/// @param [in] y               The second value to include in the median calcuation.
			
 
				+/// @param [in] z               The third value to include in the median calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the median of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on
			
 
				+/// GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the median calculation.
			
 
				+/// @param [in] y               The second value to include in the median calcuation.
			
 
				+/// @param [in] z               The third value to include in the median calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the median of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on
			
 
				+/// GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the median calculation.
			
 
				+/// @param [in] y               The second value to include in the median calcuation.
			
 
				+/// @param [in] z               The third value to include in the median calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the median of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on
			
 
				+/// GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the median calculation.
			
 
				+/// @param [in] y               The second value to include in the median calcuation.
			
 
				+/// @param [in] z               The third value to include in the median calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/// Compute the minimum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</i></c> operation on
			
 
				+/// GCN and RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the min calculation.
			
 
				+/// @param [in] y               The second value to include in the min calcuation.
			
 
				+/// @param [in] z               The third value to include in the min calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
			
 
				+{
			
 
				+    return min(x, min(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the minimum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on
			
 
				+/// GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the min calculation.
			
 
				+/// @param [in] y               The second value to include in the min calcuation.
			
 
				+/// @param [in] z               The third value to include in the min calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
			
 
				+{
			
 
				+    return min(x, min(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the minimum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on
			
 
				+/// GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the min calculation.
			
 
				+/// @param [in] y               The second value to include in the min calcuation.
			
 
				+/// @param [in] z               The third value to include in the min calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
			
 
				+{
			
 
				+    return min(x, min(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the minimum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on
			
 
				+/// GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the min calculation.
			
 
				+/// @param [in] y               The second value to include in the min calcuation.
			
 
				+/// @param [in] z               The third value to include in the min calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
			
 
				+{
			
 
				+    return min(x, min(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the minimum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on
			
 
				+/// GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the min calculation.
			
 
				+/// @param [in] y               The second value to include in the min calcuation.
			
 
				+/// @param [in] z               The third value to include in the min calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
			
 
				+{
			
 
				+    return min(x, min(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the minimum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on
			
 
				+/// GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the min calculation.
			
 
				+/// @param [in] y               The second value to include in the min calcuation.
			
 
				+/// @param [in] z               The third value to include in the min calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
			
 
				+{
			
 
				+    return min(x, min(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the minimum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on
			
 
				+/// GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the min calculation.
			
 
				+/// @param [in] y               The second value to include in the min calcuation.
			
 
				+/// @param [in] z               The third value to include in the min calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
			
 
				+{
			
 
				+    return min(x, min(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the minimum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on
			
 
				+/// GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the min calculation.
			
 
				+/// @param [in] y               The second value to include in the min calcuation.
			
 
				+/// @param [in] z               The third value to include in the min calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
			
 
				+{
			
 
				+    return min(x, min(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the reciprocal of a value.
			
 
				+///
			
 
				+/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rcp</i></c> can be used.
			
 
				+///
			
 
				+/// @param [in] x               The value to compute the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The reciprocal value of <c><i>x</i></c>.
			
 
				+/// 
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32 rcp(FfxFloat32 x)
			
 
				+{
			
 
				+    return FfxFloat32(1.0) / x;
			
 
				+}
			
 
				+
			
 
				+/// Compute the reciprocal of a value.
			
 
				+///
			
 
				+/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rcp</i></c> can be used.
			
 
				+///
			
 
				+/// @param [in] x               The value to compute the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The reciprocal value of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x2 rcp(FfxFloat32x2 x)
			
 
				+{
			
 
				+    return ffxBroadcast2(1.0) / x;
			
 
				+}
			
 
				+
			
 
				+/// Compute the reciprocal of a value.
			
 
				+///
			
 
				+/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rcp</i></c> can be used.
			
 
				+///
			
 
				+/// @param [in] x               The value to compute the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The reciprocal value of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x3 rcp(FfxFloat32x3 x)
			
 
				+{
			
 
				+    return ffxBroadcast3(1.0) / x;
			
 
				+}
			
 
				+
			
 
				+/// Compute the reciprocal of a value.
			
 
				+///
			
 
				+/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rcp</i></c> can be used.
			
 
				+///
			
 
				+/// @param [in] x               The value to compute the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The reciprocal value of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x4 rcp(FfxFloat32x4 x)
			
 
				+{
			
 
				+    return ffxBroadcast4(1.0) / x;
			
 
				+}
			
 
				+
			
 
				+/// Compute the reciprocal square root of a value.
			
 
				+///
			
 
				+/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rsqrt</i></c> can be used.
			
 
				+///
			
 
				+/// @param [in] x               The value to compute the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The reciprocal square root value of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32 rsqrt(FfxFloat32 x)
			
 
				+{
			
 
				+    return FfxFloat32(1.0) / ffxSqrt(x);
			
 
				+}
			
 
				+
			
 
				+/// Compute the reciprocal square root of a value.
			
 
				+///
			
 
				+/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rsqrt</i></c> can be used.
			
 
				+///
			
 
				+/// @param [in] x               The value to compute the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The reciprocal square root value of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x2 rsqrt(FfxFloat32x2 x)
			
 
				+{
			
 
				+    return ffxBroadcast2(1.0) / ffxSqrt(x);
			
 
				+}
			
 
				+
			
 
				+/// Compute the reciprocal square root of a value.
			
 
				+///
			
 
				+/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rsqrt</i></c> can be used.
			
 
				+///
			
 
				+/// @param [in] x               The value to compute the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The reciprocal square root value of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x3 rsqrt(FfxFloat32x3 x)
			
 
				+{
			
 
				+    return ffxBroadcast3(1.0) / ffxSqrt(x);
			
 
				+}
			
 
				+
			
 
				+/// Compute the reciprocal square root of a value.
			
 
				+///
			
 
				+/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rsqrt</i></c> can be used.
			
 
				+///
			
 
				+/// @param [in] x               The value to compute the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The reciprocal square root value of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x4 rsqrt(FfxFloat32x4 x)
			
 
				+{
			
 
				+    return ffxBroadcast4(1.0) / ffxSqrt(x);
			
 
				+}
			
 
				+
			
 
				+/// Clamp a value to a [0..1] range.
			
 
				+///
			
 
				+/// @param [in] x               The value to clamp to [0..1] range.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The clamped version of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32 ffxSaturate(FfxFloat32 x)
			
 
				+{
			
 
				+    return clamp(x, FfxFloat32(0.0), FfxFloat32(1.0));
			
 
				+}
			
 
				+
			
 
				+/// Clamp a value to a [0..1] range.
			
 
				+///
			
 
				+/// @param [in] x               The value to clamp to [0..1] range.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The clamped version of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x2 ffxSaturate(FfxFloat32x2 x)
			
 
				+{
			
 
				+    return clamp(x, ffxBroadcast2(0.0), ffxBroadcast2(1.0));
			
 
				+}
			
 
				+
			
 
				+/// Clamp a value to a [0..1] range.
			
 
				+///
			
 
				+/// @param [in] x               The value to clamp to [0..1] range.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The clamped version of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x3 ffxSaturate(FfxFloat32x3 x)
			
 
				+{
			
 
				+    return clamp(x, ffxBroadcast3(0.0), ffxBroadcast3(1.0));
			
 
				+}
			
 
				+
			
 
				+/// Clamp a value to a [0..1] range.
			
 
				+///
			
 
				+/// @param [in] x               The value to clamp to [0..1] range.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The clamped version of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup GLSL
			
 
				+FfxFloat32x4 ffxSaturate(FfxFloat32x4 x)
			
 
				+{
			
 
				+    return clamp(x, ffxBroadcast4(0.0), ffxBroadcast4(1.0));
			
 
				+}
			
 
				+
			
 
				+/// Compute the factional part of a decimal value.
			
 
				+///
			
 
				+/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
			
 
				+/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
			
 
				+/// function.
			
 
				+///
			
 
				+/// @param [in] x               The value to compute the fractional part from.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The fractional part of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32 ffxFract(FfxFloat32 x)
			
 
				+{
			
 
				+    return fract(x);
			
 
				+}
			
 
				+
			
 
				+/// Compute the factional part of a decimal value.
			
 
				+///
			
 
				+/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
			
 
				+/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
			
 
				+/// function.
			
 
				+///
			
 
				+/// @param [in] x               The value to compute the fractional part from.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The fractional part of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x2 ffxFract(FfxFloat32x2 x)
			
 
				+{
			
 
				+    return fract(x);
			
 
				+}
			
 
				+
			
 
				+/// Compute the factional part of a decimal value.
			
 
				+///
			
 
				+/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
			
 
				+/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
			
 
				+/// function.
			
 
				+///
			
 
				+/// @param [in] x               The value to compute the fractional part from.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The fractional part of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x3 ffxFract(FfxFloat32x3 x)
			
 
				+{
			
 
				+    return fract(x);
			
 
				+}
			
 
				+
			
 
				+/// Compute the factional part of a decimal value.
			
 
				+///
			
 
				+/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
			
 
				+/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
			
 
				+/// function.
			
 
				+///
			
 
				+/// @param [in] x               The value to compute the fractional part from.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The fractional part of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x4 ffxFract(FfxFloat32x4 x)
			
 
				+{
			
 
				+    return fract(x);
			
 
				+}
			
 
				+
			
 
				+FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)
			
 
				+{
			
 
				+    return FfxUInt32(FfxInt32(a) >> FfxInt32(b));
			
 
				+}
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+
			
 
				+#define FFX_UINT32_TO_FLOAT16X2(x) unpackFloat2x16(FfxUInt32(x))
			
 
				+
			
 
				+FfxFloat16x4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x)
			
 
				+{
			
 
				+    return FfxFloat16x4(unpackFloat2x16(x.x), unpackFloat2x16(x.y));
			
 
				+}
			
 
				+#define FFX_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x))
			
 
				+#define FFX_UINT32_TO_UINT16X2(x) unpackUint2x16(FfxUInt32(x))
			
 
				+#define FFX_UINT32X2_TO_UINT16X4(x) unpackUint4x16(pack64(FfxUInt32x2(x)))
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+#define FFX_FLOAT16X2_TO_UINT32(x) packFloat2x16(FfxFloat16x2(x))
			
 
				+FfxUInt32x2 ffxFloat16x4ToUint32x2(FfxFloat16x4 x)
			
 
				+{
			
 
				+    return FfxUInt32x2(packFloat2x16(x.xy), packFloat2x16(x.zw));
			
 
				+}
			
 
				+#define FFX_FLOAT16X4_TO_UINT32X2(x) ffxFloat16x4ToUint32x2(FfxFloat16x4(x))
			
 
				+#define FFX_UINT16X2_TO_UINT32(x) packUint2x16(FfxUInt16x2(x))
			
 
				+#define FFX_UINT16X4_TO_UINT32X2(x) unpack32(packUint4x16(FfxUInt16x4(x)))
			
 
				+//==============================================================================================================================
			
 
				+#define FFX_TO_UINT16(x) halfBitsToUint16(FfxFloat16(x))
			
 
				+#define FFX_TO_UINT16X2(x) halfBitsToUint16(FfxFloat16x2(x))
			
 
				+#define FFX_TO_UINT16X3(x) halfBitsToUint16(FfxFloat16x3(x))
			
 
				+#define FFX_TO_UINT16X4(x) halfBitsToUint16(FfxFloat16x4(x))
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+#define FFX_TO_FLOAT16(x) uint16BitsToHalf(FfxUInt16(x))
			
 
				+#define FFX_TO_FLOAT16X2(x) uint16BitsToHalf(FfxUInt16x2(x))
			
 
				+#define FFX_TO_FLOAT16X3(x) uint16BitsToHalf(FfxUInt16x3(x))
			
 
				+#define FFX_TO_FLOAT16X4(x) uint16BitsToHalf(FfxUInt16x4(x))
			
 
				+//==============================================================================================================================
			
 
				+FfxFloat16 ffxBroadcastFloat16(FfxFloat16 a)
			
 
				+{
			
 
				+    return FfxFloat16(a);
			
 
				+}
			
 
				+FfxFloat16x2 ffxBroadcastFloat16x2(FfxFloat16 a)
			
 
				+{
			
 
				+    return FfxFloat16x2(a, a);
			
 
				+}
			
 
				+FfxFloat16x3 ffxBroadcastFloat16x3(FfxFloat16 a)
			
 
				+{
			
 
				+    return FfxFloat16x3(a, a, a);
			
 
				+}
			
 
				+FfxFloat16x4 ffxBroadcastFloat16x4(FfxFloat16 a)
			
 
				+{
			
 
				+    return FfxFloat16x4(a, a, a, a);
			
 
				+}
			
 
				+#define FFX_BROADCAST_FLOAT16(a)   FfxFloat16(a)
			
 
				+#define FFX_BROADCAST_FLOAT16X2(a) FfxFloat16x2(FfxFloat16(a))
			
 
				+#define FFX_BROADCAST_FLOAT16X3(a) FfxFloat16x3(FfxFloat16(a))
			
 
				+#define FFX_BROADCAST_FLOAT16X4(a) FfxFloat16x4(FfxFloat16(a))
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+FfxInt16 ffxBroadcastInt16(FfxInt16 a)
			
 
				+{
			
 
				+    return FfxInt16(a);
			
 
				+}
			
 
				+FfxInt16x2 ffxBroadcastInt16x2(FfxInt16 a)
			
 
				+{
			
 
				+    return FfxInt16x2(a, a);
			
 
				+}
			
 
				+FfxInt16x3 ffxBroadcastInt16x3(FfxInt16 a)
			
 
				+{
			
 
				+    return FfxInt16x3(a, a, a);
			
 
				+}
			
 
				+FfxInt16x4 ffxBroadcastInt16x4(FfxInt16 a)
			
 
				+{
			
 
				+    return FfxInt16x4(a, a, a, a);
			
 
				+}
			
 
				+#define FFX_BROADCAST_INT16(a)   FfxInt16(a)
			
 
				+#define FFX_BROADCAST_INT16X2(a) FfxInt16x2(FfxInt16(a))
			
 
				+#define FFX_BROADCAST_INT16X3(a) FfxInt16x3(FfxInt16(a))
			
 
				+#define FFX_BROADCAST_INT16X4(a) FfxInt16x4(FfxInt16(a))
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+FfxUInt16 ffxBroadcastUInt16(FfxUInt16 a)
			
 
				+{
			
 
				+    return FfxUInt16(a);
			
 
				+}
			
 
				+FfxUInt16x2 ffxBroadcastUInt16x2(FfxUInt16 a)
			
 
				+{
			
 
				+    return FfxUInt16x2(a, a);
			
 
				+}
			
 
				+FfxUInt16x3 ffxBroadcastUInt16x3(FfxUInt16 a)
			
 
				+{
			
 
				+    return FfxUInt16x3(a, a, a);
			
 
				+}
			
 
				+FfxUInt16x4 ffxBroadcastUInt16x4(FfxUInt16 a)
			
 
				+{
			
 
				+    return FfxUInt16x4(a, a, a, a);
			
 
				+}
			
 
				+#define FFX_BROADCAST_UINT16(a)   FfxUInt16(a)
			
 
				+#define FFX_BROADCAST_UINT16X2(a) FfxUInt16x2(FfxUInt16(a))
			
 
				+#define FFX_BROADCAST_UINT16X3(a) FfxUInt16x3(FfxUInt16(a))
			
 
				+#define FFX_BROADCAST_UINT16X4(a) FfxUInt16x4(FfxUInt16(a))
			
 
				+//==============================================================================================================================
			
 
				+FfxUInt16 ffxAbsHalf(FfxUInt16 a)
			
 
				+{
			
 
				+    return FfxUInt16(abs(FfxInt16(a)));
			
 
				+}
			
 
				+FfxUInt16x2 ffxAbsHalf(FfxUInt16x2 a)
			
 
				+{
			
 
				+    return FfxUInt16x2(abs(FfxInt16x2(a)));
			
 
				+}
			
 
				+FfxUInt16x3 ffxAbsHalf(FfxUInt16x3 a)
			
 
				+{
			
 
				+    return FfxUInt16x3(abs(FfxInt16x3(a)));
			
 
				+}
			
 
				+FfxUInt16x4 ffxAbsHalf(FfxUInt16x4 a)
			
 
				+{
			
 
				+    return FfxUInt16x4(abs(FfxInt16x4(a)));
			
 
				+}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+FfxFloat16 ffxClampHalf(FfxFloat16 x, FfxFloat16 n, FfxFloat16 m)
			
 
				+{
			
 
				+    return clamp(x, n, m);
			
 
				+}
			
 
				+FfxFloat16x2 ffxClampHalf(FfxFloat16x2 x, FfxFloat16x2 n, FfxFloat16x2 m)
			
 
				+{
			
 
				+    return clamp(x, n, m);
			
 
				+}
			
 
				+FfxFloat16x3 ffxClampHalf(FfxFloat16x3 x, FfxFloat16x3 n, FfxFloat16x3 m)
			
 
				+{
			
 
				+    return clamp(x, n, m);
			
 
				+}
			
 
				+FfxFloat16x4 ffxClampHalf(FfxFloat16x4 x, FfxFloat16x4 n, FfxFloat16x4 m)
			
 
				+{
			
 
				+    return clamp(x, n, m);
			
 
				+}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+FfxFloat16 ffxFract(FfxFloat16 x)
			
 
				+{
			
 
				+    return fract(x);
			
 
				+}
			
 
				+FfxFloat16x2 ffxFract(FfxFloat16x2 x)
			
 
				+{
			
 
				+    return fract(x);
			
 
				+}
			
 
				+FfxFloat16x3 ffxFract(FfxFloat16x3 x)
			
 
				+{
			
 
				+    return fract(x);
			
 
				+}
			
 
				+FfxFloat16x4 ffxFract(FfxFloat16x4 x)
			
 
				+{
			
 
				+    return fract(x);
			
 
				+}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+FfxFloat16 ffxLerp(FfxFloat16 x, FfxFloat16 y, FfxFloat16 a)
			
 
				+{
			
 
				+    return mix(x, y, a);
			
 
				+}
			
 
				+FfxFloat16x2 ffxLerp(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16 a)
			
 
				+{
			
 
				+    return mix(x, y, a);
			
 
				+}
			
 
				+FfxFloat16x2 ffxLerp(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 a)
			
 
				+{
			
 
				+    return mix(x, y, a);
			
 
				+}
			
 
				+FfxFloat16x3 ffxLerp(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 a)
			
 
				+{
			
 
				+    return mix(x, y, a);
			
 
				+}
			
 
				+FfxFloat16x3 ffxLerp(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16 a)
			
 
				+{
			
 
				+    return mix(x, y, a);
			
 
				+}
			
 
				+FfxFloat16x4 ffxLerp(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16 a)
			
 
				+{
			
 
				+    return mix(x, y, a);
			
 
				+}
			
 
				+FfxFloat16x4 ffxLerp(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 a)
			
 
				+{
			
 
				+    return mix(x, y, a);
			
 
				+}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+// No packed version of ffxMid3.
			
 
				+FfxFloat16 ffxMed3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+FfxFloat16x2 ffxMed3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+FfxFloat16x3 ffxMed3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+FfxFloat16x4 ffxMed3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+FfxInt16 ffxMed3Half(FfxInt16 x, FfxInt16 y, FfxInt16 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+FfxInt16x2 ffxMed3Half(FfxInt16x2 x, FfxInt16x2 y, FfxInt16x2 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+FfxInt16x3 ffxMed3Half(FfxInt16x3 x, FfxInt16x3 y, FfxInt16x3 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+FfxInt16x4 ffxMed3Half(FfxInt16x4 x, FfxInt16x4 y, FfxInt16x4 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+// No packed version of ffxMax3.
			
 
				+FfxFloat16 ffxMax3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
			
 
				+{
			
 
				+    return max(x, max(y, z));
			
 
				+}
			
 
				+FfxFloat16x2 ffxMax3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
			
 
				+{
			
 
				+    return max(x, max(y, z));
			
 
				+}
			
 
				+FfxFloat16x3 ffxMax3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
			
 
				+{
			
 
				+    return max(x, max(y, z));
			
 
				+}
			
 
				+FfxFloat16x4 ffxMax3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
			
 
				+{
			
 
				+    return max(x, max(y, z));
			
 
				+}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+// No packed version of ffxMin3.
			
 
				+FfxFloat16 ffxMin3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
			
 
				+{
			
 
				+    return min(x, min(y, z));
			
 
				+}
			
 
				+FfxFloat16x2 ffxMin3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
			
 
				+{
			
 
				+    return min(x, min(y, z));
			
 
				+}
			
 
				+FfxFloat16x3 ffxMin3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
			
 
				+{
			
 
				+    return min(x, min(y, z));
			
 
				+}
			
 
				+FfxFloat16x4 ffxMin3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
			
 
				+{
			
 
				+    return min(x, min(y, z));
			
 
				+}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+FfxFloat16 ffxReciprocalHalf(FfxFloat16 x)
			
 
				+{
			
 
				+    return FFX_BROADCAST_FLOAT16(1.0) / x;
			
 
				+}
			
 
				+FfxFloat16x2 ffxReciprocalHalf(FfxFloat16x2 x)
			
 
				+{
			
 
				+    return FFX_BROADCAST_FLOAT16X2(1.0) / x;
			
 
				+}
			
 
				+FfxFloat16x3 ffxReciprocalHalf(FfxFloat16x3 x)
			
 
				+{
			
 
				+    return FFX_BROADCAST_FLOAT16X3(1.0) / x;
			
 
				+}
			
 
				+FfxFloat16x4 ffxReciprocalHalf(FfxFloat16x4 x)
			
 
				+{
			
 
				+    return FFX_BROADCAST_FLOAT16X4(1.0) / x;
			
 
				+}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+FfxFloat16 ffxReciprocalSquareRootHalf(FfxFloat16 x)
			
 
				+{
			
 
				+    return FFX_BROADCAST_FLOAT16(1.0) / sqrt(x);
			
 
				+}
			
 
				+FfxFloat16x2 ffxReciprocalSquareRootHalf(FfxFloat16x2 x)
			
 
				+{
			
 
				+    return FFX_BROADCAST_FLOAT16X2(1.0) / sqrt(x);
			
 
				+}
			
 
				+FfxFloat16x3 ffxReciprocalSquareRootHalf(FfxFloat16x3 x)
			
 
				+{
			
 
				+    return FFX_BROADCAST_FLOAT16X3(1.0) / sqrt(x);
			
 
				+}
			
 
				+FfxFloat16x4 ffxReciprocalSquareRootHalf(FfxFloat16x4 x)
			
 
				+{
			
 
				+    return FFX_BROADCAST_FLOAT16X4(1.0) / sqrt(x);
			
 
				+}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+FfxFloat16 ffxSaturate(FfxFloat16 x)
			
 
				+{
			
 
				+    return clamp(x, FFX_BROADCAST_FLOAT16(0.0), FFX_BROADCAST_FLOAT16(1.0));
			
 
				+}
			
 
				+FfxFloat16x2 ffxSaturate(FfxFloat16x2 x)
			
 
				+{
			
 
				+    return clamp(x, FFX_BROADCAST_FLOAT16X2(0.0), FFX_BROADCAST_FLOAT16X2(1.0));
			
 
				+}
			
 
				+FfxFloat16x3 ffxSaturate(FfxFloat16x3 x)
			
 
				+{
			
 
				+    return clamp(x, FFX_BROADCAST_FLOAT16X3(0.0), FFX_BROADCAST_FLOAT16X3(1.0));
			
 
				+}
			
 
				+FfxFloat16x4 ffxSaturate(FfxFloat16x4 x)
			
 
				+{
			
 
				+    return clamp(x, FFX_BROADCAST_FLOAT16X4(0.0), FFX_BROADCAST_FLOAT16X4(1.0));
			
 
				+}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+FfxUInt16 ffxBitShiftRightHalf(FfxUInt16 a, FfxUInt16 b)
			
 
				+{
			
 
				+    return FfxUInt16(FfxInt16(a) >> FfxInt16(b));
			
 
				+}
			
 
				+FfxUInt16x2 ffxBitShiftRightHalf(FfxUInt16x2 a, FfxUInt16x2 b)
			
 
				+{
			
 
				+    return FfxUInt16x2(FfxInt16x2(a) >> FfxInt16x2(b));
			
 
				+}
			
 
				+FfxUInt16x3 ffxBitShiftRightHalf(FfxUInt16x3 a, FfxUInt16x3 b)
			
 
				+{
			
 
				+    return FfxUInt16x3(FfxInt16x3(a) >> FfxInt16x3(b));
			
 
				+}
			
 
				+FfxUInt16x4 ffxBitShiftRightHalf(FfxUInt16x4 a, FfxUInt16x4 b)
			
 
				+{
			
 
				+    return FfxUInt16x4(FfxInt16x4(a) >> FfxInt16x4(b));
			
 
				+}
			
 
				+#endif // FFX_HALF
			
 
				+
			
 
				+#if defined(FFX_WAVE)
			
 
				+// Where 'x' must be a compile time literal.
			
 
				+FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return subgroupShuffleXor(v, x);
			
 
				+}
			
 
				+FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return subgroupShuffleXor(v, x);
			
 
				+}
			
 
				+FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return subgroupShuffleXor(v, x);
			
 
				+}
			
 
				+FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return subgroupShuffleXor(v, x);
			
 
				+}
			
 
				+FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return subgroupShuffleXor(v, x);
			
 
				+}
			
 
				+FfxUInt32x2 AWaveXorU2(FfxUInt32x2 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return subgroupShuffleXor(v, x);
			
 
				+}
			
 
				+FfxUInt32x3 AWaveXorU3(FfxUInt32x3 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return subgroupShuffleXor(v, x);
			
 
				+}
			
 
				+FfxUInt32x4 AWaveXorU4(FfxUInt32x4 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return subgroupShuffleXor(v, x);
			
 
				+}
			
 
				+
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+#if FFX_HALF
			
 
				+FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return FFX_UINT32_TO_FLOAT16X2(subgroupShuffleXor(FFX_FLOAT16X2_TO_UINT32(v), x));
			
 
				+}
			
 
				+FfxFloat16x4 ffxWaveXorFloat16x4(FfxFloat16x4 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return FFX_UINT32X2_TO_FLOAT16X4(subgroupShuffleXor(FFX_FLOAT16X4_TO_UINT32X2(v), x));
			
 
				+}
			
 
				+FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return FFX_UINT32_TO_UINT16X2(subgroupShuffleXor(FFX_UINT16X2_TO_UINT32(v), x));
			
 
				+}
			
 
				+FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return FFX_UINT32X2_TO_UINT16X4(subgroupShuffleXor(FFX_UINT16X4_TO_UINT32X2(v), x));
			
 
				+}
			
 
				+#endif // FFX_HALF
			
 
				+#endif // #if defined(FFX_WAVE)
			
--- a/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common.h
@@ -0,0 +1,2784 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+/// A define for a true value in a boolean expression.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_TRUE (true)
			
 
				+
			
 
				+/// A define for a false value in a boolean expression.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_FALSE (false)
			
 
				+
			
 
				+/// A define value for positive infinity.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_POSITIVE_INFINITY_FLOAT ffxAsFloat(0x7f800000u)
			
 
				+
			
 
				+/// A define value for negative infinity.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_NEGATIVE_INFINITY_FLOAT ffxAsFloat(0xff800000u)
			
 
				+
			
 
				+/// A define value for PI.
			
 
				+/// 
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_PI  (3.14159)
			
 
				+
			
 
				+
			
 
				+/// Compute the reciprocal of <c><i>value</i></c>.
			
 
				+///
			
 
				+/// @param [in] value               The value to compute the reciprocal of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The 1 / <c><i>value</i></c>.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxReciprocal(FfxFloat32 value)
			
 
				+{
			
 
				+    return rcp(value);
			
 
				+}
			
 
				+
			
 
				+/// Compute the reciprocal of <c><i>value</i></c>.
			
 
				+///
			
 
				+/// @param [in] value               The value to compute the reciprocal of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The 1 / <c><i>value</i></c>.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxReciprocal(FfxFloat32x2 value)
			
 
				+{
			
 
				+    return rcp(value);
			
 
				+}
			
 
				+
			
 
				+/// Compute the reciprocal of <c><i>value</i></c>.
			
 
				+///
			
 
				+/// @param [in] value               The value to compute the reciprocal of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The 1 / <c><i>value</i></c>.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxReciprocal(FfxFloat32x3 value)
			
 
				+{
			
 
				+    return rcp(value);
			
 
				+}
			
 
				+
			
 
				+/// Compute the reciprocal of <c><i>value</i></c>.
			
 
				+///
			
 
				+/// @param [in] value               The value to compute the reciprocal of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The 1 / <c><i>value</i></c>.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxReciprocal(FfxFloat32x4 value)
			
 
				+{
			
 
				+    return rcp(value);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxMin(FfxFloat32x2 x, FfxFloat32x2 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxMin(FfxFloat32x3 x, FfxFloat32x3 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxMin(FfxFloat32x4 x, FfxFloat32x4 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxInt32 ffxMin(FfxInt32 x, FfxInt32 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxInt32x2 ffxMin(FfxInt32x2 x, FfxInt32x2 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxInt32x3 ffxMin(FfxInt32x3 x, FfxInt32x3 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxInt32x4 ffxMin(FfxInt32x4 x, FfxInt32x4 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x2 ffxMin(FfxUInt32x2 x, FfxUInt32x2 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x3 ffxMin(FfxUInt32x3 x, FfxUInt32x3 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x4 ffxMin(FfxUInt32x4 x, FfxUInt32x4 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxMax(FfxFloat32x2 x, FfxFloat32x2 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxMax(FfxFloat32x3 x, FfxFloat32x3 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxMax(FfxFloat32x4 x, FfxFloat32x4 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxInt32 ffxMax(FfxInt32 x, FfxInt32 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxInt32x2 ffxMax(FfxInt32x2 x, FfxInt32x2 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxInt32x3 ffxMax(FfxInt32x3 x, FfxInt32x3 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxInt32x4 ffxMax(FfxInt32x4 x, FfxInt32x4 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x2 ffxMax(FfxUInt32x2 x, FfxUInt32x2 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x3 ffxMax(FfxUInt32x3 x, FfxUInt32x3 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x4 ffxMax(FfxUInt32x4 x, FfxUInt32x4 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the value of the first parameter raised to the power of the second.
			
 
				+///
			
 
				+/// @param [in] x                   The value to raise to the power y.
			
 
				+/// @param [in] y                   The power to which to raise x.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value of the first parameter raised to the power of the second.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxPow(FfxFloat32 x, FfxFloat32 y)
			
 
				+{
			
 
				+    return pow(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the value of the first parameter raised to the power of the second.
			
 
				+///
			
 
				+/// @param [in] x                   The value to raise to the power y.
			
 
				+/// @param [in] y                   The power to which to raise x.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value of the first parameter raised to the power of the second.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxPow(FfxFloat32x2 x, FfxFloat32x2 y)
			
 
				+{
			
 
				+    return pow(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the value of the first parameter raised to the power of the second.
			
 
				+///
			
 
				+/// @param [in] x                   The value to raise to the power y.
			
 
				+/// @param [in] y                   The power to which to raise x.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value of the first parameter raised to the power of the second.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxPow(FfxFloat32x3 x, FfxFloat32x3 y)
			
 
				+{
			
 
				+    return pow(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the value of the first parameter raised to the power of the second.
			
 
				+///
			
 
				+/// @param [in] x                   The value to raise to the power y.
			
 
				+/// @param [in] y                   The power to which to raise x.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value of the first parameter raised to the power of the second.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxPow(FfxFloat32x4 x, FfxFloat32x4 y)
			
 
				+{
			
 
				+    return pow(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the square root of a value.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the square root of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxSqrt(FfxFloat32 x)
			
 
				+{
			
 
				+    return sqrt(x);
			
 
				+}
			
 
				+
			
 
				+/// Compute the square root of a value.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the square root of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxSqrt(FfxFloat32x2 x)
			
 
				+{
			
 
				+    return sqrt(x);
			
 
				+}
			
 
				+
			
 
				+/// Compute the square root of a value.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the square root of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxSqrt(FfxFloat32x3 x)
			
 
				+{
			
 
				+    return sqrt(x);
			
 
				+}
			
 
				+
			
 
				+/// Compute the square root of a value.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the square root of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxSqrt(FfxFloat32x4 x)
			
 
				+{
			
 
				+    return sqrt(x);
			
 
				+}
			
 
				+
			
 
				+/// Copy the sign bit from 's' to positive 'd'.
			
 
				+///
			
 
				+/// @param [in] d                   The value to copy the sign bit into.
			
 
				+/// @param [in] s                   The value to copy the sign bit from.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
			
 
				+/// 
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxCopySignBit(FfxFloat32 d, FfxFloat32 s)
			
 
				+{
			
 
				+    return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & FfxUInt32(0x80000000u)));
			
 
				+}
			
 
				+
			
 
				+/// Copy the sign bit from 's' to positive 'd'.
			
 
				+///
			
 
				+/// @param [in] d                   The value to copy the sign bit into.
			
 
				+/// @param [in] s                   The value to copy the sign bit from.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxCopySignBit(FfxFloat32x2 d, FfxFloat32x2 s)
			
 
				+{
			
 
				+    return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast2(0x80000000u)));
			
 
				+}
			
 
				+
			
 
				+/// Copy the sign bit from 's' to positive 'd'.
			
 
				+///
			
 
				+/// @param [in] d                   The value to copy the sign bit into.
			
 
				+/// @param [in] s                   The value to copy the sign bit from.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxCopySignBit(FfxFloat32x3 d, FfxFloat32x3 s)
			
 
				+{
			
 
				+    return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast3(0x80000000u)));
			
 
				+}
			
 
				+
			
 
				+/// Copy the sign bit from 's' to positive 'd'.
			
 
				+///
			
 
				+/// @param [in] d                   The value to copy the sign bit into.
			
 
				+/// @param [in] s                   The value to copy the sign bit from.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxCopySignBit(FfxFloat32x4 d, FfxFloat32x4 s)
			
 
				+{
			
 
				+    return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast4(0x80000000u)));
			
 
				+}
			
 
				+
			
 
				+/// A single operation to return the following:
			
 
				+///     m = NaN := 0
			
 
				+///     m >= 0  := 0
			
 
				+///     m < 0   := 1
			
 
				+///
			
 
				+/// Uses the following useful floating point logic,
			
 
				+///     saturate(+a*(-INF)==-INF) := 0
			
 
				+///     saturate( 0*(-INF)== NaN) := 0
			
 
				+///     saturate(-a*(-INF)==+INF) := 1
			
 
				+/// 
			
 
				+/// This function is useful when creating masks for branch-free logic.
			
 
				+/// 
			
 
				+/// @param [in] m                       The value to test against 0.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
			
 
				+/// 
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxIsSigned(FfxFloat32 m)
			
 
				+{
			
 
				+    return ffxSaturate(m * FfxFloat32(FFX_NEGATIVE_INFINITY_FLOAT));
			
 
				+}
			
 
				+
			
 
				+/// A single operation to return the following:
			
 
				+///     m = NaN := 0
			
 
				+///     m >= 0  := 0
			
 
				+///     m < 0   := 1
			
 
				+///
			
 
				+/// Uses the following useful floating point logic,
			
 
				+///     saturate(+a*(-INF)==-INF) := 0
			
 
				+///     saturate( 0*(-INF)== NaN) := 0
			
 
				+///     saturate(-a*(-INF)==+INF) := 1
			
 
				+///
			
 
				+/// This function is useful when creating masks for branch-free logic.
			
 
				+///
			
 
				+/// @param [in] m                       The value to test against 0.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxIsSigned(FfxFloat32x2 m)
			
 
				+{
			
 
				+    return ffxSaturate(m * ffxBroadcast2(FFX_NEGATIVE_INFINITY_FLOAT));
			
 
				+}
			
 
				+
			
 
				+/// A single operation to return the following:
			
 
				+///     m = NaN := 0
			
 
				+///     m >= 0  := 0
			
 
				+///     m < 0   := 1
			
 
				+///
			
 
				+/// Uses the following useful floating point logic,
			
 
				+///     saturate(+a*(-INF)==-INF) := 0
			
 
				+///     saturate( 0*(-INF)== NaN) := 0
			
 
				+///     saturate(-a*(-INF)==+INF) := 1
			
 
				+///
			
 
				+/// This function is useful when creating masks for branch-free logic.
			
 
				+///
			
 
				+/// @param [in] m                       The value to test against 0.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxIsSigned(FfxFloat32x3 m)
			
 
				+{
			
 
				+    return ffxSaturate(m * ffxBroadcast3(FFX_NEGATIVE_INFINITY_FLOAT));
			
 
				+}
			
 
				+
			
 
				+/// A single operation to return the following:
			
 
				+///     m = NaN := 0
			
 
				+///     m >= 0  := 0
			
 
				+///     m < 0   := 1
			
 
				+///
			
 
				+/// Uses the following useful floating point logic,
			
 
				+///     saturate(+a*(-INF)==-INF) := 0
			
 
				+///     saturate( 0*(-INF)== NaN) := 0
			
 
				+///     saturate(-a*(-INF)==+INF) := 1
			
 
				+///
			
 
				+/// This function is useful when creating masks for branch-free logic.
			
 
				+///
			
 
				+/// @param [in] m                       The value to test against for have the sign set.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// 1.0 when the value is negative, or 0.0 when the value is 0 or positive.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxIsSigned(FfxFloat32x4 m)
			
 
				+{
			
 
				+    return ffxSaturate(m * ffxBroadcast4(FFX_NEGATIVE_INFINITY_FLOAT));
			
 
				+}
			
 
				+
			
 
				+/// A single operation to return the following:
			
 
				+///     m = NaN := 1
			
 
				+///     m > 0   := 0
			
 
				+///     m <= 0  := 1
			
 
				+///
			
 
				+/// This function is useful when creating masks for branch-free logic.
			
 
				+///
			
 
				+/// @param [in] m                       The value to test against zero.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxIsGreaterThanZero(FfxFloat32 m)
			
 
				+{
			
 
				+    return ffxSaturate(m * FfxFloat32(FFX_POSITIVE_INFINITY_FLOAT));
			
 
				+}
			
 
				+
			
 
				+/// A single operation to return the following:
			
 
				+///     m = NaN := 1
			
 
				+///     m > 0   := 0
			
 
				+///     m <= 0  := 1
			
 
				+///
			
 
				+/// This function is useful when creating masks for branch-free logic.
			
 
				+///
			
 
				+/// @param [in] m                       The value to test against zero.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxIsGreaterThanZero(FfxFloat32x2 m)
			
 
				+{
			
 
				+    return ffxSaturate(m * ffxBroadcast2(FFX_POSITIVE_INFINITY_FLOAT));
			
 
				+}
			
 
				+
			
 
				+/// A single operation to return the following:
			
 
				+///     m = NaN := 1
			
 
				+///     m > 0   := 0
			
 
				+///     m <= 0  := 1
			
 
				+///
			
 
				+/// This function is useful when creating masks for branch-free logic.
			
 
				+///
			
 
				+/// @param [in] m                       The value to test against zero.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxIsGreaterThanZero(FfxFloat32x3 m)
			
 
				+{
			
 
				+    return ffxSaturate(m * ffxBroadcast3(FFX_POSITIVE_INFINITY_FLOAT));
			
 
				+}
			
 
				+
			
 
				+/// A single operation to return the following:
			
 
				+///     m = NaN := 1
			
 
				+///     m > 0   := 0
			
 
				+///     m <= 0  := 1
			
 
				+///
			
 
				+/// This function is useful when creating masks for branch-free logic.
			
 
				+///
			
 
				+/// @param [in] m                       The value to test against zero.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxIsGreaterThanZero(FfxFloat32x4 m)
			
 
				+{
			
 
				+    return ffxSaturate(m * ffxBroadcast4(FFX_POSITIVE_INFINITY_FLOAT));
			
 
				+}
			
 
				+
			
 
				+/// Convert a 32bit floating point value to sortable integer.
			
 
				+/// 
			
 
				+///  - If sign bit=0, flip the sign bit (positives).
			
 
				+///  - If sign bit=1, flip all bits     (negatives).
			
 
				+/// 
			
 
				+/// The function has the side effects that:
			
 
				+///  - Larger integers are more positive values.
			
 
				+///  - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage).
			
 
				+/// 
			
 
				+/// @param [in] value                       The floating point value to make sortable.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// The sortable integer value.
			
 
				+/// 
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxFloatToSortableInteger(FfxUInt32 value)
			
 
				+{
			
 
				+    return value ^ ((AShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000));
			
 
				+}
			
 
				+
			
 
				+/// Convert a sortable integer to a 32bit floating point value.
			
 
				+///
			
 
				+/// The function has the side effects that:
			
 
				+///  - If sign bit=1, flip the sign bit (positives).
			
 
				+///  - If sign bit=0, flip all bits     (negatives).
			
 
				+///
			
 
				+/// @param [in] value                       The floating point value to make sortable.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The sortable integer value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxSortableIntegerToFloat(FfxUInt32 value)
			
 
				+{
			
 
				+    return value ^ ((~AShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a low-quality approximation for the square root of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent 
			
 
				+/// presentation materials:
			
 
				+/// 
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+/// 
			
 
				+/// @param [in] value           The value to calculate an approximate to the square root for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the square root, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxApproximateSqrt(FfxFloat32 a)
			
 
				+{
			
 
				+    return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(1)) + FfxUInt32(0x1fbc4639));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a low-quality approximation for the reciprocal of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the reciprocal, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxApproximateReciprocal(FfxFloat32 a)
			
 
				+{
			
 
				+    return ffxAsFloat(FfxUInt32(0x7ef07ebb) - ffxAsUInt32(a));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a medium-quality approximation for the reciprocal of a value.
			
 
				+/// 
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the reciprocal, estimated to medium quality.
			
 
				+/// 
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxApproximateReciprocalMedium(FfxFloat32 value)
			
 
				+{
			
 
				+    FfxFloat32 b = ffxAsFloat(FfxUInt32(0x7ef19fff) - ffxAsUInt32(value));
			
 
				+    return b * (-b * value + FfxFloat32(2.0));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a low-quality approximation for the reciprocal of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] value           The value to calculate an approximate to the reciprocal square root for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the reciprocal square root, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxApproximateReciprocalSquareRoot(FfxFloat32 a)
			
 
				+{
			
 
				+    return ffxAsFloat(FfxUInt32(0x5f347d74) - (ffxAsUInt32(a) >> FfxUInt32(1)));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a low-quality approximation for the square root of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] value           The value to calculate an approximate to the square root for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the square root, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxApproximateSqrt(FfxFloat32x2 a)
			
 
				+{
			
 
				+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(1u)) + ffxBroadcast2(0x1fbc4639u));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a low-quality approximation for the reciprocal of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the reciprocal, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxApproximateReciprocal(FfxFloat32x2 a)
			
 
				+{
			
 
				+    return ffxAsFloat(ffxBroadcast2(0x7ef07ebbu) - ffxAsUInt32(a));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a medium-quality approximation for the reciprocal of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the reciprocal, estimated to medium quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxApproximateReciprocalMedium(FfxFloat32x2 a)
			
 
				+{
			
 
				+    FfxFloat32x2 b = ffxAsFloat(ffxBroadcast2(0x7ef19fffu) - ffxAsUInt32(a));
			
 
				+    return b * (-b * a + ffxBroadcast2(2.0f));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a low-quality approximation for the square root of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] value           The value to calculate an approximate to the square root for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the square root, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxApproximateReciprocalSquareRoot(FfxFloat32x2 a)
			
 
				+{
			
 
				+    return ffxAsFloat(ffxBroadcast2(0x5f347d74u) - (ffxAsUInt32(a) >> ffxBroadcast2(1u)));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a low-quality approximation for the square root of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] value           The value to calculate an approximate to the square root for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the square root, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxApproximateSqrt(FfxFloat32x3 a)
			
 
				+{
			
 
				+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(1u)) + ffxBroadcast3(0x1fbc4639u));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a low-quality approximation for the reciprocal of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the reciprocal, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxApproximateReciprocal(FfxFloat32x3 a)
			
 
				+{
			
 
				+    return ffxAsFloat(ffxBroadcast3(0x7ef07ebbu) - ffxAsUInt32(a));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a medium-quality approximation for the reciprocal of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the reciprocal, estimated to medium quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxApproximateReciprocalMedium(FfxFloat32x3 a)
			
 
				+{
			
 
				+    FfxFloat32x3 b = ffxAsFloat(ffxBroadcast3(0x7ef19fffu) - ffxAsUInt32(a));
			
 
				+    return b * (-b * a + ffxBroadcast3(2.0f));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a low-quality approximation for the square root of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] value           The value to calculate an approximate to the square root for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the square root, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxApproximateReciprocalSquareRoot(FfxFloat32x3 a)
			
 
				+{
			
 
				+    return ffxAsFloat(ffxBroadcast3(0x5f347d74u) - (ffxAsUInt32(a) >> ffxBroadcast3(1u)));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a low-quality approximation for the square root of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] value           The value to calculate an approximate to the square root for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the square root, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxApproximateSqrt(FfxFloat32x4 a)
			
 
				+{
			
 
				+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(1u)) + ffxBroadcast4(0x1fbc4639u));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a low-quality approximation for the reciprocal of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the reciprocal, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxApproximateReciprocal(FfxFloat32x4 a)
			
 
				+{
			
 
				+    return ffxAsFloat(ffxBroadcast4(0x7ef07ebbu) - ffxAsUInt32(a));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a medium-quality approximation for the reciprocal of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the reciprocal, estimated to medium quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxApproximateReciprocalMedium(FfxFloat32x4 a)
			
 
				+{
			
 
				+    FfxFloat32x4 b = ffxAsFloat(ffxBroadcast4(0x7ef19fffu) - ffxAsUInt32(a));
			
 
				+    return b * (-b * a + ffxBroadcast4(2.0f));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a low-quality approximation for the square root of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] value           The value to calculate an approximate to the square root for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the square root, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxApproximateReciprocalSquareRoot(FfxFloat32x4 a)
			
 
				+{
			
 
				+    return ffxAsFloat(ffxBroadcast4(0x5f347d74u) - (ffxAsUInt32(a) >> ffxBroadcast4(1u)));
			
 
				+}
			
 
				+
			
 
				+/// Calculate dot product of 'a' and 'b'.
			
 
				+///
			
 
				+/// @param [in] a                   First vector input.
			
 
				+/// @param [in] b                   Second vector input.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value of <c><i>a</i></c> dot <c><i>b</i></c>.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b)
			
 
				+{
			
 
				+    return dot(a, b);
			
 
				+}
			
 
				+
			
 
				+/// Calculate dot product of 'a' and 'b'.
			
 
				+///
			
 
				+/// @param [in] a                   First vector input.
			
 
				+/// @param [in] b                   Second vector input.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value of <c><i>a</i></c> dot <c><i>b</i></c>.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b)
			
 
				+{
			
 
				+    return dot(a, b);
			
 
				+}
			
 
				+
			
 
				+/// Calculate dot product of 'a' and 'b'.
			
 
				+///
			
 
				+/// @param [in] a                   First vector input.
			
 
				+/// @param [in] b                   Second vector input.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value of <c><i>a</i></c> dot <c><i>b</i></c>.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b)
			
 
				+{
			
 
				+    return dot(a, b);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/// Compute an approximate conversion from PQ to Gamma2 space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear 
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between PQ and Gamma2.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into Gamma2.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxApproximatePQToGamma2Medium(FfxFloat32 a)
			
 
				+{
			
 
				+    return a * a * a * a;
			
 
				+}
			
 
				+
			
 
				+/// Compute an approximate conversion from PQ to linear space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between PQ and linear.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into linear.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxApproximatePQToLinear(FfxFloat32 a)
			
 
				+{
			
 
				+    return a * a * a * a * a * a * a * a;
			
 
				+}
			
 
				+
			
 
				+/// Compute an approximate conversion from gamma2 to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between gamma2 and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxApproximateGamma2ToPQ(FfxFloat32 a)
			
 
				+{
			
 
				+    return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46));
			
 
				+}
			
 
				+
			
 
				+/// Compute a more accurate approximate conversion from gamma2 to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between gamma2 and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxApproximateGamma2ToPQMedium(FfxFloat32 a)
			
 
				+{
			
 
				+    FfxFloat32 b  = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46));
			
 
				+    FfxFloat32 b4 = b * b * b * b;
			
 
				+    return b - b * (b4 - a) / (FfxFloat32(4.0) * b4);
			
 
				+}
			
 
				+
			
 
				+/// Compute a high accuracy approximate conversion from gamma2 to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between gamma2 and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxApproximateGamma2ToPQHigh(FfxFloat32 a)
			
 
				+{
			
 
				+    return ffxSqrt(ffxSqrt(a));
			
 
				+}
			
 
				+
			
 
				+/// Compute an approximate conversion from linear to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between linear and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxApproximateLinearToPQ(FfxFloat32 a)
			
 
				+{
			
 
				+    return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723));
			
 
				+}
			
 
				+
			
 
				+/// Compute a more accurate approximate conversion from linear to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between linear and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxApproximateLinearToPQMedium(FfxFloat32 a)
			
 
				+{
			
 
				+    FfxFloat32 b  = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723));
			
 
				+    FfxFloat32 b8 = b * b * b * b * b * b * b * b;
			
 
				+    return b - b * (b8 - a) / (FfxFloat32(8.0) * b8);
			
 
				+}
			
 
				+
			
 
				+/// Compute a very accurate approximate conversion from linear to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between linear and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxApproximateLinearToPQHigh(FfxFloat32 a)
			
 
				+{
			
 
				+    return ffxSqrt(ffxSqrt(ffxSqrt(a)));
			
 
				+}
			
 
				+
			
 
				+/// Compute an approximate conversion from PQ to Gamma2 space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between PQ and Gamma2.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into Gamma2.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxApproximatePQToGamma2Medium(FfxFloat32x2 a)
			
 
				+{
			
 
				+    return a * a * a * a;
			
 
				+}
			
 
				+
			
 
				+/// Compute an approximate conversion from PQ to linear space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between PQ and linear.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into linear.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxApproximatePQToLinear(FfxFloat32x2 a)
			
 
				+{
			
 
				+    return a * a * a * a * a * a * a * a;
			
 
				+}
			
 
				+
			
 
				+/// Compute an approximate conversion from gamma2 to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between gamma2 and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxApproximateGamma2ToPQ(FfxFloat32x2 a)
			
 
				+{
			
 
				+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u));
			
 
				+}
			
 
				+
			
 
				+/// Compute a more accurate approximate conversion from gamma2 to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between gamma2 and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxApproximateGamma2ToPQMedium(FfxFloat32x2 a)
			
 
				+{
			
 
				+    FfxFloat32x2 b  = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u));
			
 
				+    FfxFloat32x2 b4 = b * b * b * b;
			
 
				+    return b - b * (b4 - a) / (FfxFloat32(4.0) * b4);
			
 
				+}
			
 
				+
			
 
				+/// Compute a high accuracy approximate conversion from gamma2 to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between gamma2 and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxApproximateGamma2ToPQHigh(FfxFloat32x2 a)
			
 
				+{
			
 
				+    return ffxSqrt(ffxSqrt(a));
			
 
				+}
			
 
				+
			
 
				+/// Compute an approximate conversion from linear to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between linear and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxApproximateLinearToPQ(FfxFloat32x2 a)
			
 
				+{
			
 
				+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u));
			
 
				+}
			
 
				+
			
 
				+/// Compute a more accurate approximate conversion from linear to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between linear and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxApproximateLinearToPQMedium(FfxFloat32x2 a)
			
 
				+{
			
 
				+    FfxFloat32x2 b  = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u));
			
 
				+    FfxFloat32x2 b8 = b * b * b * b * b * b * b * b;
			
 
				+    return b - b * (b8 - a) / (FfxFloat32(8.0) * b8);
			
 
				+}
			
 
				+
			
 
				+/// Compute a very accurate approximate conversion from linear to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between linear and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxApproximateLinearToPQHigh(FfxFloat32x2 a)
			
 
				+{
			
 
				+    return ffxSqrt(ffxSqrt(ffxSqrt(a)));
			
 
				+}
			
 
				+
			
 
				+/// Compute an approximate conversion from PQ to Gamma2 space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between PQ and Gamma2.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into Gamma2.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxApproximatePQToGamma2Medium(FfxFloat32x3 a)
			
 
				+{
			
 
				+    return a * a * a * a;
			
 
				+}
			
 
				+
			
 
				+/// Compute an approximate conversion from PQ to linear space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between PQ and linear.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into linear.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxApproximatePQToLinear(FfxFloat32x3 a)
			
 
				+{
			
 
				+    return a * a * a * a * a * a * a * a;
			
 
				+}
			
 
				+
			
 
				+/// Compute an approximate conversion from gamma2 to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between gamma2 and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxApproximateGamma2ToPQ(FfxFloat32x3 a)
			
 
				+{
			
 
				+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u));
			
 
				+}
			
 
				+
			
 
				+/// Compute a more accurate approximate conversion from gamma2 to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between gamma2 and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxApproximateGamma2ToPQMedium(FfxFloat32x3 a)
			
 
				+{
			
 
				+    FfxFloat32x3 b  = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u));
			
 
				+    FfxFloat32x3 b4 = b * b * b * b;
			
 
				+    return b - b * (b4 - a) / (FfxFloat32(4.0) * b4);
			
 
				+}
			
 
				+
			
 
				+/// Compute a high accuracy approximate conversion from gamma2 to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between gamma2 and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxApproximateGamma2ToPQHigh(FfxFloat32x3 a)
			
 
				+{
			
 
				+    return ffxSqrt(ffxSqrt(a));
			
 
				+}
			
 
				+
			
 
				+/// Compute an approximate conversion from linear to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between linear and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxApproximateLinearToPQ(FfxFloat32x3 a)
			
 
				+{
			
 
				+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u));
			
 
				+}
			
 
				+
			
 
				+/// Compute a more accurate approximate conversion from linear to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between linear and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxApproximateLinearToPQMedium(FfxFloat32x3 a)
			
 
				+{
			
 
				+    FfxFloat32x3 b  = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u));
			
 
				+    FfxFloat32x3 b8 = b * b * b * b * b * b * b * b;
			
 
				+    return b - b * (b8 - a) / (FfxFloat32(8.0) * b8);
			
 
				+}
			
 
				+
			
 
				+/// Compute a very accurate approximate conversion from linear to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between linear and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxApproximateLinearToPQHigh(FfxFloat32x3 a)
			
 
				+{
			
 
				+    return ffxSqrt(ffxSqrt(ffxSqrt(a)));
			
 
				+}
			
 
				+
			
 
				+/// Compute an approximate conversion from PQ to Gamma2 space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between PQ and Gamma2.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into Gamma2.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxApproximatePQToGamma2Medium(FfxFloat32x4 a)
			
 
				+{
			
 
				+    return a * a * a * a;
			
 
				+}
			
 
				+
			
 
				+/// Compute an approximate conversion from PQ to linear space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between PQ and linear.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into linear.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxApproximatePQToLinear(FfxFloat32x4 a)
			
 
				+{
			
 
				+    return a * a * a * a * a * a * a * a;
			
 
				+}
			
 
				+
			
 
				+/// Compute an approximate conversion from gamma2 to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between gamma2 and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxApproximateGamma2ToPQ(FfxFloat32x4 a)
			
 
				+{
			
 
				+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u));
			
 
				+}
			
 
				+
			
 
				+/// Compute a more accurate approximate conversion from gamma2 to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between gamma2 and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxApproximateGamma2ToPQMedium(FfxFloat32x4 a)
			
 
				+{
			
 
				+    FfxFloat32x4 b  = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u));
			
 
				+    FfxFloat32x4 b4 = b * b * b * b * b * b * b * b;
			
 
				+    return b - b * (b4 - a) / (FfxFloat32(4.0) * b4);
			
 
				+}
			
 
				+
			
 
				+/// Compute a high accuracy approximate conversion from gamma2 to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between gamma2 and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxApproximateGamma2ToPQHigh(FfxFloat32x4 a)
			
 
				+{
			
 
				+    return ffxSqrt(ffxSqrt(a));
			
 
				+}
			
 
				+
			
 
				+/// Compute an approximate conversion from linear to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between linear and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxApproximateLinearToPQ(FfxFloat32x4 a)
			
 
				+{
			
 
				+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u));
			
 
				+}
			
 
				+
			
 
				+/// Compute a more accurate approximate conversion from linear to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between linear and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxApproximateLinearToPQMedium(FfxFloat32x4 a)
			
 
				+{
			
 
				+    FfxFloat32x4 b  = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u));
			
 
				+    FfxFloat32x4 b8 = b * b * b * b * b * b * b * b;
			
 
				+    return b - b * (b8 - a) / (FfxFloat32(8.0) * b8);
			
 
				+}
			
 
				+
			
 
				+/// Compute a very accurate approximate conversion from linear to PQ space.
			
 
				+///
			
 
				+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
			
 
				+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
			
 
				+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
			
 
				+///
			
 
				+/// @param a                    The value to convert between linear and PQ.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value <c><i>a</i></c> converted into PQ.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxApproximateLinearToPQHigh(FfxFloat32x4 a)
			
 
				+{
			
 
				+    return ffxSqrt(ffxSqrt(ffxSqrt(a)));
			
 
				+}
			
 
				+
			
 
				+// An approximation of sine.
			
 
				+//
			
 
				+// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range 
			
 
				+// is {-1/4 to 1/4} representing {-1 to 1}.
			
 
				+//
			
 
				+// @param [in] value            The value to calculate approximate sine for.
			
 
				+//
			
 
				+// @returns
			
 
				+// The approximate sine of <c><i>value</i></c>.
			
 
				+FfxFloat32 ffxParabolicSin(FfxFloat32 value)
			
 
				+{
			
 
				+    return value * abs(value) - value;
			
 
				+}
			
 
				+
			
 
				+// An approximation of sine.
			
 
				+//
			
 
				+// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
			
 
				+// is {-1/4 to 1/4} representing {-1 to 1}.
			
 
				+//
			
 
				+// @param [in] value            The value to calculate approximate sine for.
			
 
				+//
			
 
				+// @returns
			
 
				+// The approximate sine of <c><i>value</i></c>.
			
 
				+FfxFloat32x2 ffxParabolicSin(FfxFloat32x2 x)
			
 
				+{
			
 
				+    return x * abs(x) - x;
			
 
				+}
			
 
				+
			
 
				+// An approximation of cosine.
			
 
				+//
			
 
				+// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
			
 
				+// is {-1/4 to 1/4} representing {-1 to 1}.
			
 
				+//
			
 
				+// @param [in] value            The value to calculate approximate cosine for.
			
 
				+//
			
 
				+// @returns
			
 
				+// The approximate cosine of <c><i>value</i></c>.
			
 
				+FfxFloat32 ffxParabolicCos(FfxFloat32 x)
			
 
				+{
			
 
				+    x = ffxFract(x * FfxFloat32(0.5) + FfxFloat32(0.75));
			
 
				+    x = x * FfxFloat32(2.0) - FfxFloat32(1.0);
			
 
				+    return ffxParabolicSin(x);
			
 
				+}
			
 
				+
			
 
				+// An approximation of cosine.
			
 
				+//
			
 
				+// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
			
 
				+// is {-1/4 to 1/4} representing {-1 to 1}.
			
 
				+//
			
 
				+// @param [in] value            The value to calculate approximate cosine for.
			
 
				+//
			
 
				+// @returns
			
 
				+// The approximate cosine of <c><i>value</i></c>.
			
 
				+FfxFloat32x2 ffxParabolicCos(FfxFloat32x2 x)
			
 
				+{
			
 
				+    x = ffxFract(x * ffxBroadcast2(0.5f) + ffxBroadcast2(0.75f));
			
 
				+    x = x * ffxBroadcast2(2.0f) - ffxBroadcast2(1.0f);
			
 
				+    return ffxParabolicSin(x);
			
 
				+}
			
 
				+
			
 
				+// An approximation of both sine and cosine.
			
 
				+//
			
 
				+// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
			
 
				+// is {-1/4 to 1/4} representing {-1 to 1}.
			
 
				+//
			
 
				+// @param [in] value            The value to calculate approximate cosine for.
			
 
				+//
			
 
				+// @returns
			
 
				+// A <c><i>FfxFloat32x2</i></c> containing approximations of both sine and cosine of <c><i>value</i></c>.
			
 
				+FfxFloat32x2 ffxParabolicSinCos(FfxFloat32 x)
			
 
				+{
			
 
				+    FfxFloat32 y = ffxFract(x * FfxFloat32(0.5) + FfxFloat32(0.75));
			
 
				+    y = y * FfxFloat32(2.0) - FfxFloat32(1.0);
			
 
				+    return ffxParabolicSin(FfxFloat32x2(x, y));
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND operation using values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxZeroOneAnd(FfxUInt32 x, FfxUInt32 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND operation using two values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x, FfxUInt32x2 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND operation using two values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x, FfxUInt32x3 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND operation using two values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x, FfxUInt32x4 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic NOT operation using two values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the NOT operator.
			
 
				+/// @param [in] y           The second value to be fed into the NOT operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the NOT operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxZeroOneAnd(FfxUInt32 x)
			
 
				+{
			
 
				+    return x ^ FfxUInt32(1);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic NOT operation using two values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the NOT operator.
			
 
				+/// @param [in] y           The second value to be fed into the NOT operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the NOT operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x)
			
 
				+{
			
 
				+    return x ^ ffxBroadcast2(1u);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic NOT operation using two values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the NOT operator.
			
 
				+/// @param [in] y           The second value to be fed into the NOT operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the NOT operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x)
			
 
				+{
			
 
				+    return x ^ ffxBroadcast3(1u);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic NOT operation using two values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the NOT operator.
			
 
				+/// @param [in] y           The second value to be fed into the NOT operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the NOT operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x)
			
 
				+{
			
 
				+    return x ^ ffxBroadcast4(1u);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic OR operation using two values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the OR operator.
			
 
				+/// @param [in] y           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxZeroOneOr(FfxUInt32 x, FfxUInt32 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic OR operation using two values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the OR operator.
			
 
				+/// @param [in] y           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x2 ffxZeroOneOr(FfxUInt32x2 x, FfxUInt32x2 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic OR operation using two values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the OR operator.
			
 
				+/// @param [in] y           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x3 ffxZeroOneOr(FfxUInt32x3 x, FfxUInt32x3 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic OR operation using two values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the OR operator.
			
 
				+/// @param [in] y           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x4 ffxZeroOneOr(FfxUInt32x4 x, FfxUInt32x4 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxZeroOneAndToU1(FfxFloat32 x)
			
 
				+{
			
 
				+    return FfxUInt32(FfxFloat32(1.0) - x);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x2 ffxZeroOneAndToU2(FfxFloat32x2 x)
			
 
				+{
			
 
				+    return FfxUInt32x2(ffxBroadcast2(1.0) - x);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x3 ffxZeroOneAndToU3(FfxFloat32x3 x)
			
 
				+{
			
 
				+    return FfxUInt32x3(ffxBroadcast3(1.0) - x);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x4 ffxZeroOneAndToU4(FfxFloat32x4 x)
			
 
				+{
			
 
				+    return FfxUInt32x4(ffxBroadcast4(1.0) - x);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND operation using two values followed by a NOT operation
			
 
				+/// using the resulting value and a third value.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND operator.
			
 
				+/// @param [in] z           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxZeroOneAndOr(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
			
 
				+{
			
 
				+    return ffxSaturate(x * y + z);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND operation using two values followed by a NOT operation
			
 
				+/// using the resulting value and a third value.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND operator.
			
 
				+/// @param [in] z           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxZeroOneAndOr(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
			
 
				+{
			
 
				+    return ffxSaturate(x * y + z);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND operation using two values followed by a NOT operation
			
 
				+/// using the resulting value and a third value.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND operator.
			
 
				+/// @param [in] z           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxZeroOneAndOr(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
			
 
				+{
			
 
				+    return ffxSaturate(x * y + z);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND operation using two values followed by a NOT operation 
			
 
				+/// using the resulting value and a third value.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND operator.
			
 
				+/// @param [in] z           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxZeroOneAndOr(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
			
 
				+{
			
 
				+    return ffxSaturate(x * y + z);
			
 
				+}
			
 
				+
			
 
				+/// Given a value, returns 1.0 if greater than zero and 0.0 if not.
			
 
				+///
			
 
				+/// @param [in] x           The value to be compared.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the greater than zero comparison.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxZeroOneIsGreaterThanZero(FfxFloat32 x)
			
 
				+{
			
 
				+    return ffxSaturate(x * FfxFloat32(FFX_POSITIVE_INFINITY_FLOAT));
			
 
				+}
			
 
				+
			
 
				+/// Given a value, returns 1.0 if greater than zero and 0.0 if not.
			
 
				+///
			
 
				+/// @param [in] x           The value to be compared.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the greater than zero comparison.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxZeroOneIsGreaterThanZero(FfxFloat32x2 x)
			
 
				+{
			
 
				+    return ffxSaturate(x * ffxBroadcast2(FFX_POSITIVE_INFINITY_FLOAT));
			
 
				+}
			
 
				+
			
 
				+/// Given a value, returns 1.0 if greater than zero and 0.0 if not.
			
 
				+///
			
 
				+/// @param [in] x           The value to be compared.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the greater than zero comparison.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxZeroOneIsGreaterThanZero(FfxFloat32x3 x)
			
 
				+{
			
 
				+    return ffxSaturate(x * ffxBroadcast3(FFX_POSITIVE_INFINITY_FLOAT));
			
 
				+}
			
 
				+
			
 
				+/// Given a value, returns 1.0 if greater than zero and 0.0 if not.
			
 
				+///
			
 
				+/// @param [in] x           The value to be compared.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the greater than zero comparison.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxZeroOneIsGreaterThanZero(FfxFloat32x4 x)
			
 
				+{
			
 
				+    return ffxSaturate(x * ffxBroadcast4(FFX_POSITIVE_INFINITY_FLOAT));
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic signed NOT operation using two FfxFloat32 values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxZeroOneAnd(FfxFloat32 x)
			
 
				+{
			
 
				+    return FfxFloat32(1.0) - x;
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic signed NOT operation using two FfxFloat32 values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxZeroOneAnd(FfxFloat32x2 x)
			
 
				+{
			
 
				+    return ffxBroadcast2(1.0) - x;
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic signed NOT operation using two FfxFloat32 values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxZeroOneAnd(FfxFloat32x3 x)
			
 
				+{
			
 
				+    return ffxBroadcast3(1.0) - x;
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic signed NOT operation using two FfxFloat32 values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxZeroOneAnd(FfxFloat32x4 x)
			
 
				+{
			
 
				+    return ffxBroadcast4(1.0) - x;
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic OR operation using two FfxFloat32 values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the OR operator.
			
 
				+/// @param [in] y           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxZeroOneOr(FfxFloat32 x, FfxFloat32 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic OR operation using two FfxFloat32 values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the OR operator.
			
 
				+/// @param [in] y           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxZeroOneOr(FfxFloat32x2 x, FfxFloat32x2 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic OR operation using two FfxFloat32 values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the OR operator.
			
 
				+/// @param [in] y           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxZeroOneOr(FfxFloat32x3 x, FfxFloat32x3 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic OR operation using two FfxFloat32 values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the OR operator.
			
 
				+/// @param [in] y           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxZeroOneOr(FfxFloat32x4 x, FfxFloat32x4 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Choose between two FfxFloat32 values if the first paramter is greater than zero.
			
 
				+///
			
 
				+/// @param [in] x           The value to compare against zero.
			
 
				+/// @param [in] y           The value to return if the comparision is greater than zero.
			
 
				+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The selected value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxZeroOneSelect(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
			
 
				+{
			
 
				+    FfxFloat32 r = (-x) * z + z;
			
 
				+    return x * y + r;
			
 
				+}
			
 
				+
			
 
				+/// Choose between two FfxFloat32 values if the first paramter is greater than zero.
			
 
				+///
			
 
				+/// @param [in] x           The value to compare against zero.
			
 
				+/// @param [in] y           The value to return if the comparision is greater than zero.
			
 
				+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The selected value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxZeroOneSelect(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
			
 
				+{
			
 
				+    FfxFloat32x2 r = (-x) * z + z;
			
 
				+    return x * y + r;
			
 
				+}
			
 
				+
			
 
				+/// Choose between two FfxFloat32 values if the first paramter is greater than zero.
			
 
				+///
			
 
				+/// @param [in] x           The value to compare against zero.
			
 
				+/// @param [in] y           The value to return if the comparision is greater than zero.
			
 
				+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The selected value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxZeroOneSelect(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
			
 
				+{
			
 
				+    FfxFloat32x3 r = (-x) * z + z;
			
 
				+    return x * y + r;
			
 
				+}
			
 
				+
			
 
				+/// Choose between two FfxFloat32 values if the first paramter is greater than zero.
			
 
				+///
			
 
				+/// @param [in] x           The value to compare against zero.
			
 
				+/// @param [in] y           The value to return if the comparision is greater than zero.
			
 
				+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The selected value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxZeroOneSelect(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
			
 
				+{
			
 
				+    FfxFloat32x4 r = (-x) * z + z;
			
 
				+    return x * y + r;
			
 
				+}
			
 
				+
			
 
				+/// Given a value, returns 1.0 if less than zero and 0.0 if not.
			
 
				+///
			
 
				+/// @param [in] x           The value to be compared.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the sign value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxZeroOneIsSigned(FfxFloat32 x)
			
 
				+{
			
 
				+    return ffxSaturate(x * FfxFloat32(FFX_NEGATIVE_INFINITY_FLOAT));
			
 
				+}
			
 
				+
			
 
				+/// Given a value, returns 1.0 if less than zero and 0.0 if not.
			
 
				+///
			
 
				+/// @param [in] x           The value to be compared.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the sign value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxZeroOneIsSigned(FfxFloat32x2 x)
			
 
				+{
			
 
				+    return ffxSaturate(x * ffxBroadcast2(FFX_NEGATIVE_INFINITY_FLOAT));
			
 
				+}
			
 
				+
			
 
				+/// Given a value, returns 1.0 if less than zero and 0.0 if not.
			
 
				+///
			
 
				+/// @param [in] x           The value to be compared.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the sign value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxZeroOneIsSigned(FfxFloat32x3 x)
			
 
				+{
			
 
				+    return ffxSaturate(x * ffxBroadcast3(FFX_NEGATIVE_INFINITY_FLOAT));
			
 
				+}
			
 
				+
			
 
				+/// Given a value, returns 1.0 if less than zero and 0.0 if not.
			
 
				+///
			
 
				+/// @param [in] x           The value to be compared.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the sign value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x4 ffxZeroOneIsSigned(FfxFloat32x4 x)
			
 
				+{
			
 
				+    return ffxSaturate(x * ffxBroadcast4(FFX_NEGATIVE_INFINITY_FLOAT));
			
 
				+}
			
 
				+
			
 
				+/// Compute a Rec.709 color space.
			
 
				+/// 
			
 
				+/// Rec.709 is used for some HDTVs.
			
 
				+/// 
			
 
				+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
			
 
				+///  (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
			
 
				+///  (b.) For 8-bit  709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
			
 
				+///
			
 
				+/// @param [in] color           The color to convert to Rec. 709.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// The <c><i>color</i></c> in linear space.
			
 
				+/// 
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxRec709FromLinear(FfxFloat32 color)
			
 
				+{
			
 
				+    FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45);
			
 
				+    FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099);
			
 
				+    return clamp(j.x, color * j.y, pow(color, j.z) * k.x + k.y);
			
 
				+}
			
 
				+
			
 
				+/// Compute a Rec.709 color space.
			
 
				+///
			
 
				+/// Rec.709 is used for some HDTVs.
			
 
				+///
			
 
				+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
			
 
				+///  (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
			
 
				+///  (b.) For 8-bit  709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
			
 
				+///
			
 
				+/// @param [in] color           The color to convert to Rec. 709.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The <c><i>color</i></c> in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxRec709FromLinear(FfxFloat32x2 color)
			
 
				+{
			
 
				+    FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45);
			
 
				+    FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099);
			
 
				+    return clamp(j.xx, color * j.yy, pow(color, j.zz) * k.xx + k.yy);
			
 
				+}
			
 
				+
			
 
				+/// Compute a Rec.709 color space.
			
 
				+///
			
 
				+/// Rec.709 is used for some HDTVs.
			
 
				+///
			
 
				+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
			
 
				+///  (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
			
 
				+///  (b.) For 8-bit  709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
			
 
				+///
			
 
				+/// @param [in] color           The color to convert to Rec. 709.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The <c><i>color</i></c> in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxRec709FromLinear(FfxFloat32x3 color)
			
 
				+{
			
 
				+    FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45);
			
 
				+    FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099);
			
 
				+    return clamp(j.xxx, color * j.yyy, pow(color, j.zzz) * k.xxx + k.yyy);
			
 
				+}
			
 
				+
			
 
				+/// Compute a gamma value from a linear value.
			
 
				+///
			
 
				+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
			
 
				+/// 
			
 
				+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in <c><i>ffxLinearFromGamma</i></c>.
			
 
				+/// 
			
 
				+/// @param [in] value           The value to convert to gamma space from linear.
			
 
				+/// @param [in] power           The reciprocal of power value used for the gamma curve.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in gamma space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxGammaFromLinear(FfxFloat32 color, FfxFloat32 rcpX)
			
 
				+{
			
 
				+    return pow(color, FfxFloat32(rcpX));
			
 
				+}
			
 
				+
			
 
				+/// Compute a gamma value from a linear value.
			
 
				+///
			
 
				+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
			
 
				+/// 
			
 
				+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in <c><i>ffxLinearFromGamma</i></c>.
			
 
				+///
			
 
				+/// @param [in] value           The value to convert to gamma space from linear.
			
 
				+/// @param [in] power           The reciprocal of power value used for the gamma curve.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in gamma space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxGammaFromLinear(FfxFloat32x2 color, FfxFloat32 rcpX)
			
 
				+{
			
 
				+    return pow(color, ffxBroadcast2(rcpX));
			
 
				+}
			
 
				+
			
 
				+/// Compute a gamma value from a linear value.
			
 
				+///
			
 
				+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
			
 
				+///
			
 
				+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in <c><i>ffxLinearFromGamma</i></c>.
			
 
				+///
			
 
				+/// @param [in] value           The value to convert to gamma space from linear.
			
 
				+/// @param [in] power           The reciprocal of power value used for the gamma curve.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in gamma space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxGammaFromLinear(FfxFloat32x3 color, FfxFloat32 rcpX)
			
 
				+{
			
 
				+    return pow(color, ffxBroadcast3(rcpX));
			
 
				+}
			
 
				+
			
 
				+/// Compute a PQ value from a linear value.
			
 
				+///
			
 
				+/// @param [in] value           The value to convert to PQ from linear.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxPQToLinear(FfxFloat32 x)
			
 
				+{
			
 
				+    FfxFloat32 p = pow(x, FfxFloat32(0.159302));
			
 
				+    return pow((FfxFloat32(0.835938) + FfxFloat32(18.8516) * p) / (FfxFloat32(1.0) + FfxFloat32(18.6875) * p), FfxFloat32(78.8438));
			
 
				+}
			
 
				+
			
 
				+/// Compute a PQ value from a linear value.
			
 
				+///
			
 
				+/// @param [in] value           The value to convert to PQ from linear.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxPQToLinear(FfxFloat32x2 x)
			
 
				+{
			
 
				+    FfxFloat32x2 p = pow(x, ffxBroadcast2(0.159302));
			
 
				+    return pow((ffxBroadcast2(0.835938) + ffxBroadcast2(18.8516) * p) / (ffxBroadcast2(1.0) + ffxBroadcast2(18.6875) * p), ffxBroadcast2(78.8438));
			
 
				+}
			
 
				+
			
 
				+/// Compute a PQ value from a linear value.
			
 
				+///
			
 
				+/// @param [in] value           The value to convert to PQ from linear.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxPQToLinear(FfxFloat32x3 x)
			
 
				+{
			
 
				+    FfxFloat32x3 p = pow(x, ffxBroadcast3(0.159302));
			
 
				+    return pow((ffxBroadcast3(0.835938) + ffxBroadcast3(18.8516) * p) / (ffxBroadcast3(1.0) + ffxBroadcast3(18.6875) * p), ffxBroadcast3(78.8438));
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a SRGB value.
			
 
				+///
			
 
				+/// @param [in] value           The value to convert to linear from SRGB.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in SRGB space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxSrgbToLinear(FfxFloat32 color)
			
 
				+{
			
 
				+    FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
			
 
				+    FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055);
			
 
				+    return clamp(j.x, color * j.y, pow(color, j.z) * k.x + k.y);
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a SRGB value.
			
 
				+///
			
 
				+/// @param [in] value           The value to convert to linear from SRGB.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in SRGB space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxSrgbToLinear(FfxFloat32x2 color)
			
 
				+{
			
 
				+    FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
			
 
				+    FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055);
			
 
				+    return clamp(j.xx, color * j.yy, pow(color, j.zz) * k.xx + k.yy);
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a SRGB value.
			
 
				+///
			
 
				+/// @param [in] value           The value to convert to linear from SRGB.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in SRGB space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxSrgbToLinear(FfxFloat32x3 color)
			
 
				+{
			
 
				+    FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
			
 
				+    FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055);
			
 
				+    return clamp(j.xxx, color * j.yyy, pow(color, j.zzz) * k.xxx + k.yyy);
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a REC.709 value.
			
 
				+///
			
 
				+/// @param [in] color           The value to convert to linear from REC.709.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxLinearFromRec709(FfxFloat32 color)
			
 
				+{
			
 
				+    FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
			
 
				+    FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099);
			
 
				+    return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.x), color * j.y, pow(color * k.x + k.y, j.z));
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a REC.709 value.
			
 
				+///
			
 
				+/// @param [in] color           The value to convert to linear from REC.709.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxLinearFromRec709(FfxFloat32x2 color)
			
 
				+{
			
 
				+    FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
			
 
				+    FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099);
			
 
				+    return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xx), color * j.yy, pow(color * k.xx + k.yy, j.zz));
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a REC.709 value.
			
 
				+///
			
 
				+/// @param [in] color           The value to convert to linear from REC.709.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxLinearFromRec709(FfxFloat32x3 color)
			
 
				+{
			
 
				+    FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
			
 
				+    FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099);
			
 
				+    return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xxx), color * j.yyy, pow(color * k.xxx + k.yyy, j.zzz));
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a value in a gamma space.
			
 
				+///
			
 
				+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
			
 
				+///
			
 
				+/// @param [in] color           The value to convert to linear in gamma space.
			
 
				+/// @param [in] power           The power value used for the gamma curve.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxLinearFromGamma(FfxFloat32 color, FfxFloat32 power)
			
 
				+{
			
 
				+    return pow(color, FfxFloat32(power));
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a value in a gamma space.
			
 
				+///
			
 
				+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
			
 
				+///
			
 
				+/// @param [in] color           The value to convert to linear in gamma space.
			
 
				+/// @param [in] power           The power value used for the gamma curve.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxLinearFromGamma(FfxFloat32x2 color, FfxFloat32 power)
			
 
				+{
			
 
				+    return pow(color, ffxBroadcast2(power));
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a value in a gamma space.
			
 
				+///
			
 
				+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
			
 
				+///
			
 
				+/// @param [in] color           The value to convert to linear in gamma space.
			
 
				+/// @param [in] power           The power value used for the gamma curve.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxLinearFromGamma(FfxFloat32x3 color, FfxFloat32 power)
			
 
				+{
			
 
				+    return pow(color, ffxBroadcast3(power));
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a value in a PQ space.
			
 
				+///
			
 
				+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
			
 
				+///
			
 
				+/// @param [in] value           The value to convert to linear in PQ space.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxLinearFromPQ(FfxFloat32 x)
			
 
				+{
			
 
				+    FfxFloat32 p = pow(x, FfxFloat32(0.0126833));
			
 
				+    return pow(ffxSaturate(p - FfxFloat32(0.835938)) / (FfxFloat32(18.8516) - FfxFloat32(18.6875) * p), FfxFloat32(6.27739));
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a value in a PQ space.
			
 
				+///
			
 
				+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
			
 
				+///
			
 
				+/// @param [in] value           The value to convert to linear in PQ space.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxLinearFromPQ(FfxFloat32x2 x)
			
 
				+{
			
 
				+    FfxFloat32x2 p = pow(x, ffxBroadcast2(0.0126833));
			
 
				+    return pow(ffxSaturate(p - ffxBroadcast2(0.835938)) / (ffxBroadcast2(18.8516) - ffxBroadcast2(18.6875) * p), ffxBroadcast2(6.27739));
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a value in a PQ space.
			
 
				+///
			
 
				+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
			
 
				+///
			
 
				+/// @param [in] value           The value to convert to linear in PQ space.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxLinearFromPQ(FfxFloat32x3 x)
			
 
				+{
			
 
				+    FfxFloat32x3 p = pow(x, ffxBroadcast3(0.0126833));
			
 
				+    return pow(ffxSaturate(p - ffxBroadcast3(0.835938)) / (ffxBroadcast3(18.8516) - ffxBroadcast3(18.6875) * p), ffxBroadcast3(6.27739));
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a value in a SRGB space.
			
 
				+///
			
 
				+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
			
 
				+///
			
 
				+/// @param [in] value           The value to convert to linear in SRGB space.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32 ffxLinearFromSrgb(FfxFloat32 color)
			
 
				+{
			
 
				+    FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
			
 
				+    FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
			
 
				+    return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.x), color * j.y, pow(color * k.x + k.y, j.z));
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a value in a SRGB space.
			
 
				+///
			
 
				+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
			
 
				+///
			
 
				+/// @param [in] value           The value to convert to linear in SRGB space.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 color)
			
 
				+{
			
 
				+    FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
			
 
				+    FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
			
 
				+    return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xx), color * j.yy, pow(color * k.xx + k.yy, j.zz));
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a value in a SRGB space.
			
 
				+///
			
 
				+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
			
 
				+///
			
 
				+/// @param [in] value           The value to convert to linear in SRGB space.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat32x3 ffxLinearFromSrgb(FfxFloat32x3 color)
			
 
				+{
			
 
				+    FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
			
 
				+    FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
			
 
				+    return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xxx), color * j.yyy, pow(color * k.xxx + k.yyy, j.zzz));
			
 
				+}
			
 
				+
			
 
				+/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear.
			
 
				+/// 
			
 
				+///  543210
			
 
				+///  ======
			
 
				+///  ..xxx.
			
 
				+///  yy...y
			
 
				+/// 
			
 
				+/// @param [in] a       The input 1D coordinates to remap.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The remapped 2D coordinates.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x2 ffxRemapForQuad(FfxUInt32 a)
			
 
				+{
			
 
				+    return FfxUInt32x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u));
			
 
				+}
			
 
				+
			
 
				+/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions.
			
 
				+///
			
 
				+/// The 64-wide lane indices to 8x8 remapping is performed as follows:
			
 
				+/// 
			
 
				+///     00 01 08 09 10 11 18 19
			
 
				+///     02 03 0a 0b 12 13 1a 1b
			
 
				+///     04 05 0c 0d 14 15 1c 1d
			
 
				+///     06 07 0e 0f 16 17 1e 1f
			
 
				+///     20 21 28 29 30 31 38 39
			
 
				+///     22 23 2a 2b 32 33 3a 3b
			
 
				+///     24 25 2c 2d 34 35 3c 3d
			
 
				+///     26 27 2e 2f 36 37 3e 3f
			
 
				+///
			
 
				+/// @param [in] a       The input 1D coordinate to remap.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// The remapped 2D coordinates.
			
 
				+/// 
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x2 ffxRemapForWaveReduction(FfxUInt32 a)
			
 
				+{
			
 
				+    return FfxUInt32x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u));
			
 
				+}
			
--- a/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common_half.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common_half.h
@@ -0,0 +1,2978 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+#if FFX_HLSL_6_2
			
 
				+/// A define value for 16bit positive infinity.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_POSITIVE_INFINITY_HALF FFX_TO_FLOAT16((uint16_t)0x7c00u)
			
 
				+
			
 
				+/// A define value for 16bit negative infinity.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_NEGATIVE_INFINITY_HALF FFX_TO_FLOAT16((uint16_t)0xfc00u)
			
 
				+#else
			
 
				+/// A define value for 16bit positive infinity.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_POSITIVE_INFINITY_HALF FFX_TO_FLOAT16(0x7c00u)
			
 
				+
			
 
				+/// A define value for 16bit negative infinity.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_NEGATIVE_INFINITY_HALF FFX_TO_FLOAT16(0xfc00u)
			
 
				+#endif // FFX_HLSL_6_2
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxMin(FfxFloat16 x, FfxFloat16 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxMin(FfxFloat16x2 x, FfxFloat16x2 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxMin(FfxFloat16x3 x, FfxFloat16x3 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x4 ffxMin(FfxFloat16x4 x, FfxFloat16x4 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxInt16 ffxMin(FfxInt16 x, FfxInt16 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxInt16x2 ffxMin(FfxInt16x2 x, FfxInt16x2 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxInt16x3 ffxMin(FfxInt16x3 x, FfxInt16x3 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxInt16x4 ffxMin(FfxInt16x4 x, FfxInt16x4 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16 ffxMin(FfxUInt16 x, FfxUInt16 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x2 ffxMin(FfxUInt16x2 x, FfxUInt16x2 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x3 ffxMin(FfxUInt16x3 x, FfxUInt16x3 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the min of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+/// @param [in] y                   The second value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x4 ffxMin(FfxUInt16x4 x, FfxUInt16x4 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxMax(FfxFloat16 x, FfxFloat16 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxMax(FfxFloat16x2 x, FfxFloat16x2 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxMax(FfxFloat16x3 x, FfxFloat16x3 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x4 ffxMax(FfxFloat16x4 x, FfxFloat16x4 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxInt16 ffxMax(FfxInt16 x, FfxInt16 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxInt16x2 ffxMax(FfxInt16x2 x, FfxInt16x2 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxInt16x3 ffxMax(FfxInt16x3 x, FfxInt16x3 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxInt16x4 ffxMax(FfxInt16x4 x, FfxInt16x4 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16 ffxMax(FfxUInt16 x, FfxUInt16 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x2 ffxMax(FfxUInt16x2 x, FfxUInt16x2 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x3 ffxMax(FfxUInt16x3 x, FfxUInt16x3 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the max of two values.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the max of.
			
 
				+/// @param [in] y                   The second value to compute the max of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the lowest of two values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x4 ffxMax(FfxUInt16x4 x, FfxUInt16x4 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the value of the first parameter raised to the power of the second.
			
 
				+///
			
 
				+/// @param [in] x                   The value to raise to the power y.
			
 
				+/// @param [in] y                   The power to which to raise x.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value of the first parameter raised to the power of the second.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxPow(FfxFloat16 x, FfxFloat16 y)
			
 
				+{
			
 
				+    return pow(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the value of the first parameter raised to the power of the second.
			
 
				+///
			
 
				+/// @param [in] x                   The value to raise to the power y.
			
 
				+/// @param [in] y                   The power to which to raise x.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value of the first parameter raised to the power of the second.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxPow(FfxFloat16x2 x, FfxFloat16x2 y)
			
 
				+{
			
 
				+    return pow(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the value of the first parameter raised to the power of the second.
			
 
				+///
			
 
				+/// @param [in] x                   The value to raise to the power y.
			
 
				+/// @param [in] y                   The power to which to raise x.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value of the first parameter raised to the power of the second.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxPow(FfxFloat16x3 x, FfxFloat16x3 y)
			
 
				+{
			
 
				+    return pow(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the value of the first parameter raised to the power of the second.
			
 
				+///
			
 
				+/// @param [in] x                   The value to raise to the power y.
			
 
				+/// @param [in] y                   The power to which to raise x.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The value of the first parameter raised to the power of the second.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x4 ffxPow(FfxFloat16x4 x, FfxFloat16x4 y)
			
 
				+{
			
 
				+    return pow(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Compute the square root of a value.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the square root of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxSqrt(FfxFloat16 x)
			
 
				+{
			
 
				+    return sqrt(x);
			
 
				+}
			
 
				+
			
 
				+/// Compute the square root of a value.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the square root of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxSqrt(FfxFloat16x2 x)
			
 
				+{
			
 
				+    return sqrt(x);
			
 
				+}
			
 
				+
			
 
				+/// Compute the square root of a value.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the square root of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxSqrt(FfxFloat16x3 x)
			
 
				+{
			
 
				+    return sqrt(x);
			
 
				+}
			
 
				+
			
 
				+/// Compute the square root of a value.
			
 
				+///
			
 
				+/// @param [in] x                   The first value to compute the min of.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The the square root of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x4 ffxSqrt(FfxFloat16x4 x)
			
 
				+{
			
 
				+    return sqrt(x);
			
 
				+}
			
 
				+
			
 
				+/// Copy the sign bit from 's' to positive 'd'.
			
 
				+///
			
 
				+/// @param [in] d                   The value to copy the sign bit into.
			
 
				+/// @param [in] s                   The value to copy the sign bit from.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
			
 
				+/// 
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxCopySignBitHalf(FfxFloat16 d, FfxFloat16 s)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16(FFX_TO_UINT16(d) | (FFX_TO_UINT16(s) & FFX_BROADCAST_UINT16(0x8000u)));
			
 
				+}
			
 
				+
			
 
				+/// Copy the sign bit from 's' to positive 'd'.
			
 
				+///
			
 
				+/// @param [in] d                   The value to copy the sign bit into.
			
 
				+/// @param [in] s                   The value to copy the sign bit from.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
			
 
				+/// 
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxCopySignBitHalf(FfxFloat16x2 d, FfxFloat16x2 s)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X2(FFX_TO_UINT16X2(d) | (FFX_TO_UINT16X2(s) & FFX_BROADCAST_UINT16X2(0x8000u)));
			
 
				+}
			
 
				+
			
 
				+/// Copy the sign bit from 's' to positive 'd'.
			
 
				+///
			
 
				+/// @param [in] d                   The value to copy the sign bit into.
			
 
				+/// @param [in] s                   The value to copy the sign bit from.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
			
 
				+/// 
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxCopySignBitHalf(FfxFloat16x3 d, FfxFloat16x3 s)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X3(FFX_TO_UINT16X3(d) | (FFX_TO_UINT16X3(s) & FFX_BROADCAST_UINT16X3(0x8000u)));
			
 
				+}
			
 
				+
			
 
				+/// Copy the sign bit from 's' to positive 'd'.
			
 
				+///
			
 
				+/// @param [in] d                   The value to copy the sign bit into.
			
 
				+/// @param [in] s                   The value to copy the sign bit from.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
			
 
				+/// 
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x4 ffxCopySignBitHalf(FfxFloat16x4 d, FfxFloat16x4 s)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X4(FFX_TO_UINT16X4(d) | (FFX_TO_UINT16X4(s) & FFX_BROADCAST_UINT16X4(0x8000u)));
			
 
				+}
			
 
				+
			
 
				+/// A single operation to return the following:
			
 
				+///     m = NaN := 0
			
 
				+///     m >= 0  := 0
			
 
				+///     m < 0   := 1
			
 
				+///
			
 
				+/// Uses the following useful floating point logic,
			
 
				+///     saturate(+a*(-INF)==-INF) := 0
			
 
				+///     saturate( 0*(-INF)== NaN) := 0
			
 
				+///     saturate(-a*(-INF)==+INF) := 1
			
 
				+/// 
			
 
				+/// This function is useful when creating masks for branch-free logic.
			
 
				+/// 
			
 
				+/// @param [in] m                       The value to test against 0.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
			
 
				+/// 
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxIsSignedHalf(FfxFloat16 m)
			
 
				+{
			
 
				+    return ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF));
			
 
				+}
			
 
				+
			
 
				+/// A single operation to return the following:
			
 
				+///     m = NaN := 0
			
 
				+///     m >= 0  := 0
			
 
				+///     m < 0   := 1
			
 
				+///
			
 
				+/// Uses the following useful floating point logic,
			
 
				+///     saturate(+a*(-INF)==-INF) := 0
			
 
				+///     saturate( 0*(-INF)== NaN) := 0
			
 
				+///     saturate(-a*(-INF)==+INF) := 1
			
 
				+/// 
			
 
				+/// This function is useful when creating masks for branch-free logic.
			
 
				+/// 
			
 
				+/// @param [in] m                       The value to test against 0.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
			
 
				+/// 
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxIsSignedHalf(FfxFloat16x2 m)
			
 
				+{
			
 
				+    return ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF));
			
 
				+}
			
 
				+
			
 
				+/// A single operation to return the following:
			
 
				+///     m = NaN := 0
			
 
				+///     m >= 0  := 0
			
 
				+///     m < 0   := 1
			
 
				+///
			
 
				+/// Uses the following useful floating point logic,
			
 
				+///     saturate(+a*(-INF)==-INF) := 0
			
 
				+///     saturate( 0*(-INF)== NaN) := 0
			
 
				+///     saturate(-a*(-INF)==+INF) := 1
			
 
				+/// 
			
 
				+/// This function is useful when creating masks for branch-free logic.
			
 
				+/// 
			
 
				+/// @param [in] m                       The value to test against 0.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
			
 
				+/// 
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxIsSignedHalf(FfxFloat16x3 m)
			
 
				+{
			
 
				+    return ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF));
			
 
				+}
			
 
				+
			
 
				+/// A single operation to return the following:
			
 
				+///     m = NaN := 0
			
 
				+///     m >= 0  := 0
			
 
				+///     m < 0   := 1
			
 
				+///
			
 
				+/// Uses the following useful floating point logic,
			
 
				+///     saturate(+a*(-INF)==-INF) := 0
			
 
				+///     saturate( 0*(-INF)== NaN) := 0
			
 
				+///     saturate(-a*(-INF)==+INF) := 1
			
 
				+/// 
			
 
				+/// This function is useful when creating masks for branch-free logic.
			
 
				+/// 
			
 
				+/// @param [in] m                       The value to test against 0.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
			
 
				+/// 
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x4 ffxIsSignedHalf(FfxFloat16x4 m)
			
 
				+{
			
 
				+    return ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF));
			
 
				+}
			
 
				+
			
 
				+/// A single operation to return the following:
			
 
				+///     m = NaN := 1
			
 
				+///     m > 0   := 0
			
 
				+///     m <= 0  := 1
			
 
				+///
			
 
				+/// This function is useful when creating masks for branch-free logic.
			
 
				+///
			
 
				+/// @param [in] m                       The value to test against zero.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxIsGreaterThanZeroHalf(FfxFloat16 m)
			
 
				+{
			
 
				+    return ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF));
			
 
				+}
			
 
				+
			
 
				+/// A single operation to return the following:
			
 
				+///     m = NaN := 1
			
 
				+///     m > 0   := 0
			
 
				+///     m <= 0  := 1
			
 
				+///
			
 
				+/// This function is useful when creating masks for branch-free logic.
			
 
				+///
			
 
				+/// @param [in] m                       The value to test against zero.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxIsGreaterThanZeroHalf(FfxFloat16x2 m)
			
 
				+{
			
 
				+    return ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF));
			
 
				+}
			
 
				+
			
 
				+/// A single operation to return the following:
			
 
				+///     m = NaN := 1
			
 
				+///     m > 0   := 0
			
 
				+///     m <= 0  := 1
			
 
				+///
			
 
				+/// This function is useful when creating masks for branch-free logic.
			
 
				+///
			
 
				+/// @param [in] m                       The value to test against zero.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxIsGreaterThanZeroHalf(FfxFloat16x3 m)
			
 
				+{
			
 
				+    return ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF));
			
 
				+}
			
 
				+
			
 
				+/// A single operation to return the following:
			
 
				+///     m = NaN := 1
			
 
				+///     m > 0   := 0
			
 
				+///     m <= 0  := 1
			
 
				+///
			
 
				+/// This function is useful when creating masks for branch-free logic.
			
 
				+///
			
 
				+/// @param [in] m                       The value to test against zero.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x4 ffxIsGreaterThanZeroHalf(FfxFloat16x4 m)
			
 
				+{
			
 
				+    return ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF));
			
 
				+}
			
 
				+
			
 
				+/// Convert a 16bit floating point value to sortable integer.
			
 
				+/// 
			
 
				+///  - If sign bit=0, flip the sign bit (positives).
			
 
				+///  - If sign bit=1, flip all bits     (negatives).
			
 
				+/// 
			
 
				+/// The function has the side effects that:
			
 
				+///  - Larger integers are more positive values.
			
 
				+///  - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage).
			
 
				+/// 
			
 
				+/// @param [in] x                       The floating point value to make sortable.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// The sortable integer value.
			
 
				+/// 
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16 ffxFloatToSortableIntegerHalf(FfxUInt16 x)
			
 
				+{
			
 
				+    return x ^ ((ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16(15))) | FFX_BROADCAST_UINT16(0x8000));
			
 
				+}
			
 
				+
			
 
				+/// Convert a sortable integer to a 16bit floating point value.
			
 
				+///
			
 
				+/// The function has the side effects that:
			
 
				+///  - If sign bit=1, flip the sign bit (positives).
			
 
				+///  - If sign bit=0, flip all bits     (negatives).
			
 
				+///
			
 
				+/// @param [in] x                       The sortable integer value to make floating point.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The floating point value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16 ffxSortableIntegerToFloatHalf(FfxUInt16 x)
			
 
				+{
			
 
				+    return x ^ ((~ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16(15))) | FFX_BROADCAST_UINT16(0x8000));
			
 
				+}
			
 
				+
			
 
				+/// Convert a pair of 16bit floating point values to a pair of sortable integers.
			
 
				+/// 
			
 
				+///  - If sign bit=0, flip the sign bit (positives).
			
 
				+///  - If sign bit=1, flip all bits     (negatives).
			
 
				+/// 
			
 
				+/// The function has the side effects that:
			
 
				+///  - Larger integers are more positive values.
			
 
				+///  - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage).
			
 
				+/// 
			
 
				+/// @param [in] x                       The floating point values to make sortable.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// The sortable integer values.
			
 
				+/// 
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x2 ffxFloatToSortableIntegerHalf(FfxUInt16x2 x)
			
 
				+{
			
 
				+    return x ^ ((ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16X2(15))) | FFX_BROADCAST_UINT16X2(0x8000));
			
 
				+}
			
 
				+
			
 
				+/// Convert a pair of sortable integers to a pair of 16bit floating point values.
			
 
				+///
			
 
				+/// The function has the side effects that:
			
 
				+///  - If sign bit=1, flip the sign bit (positives).
			
 
				+///  - If sign bit=0, flip all bits     (negatives).
			
 
				+///
			
 
				+/// @param [in] x                       The sortable integer values to make floating point.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The floating point values.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x2 ffxSortableIntegerToFloatHalf(FfxUInt16x2 x)
			
 
				+{
			
 
				+    return x ^ ((~ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16X2(15))) | FFX_BROADCAST_UINT16X2(0x8000));
			
 
				+}
			
 
				+
			
 
				+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
			
 
				+///
			
 
				+/// The resulting integer will contain bytes in the following order, from most to least significant:
			
 
				+/// [Zero] Y0 [Zero] X0
			
 
				+///
			
 
				+/// @param [in] i                       The integer pair to pack.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed integer value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxPackBytesZeroY0ZeroX0(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return ((i.x) & 0xffu) | ((i.y << 16) & 0xff0000u);
			
 
				+}
			
 
				+
			
 
				+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
			
 
				+///
			
 
				+/// The resulting integer will contain bytes in the following order, from most to least significant:
			
 
				+/// [Zero] Y1 [Zero] X1
			
 
				+///
			
 
				+/// @param [in] i                       The integer pair to pack.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed integer value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxPackBytesZeroY1ZeroX1(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return ((i.x >> 8) & 0xffu) | ((i.y << 8) & 0xff0000u);
			
 
				+}
			
 
				+
			
 
				+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
			
 
				+///
			
 
				+/// The resulting integer will contain bytes in the following order, from most to least significant:
			
 
				+/// [Zero] Y2 [Zero] X2
			
 
				+///
			
 
				+/// @param [in] i                       The integer pair to pack.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed integer value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxPackBytesZeroY2ZeroX2(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return ((i.x >> 16) & 0xffu) | ((i.y) & 0xff0000u);
			
 
				+}
			
 
				+
			
 
				+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
			
 
				+///
			
 
				+/// The resulting integer will contain bytes in the following order, from most to least significant:
			
 
				+/// [Zero] Y3 [Zero] X3
			
 
				+///
			
 
				+/// @param [in] i                       The integer pair to pack.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed integer value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxPackBytesZeroY3ZeroX3(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return ((i.x >> 24) & 0xffu) | ((i.y >> 8) & 0xff0000u);
			
 
				+}
			
 
				+
			
 
				+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
			
 
				+///
			
 
				+/// The resulting integer will contain bytes in the following order, from most to least significant:
			
 
				+/// Y3 Y2 Y1 X0
			
 
				+///
			
 
				+/// @param [in] i                       The integer pair to pack.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed integer value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxPackBytesY3Y2Y1X0(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return ((i.x) & 0x000000ffu) | (i.y & 0xffffff00u);
			
 
				+}
			
 
				+
			
 
				+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
			
 
				+///
			
 
				+/// The resulting integer will contain bytes in the following order, from most to least significant:
			
 
				+/// Y3 Y2 Y1 X2
			
 
				+///
			
 
				+/// @param [in] i                       The integer pair to pack.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed integer value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxPackBytesY3Y2Y1X2(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return ((i.x >> 16) & 0x000000ffu) | (i.y & 0xffffff00u);
			
 
				+}
			
 
				+
			
 
				+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
			
 
				+///
			
 
				+/// The resulting integer will contain bytes in the following order, from most to least significant:
			
 
				+/// Y3 Y2 X0 Y0
			
 
				+///
			
 
				+/// @param [in] i                       The integer pair to pack.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed integer value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxPackBytesY3Y2X0Y0(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return ((i.x << 8) & 0x0000ff00u) | (i.y & 0xffff00ffu);
			
 
				+}
			
 
				+
			
 
				+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
			
 
				+///
			
 
				+/// The resulting integer will contain bytes in the following order, from most to least significant:
			
 
				+/// Y3 Y2 X2 Y0
			
 
				+///
			
 
				+/// @param [in] i                       The integer pair to pack.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed integer value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxPackBytesY3Y2X2Y0(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return ((i.x >> 8) & 0x0000ff00u) | (i.y & 0xffff00ffu);
			
 
				+}
			
 
				+
			
 
				+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
			
 
				+///
			
 
				+/// The resulting integer will contain bytes in the following order, from most to least significant:
			
 
				+/// Y3 X0 Y1 Y0
			
 
				+///
			
 
				+/// @param [in] i                       The integer pair to pack.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed integer value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxPackBytesY3X0Y1Y0(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return ((i.x << 16) & 0x00ff0000u) | (i.y & 0xff00ffffu);
			
 
				+}
			
 
				+
			
 
				+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
			
 
				+///
			
 
				+/// The resulting integer will contain bytes in the following order, from most to least significant:
			
 
				+/// Y3 X2 Y1 Y0
			
 
				+///
			
 
				+/// @param [in] i                       The integer pair to pack.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed integer value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxPackBytesY3X2Y1Y0(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return ((i.x) & 0x00ff0000u) | (i.y & 0xff00ffffu);
			
 
				+}
			
 
				+
			
 
				+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
			
 
				+///
			
 
				+/// The resulting integer will contain bytes in the following order, from most to least significant:
			
 
				+/// X0 Y2 Y1 Y0
			
 
				+///
			
 
				+/// @param [in] i                       The integer pair to pack.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed integer value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxPackBytesX0Y2Y1Y0(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return ((i.x << 24) & 0xff000000u) | (i.y & 0x00ffffffu);
			
 
				+}
			
 
				+
			
 
				+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
			
 
				+///
			
 
				+/// The resulting integer will contain bytes in the following order, from most to least significant:
			
 
				+/// X2 Y2 Y1 Y0
			
 
				+///
			
 
				+/// @param [in] i                       The integer pair to pack.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed integer value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxPackBytesX2Y2Y1Y0(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return ((i.x << 8) & 0xff000000u) | (i.y & 0x00ffffffu);
			
 
				+}
			
 
				+
			
 
				+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
			
 
				+///
			
 
				+/// The resulting integer will contain bytes in the following order, from most to least significant:
			
 
				+/// Y2 X2 Y0 X0
			
 
				+///
			
 
				+/// @param [in] i                       The integer pair to pack.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed integer value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxPackBytesY2X2Y0X0(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return ((i.x) & 0x00ff00ffu) | ((i.y << 8) & 0xff00ff00u);
			
 
				+}
			
 
				+
			
 
				+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
			
 
				+///
			
 
				+/// The resulting integer will contain bytes in the following order, from most to least significant:
			
 
				+/// Y2 Y0 X2 X0
			
 
				+///
			
 
				+/// @param [in] i                       The integer pair to pack.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed integer value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32 ffxPackBytesY2Y0X2X0(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return (((i.x) & 0xffu) | ((i.x >> 8) & 0xff00u) | ((i.y << 16) & 0xff0000u) | ((i.y << 8) & 0xff000000u));
			
 
				+}
			
 
				+
			
 
				+/// Takes two Float16x2 values x and y, normalizes them and builds a single Uint16x2 value in the format {{x0,y0},{x1,y1}}.
			
 
				+///
			
 
				+/// @param [in] x                       The first float16x2 value to pack.
			
 
				+/// @param [in] y                       The second float16x2 value to pack.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed FfxUInt32x2 value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x2 ffxPackX0Y0X1Y1UnsignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y)
			
 
				+{
			
 
				+    x *= FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0);
			
 
				+    y *= FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0);
			
 
				+    return FFX_UINT32_TO_UINT16X2(ffxPackBytesY2X2Y0X0(FfxUInt32x2(FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(x)), FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(y)))));
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7],   
			
 
				+/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
			
 
				+///
			
 
				+/// r=ffxPermuteUByte0Float16x2ToUint2(d,i)
			
 
				+///   Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits
			
 
				+///   Where 'k1' is an SGPR with 0x????
			
 
				+///   Where 'k2' is an SGPR with 0x????
			
 
				+///   V_PK_FMA_F16 i,i,k0.x,0
			
 
				+///   V_PERM_B32 r.x,i,i,k1
			
 
				+///   V_PERM_B32 r.y,i,i,k2
			
 
				+///
			
 
				+/// @param [in] d                       The FfxUInt32x2 value to be packed.
			
 
				+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed FfxUInt32x2 value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x2 ffxPermuteUByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
			
 
				+{
			
 
				+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0)));
			
 
				+    return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b)));
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15],   
			
 
				+/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
			
 
				+///
			
 
				+/// r=ffxPermuteUByte1Float16x2ToUint2(d,i)
			
 
				+///   Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits
			
 
				+///   Where 'k1' is an SGPR with 0x????
			
 
				+///   Where 'k2' is an SGPR with 0x????
			
 
				+///   V_PK_FMA_F16 i,i,k0.x,0
			
 
				+///   V_PERM_B32 r.x,i,i,k1
			
 
				+///   V_PERM_B32 r.y,i,i,k2
			
 
				+///
			
 
				+/// @param [in] d                       The FfxUInt32x2 value to be packed.
			
 
				+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed FfxUInt32x2 value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x2 ffxPermuteUByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
			
 
				+{
			
 
				+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0)));
			
 
				+    return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b)));
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23],   
			
 
				+/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops.
			
 
				+///
			
 
				+/// r=ffxPermuteUByte2Float16x2ToUint2(d,i)
			
 
				+///   Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits
			
 
				+///   Where 'k1' is an SGPR with 0x????
			
 
				+///   Where 'k2' is an SGPR with 0x????
			
 
				+///   V_PK_FMA_F16 i,i,k0.x,0
			
 
				+///   V_PERM_B32 r.x,i,i,k1
			
 
				+///   V_PERM_B32 r.y,i,i,k2
			
 
				+///
			
 
				+/// @param [in] d                       The FfxUInt32x2 value to be packed.
			
 
				+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed FfxUInt32x2 value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x2 ffxPermuteUByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
			
 
				+{
			
 
				+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0)));
			
 
				+    return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b)));
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31],   
			
 
				+/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops.
			
 
				+///
			
 
				+/// r=ffxPermuteUByte3Float16x2ToUint2(d,i)
			
 
				+///   Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits
			
 
				+///   Where 'k1' is an SGPR with 0x????
			
 
				+///   Where 'k2' is an SGPR with 0x????
			
 
				+///   V_PK_FMA_F16 i,i,k0.x,0
			
 
				+///   V_PERM_B32 r.x,i,i,k1
			
 
				+///   V_PERM_B32 r.y,i,i,k2
			
 
				+///
			
 
				+/// @param [in] d                       The FfxUInt32x2 value to be packed.
			
 
				+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed FfxUInt32x2 value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x2 ffxPermuteUByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
			
 
				+{
			
 
				+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0)));
			
 
				+    return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b)));
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops.  
			
 
				+///
			
 
				+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The unpacked FfxFloat16x2.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxPermuteUByte0Uint2ToFloat16x2(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFX_BROADCAST_FLOAT16X2(32768.0);
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops.  
			
 
				+///
			
 
				+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The unpacked FfxFloat16x2.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxPermuteUByte1Uint2ToFloat16x2(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFX_BROADCAST_FLOAT16X2(32768.0);
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops.  
			
 
				+///
			
 
				+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The unpacked FfxFloat16x2.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxPermuteUByte2Uint2ToFloat16x2(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFX_BROADCAST_FLOAT16X2(32768.0);
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops.  
			
 
				+///
			
 
				+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The unpacked FfxFloat16x2.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxPermuteUByte3Uint2ToFloat16x2(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFX_BROADCAST_FLOAT16X2(32768.0);
			
 
				+}
			
 
				+
			
 
				+/// Takes two Float16x2 values x and y, normalizes them and builds a single Uint16x2 value in the format {{x0,y0},{x1,y1}}.
			
 
				+///
			
 
				+/// @param [in] x                       The first float16x2 value to pack.
			
 
				+/// @param [in] y                       The second float16x2 value to pack.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed FfxUInt32x2 value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x2 ffxPackX0Y0X1Y1SignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y)
			
 
				+{
			
 
				+    x = x * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0);
			
 
				+    y = y * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0);
			
 
				+    return FFX_UINT32_TO_UINT16X2(ffxPackBytesY2X2Y0X0(FfxUInt32x2(FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(x)), FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(y)))));
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7],   
			
 
				+/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
			
 
				+///
			
 
				+/// Handles signed byte values.
			
 
				+///
			
 
				+/// @param [in] d                       The FfxUInt32x2 value to be packed.
			
 
				+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed FfxUInt32x2 value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x2 ffxPermuteSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
			
 
				+{
			
 
				+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0)));
			
 
				+    return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b)));
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15],   
			
 
				+/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
			
 
				+///
			
 
				+/// Handles signed byte values.
			
 
				+///
			
 
				+/// @param [in] d                       The FfxUInt32x2 value to be packed.
			
 
				+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed FfxUInt32x2 value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x2 ffxPermuteSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
			
 
				+{
			
 
				+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0)));
			
 
				+    return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b)));
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23],   
			
 
				+/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops.
			
 
				+///
			
 
				+/// Handles signed byte values.
			
 
				+///
			
 
				+/// @param [in] d                       The FfxUInt32x2 value to be packed.
			
 
				+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed FfxUInt32x2 value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x2 ffxPermuteSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
			
 
				+{
			
 
				+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0)));
			
 
				+    return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b)));
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31],   
			
 
				+/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops.
			
 
				+///
			
 
				+/// Handles signed byte values.
			
 
				+///
			
 
				+/// @param [in] d                       The FfxUInt32x2 value to be packed.
			
 
				+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed FfxUInt32x2 value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x2 ffxPermuteSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
			
 
				+{
			
 
				+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0)));
			
 
				+    return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b)));
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7],   
			
 
				+/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
			
 
				+///
			
 
				+/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero).
			
 
				+/// This is useful if there is a desire for cleared values to decode as zero.
			
 
				+///
			
 
				+/// Handles signed byte values.
			
 
				+///
			
 
				+/// @param [in] d                       The FfxUInt32x2 value to be packed.
			
 
				+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed FfxUInt32x2 value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x2 ffxPermuteZeroBasedSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
			
 
				+{
			
 
				+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u;
			
 
				+    return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b)));
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15],   
			
 
				+/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
			
 
				+///
			
 
				+/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero).
			
 
				+/// This is useful if there is a desire for cleared values to decode as zero.
			
 
				+///
			
 
				+/// Handles signed byte values.
			
 
				+///
			
 
				+/// @param [in] d                       The FfxUInt32x2 value to be packed.
			
 
				+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed FfxUInt32x2 value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x2 ffxPermuteZeroBasedSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
			
 
				+{
			
 
				+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u;
			
 
				+    return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b)));
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23],   
			
 
				+/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops.
			
 
				+///
			
 
				+/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero).
			
 
				+/// This is useful if there is a desire for cleared values to decode as zero.
			
 
				+///
			
 
				+/// Handles signed byte values.
			
 
				+///
			
 
				+/// @param [in] d                       The FfxUInt32x2 value to be packed.
			
 
				+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed FfxUInt32x2 value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x2 ffxPermuteZeroBasedSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
			
 
				+{
			
 
				+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u;
			
 
				+    return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b)));
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31],   
			
 
				+/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops.
			
 
				+///
			
 
				+/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero).
			
 
				+/// This is useful if there is a desire for cleared values to decode as zero.
			
 
				+///
			
 
				+/// Handles signed byte values.
			
 
				+///
			
 
				+/// @param [in] d                       The FfxUInt32x2 value to be packed.
			
 
				+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The packed FfxUInt32x2 value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt32x2 ffxPermuteZeroBasedSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
			
 
				+{
			
 
				+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u;
			
 
				+    return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b)));
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops.  
			
 
				+///
			
 
				+/// Handles signed byte values.
			
 
				+///
			
 
				+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The unpacked FfxFloat16x2.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxPermuteSByte0Uint2ToFloat16x2(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops.  
			
 
				+///
			
 
				+/// Handles signed byte values.
			
 
				+///
			
 
				+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The unpacked FfxFloat16x2.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxPermuteSByte1Uint2ToFloat16x2(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops.
			
 
				+///  
			
 
				+/// Handles signed byte values.
			
 
				+///
			
 
				+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The unpacked FfxFloat16x2.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxPermuteSByte2Uint2ToFloat16x2(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops.  
			
 
				+///
			
 
				+/// Handles signed byte values.
			
 
				+///
			
 
				+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The unpacked FfxFloat16x2.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxPermuteSByte3Uint2ToFloat16x2(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops.
			
 
				+///  
			
 
				+/// Handles signed byte values.
			
 
				+///
			
 
				+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The unpacked FfxFloat16x2.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxPermuteZeroBasedSByte0Uint2ToFloat16x2(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops.
			
 
				+///  
			
 
				+/// Handles signed byte values.
			
 
				+///
			
 
				+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The unpacked FfxFloat16x2.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxPermuteZeroBasedSByte1Uint2ToFloat16x2(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops.
			
 
				+///  
			
 
				+/// Handles signed byte values.
			
 
				+///
			
 
				+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The unpacked FfxFloat16x2.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxPermuteZeroBasedSByte2Uint2ToFloat16x2(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
			
 
				+}
			
 
				+
			
 
				+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops.
			
 
				+///  
			
 
				+/// Handles signed byte values.
			
 
				+///
			
 
				+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The unpacked FfxFloat16x2.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxPermuteZeroBasedSByte3Uint2ToFloat16x2(FfxUInt32x2 i)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
			
 
				+}
			
 
				+
			
 
				+/// Calculate a half-precision low-quality approximation for the square root of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] a           The value to calculate an approximate to the square root for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the square root, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxApproximateSqrtHalf(FfxFloat16 a)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16((FFX_TO_UINT16(a) >> FFX_BROADCAST_UINT16(1)) + FFX_BROADCAST_UINT16(0x1de2));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a half-precision low-quality approximation for the square root of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] a           The value to calculate an approximate to the square root for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the square root, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxApproximateSqrtHalf(FfxFloat16x2 a)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X2((FFX_TO_UINT16X2(a) >> FFX_BROADCAST_UINT16X2(1)) + FFX_BROADCAST_UINT16X2(0x1de2));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a half-precision low-quality approximation for the square root of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] a           The value to calculate an approximate to the square root for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the square root, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxApproximateSqrtHalf(FfxFloat16x3 a)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X3((FFX_TO_UINT16X3(a) >> FFX_BROADCAST_UINT16X3(1)) + FFX_BROADCAST_UINT16X3(0x1de2));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a half-precision low-quality approximation for the reciprocal of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the reciprocal, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxApproximateReciprocalHalf(FfxFloat16 a)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x7784) - FFX_TO_UINT16(a));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a half-precision low-quality approximation for the reciprocal of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the reciprocal, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxApproximateReciprocalHalf(FfxFloat16x2 a)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x7784) - FFX_TO_UINT16X2(a));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a half-precision low-quality approximation for the reciprocal of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the reciprocal, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxApproximateReciprocalHalf(FfxFloat16x3 a)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x7784) - FFX_TO_UINT16X3(a));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a half-precision low-quality approximation for the reciprocal of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the reciprocal, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x4 ffxApproximateReciprocalHalf(FfxFloat16x4 a)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x7784) - FFX_TO_UINT16X4(a));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a half-precision medium-quality approximation for the reciprocal of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the reciprocal, estimated to medium quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxApproximateReciprocalMediumHalf(FfxFloat16 a)
			
 
				+{
			
 
				+    FfxFloat16 b = FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x778d) - FFX_TO_UINT16(a));
			
 
				+    return b * (-b * a + FFX_BROADCAST_FLOAT16(2.0));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a half-precision medium-quality approximation for the reciprocal of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the reciprocal, estimated to medium quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxApproximateReciprocalMediumHalf(FfxFloat16x2 a)
			
 
				+{
			
 
				+    FfxFloat16x2 b = FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x778d) - FFX_TO_UINT16X2(a));
			
 
				+    return b * (-b * a + FFX_BROADCAST_FLOAT16X2(2.0));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a half-precision medium-quality approximation for the reciprocal of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the reciprocal, estimated to medium quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxApproximateReciprocalMediumHalf(FfxFloat16x3 a)
			
 
				+{
			
 
				+    FfxFloat16x3 b = FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x778d) - FFX_TO_UINT16X3(a));
			
 
				+    return b * (-b * a + FFX_BROADCAST_FLOAT16X3(2.0));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a half-precision medium-quality approximation for the reciprocal of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the reciprocal, estimated to medium quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x4 ffxApproximateReciprocalMediumHalf(FfxFloat16x4 a)
			
 
				+{
			
 
				+    FfxFloat16x4 b = FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x778d) - FFX_TO_UINT16X4(a));
			
 
				+    return b * (-b * a + FFX_BROADCAST_FLOAT16X4(2.0));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] a           The value to calculate an approximate to the reciprocal of the square root for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the reciprocal of the square root, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxApproximateReciprocalSquareRootHalf(FfxFloat16 a)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x59a3) - (FFX_TO_UINT16(a) >> FFX_BROADCAST_UINT16(1)));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] a           The value to calculate an approximate to the reciprocal of the square root for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the reciprocal of the square root, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x2 a)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x59a3) - (FFX_TO_UINT16X2(a) >> FFX_BROADCAST_UINT16X2(1)));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] a           The value to calculate an approximate to the reciprocal of the square root for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the reciprocal of the square root, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x3 a)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x59a3) - (FFX_TO_UINT16X3(a) >> FFX_BROADCAST_UINT16X3(1)));
			
 
				+}
			
 
				+
			
 
				+/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value.
			
 
				+///
			
 
				+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
			
 
				+/// presentation materials:
			
 
				+///
			
 
				+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
			
 
				+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
			
 
				+///
			
 
				+/// @param [in] a           The value to calculate an approximate to the reciprocal of the square root for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// An approximation of the reciprocal of the square root, estimated to low quality.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x4 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x4 a)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x59a3) - (FFX_TO_UINT16X4(a) >> FFX_BROADCAST_UINT16X4(1)));
			
 
				+}
			
 
				+
			
 
				+/// An approximation of sine.
			
 
				+///
			
 
				+/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
			
 
				+/// is {-1/4 to 1/4} representing {-1 to 1}.
			
 
				+///
			
 
				+/// @param [in] x            The value to calculate approximate sine for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The approximate sine of <c><i>value</i></c>.
			
 
				+FfxFloat16 ffxParabolicSinHalf(FfxFloat16 x)
			
 
				+{
			
 
				+    return x * abs(x) - x;
			
 
				+}
			
 
				+
			
 
				+/// An approximation of sine.
			
 
				+///
			
 
				+/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
			
 
				+/// is {-1/4 to 1/4} representing {-1 to 1}.
			
 
				+///
			
 
				+/// @param [in] x            The value to calculate approximate sine for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The approximate sine of <c><i>value</i></c>.
			
 
				+FfxFloat16x2 ffxParabolicSinHalf(FfxFloat16x2 x)
			
 
				+{
			
 
				+    return x * abs(x) - x;
			
 
				+}
			
 
				+
			
 
				+/// An approximation of cosine.
			
 
				+///
			
 
				+/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
			
 
				+/// is {-1/4 to 1/4} representing {-1 to 1}.
			
 
				+///
			
 
				+/// @param [in] x            The value to calculate approximate cosine for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The approximate cosine of <c><i>value</i></c>.
			
 
				+FfxFloat16 ffxParabolicCosHalf(FfxFloat16 x)
			
 
				+{
			
 
				+    x = ffxFract(x * FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16(0.75));
			
 
				+    x = x * FFX_BROADCAST_FLOAT16(2.0) - FFX_BROADCAST_FLOAT16(1.0);
			
 
				+    return ffxParabolicSinHalf(x);
			
 
				+}
			
 
				+
			
 
				+/// An approximation of cosine.
			
 
				+///
			
 
				+/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
			
 
				+/// is {-1/4 to 1/4} representing {-1 to 1}.
			
 
				+///
			
 
				+/// @param [in] x            The value to calculate approximate cosine for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The approximate cosine of <c><i>value</i></c>.
			
 
				+FfxFloat16x2 ffxParabolicCosHalf(FfxFloat16x2 x)
			
 
				+{
			
 
				+    x = ffxFract(x * FFX_BROADCAST_FLOAT16X2(0.5) + FFX_BROADCAST_FLOAT16X2(0.75));
			
 
				+    x = x * FFX_BROADCAST_FLOAT16X2(2.0) - FFX_BROADCAST_FLOAT16X2(1.0);
			
 
				+    return ffxParabolicSinHalf(x);
			
 
				+}
			
 
				+
			
 
				+/// An approximation of both sine and cosine.
			
 
				+///
			
 
				+/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
			
 
				+/// is {-1/4 to 1/4} representing {-1 to 1}.
			
 
				+///
			
 
				+/// @param [in] x            The value to calculate approximate cosine for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A <c><i>FfxFloat32x2</i></c> containing approximations of both sine and cosine of <c><i>value</i></c>.
			
 
				+FfxFloat16x2 ffxParabolicSinCosHalf(FfxFloat16 x)
			
 
				+{
			
 
				+    FfxFloat16 y = ffxFract(x * FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16(0.75));
			
 
				+    y     = y * FFX_BROADCAST_FLOAT16(2.0) - FFX_BROADCAST_FLOAT16(1.0);
			
 
				+    return ffxParabolicSinHalf(FfxFloat16x2(x, y));
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND operation using two half-precision values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16 ffxZeroOneAndHalf(FfxUInt16 x, FfxUInt16 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND operation using two half-precision values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x2 ffxZeroOneAndHalf(FfxUInt16x2 x, FfxUInt16x2 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND operation using two half-precision values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x3 ffxZeroOneAndHalf(FfxUInt16x3 x, FfxUInt16x3 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND operation using two half-precision values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x4 ffxZeroOneAndHalf(FfxUInt16x4 x, FfxUInt16x4 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic NOT operation using two half-precision values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the NOT operator.
			
 
				+/// @param [in] y           The second value to be fed into the NOT operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the NOT operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16 ffxZeroOneNotHalf(FfxUInt16 x)
			
 
				+{
			
 
				+    return x ^ FFX_BROADCAST_UINT16(1);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic NOT operation using two half-precision values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the NOT operator.
			
 
				+/// @param [in] y           The second value to be fed into the NOT operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the NOT operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x2 ffxZeroOneNotHalf(FfxUInt16x2 x)
			
 
				+{
			
 
				+    return x ^ FFX_BROADCAST_UINT16X2(1);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic NOT operation using two half-precision values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the NOT operator.
			
 
				+/// @param [in] y           The second value to be fed into the NOT operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the NOT operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x3 ffxZeroOneNotHalf(FfxUInt16x3 x)
			
 
				+{
			
 
				+    return x ^ FFX_BROADCAST_UINT16X3(1);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic NOT operation using two half-precision values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the NOT operator.
			
 
				+/// @param [in] y           The second value to be fed into the NOT operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the NOT operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x4 ffxZeroOneNotHalf(FfxUInt16x4 x)
			
 
				+{
			
 
				+    return x ^ FFX_BROADCAST_UINT16X4(1);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic OR operation using two half-precision values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the OR operator.
			
 
				+/// @param [in] y           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16 ffxZeroOneOrHalf(FfxUInt16 x, FfxUInt16 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic OR operation using two half-precision values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the OR operator.
			
 
				+/// @param [in] y           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x2 ffxZeroOneOrHalf(FfxUInt16x2 x, FfxUInt16x2 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic OR operation using two half-precision values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the OR operator.
			
 
				+/// @param [in] y           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x3 ffxZeroOneOrHalf(FfxUInt16x3 x, FfxUInt16x3 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic OR operation using two half-precision values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the OR operator.
			
 
				+/// @param [in] y           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x4 ffxZeroOneOrHalf(FfxUInt16x4 x, FfxUInt16x4 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint.
			
 
				+///
			
 
				+/// @param [in] x           The value to converted to a Uint.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The converted Uint value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16 ffxZeroOneFloat16ToUint16(FfxFloat16 x)
			
 
				+{
			
 
				+    return FFX_TO_UINT16(x * FFX_TO_FLOAT16(FFX_TO_UINT16(1)));
			
 
				+}
			
 
				+
			
 
				+/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint.
			
 
				+///
			
 
				+/// @param [in] x           The value to converted to a Uint.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The converted Uint value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x2 ffxZeroOneFloat16x2ToUint16x2(FfxFloat16x2 x)
			
 
				+{
			
 
				+    return FFX_TO_UINT16X2(x * FFX_TO_FLOAT16X2(FfxUInt16x2(1, 1)));
			
 
				+}
			
 
				+
			
 
				+/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint.
			
 
				+///
			
 
				+/// @param [in] x           The value to converted to a Uint.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The converted Uint value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x3 ffxZeroOneFloat16x3ToUint16x3(FfxFloat16x3 x)
			
 
				+{
			
 
				+    return FFX_TO_UINT16X3(x * FFX_TO_FLOAT16X3(FfxUInt16x3(1, 1, 1)));
			
 
				+}
			
 
				+
			
 
				+/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint.
			
 
				+///
			
 
				+/// @param [in] x           The value to converted to a Uint.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The converted Uint value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x4 ffxZeroOneFloat16x4ToUint16x4(FfxFloat16x4 x)
			
 
				+{
			
 
				+    return FFX_TO_UINT16X4(x * FFX_TO_FLOAT16X4(FfxUInt16x4(1, 1, 1, 1)));
			
 
				+}
			
 
				+
			
 
				+/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32.
			
 
				+///
			
 
				+/// @param [in] x           The value to converted to a half-precision FfxFloat32.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The converted half-precision FfxFloat32 value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxZeroOneUint16ToFloat16(FfxUInt16 x)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16(x * FFX_TO_UINT16(FFX_TO_FLOAT16(1.0)));
			
 
				+}
			
 
				+
			
 
				+/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32.
			
 
				+///
			
 
				+/// @param [in] x           The value to converted to a half-precision FfxFloat32.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The converted half-precision FfxFloat32 value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxZeroOneUint16x2ToFloat16x2(FfxUInt16x2 x)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X2(x * FFX_TO_UINT16X2(FfxUInt16x2(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0))));
			
 
				+}
			
 
				+
			
 
				+/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32.
			
 
				+///
			
 
				+/// @param [in] x           The value to converted to a half-precision FfxFloat32.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The converted half-precision FfxFloat32 value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxZeroOneUint16x3ToFloat16x3(FfxUInt16x3 x)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X3(x * FFX_TO_UINT16X3(FfxUInt16x3(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0))));
			
 
				+}
			
 
				+
			
 
				+/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32.
			
 
				+///
			
 
				+/// @param [in] x           The value to converted to a half-precision FfxFloat32.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The converted half-precision FfxFloat32 value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x4 ffxZeroOneUint16x4ToFloat16x4(FfxUInt16x4 x)
			
 
				+{
			
 
				+    return FFX_TO_FLOAT16X4(x * FFX_TO_UINT16X4(FfxUInt16x4(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0))));
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND operation using two half-precision values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxZeroOneAndHalf(FfxFloat16 x, FfxFloat16 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND operation using two half-precision values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxZeroOneAndHalf(FfxFloat16x2 x, FfxFloat16x2 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND operation using two half-precision values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxZeroOneAndHalf(FfxFloat16x3 x, FfxFloat16x3 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND operation using two half-precision values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x4 ffxZeroOneAndHalf(FfxFloat16x4 x, FfxFloat16x4 y)
			
 
				+{
			
 
				+    return min(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND NOT operation using two half-precision values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND NOT operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND NOT operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND NOT operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxSignedZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y)
			
 
				+{
			
 
				+    return (-x) * y + FFX_BROADCAST_FLOAT16(1.0);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND NOT operation using two half-precision values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND NOT operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND NOT operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND NOT operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxSignedZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y)
			
 
				+{
			
 
				+    return (-x) * y + FFX_BROADCAST_FLOAT16X2(1.0);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND NOT operation using two half-precision values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND NOT operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND NOT operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND NOT operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxSignedZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y)
			
 
				+{
			
 
				+    return (-x) * y + FFX_BROADCAST_FLOAT16X3(1.0);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND NOT operation using two half-precision values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND NOT operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND NOT operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND NOT operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x4 ffxSignedZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y)
			
 
				+{
			
 
				+    return (-x) * y + FFX_BROADCAST_FLOAT16X4(1.0);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND operation using two half-precision values followed by
			
 
				+/// a NOT operation using the resulting value and a third half-precision value.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND operator.
			
 
				+/// @param [in] z           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
			
 
				+{
			
 
				+    return ffxSaturate(x * y + z);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND operation using two half-precision values followed by
			
 
				+/// a NOT operation using the resulting value and a third half-precision value.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND operator.
			
 
				+/// @param [in] z           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
			
 
				+{
			
 
				+    return ffxSaturate(x * y + z);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND operation using two half-precision values followed by
			
 
				+/// a NOT operation using the resulting value and a third half-precision value.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND operator.
			
 
				+/// @param [in] z           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
			
 
				+{
			
 
				+    return ffxSaturate(x * y + z);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic AND operation using two half-precision values followed by
			
 
				+/// a NOT operation using the resulting value and a third half-precision value.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND operator.
			
 
				+/// @param [in] y           The second value to be fed into the AND operator.
			
 
				+/// @param [in] z           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x4 ffxZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
			
 
				+{
			
 
				+    return ffxSaturate(x * y + z);
			
 
				+}
			
 
				+
			
 
				+/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not.
			
 
				+///
			
 
				+/// @param [in] x           The value to be compared.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the greater than zero comparison.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16 x)
			
 
				+{
			
 
				+    return ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF));
			
 
				+}
			
 
				+
			
 
				+/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not.
			
 
				+///
			
 
				+/// @param [in] x           The value to be compared.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the greater than zero comparison.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x2 x)
			
 
				+{
			
 
				+    return ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF));
			
 
				+}
			
 
				+
			
 
				+/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not.
			
 
				+///
			
 
				+/// @param [in] x           The value to be compared.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the greater than zero comparison.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x3 x)
			
 
				+{
			
 
				+    return ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF));
			
 
				+}
			
 
				+
			
 
				+/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not.
			
 
				+///
			
 
				+/// @param [in] x           The value to be compared.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the greater than zero comparison.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x4 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x4 x)
			
 
				+{
			
 
				+    return ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF));
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxZeroOneNotHalf(FfxFloat16 x)
			
 
				+{
			
 
				+    return FFX_BROADCAST_FLOAT16(1.0) - x;
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxZeroOneNotHalf(FfxFloat16x2 x)
			
 
				+{
			
 
				+    return FFX_BROADCAST_FLOAT16X2(1.0) - x;
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxZeroOneNotHalf(FfxFloat16x3 x)
			
 
				+{
			
 
				+    return FFX_BROADCAST_FLOAT16X3(1.0) - x;
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the AND OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the AND OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x4 ffxZeroOneNotHalf(FfxFloat16x4 x)
			
 
				+{
			
 
				+    return FFX_BROADCAST_FLOAT16X4(1.0) - x;
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic OR operation using two half-precision FfxFloat32 values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the OR operator.
			
 
				+/// @param [in] y           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxZeroOneOrHalf(FfxFloat16 x, FfxFloat16 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic OR operation using two half-precision FfxFloat32 values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the OR operator.
			
 
				+/// @param [in] y           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxZeroOneOrHalf(FfxFloat16x2 x, FfxFloat16x2 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic OR operation using two half-precision FfxFloat32 values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the OR operator.
			
 
				+/// @param [in] y           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxZeroOneOrHalf(FfxFloat16x3 x, FfxFloat16x3 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Conditional free logic OR operation using two half-precision FfxFloat32 values.
			
 
				+///
			
 
				+/// @param [in] x           The first value to be fed into the OR operator.
			
 
				+/// @param [in] y           The second value to be fed into the OR operator.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the OR operation.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x4 ffxZeroOneOrHalf(FfxFloat16x4 x, FfxFloat16x4 y)
			
 
				+{
			
 
				+    return max(x, y);
			
 
				+}
			
 
				+
			
 
				+/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero.
			
 
				+///
			
 
				+/// @param [in] x           The value to compare against zero.
			
 
				+/// @param [in] y           The value to return if the comparision is greater than zero.
			
 
				+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The selected value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxZeroOneSelectHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
			
 
				+{
			
 
				+    FfxFloat16 r = (-x) * z + z;
			
 
				+    return x * y + r;
			
 
				+}
			
 
				+
			
 
				+/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero.
			
 
				+///
			
 
				+/// @param [in] x           The value to compare against zero.
			
 
				+/// @param [in] y           The value to return if the comparision is greater than zero.
			
 
				+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The selected value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxZeroOneSelectHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
			
 
				+{
			
 
				+    FfxFloat16x2 r = (-x) * z + z;
			
 
				+    return x * y + r;
			
 
				+}
			
 
				+
			
 
				+/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero.
			
 
				+///
			
 
				+/// @param [in] x           The value to compare against zero.
			
 
				+/// @param [in] y           The value to return if the comparision is greater than zero.
			
 
				+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The selected value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxZeroOneSelectHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
			
 
				+{
			
 
				+    FfxFloat16x3 r = (-x) * z + z;
			
 
				+    return x * y + r;
			
 
				+}
			
 
				+
			
 
				+/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero.
			
 
				+///
			
 
				+/// @param [in] x           The value to compare against zero.
			
 
				+/// @param [in] y           The value to return if the comparision is greater than zero.
			
 
				+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The selected value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x4 ffxZeroOneSelectHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
			
 
				+{
			
 
				+    FfxFloat16x4 r = (-x) * z + z;
			
 
				+    return x * y + r;
			
 
				+}
			
 
				+
			
 
				+/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not.
			
 
				+///
			
 
				+/// @param [in] x           The value to be compared.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the sign value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxZeroOneIsSignedHalf(FfxFloat16 x)
			
 
				+{
			
 
				+    return ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF));
			
 
				+}
			
 
				+
			
 
				+/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not.
			
 
				+///
			
 
				+/// @param [in] x           The value to be compared.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the sign value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxZeroOneIsSignedHalf(FfxFloat16x2 x)
			
 
				+{
			
 
				+    return ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF));
			
 
				+}
			
 
				+
			
 
				+/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not.
			
 
				+///
			
 
				+/// @param [in] x           The value to be compared.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the sign value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxZeroOneIsSignedHalf(FfxFloat16x3 x)
			
 
				+{
			
 
				+    return ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF));
			
 
				+}
			
 
				+
			
 
				+/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not.
			
 
				+///
			
 
				+/// @param [in] x           The value to be compared.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// Result of the sign value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x4 ffxZeroOneIsSignedHalf(FfxFloat16x4 x)
			
 
				+{
			
 
				+    return ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF));
			
 
				+}
			
 
				+
			
 
				+/// Compute a Rec.709 color space.
			
 
				+/// 
			
 
				+/// Rec.709 is used for some HDTVs.
			
 
				+/// 
			
 
				+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
			
 
				+///  (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
			
 
				+///  (b.) For 8-bit  709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
			
 
				+///
			
 
				+/// @param [in] c           The color to convert to Rec. 709.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// The <c><i>color</i></c> in Rec.709 space.
			
 
				+/// 
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxRec709FromLinearHalf(FfxFloat16 c)
			
 
				+{
			
 
				+    FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45);
			
 
				+    FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099);
			
 
				+    return clamp(j.x, c * j.y, pow(c, j.z) * k.x + k.y);
			
 
				+}
			
 
				+
			
 
				+/// Compute a Rec.709 color space.
			
 
				+/// 
			
 
				+/// Rec.709 is used for some HDTVs.
			
 
				+/// 
			
 
				+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
			
 
				+///  (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
			
 
				+///  (b.) For 8-bit  709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
			
 
				+///
			
 
				+/// @param [in] c           The color to convert to Rec. 709.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// The <c><i>color</i></c> in Rec.709 space.
			
 
				+/// 
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxRec709FromLinearHalf(FfxFloat16x2 c)
			
 
				+{
			
 
				+    FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45);
			
 
				+    FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099);
			
 
				+    return clamp(j.xx, c * j.yy, pow(c, j.zz) * k.xx + k.yy);
			
 
				+}
			
 
				+
			
 
				+/// Compute a Rec.709 color space.
			
 
				+/// 
			
 
				+/// Rec.709 is used for some HDTVs.
			
 
				+/// 
			
 
				+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
			
 
				+///  (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
			
 
				+///  (b.) For 8-bit  709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
			
 
				+///
			
 
				+/// @param [in] c           The color to convert to Rec. 709.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// The <c><i>color</i></c> in Rec.709 space.
			
 
				+/// 
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxRec709FromLinearHalf(FfxFloat16x3 c)
			
 
				+{
			
 
				+    FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45);
			
 
				+    FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099);
			
 
				+    return clamp(j.xxx, c * j.yyy, pow(c, j.zzz) * k.xxx + k.yyy);
			
 
				+}
			
 
				+
			
 
				+/// Compute a gamma value from a linear value.
			
 
				+///
			
 
				+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
			
 
				+/// 
			
 
				+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in <c><i>ffxLinearFromGammaHalf</i></c>.
			
 
				+/// 
			
 
				+/// @param [in] c              The value to convert to gamma space from linear.
			
 
				+/// @param [in] rcpX           The reciprocal of power value used for the gamma curve.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in gamma space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxGammaFromLinearHalf(FfxFloat16 c, FfxFloat16 rcpX)
			
 
				+{
			
 
				+    return pow(c, FFX_BROADCAST_FLOAT16(rcpX));
			
 
				+}
			
 
				+
			
 
				+/// Compute a gamma value from a linear value.
			
 
				+///
			
 
				+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
			
 
				+/// 
			
 
				+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in <c><i>ffxLinearFromGammaHalf</i></c>.
			
 
				+/// 
			
 
				+/// @param [in] c              The value to convert to gamma space from linear.
			
 
				+/// @param [in] rcpX           The reciprocal of power value used for the gamma curve.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in gamma space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxGammaFromLinearHalf(FfxFloat16x2 c, FfxFloat16 rcpX)
			
 
				+{
			
 
				+    return pow(c, FFX_BROADCAST_FLOAT16X2(rcpX));
			
 
				+}
			
 
				+
			
 
				+/// Compute a gamma value from a linear value.
			
 
				+///
			
 
				+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
			
 
				+/// 
			
 
				+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in <c><i>ffxLinearFromGammaHalf</i></c>.
			
 
				+/// 
			
 
				+/// @param [in] c              The value to convert to gamma space from linear.
			
 
				+/// @param [in] rcpX           The reciprocal of power value used for the gamma curve.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in gamma space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxGammaFromLinearHalf(FfxFloat16x3 c, FfxFloat16 rcpX)
			
 
				+{
			
 
				+    return pow(c, FFX_BROADCAST_FLOAT16X3(rcpX));
			
 
				+}
			
 
				+
			
 
				+/// Compute an SRGB value from a linear value.
			
 
				+///
			
 
				+/// @param [in] c           The value to convert to SRGB from linear.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in SRGB space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxSrgbFromLinearHalf(FfxFloat16 c)
			
 
				+{
			
 
				+    FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
			
 
				+    FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055);
			
 
				+    return clamp(j.x, c * j.y, pow(c, j.z) * k.x + k.y);
			
 
				+}
			
 
				+
			
 
				+/// Compute an SRGB value from a linear value.
			
 
				+///
			
 
				+/// @param [in] c           The value to convert to SRGB from linear.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in SRGB space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxSrgbFromLinearHalf(FfxFloat16x2 c)
			
 
				+{
			
 
				+    FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
			
 
				+    FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055);
			
 
				+    return clamp(j.xx, c * j.yy, pow(c, j.zz) * k.xx + k.yy);
			
 
				+}
			
 
				+
			
 
				+/// Compute an SRGB value from a linear value.
			
 
				+///
			
 
				+/// @param [in] c           The value to convert to SRGB from linear.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in SRGB space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxSrgbFromLinearHalf(FfxFloat16x3 c)
			
 
				+{
			
 
				+    FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
			
 
				+    FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055);
			
 
				+    return clamp(j.xxx, c * j.yyy, pow(c, j.zzz) * k.xxx + k.yyy);
			
 
				+}
			
 
				+
			
 
				+/// Compute the square root of a value.
			
 
				+///
			
 
				+/// @param [in] c           The value to compute the square root for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A square root of the input value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxSquareRootHalf(FfxFloat16 c)
			
 
				+{
			
 
				+    return sqrt(c);
			
 
				+}
			
 
				+
			
 
				+/// Compute the square root of a value.
			
 
				+///
			
 
				+/// @param [in] c           The value to compute the square root for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A square root of the input value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxSquareRootHalf(FfxFloat16x2 c)
			
 
				+{
			
 
				+    return sqrt(c);
			
 
				+}
			
 
				+
			
 
				+/// Compute the square root of a value.
			
 
				+///
			
 
				+/// @param [in] c           The value to compute the square root for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A square root of the input value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxSquareRootHalf(FfxFloat16x3 c)
			
 
				+{
			
 
				+    return sqrt(c);
			
 
				+}
			
 
				+
			
 
				+/// Compute the cube root of a value.
			
 
				+///
			
 
				+/// @param [in] c           The value to compute the cube root for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A cube root of the input value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxCubeRootHalf(FfxFloat16 c)
			
 
				+{
			
 
				+    return pow(c, FFX_BROADCAST_FLOAT16(1.0 / 3.0));
			
 
				+}
			
 
				+
			
 
				+/// Compute the cube root of a value.
			
 
				+///
			
 
				+/// @param [in] c           The value to compute the cube root for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A cube root of the input value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxCubeRootHalf(FfxFloat16x2 c)
			
 
				+{
			
 
				+    return pow(c, FFX_BROADCAST_FLOAT16X2(1.0 / 3.0));
			
 
				+}
			
 
				+
			
 
				+/// Compute the cube root of a value.
			
 
				+///
			
 
				+/// @param [in] c           The value to compute the cube root for.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A cube root of the input value.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxCubeRootHalf(FfxFloat16x3 c)
			
 
				+{
			
 
				+    return pow(c, FFX_BROADCAST_FLOAT16X3(1.0 / 3.0));
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a REC.709 value.
			
 
				+///
			
 
				+/// @param [in] c           The value to convert to linear from REC.709.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxLinearFromRec709Half(FfxFloat16 c)
			
 
				+{
			
 
				+    FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
			
 
				+    FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099);
			
 
				+    return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z));
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a REC.709 value.
			
 
				+///
			
 
				+/// @param [in] c           The value to convert to linear from REC.709.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxLinearFromRec709Half(FfxFloat16x2 c)
			
 
				+{
			
 
				+    FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
			
 
				+    FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099);
			
 
				+    return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz));
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a REC.709 value.
			
 
				+///
			
 
				+/// @param [in] c           The value to convert to linear from REC.709.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxLinearFromRec709Half(FfxFloat16x3 c)
			
 
				+{
			
 
				+    FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
			
 
				+    FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099);
			
 
				+    return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz));
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a value in a gamma space.
			
 
				+///
			
 
				+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
			
 
				+///
			
 
				+/// @param [in] c           The value to convert to linear in gamma space.
			
 
				+/// @param [in] x           The power value used for the gamma curve.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxLinearFromGammaHalf(FfxFloat16 c, FfxFloat16 x)
			
 
				+{
			
 
				+    return pow(c, FFX_BROADCAST_FLOAT16(x));
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a value in a gamma space.
			
 
				+///
			
 
				+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
			
 
				+///
			
 
				+/// @param [in] c           The value to convert to linear in gamma space.
			
 
				+/// @param [in] x           The power value used for the gamma curve.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxLinearFromGammaHalf(FfxFloat16x2 c, FfxFloat16 x)
			
 
				+{
			
 
				+    return pow(c, FFX_BROADCAST_FLOAT16X2(x));
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a value in a gamma space.
			
 
				+///
			
 
				+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
			
 
				+///
			
 
				+/// @param [in] c           The value to convert to linear in gamma space.
			
 
				+/// @param [in] x           The power value used for the gamma curve.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxLinearFromGammaHalf(FfxFloat16x3 c, FfxFloat16 x)
			
 
				+{
			
 
				+    return pow(c, FFX_BROADCAST_FLOAT16X3(x));
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a value in a SRGB space.
			
 
				+///
			
 
				+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
			
 
				+///
			
 
				+/// @param [in] c           The value to convert to linear in SRGB space.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c)
			
 
				+{
			
 
				+    FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
			
 
				+    FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
			
 
				+    return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z));
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a value in a SRGB space.
			
 
				+///
			
 
				+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
			
 
				+///
			
 
				+/// @param [in] c           The value to convert to linear in SRGB space.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c)
			
 
				+{
			
 
				+    FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
			
 
				+    FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
			
 
				+    return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz));
			
 
				+}
			
 
				+
			
 
				+/// Compute a linear value from a value in a SRGB space.
			
 
				+///
			
 
				+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
			
 
				+///
			
 
				+/// @param [in] c           The value to convert to linear in SRGB space.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A value in linear space.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxFloat16x3 ffxLinearFromSrgbHalf(FfxFloat16x3 c)
			
 
				+{
			
 
				+    FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
			
 
				+    FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
			
 
				+    return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz));
			
 
				+}
			
 
				+
			
 
				+/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear.
			
 
				+/// 
			
 
				+///  543210
			
 
				+///  ======
			
 
				+///  ..xxx.
			
 
				+///  yy...y
			
 
				+/// 
			
 
				+/// @param [in] a       The input 1D coordinates to remap.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The remapped 2D coordinates.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x2 ffxRemapForQuadHalf(FfxUInt32 a)
			
 
				+{
			
 
				+    return FfxUInt16x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u));
			
 
				+}
			
 
				+
			
 
				+/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions.
			
 
				+///
			
 
				+/// The 64-wide lane indices to 8x8 remapping is performed as follows:
			
 
				+/// 
			
 
				+///     00 01 08 09 10 11 18 19
			
 
				+///     02 03 0a 0b 12 13 1a 1b
			
 
				+///     04 05 0c 0d 14 15 1c 1d
			
 
				+///     06 07 0e 0f 16 17 1e 1f
			
 
				+///     20 21 28 29 30 31 38 39
			
 
				+///     22 23 2a 2b 32 33 3a 3b
			
 
				+///     24 25 2c 2d 34 35 3c 3d
			
 
				+///     26 27 2e 2f 36 37 3e 3f
			
 
				+///
			
 
				+/// @param [in] a       The input 1D coordinate to remap.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// The remapped 2D coordinates.
			
 
				+/// 
			
 
				+/// @ingroup GPU
			
 
				+FfxUInt16x2 ffxRemapForWaveReductionHalf(FfxUInt32 a)
			
 
				+{
			
 
				+    return FfxUInt16x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u));
			
 
				+}
			
 
				+
			
 
				+#endif  // FFX_HALF
			
--- a/thirdparty/amd-fsr2/shaders/ffx_core_hlsl.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_core_hlsl.h
@@ -0,0 +1,1502 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+/// A define for abstracting shared memory between shading languages.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_GROUPSHARED groupshared
			
 
				+
			
 
				+/// A define for abstracting compute memory barriers between shading languages.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_GROUP_MEMORY_BARRIER GroupMemoryBarrierWithGroupSync
			
 
				+
			
 
				+/// A define added to accept static markup on functions to aid CPU/GPU portability of code.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_STATIC static
			
 
				+
			
 
				+/// A define for abstracting loop unrolling between shading languages.
			
 
				+///
			
 
				+/// @ingroup GPU 
			
 
				+#define FFX_UNROLL [unroll]
			
 
				+
			
 
				+/// A define for abstracting a 'greater than' comparison operator between two types.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_GREATER_THAN(x, y) x > y
			
 
				+
			
 
				+/// A define for abstracting a 'greater than or equal' comparison operator between two types.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_GREATER_THAN_EQUAL(x, y) x >= y
			
 
				+
			
 
				+/// A define for abstracting a 'less than' comparison operator between two types.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_LESS_THAN(x, y) x < y
			
 
				+
			
 
				+/// A define for abstracting a 'less than or equal' comparison operator between two types.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_LESS_THAN_EQUAL(x, y) x <= y
			
 
				+
			
 
				+/// A define for abstracting an 'equal' comparison operator between two types.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_EQUAL(x, y) x == y
			
 
				+
			
 
				+/// A define for abstracting a 'not equal' comparison operator between two types.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_NOT_EQUAL(x, y) x != y
			
 
				+
			
 
				+/// Broadcast a scalar value to a 1-dimensional floating point vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_FLOAT32(x) FfxFloat32(x)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 2-dimensional floating point vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32(x)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 3-dimensional floating point vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32(x)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 4-dimensional floating point vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32(x)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 1-dimensional unsigned integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_UINT32(x) FfxUInt32(x)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_UINT32X2(x) FfxUInt32(x)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_UINT32X3(x) FfxUInt32(x)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_UINT32X4(x) FfxUInt32(x)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 1-dimensional signed integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_INT32(x) FfxInt32(x)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 2-dimensional signed integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_INT32X2(x) FfxInt32(x)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 3-dimensional signed integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_INT32X3(x) FfxInt32(x)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 4-dimensional signed integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_INT32X4(x) FfxInt32(x)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_FLOAT16(a)   FFX_MIN16_F(a)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_FLOAT16X2(a) FFX_MIN16_F(a)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_FLOAT16X3(a) FFX_MIN16_F(a)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_FLOAT16X4(a) FFX_MIN16_F(a)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_UINT16(a)   FFX_MIN16_U(a)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_UINT16X2(a) FFX_MIN16_U(a)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_UINT16X3(a) FFX_MIN16_U(a)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_UINT16X4(a) FFX_MIN16_U(a)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_INT16(a)   FFX_MIN16_I(a)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_INT16X2(a) FFX_MIN16_I(a)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_INT16X3(a) FFX_MIN16_I(a)
			
 
				+
			
 
				+/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector.
			
 
				+///
			
 
				+/// @ingroup GPU
			
 
				+#define FFX_BROADCAST_MIN_INT16X4(a) FFX_MIN16_I(a)
			
 
				+
			
 
				+/// Pack 2x32-bit floating point values in a single 32bit value.
			
 
				+/// 
			
 
				+/// This function first converts each component of <c><i>value</i></c> into their nearest 16-bit floating
			
 
				+/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the
			
 
				+/// 32bit unsigned integer respectively.
			
 
				+///
			
 
				+/// @param [in] value               A 2-dimensional floating point value to convert and pack.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// A packed 32bit value containing 2 16bit floating point values.
			
 
				+/// 
			
 
				+/// @ingroup HLSL
			
 
				+FfxUInt32 packHalf2x16(FfxFloat32x2 value)
			
 
				+{
			
 
				+    return f32tof16(value.x) | (f32tof16(value.y) << 16);
			
 
				+}
			
 
				+
			
 
				+/// Broadcast a scalar value to a 2-dimensional floating point vector.
			
 
				+///
			
 
				+/// @param [in] value               The value to to broadcast.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A 2-dimensional floating point vector with <c><i>value</i></c> in each component.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x2 ffxBroadcast2(FfxFloat32 value)
			
 
				+{
			
 
				+    return FfxFloat32x2(value, value);
			
 
				+}
			
 
				+
			
 
				+/// Broadcast a scalar value to a 3-dimensional floating point vector.
			
 
				+///
			
 
				+/// @param [in] value               The value to to broadcast.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A 3-dimensional floating point vector with <c><i>value</i></c> in each component.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x3 ffxBroadcast3(FfxFloat32 value)
			
 
				+{
			
 
				+    return FfxFloat32x3(value, value, value);
			
 
				+}
			
 
				+
			
 
				+/// Broadcast a scalar value to a 4-dimensional floating point vector.
			
 
				+///
			
 
				+/// @param [in] value               The value to to broadcast.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A 4-dimensional floating point vector with <c><i>value</i></c> in each component.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x4 ffxBroadcast4(FfxFloat32 value)
			
 
				+{
			
 
				+    return FfxFloat32x4(value, value, value, value);
			
 
				+}
			
 
				+
			
 
				+/// Broadcast a scalar value to a 2-dimensional signed integer vector.
			
 
				+///
			
 
				+/// @param [in] value               The value to to broadcast.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A 2-dimensional signed integer vector with <c><i>value</i></c> in each component.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxInt32x2 ffxBroadcast2(FfxInt32 value)
			
 
				+{
			
 
				+    return FfxInt32x2(value, value);
			
 
				+}
			
 
				+
			
 
				+/// Broadcast a scalar value to a 3-dimensional signed integer vector.
			
 
				+///
			
 
				+/// @param [in] value               The value to to broadcast.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A 3-dimensional signed integer vector with <c><i>value</i></c> in each component.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxUInt32x3 ffxBroadcast3(FfxInt32 value)
			
 
				+{
			
 
				+    return FfxUInt32x3(value, value, value);
			
 
				+}
			
 
				+
			
 
				+/// Broadcast a scalar value to a 4-dimensional signed integer vector.
			
 
				+///
			
 
				+/// @param [in] value               The value to to broadcast.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A 4-dimensional signed integer vector with <c><i>value</i></c> in each component.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxInt32x4 ffxBroadcast4(FfxInt32 value)
			
 
				+{
			
 
				+    return FfxInt32x4(value, value, value, value);
			
 
				+}
			
 
				+
			
 
				+/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
			
 
				+///
			
 
				+/// @param [in] value               The value to to broadcast.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A 2-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxUInt32x2 ffxBroadcast2(FfxUInt32 value)
			
 
				+{
			
 
				+    return FfxUInt32x2(value, value);
			
 
				+}
			
 
				+
			
 
				+/// Broadcast a scalar value to a 3-dimensional unsigned integer vector.
			
 
				+///
			
 
				+/// @param [in] value               The value to to broadcast.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A 3-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxUInt32x3 ffxBroadcast3(FfxUInt32 value)
			
 
				+{
			
 
				+    return FfxUInt32x3(value, value, value);
			
 
				+}
			
 
				+
			
 
				+/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
			
 
				+///
			
 
				+/// @param [in] value               The value to to broadcast.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A 4-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxUInt32x4 ffxBroadcast4(FfxUInt32 value)
			
 
				+{
			
 
				+    return FfxUInt32x4(value, value, value, value);
			
 
				+}
			
 
				+
			
 
				+FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits)
			
 
				+{
			
 
				+    FfxUInt32 mask = (1u << bits) - 1;
			
 
				+    return (src >> off) & mask;
			
 
				+}
			
 
				+
			
 
				+FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask)
			
 
				+{
			
 
				+    return (ins & mask) | (src & (~mask));
			
 
				+}
			
 
				+
			
 
				+FfxUInt32 bitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits)
			
 
				+{
			
 
				+    FfxUInt32 mask = (1u << bits) - 1;
			
 
				+    return (ins & mask) | (src & (~mask));
			
 
				+}
			
 
				+
			
 
				+/// Interprets the bit pattern of x as an unsigned integer.
			
 
				+///
			
 
				+/// @param [in] value               The input value.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The input interpreted as an unsigned integer.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxUInt32 ffxAsUInt32(FfxFloat32 x)
			
 
				+{
			
 
				+    return asuint(x);
			
 
				+}
			
 
				+
			
 
				+/// Interprets the bit pattern of x as an unsigned integer.
			
 
				+///
			
 
				+/// @param [in] value               The input value.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The input interpreted as an unsigned integer.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x)
			
 
				+{
			
 
				+    return asuint(x);
			
 
				+}
			
 
				+
			
 
				+/// Interprets the bit pattern of x as an unsigned integer.
			
 
				+///
			
 
				+/// @param [in] value               The input value.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The input interpreted as an unsigned integer.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x)
			
 
				+{
			
 
				+    return asuint(x);
			
 
				+}
			
 
				+
			
 
				+/// Interprets the bit pattern of x as an unsigned integer.
			
 
				+///
			
 
				+/// @param [in] value               The input value.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The input interpreted as an unsigned integer.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x)
			
 
				+{
			
 
				+    return asuint(x);
			
 
				+}
			
 
				+
			
 
				+/// Interprets the bit pattern of x as a floating-point number.
			
 
				+///
			
 
				+/// @param [in] value               The input value.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The input interpreted as a floating-point number.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32 ffxAsFloat(FfxUInt32 x)
			
 
				+{
			
 
				+    return asfloat(x);
			
 
				+}
			
 
				+
			
 
				+/// Interprets the bit pattern of x as a floating-point number.
			
 
				+///
			
 
				+/// @param [in] value               The input value.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The input interpreted as a floating-point number.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x)
			
 
				+{
			
 
				+    return asfloat(x);
			
 
				+}
			
 
				+
			
 
				+/// Interprets the bit pattern of x as a floating-point number.
			
 
				+///
			
 
				+/// @param [in] value               The input value.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The input interpreted as a floating-point number.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x)
			
 
				+{
			
 
				+    return asfloat(x);
			
 
				+}
			
 
				+
			
 
				+/// Interprets the bit pattern of x as a floating-point number.
			
 
				+///
			
 
				+/// @param [in] value               The input value.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The input interpreted as a floating-point number.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x)
			
 
				+{
			
 
				+    return asfloat(x);
			
 
				+}
			
 
				+
			
 
				+/// Compute the linear interopation between two values.
			
 
				+///
			
 
				+/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
			
 
				+/// following math:
			
 
				+///
			
 
				+///     (1 - t) * x + t * y
			
 
				+///
			
 
				+/// @param [in] x               The first value to lerp between.
			
 
				+/// @param [in] y               The second value to lerp between.
			
 
				+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
			
 
				+{
			
 
				+    return lerp(x, y, t);
			
 
				+}
			
 
				+
			
 
				+/// Compute the linear interopation between two values.
			
 
				+///
			
 
				+/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
			
 
				+/// following math:
			
 
				+///
			
 
				+///     (1 - t) * x + t * y
			
 
				+///
			
 
				+/// @param [in] x               The first value to lerp between.
			
 
				+/// @param [in] y               The second value to lerp between.
			
 
				+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t)
			
 
				+{
			
 
				+    return lerp(x, y, t);
			
 
				+}
			
 
				+
			
 
				+/// Compute the linear interopation between two values.
			
 
				+///
			
 
				+/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
			
 
				+/// following math:
			
 
				+///
			
 
				+///     (1 - t) * x + t * y
			
 
				+///
			
 
				+/// @param [in] x               The first value to lerp between.
			
 
				+/// @param [in] y               The second value to lerp between.
			
 
				+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t)
			
 
				+{
			
 
				+    return lerp(x, y, t);
			
 
				+}
			
 
				+
			
 
				+/// Compute the linear interopation between two values.
			
 
				+///
			
 
				+/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
			
 
				+/// following math:
			
 
				+///
			
 
				+///     (1 - t) * x + t * y
			
 
				+///
			
 
				+/// @param [in] x               The first value to lerp between.
			
 
				+/// @param [in] y               The second value to lerp between.
			
 
				+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t)
			
 
				+{
			
 
				+    return lerp(x, y, t);
			
 
				+}
			
 
				+
			
 
				+/// Compute the linear interopation between two values.
			
 
				+///
			
 
				+/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
			
 
				+/// following math:
			
 
				+///
			
 
				+///     (1 - t) * x + t * y
			
 
				+///
			
 
				+/// @param [in] x               The first value to lerp between.
			
 
				+/// @param [in] y               The second value to lerp between.
			
 
				+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t)
			
 
				+{
			
 
				+    return lerp(x, y, t);
			
 
				+}
			
 
				+
			
 
				+/// Compute the linear interopation between two values.
			
 
				+///
			
 
				+/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
			
 
				+/// following math:
			
 
				+///
			
 
				+///     (1 - t) * x + t * y
			
 
				+///
			
 
				+/// @param [in] x               The first value to lerp between.
			
 
				+/// @param [in] y               The second value to lerp between.
			
 
				+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t)
			
 
				+{
			
 
				+    return lerp(x, y, t);
			
 
				+}
			
 
				+
			
 
				+/// Compute the linear interopation between two values.
			
 
				+///
			
 
				+/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
			
 
				+/// following math:
			
 
				+///
			
 
				+///     (1 - t) * x + t * y
			
 
				+///
			
 
				+/// @param [in] x               The first value to lerp between.
			
 
				+/// @param [in] y               The second value to lerp between.
			
 
				+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t)
			
 
				+{
			
 
				+    return lerp(x, y, t);
			
 
				+}
			
 
				+
			
 
				+/// Clamp a value to a [0..1] range.
			
 
				+///
			
 
				+/// @param [in] x               The value to clamp to [0..1] range.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The clamped version of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32 ffxSaturate(FfxFloat32 x)
			
 
				+{
			
 
				+    return saturate(x);
			
 
				+}
			
 
				+
			
 
				+/// Clamp a value to a [0..1] range.
			
 
				+///
			
 
				+/// @param [in] x               The value to clamp to [0..1] range.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The clamped version of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x2 ffxSaturate(FfxFloat32x2 x)
			
 
				+{
			
 
				+    return saturate(x);
			
 
				+}
			
 
				+
			
 
				+/// Clamp a value to a [0..1] range.
			
 
				+///
			
 
				+/// @param [in] x               The value to clamp to [0..1] range.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The clamped version of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x3 ffxSaturate(FfxFloat32x3 x)
			
 
				+{
			
 
				+    return saturate(x);
			
 
				+}
			
 
				+
			
 
				+/// Clamp a value to a [0..1] range.
			
 
				+///
			
 
				+/// @param [in] x               The value to clamp to [0..1] range.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The clamped version of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x4 ffxSaturate(FfxFloat32x4 x)
			
 
				+{
			
 
				+    return saturate(x);
			
 
				+}
			
 
				+
			
 
				+/// Compute the factional part of a decimal value.
			
 
				+///
			
 
				+/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
			
 
				+/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
			
 
				+/// function.
			
 
				+///
			
 
				+/// @param [in] x               The value to compute the fractional part from.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// The fractional part of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32 ffxFract(FfxFloat32 x)
			
 
				+{
			
 
				+    return x - floor(x);
			
 
				+}
			
 
				+
			
 
				+/// Compute the factional part of a decimal value.
			
 
				+///
			
 
				+/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
			
 
				+/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
			
 
				+/// function.
			
 
				+///
			
 
				+/// @param [in] x               The value to compute the fractional part from.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The fractional part of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x2 ffxFract(FfxFloat32x2 x)
			
 
				+{
			
 
				+    return x - floor(x);
			
 
				+}
			
 
				+
			
 
				+/// Compute the factional part of a decimal value.
			
 
				+///
			
 
				+/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
			
 
				+/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
			
 
				+/// function.
			
 
				+///
			
 
				+/// @param [in] x               The value to compute the fractional part from.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The fractional part of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x3 ffxFract(FfxFloat32x3 x)
			
 
				+{
			
 
				+    return x - floor(x);
			
 
				+}
			
 
				+
			
 
				+/// Compute the factional part of a decimal value.
			
 
				+///
			
 
				+/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
			
 
				+/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic 
			
 
				+/// function.
			
 
				+///
			
 
				+/// @param [in] x               The value to compute the fractional part from.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The fractional part of <c><i>x</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x4 ffxFract(FfxFloat32x4 x)
			
 
				+{
			
 
				+    return x - floor(x);
			
 
				+}
			
 
				+
			
 
				+/// Compute the maximum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
			
 
				+/// 
			
 
				+/// @param [in] x               The first value to include in the max calculation.
			
 
				+/// @param [in] y               The second value to include in the max calcuation.
			
 
				+/// @param [in] z               The third value to include in the max calcuation.
			
 
				+/// 
			
 
				+/// @returns
			
 
				+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
			
 
				+{
			
 
				+    return max(x, max(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the maximum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
			
 
				+/// 
			
 
				+/// @param [in] x               The first value to include in the max calculation.
			
 
				+/// @param [in] y               The second value to include in the max calcuation.
			
 
				+/// @param [in] z               The third value to include in the max calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
			
 
				+{
			
 
				+    return max(x, max(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the maximum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
			
 
				+/// 
			
 
				+/// @param [in] x               The first value to include in the max calculation.
			
 
				+/// @param [in] y               The second value to include in the max calcuation.
			
 
				+/// @param [in] z               The third value to include in the max calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
			
 
				+{
			
 
				+    return max(x, max(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the maximum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the max calculation.
			
 
				+/// @param [in] y               The second value to include in the max calcuation.
			
 
				+/// @param [in] z               The third value to include in the max calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
			
 
				+{
			
 
				+    return max(x, max(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the maximum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the max calculation.
			
 
				+/// @param [in] y               The second value to include in the max calcuation.
			
 
				+/// @param [in] z               The third value to include in the max calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
			
 
				+{
			
 
				+    return max(x, max(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the maximum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the max calculation.
			
 
				+/// @param [in] y               The second value to include in the max calcuation.
			
 
				+/// @param [in] z               The third value to include in the max calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
			
 
				+{
			
 
				+    return max(x, max(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the maximum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the max calculation.
			
 
				+/// @param [in] y               The second value to include in the max calcuation.
			
 
				+/// @param [in] z               The third value to include in the max calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
			
 
				+{
			
 
				+    return max(x, max(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the maximum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the max calculation.
			
 
				+/// @param [in] y               The second value to include in the max calcuation.
			
 
				+/// @param [in] z               The third value to include in the max calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
			
 
				+{
			
 
				+    return max(x, max(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the median of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the median calculation.
			
 
				+/// @param [in] y               The second value to include in the median calcuation.
			
 
				+/// @param [in] z               The third value to include in the median calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the median of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the median calculation.
			
 
				+/// @param [in] y               The second value to include in the median calcuation.
			
 
				+/// @param [in] z               The third value to include in the median calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the median of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the median calculation.
			
 
				+/// @param [in] y               The second value to include in the median calcuation.
			
 
				+/// @param [in] z               The third value to include in the median calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the median of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the median calculation.
			
 
				+/// @param [in] y               The second value to include in the median calcuation.
			
 
				+/// @param [in] z               The third value to include in the median calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the median of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the median calculation.
			
 
				+/// @param [in] y               The second value to include in the median calcuation.
			
 
				+/// @param [in] z               The third value to include in the median calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+    // return min(max(min(y, z), x), max(y, z));
			
 
				+    // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y));
			
 
				+}
			
 
				+
			
 
				+/// Compute the median of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the median calculation.
			
 
				+/// @param [in] y               The second value to include in the median calcuation.
			
 
				+/// @param [in] z               The third value to include in the median calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+    // return min(max(min(y, z), x), max(y, z));
			
 
				+    // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y));
			
 
				+}
			
 
				+
			
 
				+/// Compute the median of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the median calculation.
			
 
				+/// @param [in] y               The second value to include in the median calcuation.
			
 
				+/// @param [in] z               The third value to include in the median calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the median of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the median calculation.
			
 
				+/// @param [in] y               The second value to include in the median calcuation.
			
 
				+/// @param [in] z               The third value to include in the median calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the minimum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MIN3_I32</i></c> operation on GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the min calculation.
			
 
				+/// @param [in] y               The second value to include in the min calcuation.
			
 
				+/// @param [in] z               The third value to include in the min calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
			
 
				+{
			
 
				+    return min(x, min(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the minimum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MIN3_I32</i></c> operation on GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the min calculation.
			
 
				+/// @param [in] y               The second value to include in the min calcuation.
			
 
				+/// @param [in] z               The third value to include in the min calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
			
 
				+{
			
 
				+    return min(x, min(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the minimum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MIN3_I32</c></i> operation on GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the min calculation.
			
 
				+/// @param [in] y               The second value to include in the min calcuation.
			
 
				+/// @param [in] z               The third value to include in the min calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
			
 
				+{
			
 
				+    return min(x, min(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the minimum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the min calculation.
			
 
				+/// @param [in] y               The second value to include in the min calcuation.
			
 
				+/// @param [in] z               The third value to include in the min calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
			
 
				+{
			
 
				+    return min(x, min(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the minimum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the min calculation.
			
 
				+/// @param [in] y               The second value to include in the min calcuation.
			
 
				+/// @param [in] z               The third value to include in the min calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
			
 
				+{
			
 
				+    return min(x, min(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the minimum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the min calculation.
			
 
				+/// @param [in] y               The second value to include in the min calcuation.
			
 
				+/// @param [in] z               The third value to include in the min calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
			
 
				+{
			
 
				+    return min(x, min(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the minimum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the min calculation.
			
 
				+/// @param [in] y               The second value to include in the min calcuation.
			
 
				+/// @param [in] z               The third value to include in the min calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
			
 
				+{
			
 
				+    return min(x, min(y, z));
			
 
				+}
			
 
				+
			
 
				+/// Compute the minimum of three values.
			
 
				+///
			
 
				+/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
			
 
				+///
			
 
				+/// @param [in] x               The first value to include in the min calculation.
			
 
				+/// @param [in] y               The second value to include in the min calcuation.
			
 
				+/// @param [in] z               The third value to include in the min calcuation.
			
 
				+///
			
 
				+/// @returns
			
 
				+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
			
 
				+///
			
 
				+/// @ingroup HLSL
			
 
				+FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
			
 
				+{
			
 
				+    return min(x, min(y, z));
			
 
				+}
			
 
				+
			
 
				+
			
 
				+FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)
			
 
				+{
			
 
				+    return FfxUInt32(FfxInt32(a) >> FfxInt32(b));
			
 
				+}
			
 
				+
			
 
				+//==============================================================================================================================
			
 
				+//                                                          HLSL HALF
			
 
				+//==============================================================================================================================
			
 
				+#if FFX_HALF
			
 
				+
			
 
				+//==============================================================================================================================
			
 
				+// Need to use manual unpack to get optimal execution (don't use packed types in buffers directly).
			
 
				+// Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/
			
 
				+FFX_MIN16_F2 ffxUint32ToFloat16x2(FfxUInt32 x)
			
 
				+{
			
 
				+	FfxFloat32x2 t = f16tof32(FfxUInt32x2(x & 0xFFFF, x >> 16));
			
 
				+	return FFX_MIN16_F2(t);
			
 
				+}
			
 
				+FFX_MIN16_F4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x)
			
 
				+{
			
 
				+	return FFX_MIN16_F4(ffxUint32ToFloat16x2(x.x), ffxUint32ToFloat16x2(x.y));
			
 
				+}
			
 
				+FFX_MIN16_U2 ffxUint32ToUint16x2(FfxUInt32 x)
			
 
				+{
			
 
				+	FfxUInt32x2 t = FfxUInt32x2(x & 0xFFFF, x >> 16);
			
 
				+	return FFX_MIN16_U2(t);
			
 
				+}
			
 
				+FFX_MIN16_U4 ffxUint32x2ToUint16x4(FfxUInt32x2 x)
			
 
				+{
			
 
				+	return FFX_MIN16_U4(ffxUint32ToUint16x2(x.x), ffxUint32ToUint16x2(x.y));
			
 
				+}
			
 
				+#define FFX_UINT32_TO_FLOAT16X2(x) ffxUint32ToFloat16x2(FfxUInt32(x))
			
 
				+#define FFX_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x))
			
 
				+#define FFX_UINT32_TO_UINT16X2(x) ffxUint32ToUint16x2(FfxUInt32(x))
			
 
				+#define FFX_UINT32X2_TO_UINT16X4(x) ffxUint32x2ToUint16x4(FfxUInt32x2(x))
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+FfxUInt32 FFX_MIN16_F2ToUint32(FFX_MIN16_F2 x)
			
 
				+{
			
 
				+	return f32tof16(x.x) + (f32tof16(x.y) << 16);
			
 
				+}
			
 
				+FfxUInt32x2 FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4 x)
			
 
				+{
			
 
				+	return FfxUInt32x2(FFX_MIN16_F2ToUint32(x.xy), FFX_MIN16_F2ToUint32(x.zw));
			
 
				+}
			
 
				+FfxUInt32 FFX_MIN16_U2ToUint32(FFX_MIN16_U2 x)
			
 
				+{
			
 
				+	return FfxUInt32(x.x) + (FfxUInt32(x.y) << 16);
			
 
				+}
			
 
				+FfxUInt32x2 FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4 x)
			
 
				+{
			
 
				+	return FfxUInt32x2(FFX_MIN16_U2ToUint32(x.xy), FFX_MIN16_U2ToUint32(x.zw));
			
 
				+}
			
 
				+#define FFX_FLOAT16X2_TO_UINT32(x) FFX_MIN16_F2ToUint32(FFX_MIN16_F2(x))
			
 
				+#define FFX_FLOAT16X4_TO_UINT32X2(x) FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4(x))
			
 
				+#define FFX_UINT16X2_TO_UINT32(x) FFX_MIN16_U2ToUint32(FFX_MIN16_U2(x))
			
 
				+#define FFX_UINT16X4_TO_UINT32X2(x) FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4(x))
			
 
				+
			
 
				+#if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
			
 
				+#define FFX_TO_UINT16(x) asuint16(x)
			
 
				+#define FFX_TO_UINT16X2(x) asuint16(x)
			
 
				+#define FFX_TO_UINT16X3(x) asuint16(x)
			
 
				+#define FFX_TO_UINT16X4(x) asuint16(x)
			
 
				+#else
			
 
				+#define FFX_TO_UINT16(a) FFX_MIN16_U(f32tof16(FfxFloat32(a)))
			
 
				+#define FFX_TO_UINT16X2(a) FFX_MIN16_U2(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y))
			
 
				+#define FFX_TO_UINT16X3(a) FFX_MIN16_U3(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z))
			
 
				+#define FFX_TO_UINT16X4(a) FFX_MIN16_U4(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z), FFX_TO_UINT16((a).w))
			
 
				+#endif // #if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
			
 
				+
			
 
				+#if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
			
 
				+#define FFX_TO_FLOAT16(x) asfloat16(x)
			
 
				+#define FFX_TO_FLOAT16X2(x) asfloat16(x)
			
 
				+#define FFX_TO_FLOAT16X3(x) asfloat16(x)
			
 
				+#define FFX_TO_FLOAT16X4(x) asfloat16(x)
			
 
				+#else
			
 
				+#define FFX_TO_FLOAT16(a) FFX_MIN16_F(f16tof32(FfxUInt32(a)))
			
 
				+#define FFX_TO_FLOAT16X2(a) FFX_MIN16_F2(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y))
			
 
				+#define FFX_TO_FLOAT16X3(a) FFX_MIN16_F3(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z))
			
 
				+#define FFX_TO_FLOAT16X4(a) FFX_MIN16_F4(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z), FFX_TO_FLOAT16((a).w))
			
 
				+#endif // #if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
			
 
				+
			
 
				+//==============================================================================================================================
			
 
				+#define FFX_BROADCAST_FLOAT16(a)   FFX_MIN16_F(a)
			
 
				+#define FFX_BROADCAST_FLOAT16X2(a) FFX_MIN16_F(a)
			
 
				+#define FFX_BROADCAST_FLOAT16X3(a) FFX_MIN16_F(a)
			
 
				+#define FFX_BROADCAST_FLOAT16X4(a) FFX_MIN16_F(a)
			
 
				+
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+#define FFX_BROADCAST_INT16(a)   FFX_MIN16_I(a)
			
 
				+#define FFX_BROADCAST_INT16X2(a) FFX_MIN16_I(a)
			
 
				+#define FFX_BROADCAST_INT16X3(a) FFX_MIN16_I(a)
			
 
				+#define FFX_BROADCAST_INT16X4(a) FFX_MIN16_I(a)
			
 
				+
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+#define FFX_BROADCAST_UINT16(a)   FFX_MIN16_U(a)
			
 
				+#define FFX_BROADCAST_UINT16X2(a) FFX_MIN16_U(a)
			
 
				+#define FFX_BROADCAST_UINT16X3(a) FFX_MIN16_U(a)
			
 
				+#define FFX_BROADCAST_UINT16X4(a) FFX_MIN16_U(a)
			
 
				+
			
 
				+//==============================================================================================================================
			
 
				+FFX_MIN16_U ffxAbsHalf(FFX_MIN16_U a)
			
 
				+{
			
 
				+	return FFX_MIN16_U(abs(FFX_MIN16_I(a)));
			
 
				+}
			
 
				+FFX_MIN16_U2 ffxAbsHalf(FFX_MIN16_U2 a)
			
 
				+{
			
 
				+	return FFX_MIN16_U2(abs(FFX_MIN16_I2(a)));
			
 
				+}
			
 
				+FFX_MIN16_U3 ffxAbsHalf(FFX_MIN16_U3 a)
			
 
				+{
			
 
				+	return FFX_MIN16_U3(abs(FFX_MIN16_I3(a)));
			
 
				+}
			
 
				+FFX_MIN16_U4 ffxAbsHalf(FFX_MIN16_U4 a)
			
 
				+{
			
 
				+	return FFX_MIN16_U4(abs(FFX_MIN16_I4(a)));
			
 
				+}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+FFX_MIN16_F ffxClampHalf(FFX_MIN16_F x, FFX_MIN16_F n, FFX_MIN16_F m)
			
 
				+{
			
 
				+	return max(n, min(x, m));
			
 
				+}
			
 
				+FFX_MIN16_F2 ffxClampHalf(FFX_MIN16_F2 x, FFX_MIN16_F2 n, FFX_MIN16_F2 m)
			
 
				+{
			
 
				+	return max(n, min(x, m));
			
 
				+}
			
 
				+FFX_MIN16_F3 ffxClampHalf(FFX_MIN16_F3 x, FFX_MIN16_F3 n, FFX_MIN16_F3 m)
			
 
				+{
			
 
				+	return max(n, min(x, m));
			
 
				+}
			
 
				+FFX_MIN16_F4 ffxClampHalf(FFX_MIN16_F4 x, FFX_MIN16_F4 n, FFX_MIN16_F4 m)
			
 
				+{
			
 
				+	return max(n, min(x, m));
			
 
				+}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+// V_FRACT_F16 (note DX frac() is different).
			
 
				+FFX_MIN16_F ffxFract(FFX_MIN16_F x)
			
 
				+{
			
 
				+	return x - floor(x);
			
 
				+}
			
 
				+FFX_MIN16_F2 ffxFract(FFX_MIN16_F2 x)
			
 
				+{
			
 
				+	return x - floor(x);
			
 
				+}
			
 
				+FFX_MIN16_F3 ffxFract(FFX_MIN16_F3 x)
			
 
				+{
			
 
				+	return x - floor(x);
			
 
				+}
			
 
				+FFX_MIN16_F4 ffxFract(FFX_MIN16_F4 x)
			
 
				+{
			
 
				+	return x - floor(x);
			
 
				+}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+FFX_MIN16_F ffxLerp(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F a)
			
 
				+{
			
 
				+	return lerp(x, y, a);
			
 
				+}
			
 
				+FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F a)
			
 
				+{
			
 
				+	return lerp(x, y, a);
			
 
				+}
			
 
				+FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 a)
			
 
				+{
			
 
				+	return lerp(x, y, a);
			
 
				+}
			
 
				+FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F a)
			
 
				+{
			
 
				+	return lerp(x, y, a);
			
 
				+}
			
 
				+FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 a)
			
 
				+{
			
 
				+	return lerp(x, y, a);
			
 
				+}
			
 
				+FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F a)
			
 
				+{
			
 
				+	return lerp(x, y, a);
			
 
				+}
			
 
				+FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 a)
			
 
				+{
			
 
				+	return lerp(x, y, a);
			
 
				+}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+FFX_MIN16_F ffxMax3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z)
			
 
				+{
			
 
				+	return max(x, max(y, z));
			
 
				+}
			
 
				+FFX_MIN16_F2 ffxMax3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z)
			
 
				+{
			
 
				+	return max(x, max(y, z));
			
 
				+}
			
 
				+FFX_MIN16_F3 ffxMax3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z)
			
 
				+{
			
 
				+	return max(x, max(y, z));
			
 
				+}
			
 
				+FFX_MIN16_F4 ffxMax3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z)
			
 
				+{
			
 
				+	return max(x, max(y, z));
			
 
				+}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+FFX_MIN16_F ffxMin3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z)
			
 
				+{
			
 
				+	return min(x, min(y, z));
			
 
				+}
			
 
				+FFX_MIN16_F2 ffxMin3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z)
			
 
				+{
			
 
				+	return min(x, min(y, z));
			
 
				+}
			
 
				+FFX_MIN16_F3 ffxMin3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z)
			
 
				+{
			
 
				+	return min(x, min(y, z));
			
 
				+}
			
 
				+FFX_MIN16_F4 ffxMin3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z)
			
 
				+{
			
 
				+	return min(x, min(y, z));
			
 
				+}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+FFX_MIN16_F ffxMed3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+FFX_MIN16_F2 ffxMed3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+FFX_MIN16_F3 ffxMed3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+FFX_MIN16_F4 ffxMed3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+FFX_MIN16_I ffxMed3Half(FFX_MIN16_I x, FFX_MIN16_I y, FFX_MIN16_I z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+FFX_MIN16_I2 ffxMed3Half(FFX_MIN16_I2 x, FFX_MIN16_I2 y, FFX_MIN16_I2 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+FFX_MIN16_I3 ffxMed3Half(FFX_MIN16_I3 x, FFX_MIN16_I3 y, FFX_MIN16_I3 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+FFX_MIN16_I4 ffxMed3Half(FFX_MIN16_I4 x, FFX_MIN16_I4 y, FFX_MIN16_I4 z)
			
 
				+{
			
 
				+    return max(min(x, y), min(max(x, y), z));
			
 
				+}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+FFX_MIN16_F ffxReciprocalHalf(FFX_MIN16_F x)
			
 
				+{
			
 
				+	return rcp(x);
			
 
				+}
			
 
				+FFX_MIN16_F2 ffxReciprocalHalf(FFX_MIN16_F2 x)
			
 
				+{
			
 
				+	return rcp(x);
			
 
				+}
			
 
				+FFX_MIN16_F3 ffxReciprocalHalf(FFX_MIN16_F3 x)
			
 
				+{
			
 
				+	return rcp(x);
			
 
				+}
			
 
				+FFX_MIN16_F4 ffxReciprocalHalf(FFX_MIN16_F4 x)
			
 
				+{
			
 
				+	return rcp(x);
			
 
				+}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+FFX_MIN16_F ffxReciprocalSquareRootHalf(FFX_MIN16_F x)
			
 
				+{
			
 
				+	return rsqrt(x);
			
 
				+}
			
 
				+FFX_MIN16_F2 ffxReciprocalSquareRootHalf(FFX_MIN16_F2 x)
			
 
				+{
			
 
				+	return rsqrt(x);
			
 
				+}
			
 
				+FFX_MIN16_F3 ffxReciprocalSquareRootHalf(FFX_MIN16_F3 x)
			
 
				+{
			
 
				+	return rsqrt(x);
			
 
				+}
			
 
				+FFX_MIN16_F4 ffxReciprocalSquareRootHalf(FFX_MIN16_F4 x)
			
 
				+{
			
 
				+	return rsqrt(x);
			
 
				+}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+FFX_MIN16_F ffxSaturate(FFX_MIN16_F x)
			
 
				+{
			
 
				+	return saturate(x);
			
 
				+}
			
 
				+FFX_MIN16_F2 ffxSaturate(FFX_MIN16_F2 x)
			
 
				+{
			
 
				+	return saturate(x);
			
 
				+}
			
 
				+FFX_MIN16_F3 ffxSaturate(FFX_MIN16_F3 x)
			
 
				+{
			
 
				+	return saturate(x);
			
 
				+}
			
 
				+FFX_MIN16_F4 ffxSaturate(FFX_MIN16_F4 x)
			
 
				+{
			
 
				+	return saturate(x);
			
 
				+}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+FFX_MIN16_U ffxBitShiftRightHalf(FFX_MIN16_U a, FFX_MIN16_U b)
			
 
				+{
			
 
				+	return FFX_MIN16_U(FFX_MIN16_I(a) >> FFX_MIN16_I(b));
			
 
				+}
			
 
				+FFX_MIN16_U2 ffxBitShiftRightHalf(FFX_MIN16_U2 a, FFX_MIN16_U2 b)
			
 
				+{
			
 
				+	return FFX_MIN16_U2(FFX_MIN16_I2(a) >> FFX_MIN16_I2(b));
			
 
				+}
			
 
				+FFX_MIN16_U3 ffxBitShiftRightHalf(FFX_MIN16_U3 a, FFX_MIN16_U3 b)
			
 
				+{
			
 
				+	return FFX_MIN16_U3(FFX_MIN16_I3(a) >> FFX_MIN16_I3(b));
			
 
				+}
			
 
				+FFX_MIN16_U4 ffxBitShiftRightHalf(FFX_MIN16_U4 a, FFX_MIN16_U4 b)
			
 
				+{
			
 
				+	return FFX_MIN16_U4(FFX_MIN16_I4(a) >> FFX_MIN16_I4(b));
			
 
				+}
			
 
				+#endif // FFX_HALF
			
 
				+
			
 
				+//==============================================================================================================================
			
 
				+//                                                         HLSL WAVE
			
 
				+//==============================================================================================================================
			
 
				+#if defined(FFX_WAVE)
			
 
				+// Where 'x' must be a compile time literal.
			
 
				+FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
			
 
				+}
			
 
				+FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
			
 
				+}
			
 
				+FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
			
 
				+}
			
 
				+FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
			
 
				+}
			
 
				+FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
			
 
				+}
			
 
				+FfxUInt32x2 AWaveXorU1(FfxUInt32x2 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
			
 
				+}
			
 
				+FfxUInt32x3 AWaveXorU1(FfxUInt32x3 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
			
 
				+}
			
 
				+FfxUInt32x4 AWaveXorU1(FfxUInt32x4 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
			
 
				+}
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return FFX_UINT32_TO_FLOAT16X2(WaveReadLaneAt(FFX_FLOAT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x));
			
 
				+}
			
 
				+FfxFloat16x4 ffxWaveXorFloat16x4(FfxFloat16x4 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return FFX_UINT32X2_TO_FLOAT16X4(WaveReadLaneAt(FFX_FLOAT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x));
			
 
				+}
			
 
				+FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return FFX_UINT32_TO_UINT16X2(WaveReadLaneAt(FFX_UINT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x));
			
 
				+}
			
 
				+FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x)
			
 
				+{
			
 
				+    return AW4_FFX_UINT32(WaveReadLaneAt(FFX_UINT32_AW4(v), WaveGetLaneIndex() ^ x));
			
 
				+}
			
 
				+#endif // FFX_HALF
			
 
				+#endif // #if defined(FFX_WAVE)
			
--- a/thirdparty/amd-fsr2/shaders/ffx_core_portability.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_core_portability.h
@@ -0,0 +1,50 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+FfxFloat32x3 opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
			
 
				+{
			
 
				+    d = a + ffxBroadcast3(b);
			
 
				+    return d;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x3 opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a)
			
 
				+{
			
 
				+    d = a;
			
 
				+    return d;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x3 opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b)
			
 
				+{
			
 
				+    d = a * b;
			
 
				+    return d;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x3 opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
			
 
				+{
			
 
				+    d = a * ffxBroadcast3(b);
			
 
				+    return d;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x3 opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a)
			
 
				+{
			
 
				+    d = rcp(a);
			
 
				+    return d;
			
 
				+}
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr1.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr1.h
@@ -0,0 +1,1250 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#ifdef __clang__
			
 
				+#pragma clang diagnostic ignored "-Wunused-variable"
			
 
				+#endif
			
 
				+
			
 
				+/// Setup required constant values for EASU (works on CPU or GPU).
			
 
				+///
			
 
				+/// @param [out] con0
			
 
				+/// @param [out] con1
			
 
				+/// @param [out] con2
			
 
				+/// @param [out] con3
			
 
				+/// @param [in] inputViewportInPixelsX                  The rendered image resolution being upscaled in X dimension.
			
 
				+/// @param [in] inputViewportInPixelsY                  The rendered image resolution being upscaled in Y dimension.
			
 
				+/// @param [in] inputSizeInPixelsX                      The resolution of the resource containing the input image (useful for dynamic resolution) in X dimension.
			
 
				+/// @param [in] inputSizeInPixelsY                      The resolution of the resource containing the input image (useful for dynamic resolution) in Y dimension.
			
 
				+/// @param [in] outputSizeInPixelsX                     The display resolution which the input image gets upscaled to in X dimension.
			
 
				+/// @param [in] outputSizeInPixelsY                     The display resolution which the input image gets upscaled to in Y dimension.
			
 
				+/// 
			
 
				+/// @ingroup FSR1
			
 
				+FFX_STATIC void ffxFsrPopulateEasuConstants(
			
 
				+    FFX_PARAMETER_INOUT FfxUInt32x4 con0,
			
 
				+    FFX_PARAMETER_INOUT FfxUInt32x4 con1,
			
 
				+    FFX_PARAMETER_INOUT FfxUInt32x4 con2,
			
 
				+    FFX_PARAMETER_INOUT FfxUInt32x4 con3,
			
 
				+    FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsX,
			
 
				+    FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsY,
			
 
				+    FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsX,
			
 
				+    FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsY,
			
 
				+    FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsX,
			
 
				+    FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsY)
			
 
				+{
			
 
				+    // Output integer position to a pixel position in viewport.
			
 
				+    con0[0] = ffxAsUInt32(inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX));
			
 
				+    con0[1] = ffxAsUInt32(inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY));
			
 
				+    con0[2] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX) - FfxFloat32(0.5));
			
 
				+    con0[3] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY) - FfxFloat32(0.5));
			
 
				+
			
 
				+    // Viewport pixel position to normalized image space.
			
 
				+    // This is used to get upper-left of 'F' tap.
			
 
				+    con1[0] = ffxAsUInt32(ffxReciprocal(inputSizeInPixelsX));
			
 
				+    con1[1] = ffxAsUInt32(ffxReciprocal(inputSizeInPixelsY));
			
 
				+
			
 
				+    // Centers of gather4, first offset from upper-left of 'F'.
			
 
				+    //      +---+---+
			
 
				+    //      |   |   |
			
 
				+    //      +--(0)--+
			
 
				+    //      | b | c |
			
 
				+    //  +---F---+---+---+
			
 
				+    //  | e | f | g | h |
			
 
				+    //  +--(1)--+--(2)--+
			
 
				+    //  | i | j | k | l |
			
 
				+    //  +---+---+---+---+
			
 
				+    //      | n | o |
			
 
				+    //      +--(3)--+
			
 
				+    //      |   |   |
			
 
				+    //      +---+---+
			
 
				+    con1[2] = ffxAsUInt32(FfxFloat32(1.0) * ffxReciprocal(inputSizeInPixelsX));
			
 
				+    con1[3] = ffxAsUInt32(FfxFloat32(-1.0) * ffxReciprocal(inputSizeInPixelsY));
			
 
				+
			
 
				+    // These are from (0) instead of 'F'.
			
 
				+    con2[0] = ffxAsUInt32(FfxFloat32(-1.0) * ffxReciprocal(inputSizeInPixelsX));
			
 
				+    con2[1] = ffxAsUInt32(FfxFloat32(2.0) * ffxReciprocal(inputSizeInPixelsY));
			
 
				+    con2[2] = ffxAsUInt32(FfxFloat32(1.0) * ffxReciprocal(inputSizeInPixelsX));
			
 
				+    con2[3] = ffxAsUInt32(FfxFloat32(2.0) * ffxReciprocal(inputSizeInPixelsY));
			
 
				+    con3[0] = ffxAsUInt32(FfxFloat32(0.0) * ffxReciprocal(inputSizeInPixelsX));
			
 
				+    con3[1] = ffxAsUInt32(FfxFloat32(4.0) * ffxReciprocal(inputSizeInPixelsY));
			
 
				+    con3[2] = con3[3] = 0;
			
 
				+}
			
 
				+
			
 
				+/// Setup required constant values for EASU (works on CPU or GPU).
			
 
				+///
			
 
				+/// @param [out] con0
			
 
				+/// @param [out] con1
			
 
				+/// @param [out] con2
			
 
				+/// @param [out] con3
			
 
				+/// @param [in] inputViewportInPixelsX              The resolution of the input in the X dimension.
			
 
				+/// @param [in] inputViewportInPixelsY              The resolution of the input in the Y dimension.
			
 
				+/// @param [in] inputSizeInPixelsX                  The input size in pixels in the X dimension.
			
 
				+/// @param [in] inputSizeInPixelsY                  The input size in pixels in the Y dimension.
			
 
				+/// @param [in] outputSizeInPixelsX                 The output size in pixels in the X dimension.
			
 
				+/// @param [in] outputSizeInPixelsY                 The output size in pixels in the Y dimension.
			
 
				+/// @param [in] inputOffsetInPixelsX                The input image offset in the X dimension into the resource containing it (useful for dynamic resolution).
			
 
				+/// @param [in] inputOffsetInPixelsY                The input image offset in the Y dimension into the resource containing it (useful for dynamic resolution).
			
 
				+///
			
 
				+/// @ingroup FSR1
			
 
				+FFX_STATIC void ffxFsrPopulateEasuConstantsOffset(
			
 
				+    FFX_PARAMETER_INOUT FfxUInt32x4 con0,
			
 
				+    FFX_PARAMETER_INOUT FfxUInt32x4 con1,
			
 
				+    FFX_PARAMETER_INOUT FfxUInt32x4 con2,
			
 
				+    FFX_PARAMETER_INOUT FfxUInt32x4 con3,
			
 
				+    FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsX,
			
 
				+    FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsY,
			
 
				+    FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsX,
			
 
				+    FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsY,
			
 
				+    FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsX,
			
 
				+    FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsY,
			
 
				+    FFX_PARAMETER_IN FfxFloat32 inputOffsetInPixelsX,
			
 
				+    FFX_PARAMETER_IN FfxFloat32 inputOffsetInPixelsY)
			
 
				+{
			
 
				+    ffxFsrPopulateEasuConstants(
			
 
				+        con0,
			
 
				+        con1,
			
 
				+        con2,
			
 
				+        con3,
			
 
				+        inputViewportInPixelsX,
			
 
				+        inputViewportInPixelsY,
			
 
				+        inputSizeInPixelsX,
			
 
				+        inputSizeInPixelsY,
			
 
				+        outputSizeInPixelsX,
			
 
				+        outputSizeInPixelsY);
			
 
				+
			
 
				+    // override 
			
 
				+    con0[2] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX) - FfxFloat32(0.5) + inputOffsetInPixelsX);
			
 
				+    con0[3] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY) - FfxFloat32(0.5) + inputOffsetInPixelsY);
			
 
				+}
			
 
				+
			
 
				+#if defined(FFX_GPU) && defined(FFX_FSR_EASU_FLOAT)
			
 
				+// Input callback prototypes, need to be implemented by calling shader
			
 
				+FfxFloat32x4 FsrEasuRF(FfxFloat32x2 p);
			
 
				+FfxFloat32x4 FsrEasuGF(FfxFloat32x2 p);
			
 
				+FfxFloat32x4 FsrEasuBF(FfxFloat32x2 p);
			
 
				+
			
 
				+// Filtering for a given tap for the scalar.
			
 
				+void fsrEasuTapFloat(
			
 
				+    FFX_PARAMETER_INOUT FfxFloat32x3 accumulatedColor,   // Accumulated color, with negative lobe.
			
 
				+    FFX_PARAMETER_INOUT FfxFloat32 accumulatedWeight,    // Accumulated weight.
			
 
				+    FFX_PARAMETER_IN FfxFloat32x2 pixelOffset,           // Pixel offset from resolve position to tap.
			
 
				+    FFX_PARAMETER_IN FfxFloat32x2 gradientDirection,     // Gradient direction.
			
 
				+    FFX_PARAMETER_IN FfxFloat32x2 length,                // Length.
			
 
				+    FFX_PARAMETER_IN FfxFloat32 negativeLobeStrength,    // Negative lobe strength.
			
 
				+    FFX_PARAMETER_IN FfxFloat32 clippingPoint,           // Clipping point.
			
 
				+    FFX_PARAMETER_IN FfxFloat32x3 color)                 // Tap color.
			
 
				+{
			
 
				+    // Rotate offset by direction.
			
 
				+    FfxFloat32x2 rotatedOffset;
			
 
				+    rotatedOffset.x = (pixelOffset.x * (gradientDirection.x)) + (pixelOffset.y * gradientDirection.y);
			
 
				+    rotatedOffset.y = (pixelOffset.x * (-gradientDirection.y)) + (pixelOffset.y * gradientDirection.x);
			
 
				+
			
 
				+    // Anisotropy.
			
 
				+    rotatedOffset *= length;
			
 
				+
			
 
				+    // Compute distance^2.
			
 
				+    FfxFloat32 distanceSquared = rotatedOffset.x * rotatedOffset.x + rotatedOffset.y * rotatedOffset.y;
			
 
				+
			
 
				+    // Limit to the window as at corner, 2 taps can easily be outside.
			
 
				+    distanceSquared = ffxMin(distanceSquared, clippingPoint);
			
 
				+
			
 
				+    // Approximation of lancos2 without sin() or rcp(), or sqrt() to get x.
			
 
				+    //  (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2
			
 
				+    //  |_______________________________________|   |_______________|
			
 
				+    //                   base                             window
			
 
				+    // The general form of the 'base' is,
			
 
				+    //  (a*(b*x^2-1)^2-(a-1))
			
 
				+    // Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe.
			
 
				+    FfxFloat32 weightB = FfxFloat32(2.0 / 5.0) * distanceSquared + FfxFloat32(-1.0);
			
 
				+    FfxFloat32 weightA = negativeLobeStrength * distanceSquared + FfxFloat32(-1.0);
			
 
				+    weightB *= weightB;
			
 
				+    weightA *= weightA;
			
 
				+    weightB = FfxFloat32(25.0 / 16.0) * weightB + FfxFloat32(-(25.0 / 16.0 - 1.0));
			
 
				+    FfxFloat32 weight = weightB * weightA;
			
 
				+
			
 
				+    // Do weighted average.
			
 
				+    accumulatedColor += color * weight;
			
 
				+    accumulatedWeight += weight;
			
 
				+}
			
 
				+
			
 
				+// Accumulate direction and length.
			
 
				+void fsrEasuSetFloat(
			
 
				+    FFX_PARAMETER_INOUT FfxFloat32x2 direction,
			
 
				+    FFX_PARAMETER_INOUT FfxFloat32 length,
			
 
				+    FFX_PARAMETER_IN FfxFloat32x2 pp,
			
 
				+    FFX_PARAMETER_IN FfxBoolean biS,
			
 
				+    FFX_PARAMETER_IN FfxBoolean biT,
			
 
				+    FFX_PARAMETER_IN FfxBoolean biU,
			
 
				+    FFX_PARAMETER_IN FfxBoolean biV,
			
 
				+    FFX_PARAMETER_IN FfxFloat32 lA,
			
 
				+    FFX_PARAMETER_IN FfxFloat32 lB,
			
 
				+    FFX_PARAMETER_IN FfxFloat32 lC,
			
 
				+    FFX_PARAMETER_IN FfxFloat32 lD,
			
 
				+    FFX_PARAMETER_IN FfxFloat32 lE)
			
 
				+{
			
 
				+    // Compute bilinear weight, branches factor out as predicates are compiler time immediates.
			
 
				+    //  s t
			
 
				+    //  u v
			
 
				+    FfxFloat32 weight = FfxFloat32(0.0);
			
 
				+    if (biS)
			
 
				+        weight = (FfxFloat32(1.0) - pp.x) * (FfxFloat32(1.0) - pp.y);
			
 
				+    if (biT)
			
 
				+        weight = pp.x * (FfxFloat32(1.0) - pp.y);
			
 
				+    if (biU)
			
 
				+        weight = (FfxFloat32(1.0) - pp.x) * pp.y;
			
 
				+    if (biV)
			
 
				+        weight = pp.x * pp.y;
			
 
				+
			
 
				+    // Direction is the '+' diff.
			
 
				+    //    a
			
 
				+    //  b c d
			
 
				+    //    e
			
 
				+    // Then takes magnitude from abs average of both sides of 'c'.
			
 
				+    // Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms.
			
 
				+    FfxFloat32 dc = lD - lC;
			
 
				+    FfxFloat32 cb = lC - lB;
			
 
				+    FfxFloat32 lengthX = max(abs(dc), abs(cb));
			
 
				+    lengthX = ffxApproximateReciprocal(lengthX);
			
 
				+    FfxFloat32 directionX = lD - lB;
			
 
				+    direction.x += directionX * weight;
			
 
				+    lengthX = ffxSaturate(abs(directionX) * lengthX);
			
 
				+    lengthX *= lengthX;
			
 
				+    length += lengthX * weight;
			
 
				+
			
 
				+    // Repeat for the y axis.
			
 
				+    FfxFloat32 ec = lE - lC;
			
 
				+    FfxFloat32 ca = lC - lA;
			
 
				+    FfxFloat32 lengthY = max(abs(ec), abs(ca));
			
 
				+    lengthY = ffxApproximateReciprocal(lengthY);
			
 
				+    FfxFloat32 directionY = lE - lA;
			
 
				+    direction.y += directionY * weight;
			
 
				+    lengthY = ffxSaturate(abs(directionY) * lengthY);
			
 
				+    lengthY *= lengthY;
			
 
				+    length += lengthY * weight;
			
 
				+}
			
 
				+
			
 
				+/// Apply edge-aware spatial upsampling using 32bit floating point precision calculations.
			
 
				+///
			
 
				+/// @param [out] outPixel               The computed color of a pixel.
			
 
				+/// @param [in]  integerPosition        Integer pixel position within the output.
			
 
				+/// @param [in]  con0                   The first constant value generated by <c><i>ffxFsrPopulateEasuConstants</i></c>.
			
 
				+/// @param [in]  con1                   The second constant value generated by <c><i>ffxFsrPopulateEasuConstants</i></c>.
			
 
				+/// @param [in]  con2                   The third constant value generated by <c><i>ffxFsrPopulateEasuConstants</i></c>.
			
 
				+/// @param [in]  con3                   The fourth constant value generated by <c><i>ffxFsrPopulateEasuConstants</i></c>.
			
 
				+/// 
			
 
				+/// @ingroup FSR
			
 
				+void ffxFsrEasuFloat(
			
 
				+    FFX_PARAMETER_OUT FfxFloat32x3 pix,
			
 
				+    FFX_PARAMETER_IN FfxUInt32x2 ip,
			
 
				+    FFX_PARAMETER_IN FfxUInt32x4 con0,
			
 
				+    FFX_PARAMETER_IN FfxUInt32x4 con1,
			
 
				+    FFX_PARAMETER_IN FfxUInt32x4 con2,
			
 
				+    FFX_PARAMETER_IN FfxUInt32x4 con3)
			
 
				+{
			
 
				+    // Get position of 'f'.
			
 
				+    FfxFloat32x2 pp = FfxFloat32x2(ip) * ffxAsFloat(con0.xy) + ffxAsFloat(con0.zw);
			
 
				+    FfxFloat32x2 fp = floor(pp);
			
 
				+    pp -= fp;
			
 
				+
			
 
				+    // 12-tap kernel.
			
 
				+    //    b c
			
 
				+    //  e f g h
			
 
				+    //  i j k l
			
 
				+    //    n o
			
 
				+    // Gather 4 ordering.
			
 
				+    //  a b
			
 
				+    //  r g
			
 
				+    // For packed FP16, need either {rg} or {ab} so using the following setup for gather in all versions,
			
 
				+    //    a b    <- unused (z)
			
 
				+    //    r g
			
 
				+    //  a b a b
			
 
				+    //  r g r g
			
 
				+    //    a b
			
 
				+    //    r g    <- unused (z)
			
 
				+    // Allowing dead-code removal to remove the 'z's.
			
 
				+    FfxFloat32x2 p0 = fp * ffxAsFloat(con1.xy) + ffxAsFloat(con1.zw);
			
 
				+
			
 
				+    // These are from p0 to avoid pulling two constants on pre-Navi hardware.
			
 
				+    FfxFloat32x2 p1    = p0 + ffxAsFloat(con2.xy);
			
 
				+    FfxFloat32x2 p2    = p0 + ffxAsFloat(con2.zw);
			
 
				+    FfxFloat32x2 p3    = p0 + ffxAsFloat(con3.xy);
			
 
				+    FfxFloat32x4 bczzR = FsrEasuRF(p0);
			
 
				+    FfxFloat32x4 bczzG = FsrEasuGF(p0);
			
 
				+    FfxFloat32x4 bczzB = FsrEasuBF(p0);
			
 
				+    FfxFloat32x4 ijfeR = FsrEasuRF(p1);
			
 
				+    FfxFloat32x4 ijfeG = FsrEasuGF(p1);
			
 
				+    FfxFloat32x4 ijfeB = FsrEasuBF(p1);
			
 
				+    FfxFloat32x4 klhgR = FsrEasuRF(p2);
			
 
				+    FfxFloat32x4 klhgG = FsrEasuGF(p2);
			
 
				+    FfxFloat32x4 klhgB = FsrEasuBF(p2);
			
 
				+    FfxFloat32x4 zzonR = FsrEasuRF(p3);
			
 
				+    FfxFloat32x4 zzonG = FsrEasuGF(p3);
			
 
				+    FfxFloat32x4 zzonB = FsrEasuBF(p3);
			
 
				+
			
 
				+    // Simplest multi-channel approximate luma possible (luma times 2, in 2 FMA/MAD).
			
 
				+    FfxFloat32x4 bczzL = bczzB * ffxBroadcast4(0.5) + (bczzR * ffxBroadcast4(0.5) + bczzG);
			
 
				+    FfxFloat32x4 ijfeL = ijfeB * ffxBroadcast4(0.5) + (ijfeR * ffxBroadcast4(0.5) + ijfeG);
			
 
				+    FfxFloat32x4 klhgL = klhgB * ffxBroadcast4(0.5) + (klhgR * ffxBroadcast4(0.5) + klhgG);
			
 
				+    FfxFloat32x4 zzonL = zzonB * ffxBroadcast4(0.5) + (zzonR * ffxBroadcast4(0.5) + zzonG);
			
 
				+
			
 
				+    // Rename.
			
 
				+    FfxFloat32 bL = bczzL.x;
			
 
				+    FfxFloat32 cL = bczzL.y;
			
 
				+    FfxFloat32 iL = ijfeL.x;
			
 
				+    FfxFloat32 jL = ijfeL.y;
			
 
				+    FfxFloat32 fL = ijfeL.z;
			
 
				+    FfxFloat32 eL = ijfeL.w;
			
 
				+    FfxFloat32 kL = klhgL.x;
			
 
				+    FfxFloat32 lL = klhgL.y;
			
 
				+    FfxFloat32 hL = klhgL.z;
			
 
				+    FfxFloat32 gL = klhgL.w;
			
 
				+    FfxFloat32 oL = zzonL.z;
			
 
				+    FfxFloat32 nL = zzonL.w;
			
 
				+
			
 
				+    // Accumulate for bilinear interpolation.
			
 
				+    FfxFloat32x2 dir = ffxBroadcast2(0.0);
			
 
				+    FfxFloat32  len = FfxFloat32(0.0);
			
 
				+    fsrEasuSetFloat(dir, len, pp, FFX_TRUE,  FFX_FALSE, FFX_FALSE, FFX_FALSE, bL, eL, fL, gL, jL);
			
 
				+    fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_TRUE,  FFX_FALSE, FFX_FALSE, cL, fL, gL, hL, kL);
			
 
				+    fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_FALSE, FFX_TRUE,  FFX_FALSE, fL, iL, jL, kL, nL);
			
 
				+    fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_FALSE, FFX_FALSE, FFX_TRUE,  gL, jL, kL, lL, oL);
			
 
				+
			
 
				+    // Normalize with approximation, and cleanup close to zero.
			
 
				+    FfxFloat32x2 dir2 = dir * dir;
			
 
				+    FfxFloat32 dirR = dir2.x + dir2.y;
			
 
				+    FfxUInt32 zro  = dirR < FfxFloat32(1.0 / 32768.0);
			
 
				+    dirR = ffxApproximateReciprocalSquareRoot(dirR);
			
 
				+    dirR = zro ? FfxFloat32(1.0) : dirR;
			
 
				+    dir.x = zro ? FfxFloat32(1.0) : dir.x;
			
 
				+    dir *= ffxBroadcast2(dirR);
			
 
				+
			
 
				+    // Transform from {0 to 2} to {0 to 1} range, and shape with square.
			
 
				+    len = len * FfxFloat32(0.5);
			
 
				+    len *= len;
			
 
				+
			
 
				+    // Stretch kernel {1.0 vert|horz, to sqrt(2.0) on diagonal}.
			
 
				+    FfxFloat32 stretch = (dir.x * dir.x + dir.y * dir.y) * ffxApproximateReciprocal(max(abs(dir.x), abs(dir.y)));
			
 
				+
			
 
				+    // Anisotropic length after rotation,
			
 
				+    //  x := 1.0 lerp to 'stretch' on edges
			
 
				+    //  y := 1.0 lerp to 2x on edges
			
 
				+    FfxFloat32x2 len2 = FfxFloat32x2(FfxFloat32(1.0) + (stretch - FfxFloat32(1.0)) * len, FfxFloat32(1.0) + FfxFloat32(-0.5) * len);
			
 
				+
			
 
				+    // Based on the amount of 'edge',
			
 
				+    // the window shifts from +/-{sqrt(2.0) to slightly beyond 2.0}.
			
 
				+    FfxFloat32 lob = FfxFloat32(0.5) + FfxFloat32((1.0 / 4.0 - 0.04) - 0.5) * len;
			
 
				+
			
 
				+    // Set distance^2 clipping point to the end of the adjustable window.
			
 
				+    FfxFloat32 clp = ffxApproximateReciprocal(lob);
			
 
				+
			
 
				+    // Accumulation mixed with min/max of 4 nearest.
			
 
				+    //    b c
			
 
				+    //  e f g h
			
 
				+    //  i j k l
			
 
				+    //    n o
			
 
				+    FfxFloat32x3 min4 =
			
 
				+        ffxMin(ffxMin3(FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z), FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w), FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)),
			
 
				+               FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x));
			
 
				+    FfxFloat32x3 max4 =
			
 
				+        max(ffxMax3(FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z), FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w), FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)), FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x));
			
 
				+
			
 
				+    // Accumulation.
			
 
				+    FfxFloat32x3 aC = ffxBroadcast3(0.0);
			
 
				+    FfxFloat32  aW = FfxFloat32(0.0);
			
 
				+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, -1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(bczzR.x, bczzG.x, bczzB.x));  // b
			
 
				+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, -1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(bczzR.y, bczzG.y, bczzB.y));  // c
			
 
				+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(-1.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.x, ijfeG.x, ijfeB.x));  // i
			
 
				+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y));   // j
			
 
				+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z));   // f
			
 
				+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(-1.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.w, ijfeG.w, ijfeB.w));  // e
			
 
				+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x));   // k
			
 
				+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(2.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.y, klhgG.y, klhgB.y));   // l
			
 
				+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(2.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.z, klhgG.z, klhgB.z));   // h
			
 
				+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w));   // g
			
 
				+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.z, zzonG.z, zzonB.z));   // o
			
 
				+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.w, zzonG.w, zzonB.w));   // n
			
 
				+
			
 
				+    // Normalize and dering.
			
 
				+    pix = ffxMin(max4, max(min4, aC * ffxBroadcast3(rcp(aW))));
			
 
				+}
			
 
				+#endif // #if defined(FFX_GPU) && defined(FFX_FSR_EASU_FLOAT)
			
 
				+
			
 
				+#if defined(FFX_GPU) && FFX_HALF == 1 && defined(FFX_FSR_EASU_HALF)
			
 
				+// Input callback prototypes, need to be implemented by calling shader
			
 
				+FfxFloat16x4 FsrEasuRH(FfxFloat32x2 p);
			
 
				+FfxFloat16x4 FsrEasuGH(FfxFloat32x2 p);
			
 
				+FfxFloat16x4 FsrEasuBH(FfxFloat32x2 p);
			
 
				+
			
 
				+// This runs 2 taps in parallel.
			
 
				+void FsrEasuTapH(
			
 
				+    FFX_PARAMETER_INOUT FfxFloat16x2 aCR,
			
 
				+    FFX_PARAMETER_INOUT FfxFloat16x2 aCG,
			
 
				+    FFX_PARAMETER_INOUT FfxFloat16x2 aCB,
			
 
				+    FFX_PARAMETER_INOUT FfxFloat16x2 aW,
			
 
				+    FFX_PARAMETER_IN FfxFloat16x2 offX,
			
 
				+    FFX_PARAMETER_IN FfxFloat16x2 offY,
			
 
				+    FFX_PARAMETER_IN FfxFloat16x2 dir,
			
 
				+    FFX_PARAMETER_IN FfxFloat16x2 len,
			
 
				+    FFX_PARAMETER_IN FfxFloat16 lob,
			
 
				+    FFX_PARAMETER_IN FfxFloat16 clp,
			
 
				+    FFX_PARAMETER_IN FfxFloat16x2 cR,
			
 
				+    FFX_PARAMETER_IN FfxFloat16x2 cG,
			
 
				+    FFX_PARAMETER_IN FfxFloat16x2 cB)
			
 
				+{
			
 
				+    FfxFloat16x2 vX, vY;
			
 
				+    vX = offX * dir.xx + offY * dir.yy;
			
 
				+    vY = offX * (-dir.yy) + offY * dir.xx;
			
 
				+    vX *= len.x;
			
 
				+    vY *= len.y;
			
 
				+    FfxFloat16x2 d2 = vX * vX + vY * vY;
			
 
				+    d2              = min(d2, FFX_BROADCAST_FLOAT16X2(clp));
			
 
				+    FfxFloat16x2 wB = FFX_BROADCAST_FLOAT16X2(2.0 / 5.0) * d2 + FFX_BROADCAST_FLOAT16X2(-1.0);
			
 
				+    FfxFloat16x2 wA = FFX_BROADCAST_FLOAT16X2(lob) * d2 + FFX_BROADCAST_FLOAT16X2(-1.0);
			
 
				+    wB *= wB;
			
 
				+    wA *= wA;
			
 
				+    wB             = FFX_BROADCAST_FLOAT16X2(25.0 / 16.0) * wB + FFX_BROADCAST_FLOAT16X2(-(25.0 / 16.0 - 1.0));
			
 
				+    FfxFloat16x2 w = wB * wA;
			
 
				+    aCR += cR * w;
			
 
				+    aCG += cG * w;
			
 
				+    aCB += cB * w;
			
 
				+    aW += w;
			
 
				+}
			
 
				+
			
 
				+// This runs 2 taps in parallel.
			
 
				+void FsrEasuSetH(
			
 
				+    FFX_PARAMETER_INOUT FfxFloat16x2 dirPX,
			
 
				+    FFX_PARAMETER_INOUT FfxFloat16x2  dirPY,
			
 
				+    FFX_PARAMETER_INOUT FfxFloat16x2 lenP,
			
 
				+    FFX_PARAMETER_IN FfxFloat16x2 pp,
			
 
				+    FFX_PARAMETER_IN FfxBoolean biST,
			
 
				+    FFX_PARAMETER_IN FfxBoolean biUV,
			
 
				+    FFX_PARAMETER_IN FfxFloat16x2 lA,
			
 
				+    FFX_PARAMETER_IN FfxFloat16x2 lB,
			
 
				+    FFX_PARAMETER_IN FfxFloat16x2 lC,
			
 
				+    FFX_PARAMETER_IN FfxFloat16x2 lD,
			
 
				+    FFX_PARAMETER_IN FfxFloat16x2 lE)
			
 
				+{
			
 
				+    FfxFloat16x2 w = FFX_BROADCAST_FLOAT16X2(0.0);
			
 
				+    
			
 
				+    if (biST)
			
 
				+        w = (FfxFloat16x2(1.0, 0.0) + FfxFloat16x2(-pp.x, pp.x)) * FFX_BROADCAST_FLOAT16X2(FFX_BROADCAST_FLOAT16(1.0) - pp.y);
			
 
				+
			
 
				+    if (biUV)
			
 
				+        w = (FfxFloat16x2(1.0, 0.0) + FfxFloat16x2(-pp.x, pp.x)) * FFX_BROADCAST_FLOAT16X2(pp.y);
			
 
				+
			
 
				+    // ABS is not free in the packed FP16 path.
			
 
				+    FfxFloat16x2 dc   = lD - lC;
			
 
				+    FfxFloat16x2 cb   = lC - lB;
			
 
				+    FfxFloat16x2 lenX = max(abs(dc), abs(cb));
			
 
				+    lenX              = ffxReciprocalHalf(lenX);
			
 
				+
			
 
				+    FfxFloat16x2 dirX = lD - lB;
			
 
				+    dirPX += dirX * w;
			
 
				+    lenX = ffxSaturate(abs(dirX) * lenX);
			
 
				+    lenX *= lenX;
			
 
				+    lenP += lenX * w;
			
 
				+    FfxFloat16x2 ec   = lE - lC;
			
 
				+    FfxFloat16x2 ca   = lC - lA;
			
 
				+    FfxFloat16x2 lenY = max(abs(ec), abs(ca));
			
 
				+    lenY              = ffxReciprocalHalf(lenY);
			
 
				+    FfxFloat16x2 dirY = lE - lA;
			
 
				+    dirPY += dirY * w;
			
 
				+    lenY = ffxSaturate(abs(dirY) * lenY);
			
 
				+    lenY *= lenY;
			
 
				+    lenP += lenY * w;
			
 
				+}
			
 
				+
			
 
				+void FsrEasuH(
			
 
				+    FFX_PARAMETER_OUT FfxFloat16x3 pix, 
			
 
				+    FFX_PARAMETER_IN FfxUInt32x2 ip,
			
 
				+    FFX_PARAMETER_IN FfxUInt32x4 con0,
			
 
				+    FFX_PARAMETER_IN FfxUInt32x4 con1,
			
 
				+    FFX_PARAMETER_IN FfxUInt32x4 con2,
			
 
				+    FFX_PARAMETER_IN FfxUInt32x4 con3)
			
 
				+{
			
 
				+    FfxFloat32x2 pp = FfxFloat32x2(ip) * ffxAsFloat(con0.xy) + ffxAsFloat(con0.zw);
			
 
				+    FfxFloat32x2 fp = floor(pp);
			
 
				+    pp -= fp;
			
 
				+    FfxFloat16x2 ppp = FfxFloat16x2(pp);
			
 
				+
			
 
				+    FfxFloat32x2 p0    = fp * ffxAsFloat(con1.xy) + ffxAsFloat(con1.zw);
			
 
				+    FfxFloat32x2 p1    = p0 + ffxAsFloat(con2.xy);
			
 
				+    FfxFloat32x2 p2    = p0 + ffxAsFloat(con2.zw);
			
 
				+    FfxFloat32x2 p3    = p0 + ffxAsFloat(con3.xy);
			
 
				+    FfxFloat16x4 bczzR = FsrEasuRH(p0);
			
 
				+    FfxFloat16x4 bczzG = FsrEasuGH(p0);
			
 
				+    FfxFloat16x4 bczzB = FsrEasuBH(p0);
			
 
				+    FfxFloat16x4 ijfeR = FsrEasuRH(p1);
			
 
				+    FfxFloat16x4 ijfeG = FsrEasuGH(p1);
			
 
				+    FfxFloat16x4 ijfeB = FsrEasuBH(p1);
			
 
				+    FfxFloat16x4 klhgR = FsrEasuRH(p2);
			
 
				+    FfxFloat16x4 klhgG = FsrEasuGH(p2);
			
 
				+    FfxFloat16x4 klhgB = FsrEasuBH(p2);
			
 
				+    FfxFloat16x4 zzonR = FsrEasuRH(p3);
			
 
				+    FfxFloat16x4 zzonG = FsrEasuGH(p3);
			
 
				+    FfxFloat16x4 zzonB = FsrEasuBH(p3);
			
 
				+
			
 
				+    FfxFloat16x4 bczzL = bczzB * FFX_BROADCAST_FLOAT16X4(0.5) + (bczzR * FFX_BROADCAST_FLOAT16X4(0.5) + bczzG);
			
 
				+    FfxFloat16x4 ijfeL = ijfeB * FFX_BROADCAST_FLOAT16X4(0.5) + (ijfeR * FFX_BROADCAST_FLOAT16X4(0.5) + ijfeG);
			
 
				+    FfxFloat16x4 klhgL = klhgB * FFX_BROADCAST_FLOAT16X4(0.5) + (klhgR * FFX_BROADCAST_FLOAT16X4(0.5) + klhgG);
			
 
				+    FfxFloat16x4 zzonL = zzonB * FFX_BROADCAST_FLOAT16X4(0.5) + (zzonR * FFX_BROADCAST_FLOAT16X4(0.5) + zzonG);
			
 
				+    FfxFloat16   bL    = bczzL.x;
			
 
				+    FfxFloat16   cL    = bczzL.y;
			
 
				+    FfxFloat16   iL    = ijfeL.x;
			
 
				+    FfxFloat16   jL    = ijfeL.y;
			
 
				+    FfxFloat16   fL    = ijfeL.z;
			
 
				+    FfxFloat16   eL    = ijfeL.w;
			
 
				+    FfxFloat16   kL    = klhgL.x;
			
 
				+    FfxFloat16   lL    = klhgL.y;
			
 
				+    FfxFloat16   hL    = klhgL.z;
			
 
				+    FfxFloat16   gL    = klhgL.w;
			
 
				+    FfxFloat16   oL    = zzonL.z;
			
 
				+    FfxFloat16   nL    = zzonL.w;
			
 
				+
			
 
				+    // This part is different, accumulating 2 taps in parallel.
			
 
				+    FfxFloat16x2 dirPX = FFX_BROADCAST_FLOAT16X2(0.0);
			
 
				+    FfxFloat16x2 dirPY = FFX_BROADCAST_FLOAT16X2(0.0);
			
 
				+    FfxFloat16x2 lenP  = FFX_BROADCAST_FLOAT16X2(0.0);
			
 
				+    FsrEasuSetH(dirPX,
			
 
				+                dirPY,
			
 
				+                lenP,
			
 
				+                ppp,
			
 
				+                FfxUInt32(true),
			
 
				+                FfxUInt32(false),
			
 
				+                FfxFloat16x2(bL, cL),
			
 
				+                FfxFloat16x2(eL, fL),
			
 
				+                FfxFloat16x2(fL, gL),
			
 
				+                FfxFloat16x2(gL, hL),
			
 
				+                FfxFloat16x2(jL, kL));
			
 
				+    FsrEasuSetH(dirPX,
			
 
				+                dirPY,
			
 
				+                lenP,
			
 
				+                ppp,
			
 
				+                FfxUInt32(false),
			
 
				+                FfxUInt32(true),
			
 
				+                FfxFloat16x2(fL, gL),
			
 
				+                FfxFloat16x2(iL, jL),
			
 
				+                FfxFloat16x2(jL, kL),
			
 
				+                FfxFloat16x2(kL, lL),
			
 
				+                FfxFloat16x2(nL, oL));
			
 
				+    FfxFloat16x2 dir = FfxFloat16x2(dirPX.r + dirPX.g, dirPY.r + dirPY.g);
			
 
				+    FfxFloat16   len = lenP.r + lenP.g;
			
 
				+
			
 
				+    FfxFloat16x2 dir2 = dir * dir;
			
 
				+    FfxFloat16   dirR = dir2.x + dir2.y;
			
 
				+    FfxBoolean   zro  = FfxBoolean(dirR < FFX_BROADCAST_FLOAT16(1.0 / 32768.0));
			
 
				+    dirR              = ffxApproximateReciprocalSquareRootHalf(dirR);
			
 
				+    dirR              = (zro > 0) ? FFX_BROADCAST_FLOAT16(1.0) : dirR;
			
 
				+    dir.x             = (zro > 0) ? FFX_BROADCAST_FLOAT16(1.0) : dir.x;
			
 
				+    dir *= FFX_BROADCAST_FLOAT16X2(dirR);
			
 
				+    len = len * FFX_BROADCAST_FLOAT16(0.5);
			
 
				+    len *= len;
			
 
				+    FfxFloat16   stretch = (dir.x * dir.x + dir.y * dir.y) * ffxApproximateReciprocalHalf(max(abs(dir.x), abs(dir.y)));
			
 
				+    FfxFloat16x2 len2 =
			
 
				+        FfxFloat16x2(FFX_BROADCAST_FLOAT16(1.0) + (stretch - FFX_BROADCAST_FLOAT16(1.0)) * len, FFX_BROADCAST_FLOAT16(1.0) + FFX_BROADCAST_FLOAT16(-0.5) * len);
			
 
				+    FfxFloat16 lob = FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16((1.0 / 4.0 - 0.04) - 0.5) * len;
			
 
				+    FfxFloat16 clp = ffxApproximateReciprocalHalf(lob);
			
 
				+
			
 
				+    // FP16 is different, using packed trick to do min and max in same operation.
			
 
				+    FfxFloat16x2 bothR =
			
 
				+        max(max(FfxFloat16x2(-ijfeR.z, ijfeR.z), FfxFloat16x2(-klhgR.w, klhgR.w)), max(FfxFloat16x2(-ijfeR.y, ijfeR.y), FfxFloat16x2(-klhgR.x, klhgR.x)));
			
 
				+    FfxFloat16x2 bothG =
			
 
				+        max(max(FfxFloat16x2(-ijfeG.z, ijfeG.z), FfxFloat16x2(-klhgG.w, klhgG.w)), max(FfxFloat16x2(-ijfeG.y, ijfeG.y), FfxFloat16x2(-klhgG.x, klhgG.x)));
			
 
				+    FfxFloat16x2 bothB =
			
 
				+        max(max(FfxFloat16x2(-ijfeB.z, ijfeB.z), FfxFloat16x2(-klhgB.w, klhgB.w)), max(FfxFloat16x2(-ijfeB.y, ijfeB.y), FfxFloat16x2(-klhgB.x, klhgB.x)));
			
 
				+
			
 
				+    // This part is different for FP16, working pairs of taps at a time.
			
 
				+    FfxFloat16x2 pR = FFX_BROADCAST_FLOAT16X2(0.0);
			
 
				+    FfxFloat16x2 pG = FFX_BROADCAST_FLOAT16X2(0.0);
			
 
				+    FfxFloat16x2 pB = FFX_BROADCAST_FLOAT16X2(0.0);
			
 
				+    FfxFloat16x2 pW = FFX_BROADCAST_FLOAT16X2(0.0);
			
 
				+    FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(0.0, 1.0) - ppp.xx, FfxFloat16x2(-1.0, -1.0) - ppp.yy, dir, len2, lob, clp, bczzR.xy, bczzG.xy, bczzB.xy);
			
 
				+    FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(-1.0, 0.0) - ppp.xx, FfxFloat16x2(1.0, 1.0) - ppp.yy, dir, len2, lob, clp, ijfeR.xy, ijfeG.xy, ijfeB.xy);
			
 
				+    FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(0.0, -1.0) - ppp.xx, FfxFloat16x2(0.0, 0.0) - ppp.yy, dir, len2, lob, clp, ijfeR.zw, ijfeG.zw, ijfeB.zw);
			
 
				+    FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(1.0, 2.0) - ppp.xx, FfxFloat16x2(1.0, 1.0) - ppp.yy, dir, len2, lob, clp, klhgR.xy, klhgG.xy, klhgB.xy);
			
 
				+    FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(2.0, 1.0) - ppp.xx, FfxFloat16x2(0.0, 0.0) - ppp.yy, dir, len2, lob, clp, klhgR.zw, klhgG.zw, klhgB.zw);
			
 
				+    FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(1.0, 0.0) - ppp.xx, FfxFloat16x2(2.0, 2.0) - ppp.yy, dir, len2, lob, clp, zzonR.zw, zzonG.zw, zzonB.zw);
			
 
				+    FfxFloat16x3 aC = FfxFloat16x3(pR.x + pR.y, pG.x + pG.y, pB.x + pB.y);
			
 
				+    FfxFloat16   aW = pW.x + pW.y;
			
 
				+
			
 
				+    // Slightly different for FP16 version due to combined min and max.
			
 
				+    pix = min(FfxFloat16x3(bothR.y, bothG.y, bothB.y), max(-FfxFloat16x3(bothR.x, bothG.x, bothB.x), aC * FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(aW))));
			
 
				+}
			
 
				+#endif // #if defined(FFX_GPU) && defined(FFX_HALF) && defined(FFX_FSR_EASU_HALF)
			
 
				+
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+//_____________________________________________________________/\_______________________________________________________________
			
 
				+//==============================================================================================================================
			
 
				+//
			
 
				+//                                      FSR - [RCAS] ROBUST CONTRAST ADAPTIVE SHARPENING
			
 
				+//
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+// CAS uses a simplified mechanism to convert local contrast into a variable amount of sharpness.
			
 
				+// RCAS uses a more exact mechanism, solving for the maximum local sharpness possible before clipping.
			
 
				+// RCAS also has a built in process to limit sharpening of what it detects as possible noise.
			
 
				+// RCAS sharper does not support scaling, as it should be applied after EASU scaling.
			
 
				+// Pass EASU output straight into RCAS, no color conversions necessary.
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+// RCAS is based on the following logic.
			
 
				+// RCAS uses a 5 tap filter in a cross pattern (same as CAS),
			
 
				+//    w                n
			
 
				+//  w 1 w  for taps  w m e 
			
 
				+//    w                s
			
 
				+// Where 'w' is the negative lobe weight.
			
 
				+//  output = (w*(n+e+w+s)+m)/(4*w+1)
			
 
				+// RCAS solves for 'w' by seeing where the signal might clip out of the {0 to 1} input range,
			
 
				+//  0 == (w*(n+e+w+s)+m)/(4*w+1) -> w = -m/(n+e+w+s)
			
 
				+//  1 == (w*(n+e+w+s)+m)/(4*w+1) -> w = (1-m)/(n+e+w+s-4*1)
			
 
				+// Then chooses the 'w' which results in no clipping, limits 'w', and multiplies by the 'sharp' amount.
			
 
				+// This solution above has issues with MSAA input as the steps along the gradient cause edge detection issues.
			
 
				+// So RCAS uses 4x the maximum and 4x the minimum (depending on equation)in place of the individual taps.
			
 
				+// As well as switching from 'm' to either the minimum or maximum (depending on side), to help in energy conservation.
			
 
				+// This stabilizes RCAS.
			
 
				+// RCAS does a simple highpass which is normalized against the local contrast then shaped,
			
 
				+//       0.25
			
 
				+//  0.25  -1  0.25
			
 
				+//       0.25
			
 
				+// This is used as a noise detection filter, to reduce the effect of RCAS on grain, and focus on real edges.
			
 
				+//
			
 
				+//  GLSL example for the required callbacks :
			
 
				+// 
			
 
				+//  FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p){return FfxFloat16x4(imageLoad(imgSrc,FfxInt32x2(p)));}
			
 
				+//  void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b)
			
 
				+//  {
			
 
				+//    //do any simple input color conversions here or leave empty if none needed
			
 
				+//  }
			
 
				+//  
			
 
				+//  FsrRcasCon need to be called from the CPU or GPU to set up constants.
			
 
				+//  Including a GPU example here, the 'con' value would be stored out to a constant buffer.
			
 
				+// 
			
 
				+//  FfxUInt32x4 con;
			
 
				+//  FsrRcasCon(con,
			
 
				+//   0.0); // The scale is {0.0 := maximum sharpness, to N>0, where N is the number of stops (halving) of the reduction of sharpness}.
			
 
				+// ---------------
			
 
				+// RCAS sharpening supports a CAS-like pass-through alpha via,
			
 
				+//  #define FSR_RCAS_PASSTHROUGH_ALPHA 1
			
 
				+// RCAS also supports a define to enable a more expensive path to avoid some sharpening of noise.
			
 
				+// Would suggest it is better to apply film grain after RCAS sharpening (and after scaling) instead of using this define,
			
 
				+//  #define FSR_RCAS_DENOISE 1
			
 
				+//==============================================================================================================================
			
 
				+// This is set at the limit of providing unnatural results for sharpening.
			
 
				+#define FSR_RCAS_LIMIT (0.25-(1.0/16.0))
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+//_____________________________________________________________/\_______________________________________________________________
			
 
				+//==============================================================================================================================
			
 
				+//                                                      CONSTANT SETUP
			
 
				+//==============================================================================================================================
			
 
				+// Call to setup required constant values (works on CPU or GPU).
			
 
				+ FFX_STATIC void FsrRcasCon(FfxUInt32x4 con,
			
 
				+                            // The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}.
			
 
				+                            FfxFloat32 sharpness)
			
 
				+ {
			
 
				+     // Transform from stops to linear value.
			
 
				+     sharpness = exp2(-sharpness);
			
 
				+     FfxFloat32x2 hSharp  = {sharpness, sharpness};
			
 
				+     con[0] = ffxAsUInt32(sharpness);
			
 
				+     con[1] = packHalf2x16(hSharp);
			
 
				+     con[2] = 0;
			
 
				+     con[3] = 0;
			
 
				+ }
			
 
				+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+//_____________________________________________________________/\_______________________________________________________________
			
 
				+//==============================================================================================================================
			
 
				+//                                                   NON-PACKED 32-BIT VERSION
			
 
				+//==============================================================================================================================
			
 
				+#if defined(FFX_GPU)&&defined(FSR_RCAS_F)
			
 
				+ // Input callback prototypes that need to be implemented by calling shader
			
 
				+ FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p);
			
 
				+ void FsrRcasInputF(inout FfxFloat32 r,inout FfxFloat32 g,inout FfxFloat32 b);
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+ void FsrRcasF(out FfxFloat32 pixR,  // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy.
			
 
				+               out FfxFloat32 pixG,
			
 
				+               out FfxFloat32 pixB,
			
 
				+#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
			
 
				+               out FfxFloat32 pixA,
			
 
				+#endif
			
 
				+               FfxUInt32x2 ip,  // Integer pixel position in output.
			
 
				+               FfxUInt32x4 con)
			
 
				+ {  // Constant generated by RcasSetup().
			
 
				+     // Algorithm uses minimal 3x3 pixel neighborhood.
			
 
				+     //    b
			
 
				+     //  d e f
			
 
				+     //    h
			
 
				+     FfxInt32x2   sp = FfxInt32x2(ip);
			
 
				+     FfxFloat32x3 b  = FsrRcasLoadF(sp + FfxInt32x2(0, -1)).rgb;
			
 
				+     FfxFloat32x3 d  = FsrRcasLoadF(sp + FfxInt32x2(-1, 0)).rgb;
			
 
				+#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
			
 
				+     FfxFloat32x4 ee = FsrRcasLoadF(sp);
			
 
				+     FfxFloat32x3 e  = ee.rgb;
			
 
				+     pixA            = ee.a;
			
 
				+#else
			
 
				+     FfxFloat32x3 e = FsrRcasLoadF(sp).rgb;
			
 
				+#endif
			
 
				+     FfxFloat32x3 f = FsrRcasLoadF(sp + FfxInt32x2(1, 0)).rgb;
			
 
				+     FfxFloat32x3 h = FsrRcasLoadF(sp + FfxInt32x2(0, 1)).rgb;
			
 
				+     // Rename (32-bit) or regroup (16-bit).
			
 
				+     FfxFloat32 bR = b.r;
			
 
				+     FfxFloat32 bG = b.g;
			
 
				+     FfxFloat32 bB = b.b;
			
 
				+     FfxFloat32 dR = d.r;
			
 
				+     FfxFloat32 dG = d.g;
			
 
				+     FfxFloat32 dB = d.b;
			
 
				+     FfxFloat32 eR = e.r;
			
 
				+     FfxFloat32 eG = e.g;
			
 
				+     FfxFloat32 eB = e.b;
			
 
				+     FfxFloat32 fR = f.r;
			
 
				+     FfxFloat32 fG = f.g;
			
 
				+     FfxFloat32 fB = f.b;
			
 
				+     FfxFloat32 hR = h.r;
			
 
				+     FfxFloat32 hG = h.g;
			
 
				+     FfxFloat32 hB = h.b;
			
 
				+     // Run optional input transform.
			
 
				+     FsrRcasInputF(bR, bG, bB);
			
 
				+     FsrRcasInputF(dR, dG, dB);
			
 
				+     FsrRcasInputF(eR, eG, eB);
			
 
				+     FsrRcasInputF(fR, fG, fB);
			
 
				+     FsrRcasInputF(hR, hG, hB);
			
 
				+     // Luma times 2.
			
 
				+     FfxFloat32 bL = bB * FfxFloat32(0.5) + (bR * FfxFloat32(0.5) + bG);
			
 
				+     FfxFloat32 dL = dB * FfxFloat32(0.5) + (dR * FfxFloat32(0.5) + dG);
			
 
				+     FfxFloat32 eL = eB * FfxFloat32(0.5) + (eR * FfxFloat32(0.5) + eG);
			
 
				+     FfxFloat32 fL = fB * FfxFloat32(0.5) + (fR * FfxFloat32(0.5) + fG);
			
 
				+     FfxFloat32 hL = hB * FfxFloat32(0.5) + (hR * FfxFloat32(0.5) + hG);
			
 
				+     // Noise detection.
			
 
				+     FfxFloat32 nz = FfxFloat32(0.25) * bL + FfxFloat32(0.25) * dL + FfxFloat32(0.25) * fL + FfxFloat32(0.25) * hL - eL;
			
 
				+     nz            = ffxSaturate(abs(nz) * ffxApproximateReciprocalMedium(ffxMax3(ffxMax3(bL, dL, eL), fL, hL) - ffxMin3(ffxMin3(bL, dL, eL), fL, hL)));
			
 
				+     nz            = FfxFloat32(-0.5) * nz + FfxFloat32(1.0);
			
 
				+     // Min and max of ring.
			
 
				+     FfxFloat32 mn4R = ffxMin(ffxMin3(bR, dR, fR), hR);
			
 
				+     FfxFloat32 mn4G = ffxMin(ffxMin3(bG, dG, fG), hG);
			
 
				+     FfxFloat32 mn4B = ffxMin(ffxMin3(bB, dB, fB), hB);
			
 
				+     FfxFloat32 mx4R = max(ffxMax3(bR, dR, fR), hR);
			
 
				+     FfxFloat32 mx4G = max(ffxMax3(bG, dG, fG), hG);
			
 
				+     FfxFloat32 mx4B = max(ffxMax3(bB, dB, fB), hB);
			
 
				+     // Immediate constants for peak range.
			
 
				+     FfxFloat32x2 peakC = FfxFloat32x2(1.0, -1.0 * 4.0);
			
 
				+     // Limiters, these need to be high precision RCPs.
			
 
				+     FfxFloat32 hitMinR = mn4R * rcp(FfxFloat32(4.0) * mx4R);
			
 
				+     FfxFloat32 hitMinG = mn4G * rcp(FfxFloat32(4.0) * mx4G);
			
 
				+     FfxFloat32 hitMinB = mn4B * rcp(FfxFloat32(4.0) * mx4B);
			
 
				+     FfxFloat32 hitMaxR = (peakC.x - mx4R) * rcp(FfxFloat32(4.0) * mn4R + peakC.y);
			
 
				+     FfxFloat32 hitMaxG = (peakC.x - mx4G) * rcp(FfxFloat32(4.0) * mn4G + peakC.y);
			
 
				+     FfxFloat32 hitMaxB = (peakC.x - mx4B) * rcp(FfxFloat32(4.0) * mn4B + peakC.y);
			
 
				+     FfxFloat32 lobeR   = max(-hitMinR, hitMaxR);
			
 
				+     FfxFloat32 lobeG   = max(-hitMinG, hitMaxG);
			
 
				+     FfxFloat32 lobeB   = max(-hitMinB, hitMaxB);
			
 
				+     FfxFloat32 lobe    = max(FfxFloat32(-FSR_RCAS_LIMIT), ffxMin(ffxMax3(lobeR, lobeG, lobeB), FfxFloat32(0.0))) * ffxAsFloat
			
 
				+     (con.x);
			
 
				+ // Apply noise removal.
			
 
				+#ifdef FSR_RCAS_DENOISE
			
 
				+     lobe *= nz;
			
 
				+#endif
			
 
				+     // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
			
 
				+     FfxFloat32 rcpL = ffxApproximateReciprocalMedium(FfxFloat32(4.0) * lobe + FfxFloat32(1.0));
			
 
				+     pixR            = (lobe * bR + lobe * dR + lobe * hR + lobe * fR + eR) * rcpL;
			
 
				+     pixG            = (lobe * bG + lobe * dG + lobe * hG + lobe * fG + eG) * rcpL;
			
 
				+     pixB            = (lobe * bB + lobe * dB + lobe * hB + lobe * fB + eB) * rcpL;
			
 
				+     return;
			
 
				+ }
			
 
				+#endif
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+//_____________________________________________________________/\_______________________________________________________________
			
 
				+//==============================================================================================================================
			
 
				+//                                                  NON-PACKED 16-BIT VERSION
			
 
				+//==============================================================================================================================
			
 
				+#if defined(FFX_GPU) && FFX_HALF == 1 && defined(FSR_RCAS_H)
			
 
				+ // Input callback prototypes that need to be implemented by calling shader
			
 
				+ FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p);
			
 
				+ void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b);
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+ void FsrRcasH(
			
 
				+ out FfxFloat16 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy.
			
 
				+ out FfxFloat16 pixG,
			
 
				+ out FfxFloat16 pixB,
			
 
				+ #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
			
 
				+  out FfxFloat16 pixA,
			
 
				+ #endif
			
 
				+ FfxUInt32x2 ip, // Integer pixel position in output.
			
 
				+ FfxUInt32x4 con){ // Constant generated by RcasSetup().
			
 
				+  // Sharpening algorithm uses minimal 3x3 pixel neighborhood.
			
 
				+  //    b 
			
 
				+  //  d e f
			
 
				+  //    h
			
 
				+  FfxInt16x2 sp=FfxInt16x2(ip);
			
 
				+  FfxFloat16x3 b=FsrRcasLoadH(sp+FfxInt16x2( 0,-1)).rgb;
			
 
				+  FfxFloat16x3 d=FsrRcasLoadH(sp+FfxInt16x2(-1, 0)).rgb;
			
 
				+  #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
			
 
				+   FfxFloat16x4 ee=FsrRcasLoadH(sp);
			
 
				+   FfxFloat16x3 e=ee.rgb;pixA=ee.a;
			
 
				+  #else
			
 
				+   FfxFloat16x3 e=FsrRcasLoadH(sp).rgb;
			
 
				+  #endif
			
 
				+  FfxFloat16x3 f=FsrRcasLoadH(sp+FfxInt16x2( 1, 0)).rgb;
			
 
				+  FfxFloat16x3 h=FsrRcasLoadH(sp+FfxInt16x2( 0, 1)).rgb;
			
 
				+  // Rename (32-bit) or regroup (16-bit).
			
 
				+  FfxFloat16 bR=b.r;
			
 
				+  FfxFloat16 bG=b.g;
			
 
				+  FfxFloat16 bB=b.b;
			
 
				+  FfxFloat16 dR=d.r;
			
 
				+  FfxFloat16 dG=d.g;
			
 
				+  FfxFloat16 dB=d.b;
			
 
				+  FfxFloat16 eR=e.r;
			
 
				+  FfxFloat16 eG=e.g;
			
 
				+  FfxFloat16 eB=e.b;
			
 
				+  FfxFloat16 fR=f.r;
			
 
				+  FfxFloat16 fG=f.g;
			
 
				+  FfxFloat16 fB=f.b;
			
 
				+  FfxFloat16 hR=h.r;
			
 
				+  FfxFloat16 hG=h.g;
			
 
				+  FfxFloat16 hB=h.b;
			
 
				+  // Run optional input transform.
			
 
				+  FsrRcasInputH(bR,bG,bB);
			
 
				+  FsrRcasInputH(dR,dG,dB);
			
 
				+  FsrRcasInputH(eR,eG,eB);
			
 
				+  FsrRcasInputH(fR,fG,fB);
			
 
				+  FsrRcasInputH(hR,hG,hB);
			
 
				+  // Luma times 2.
			
 
				+  FfxFloat16 bL=bB*FFX_BROADCAST_FLOAT16(0.5)+(bR*FFX_BROADCAST_FLOAT16(0.5)+bG);
			
 
				+  FfxFloat16 dL=dB*FFX_BROADCAST_FLOAT16(0.5)+(dR*FFX_BROADCAST_FLOAT16(0.5)+dG);
			
 
				+  FfxFloat16 eL=eB*FFX_BROADCAST_FLOAT16(0.5)+(eR*FFX_BROADCAST_FLOAT16(0.5)+eG);
			
 
				+  FfxFloat16 fL=fB*FFX_BROADCAST_FLOAT16(0.5)+(fR*FFX_BROADCAST_FLOAT16(0.5)+fG);
			
 
				+  FfxFloat16 hL=hB*FFX_BROADCAST_FLOAT16(0.5)+(hR*FFX_BROADCAST_FLOAT16(0.5)+hG);
			
 
				+  // Noise detection.
			
 
				+  FfxFloat16 nz=FFX_BROADCAST_FLOAT16(0.25)*bL+FFX_BROADCAST_FLOAT16(0.25)*dL+FFX_BROADCAST_FLOAT16(0.25)*fL+FFX_BROADCAST_FLOAT16(0.25)*hL-eL;
			
 
				+  nz=ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL)));
			
 
				+  nz=FFX_BROADCAST_FLOAT16(-0.5)*nz+FFX_BROADCAST_FLOAT16(1.0);
			
 
				+  // Min and max of ring.
			
 
				+  FfxFloat16 mn4R=min(ffxMin3Half(bR,dR,fR),hR);
			
 
				+  FfxFloat16 mn4G=min(ffxMin3Half(bG,dG,fG),hG);
			
 
				+  FfxFloat16 mn4B=min(ffxMin3Half(bB,dB,fB),hB);
			
 
				+  FfxFloat16 mx4R=max(ffxMax3Half(bR,dR,fR),hR);
			
 
				+  FfxFloat16 mx4G=max(ffxMax3Half(bG,dG,fG),hG);
			
 
				+  FfxFloat16 mx4B=max(ffxMax3Half(bB,dB,fB),hB);
			
 
				+  // Immediate constants for peak range.
			
 
				+  FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0);
			
 
				+  // Limiters, these need to be high precision RCPs.
			
 
				+  FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R);
			
 
				+  FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G);
			
 
				+  FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B);
			
 
				+  FfxFloat16 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4R+peakC.y);
			
 
				+  FfxFloat16 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4G+peakC.y);
			
 
				+  FfxFloat16 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4B+peakC.y);
			
 
				+  FfxFloat16 lobeR=max(-hitMinR,hitMaxR);
			
 
				+  FfxFloat16 lobeG=max(-hitMinG,hitMaxG);
			
 
				+  FfxFloat16 lobeB=max(-hitMinB,hitMaxB);
			
 
				+  FfxFloat16 lobe=max(FFX_BROADCAST_FLOAT16(-FSR_RCAS_LIMIT),min(ffxMax3Half(lobeR,lobeG,lobeB),FFX_BROADCAST_FLOAT16(0.0)))*FFX_UINT32_TO_FLOAT16X2(con.y).x;
			
 
				+  // Apply noise removal.
			
 
				+  #ifdef FSR_RCAS_DENOISE
			
 
				+   lobe*=nz;
			
 
				+  #endif
			
 
				+  // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
			
 
				+  FfxFloat16 rcpL=ffxApproximateReciprocalMediumHalf(FFX_BROADCAST_FLOAT16(4.0)*lobe+FFX_BROADCAST_FLOAT16(1.0));
			
 
				+  pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL;
			
 
				+  pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL;
			
 
				+  pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;
			
 
				+}
			
 
				+#endif
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+//_____________________________________________________________/\_______________________________________________________________
			
 
				+//==============================================================================================================================
			
 
				+//                                                     PACKED 16-BIT VERSION
			
 
				+//==============================================================================================================================
			
 
				+#if defined(FFX_GPU)&& FFX_HALF == 1 && defined(FSR_RCAS_HX2)
			
 
				+ // Input callback prototypes that need to be implemented by the calling shader
			
 
				+ FfxFloat16x4 FsrRcasLoadHx2(FfxInt16x2 p);
			
 
				+ void FsrRcasInputHx2(inout FfxFloat16x2 r,inout FfxFloat16x2 g,inout FfxFloat16x2 b);
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+ // Can be used to convert from packed Structures of Arrays to Arrays of Structures for store.
			
 
				+ void FsrRcasDepackHx2(out FfxFloat16x4 pix0,out FfxFloat16x4 pix1,FfxFloat16x2 pixR,FfxFloat16x2 pixG,FfxFloat16x2 pixB){
			
 
				+  #ifdef FFX_HLSL
			
 
				+   // Invoke a slower path for DX only, since it won't allow uninitialized values.
			
 
				+   pix0.a=pix1.a=0.0;
			
 
				+  #endif
			
 
				+  pix0.rgb=FfxFloat16x3(pixR.x,pixG.x,pixB.x);
			
 
				+  pix1.rgb=FfxFloat16x3(pixR.y,pixG.y,pixB.y);}
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+ void FsrRcasHx2(
			
 
				+ // Output values are for 2 8x8 tiles in a 16x8 region.
			
 
				+ //  pix<R,G,B>.x =  left 8x8 tile
			
 
				+ //  pix<R,G,B>.y = right 8x8 tile
			
 
				+ // This enables later processing to easily be packed as well.
			
 
				+ out FfxFloat16x2 pixR,
			
 
				+ out FfxFloat16x2 pixG,
			
 
				+ out FfxFloat16x2 pixB,
			
 
				+ #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
			
 
				+  out FfxFloat16x2 pixA,
			
 
				+ #endif
			
 
				+ FfxUInt32x2 ip, // Integer pixel position in output.
			
 
				+ FfxUInt32x4 con){ // Constant generated by RcasSetup().
			
 
				+  // No scaling algorithm uses minimal 3x3 pixel neighborhood.
			
 
				+  FfxInt16x2 sp0=FfxInt16x2(ip);
			
 
				+  FfxFloat16x3 b0=FsrRcasLoadHx2(sp0+FfxInt16x2( 0,-1)).rgb;
			
 
				+  FfxFloat16x3 d0=FsrRcasLoadHx2(sp0+FfxInt16x2(-1, 0)).rgb;
			
 
				+  #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
			
 
				+   FfxFloat16x4 ee0=FsrRcasLoadHx2(sp0);
			
 
				+   FfxFloat16x3 e0=ee0.rgb;pixA.r=ee0.a;
			
 
				+  #else
			
 
				+   FfxFloat16x3 e0=FsrRcasLoadHx2(sp0).rgb;
			
 
				+  #endif
			
 
				+  FfxFloat16x3 f0=FsrRcasLoadHx2(sp0+FfxInt16x2( 1, 0)).rgb;
			
 
				+  FfxFloat16x3 h0=FsrRcasLoadHx2(sp0+FfxInt16x2( 0, 1)).rgb;
			
 
				+  FfxInt16x2 sp1=sp0+FfxInt16x2(8,0);
			
 
				+  FfxFloat16x3 b1=FsrRcasLoadHx2(sp1+FfxInt16x2( 0,-1)).rgb;
			
 
				+  FfxFloat16x3 d1=FsrRcasLoadHx2(sp1+FfxInt16x2(-1, 0)).rgb;
			
 
				+  #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
			
 
				+   FfxFloat16x4 ee1=FsrRcasLoadHx2(sp1);
			
 
				+   FfxFloat16x3 e1=ee1.rgb;pixA.g=ee1.a;
			
 
				+  #else
			
 
				+   FfxFloat16x3 e1=FsrRcasLoadHx2(sp1).rgb;
			
 
				+  #endif
			
 
				+  FfxFloat16x3 f1=FsrRcasLoadHx2(sp1+FfxInt16x2( 1, 0)).rgb;
			
 
				+  FfxFloat16x3 h1=FsrRcasLoadHx2(sp1+FfxInt16x2( 0, 1)).rgb;
			
 
				+  // Arrays of Structures to Structures of Arrays conversion.
			
 
				+  FfxFloat16x2 bR=FfxFloat16x2(b0.r,b1.r);
			
 
				+  FfxFloat16x2 bG=FfxFloat16x2(b0.g,b1.g);
			
 
				+  FfxFloat16x2 bB=FfxFloat16x2(b0.b,b1.b);
			
 
				+  FfxFloat16x2 dR=FfxFloat16x2(d0.r,d1.r);
			
 
				+  FfxFloat16x2 dG=FfxFloat16x2(d0.g,d1.g);
			
 
				+  FfxFloat16x2 dB=FfxFloat16x2(d0.b,d1.b);
			
 
				+  FfxFloat16x2 eR=FfxFloat16x2(e0.r,e1.r);
			
 
				+  FfxFloat16x2 eG=FfxFloat16x2(e0.g,e1.g);
			
 
				+  FfxFloat16x2 eB=FfxFloat16x2(e0.b,e1.b);
			
 
				+  FfxFloat16x2 fR=FfxFloat16x2(f0.r,f1.r);
			
 
				+  FfxFloat16x2 fG=FfxFloat16x2(f0.g,f1.g);
			
 
				+  FfxFloat16x2 fB=FfxFloat16x2(f0.b,f1.b);
			
 
				+  FfxFloat16x2 hR=FfxFloat16x2(h0.r,h1.r);
			
 
				+  FfxFloat16x2 hG=FfxFloat16x2(h0.g,h1.g);
			
 
				+  FfxFloat16x2 hB=FfxFloat16x2(h0.b,h1.b);
			
 
				+  // Run optional input transform.
			
 
				+  FsrRcasInputHx2(bR,bG,bB);
			
 
				+  FsrRcasInputHx2(dR,dG,dB);
			
 
				+  FsrRcasInputHx2(eR,eG,eB);
			
 
				+  FsrRcasInputHx2(fR,fG,fB);
			
 
				+  FsrRcasInputHx2(hR,hG,hB);
			
 
				+  // Luma times 2.
			
 
				+  FfxFloat16x2 bL=bB*FFX_BROADCAST_FLOAT16X2(0.5)+(bR*FFX_BROADCAST_FLOAT16X2(0.5)+bG);
			
 
				+  FfxFloat16x2 dL=dB*FFX_BROADCAST_FLOAT16X2(0.5)+(dR*FFX_BROADCAST_FLOAT16X2(0.5)+dG);
			
 
				+  FfxFloat16x2 eL=eB*FFX_BROADCAST_FLOAT16X2(0.5)+(eR*FFX_BROADCAST_FLOAT16X2(0.5)+eG);
			
 
				+  FfxFloat16x2 fL=fB*FFX_BROADCAST_FLOAT16X2(0.5)+(fR*FFX_BROADCAST_FLOAT16X2(0.5)+fG);
			
 
				+  FfxFloat16x2 hL=hB*FFX_BROADCAST_FLOAT16X2(0.5)+(hR*FFX_BROADCAST_FLOAT16X2(0.5)+hG);
			
 
				+  // Noise detection.
			
 
				+  FfxFloat16x2 nz=FFX_BROADCAST_FLOAT16X2(0.25)*bL+FFX_BROADCAST_FLOAT16X2(0.25)*dL+FFX_BROADCAST_FLOAT16X2(0.25)*fL+FFX_BROADCAST_FLOAT16X2(0.25)*hL-eL;
			
 
				+  nz=ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL)));
			
 
				+  nz=FFX_BROADCAST_FLOAT16X2(-0.5)*nz+FFX_BROADCAST_FLOAT16X2(1.0);
			
 
				+  // Min and max of ring.
			
 
				+  FfxFloat16x2 mn4R=min(ffxMin3Half(bR,dR,fR),hR);
			
 
				+  FfxFloat16x2 mn4G=min(ffxMin3Half(bG,dG,fG),hG);
			
 
				+  FfxFloat16x2 mn4B=min(ffxMin3Half(bB,dB,fB),hB);
			
 
				+  FfxFloat16x2 mx4R=max(ffxMax3Half(bR,dR,fR),hR);
			
 
				+  FfxFloat16x2 mx4G=max(ffxMax3Half(bG,dG,fG),hG);
			
 
				+  FfxFloat16x2 mx4B=max(ffxMax3Half(bB,dB,fB),hB);
			
 
				+  // Immediate constants for peak range.
			
 
				+  FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0);
			
 
				+  // Limiters, these need to be high precision RCPs.
			
 
				+  FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R);
			
 
				+  FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G);
			
 
				+  FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B);
			
 
				+  FfxFloat16x2 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4R+peakC.y);
			
 
				+  FfxFloat16x2 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4G+peakC.y);
			
 
				+  FfxFloat16x2 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4B+peakC.y);
			
 
				+  FfxFloat16x2 lobeR=max(-hitMinR,hitMaxR);
			
 
				+  FfxFloat16x2 lobeG=max(-hitMinG,hitMaxG);
			
 
				+  FfxFloat16x2 lobeB=max(-hitMinB,hitMaxB);
			
 
				+  FfxFloat16x2 lobe=max(FFX_BROADCAST_FLOAT16X2(-FSR_RCAS_LIMIT),min(ffxMax3Half(lobeR,lobeG,lobeB),FFX_BROADCAST_FLOAT16X2(0.0)))*FFX_BROADCAST_FLOAT16X2(FFX_UINT32_TO_FLOAT16X2(con.y).x);
			
 
				+  // Apply noise removal.
			
 
				+  #ifdef FSR_RCAS_DENOISE
			
 
				+   lobe*=nz;
			
 
				+  #endif
			
 
				+  // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
			
 
				+  FfxFloat16x2 rcpL=ffxApproximateReciprocalMediumHalf(FFX_BROADCAST_FLOAT16X2(4.0)*lobe+FFX_BROADCAST_FLOAT16X2(1.0));
			
 
				+  pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL;
			
 
				+  pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL;
			
 
				+  pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;}
			
 
				+#endif
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+//_____________________________________________________________/\_______________________________________________________________
			
 
				+//==============================================================================================================================
			
 
				+//
			
 
				+//                                          FSR - [LFGA] LINEAR FILM GRAIN APPLICATOR
			
 
				+//
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+// Adding output-resolution film grain after scaling is a good way to mask both rendering and scaling artifacts.
			
 
				+// Suggest using tiled blue noise as film grain input, with peak noise frequency set for a specific look and feel.
			
 
				+// The 'Lfga*()' functions provide a convenient way to introduce grain.
			
 
				+// These functions limit grain based on distance to signal limits.
			
 
				+// This is done so that the grain is temporally energy preserving, and thus won't modify image tonality.
			
 
				+// Grain application should be done in a linear colorspace.
			
 
				+// The grain should be temporally changing, but have a temporal sum per pixel that adds to zero (non-biased).
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+// Usage,
			
 
				+//   FsrLfga*(
			
 
				+//    color, // In/out linear colorspace color {0 to 1} ranged.
			
 
				+//    grain, // Per pixel grain texture value {-0.5 to 0.5} ranged, input is 3-channel to support colored grain.
			
 
				+//    amount); // Amount of grain (0 to 1} ranged.
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+// Example if grain texture is monochrome: 'FsrLfgaF(color,ffxBroadcast3(grain),amount)'
			
 
				+//==============================================================================================================================
			
 
				+#if defined(FFX_GPU)
			
 
				+ // Maximum grain is the minimum distance to the signal limit.
			
 
				+ void FsrLfgaF(inout FfxFloat32x3 c, FfxFloat32x3 t, FfxFloat32 a)
			
 
				+ {
			
 
				+     c += (t * ffxBroadcast3(a)) * ffxMin(ffxBroadcast3(1.0) - c, c);
			
 
				+ }
			
 
				+#endif
			
 
				+//==============================================================================================================================
			
 
				+#if defined(FFX_GPU)&& FFX_HALF == 1
			
 
				+ // Half precision version (slower).
			
 
				+ void FsrLfgaH(inout FfxFloat16x3 c, FfxFloat16x3 t, FfxFloat16 a)
			
 
				+ {
			
 
				+     c += (t * FFX_BROADCAST_FLOAT16X3(a)) * min(FFX_BROADCAST_FLOAT16X3(1.0) - c, c);
			
 
				+ }
			
 
				+ //------------------------------------------------------------------------------------------------------------------------------
			
 
				+ // Packed half precision version (faster).
			
 
				+ void FsrLfgaHx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB,FfxFloat16x2 tR,FfxFloat16x2 tG,FfxFloat16x2 tB,FfxFloat16 a){
			
 
				+  cR+=(tR*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cR,cR);cG+=(tG*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cG,cG);cB+=(tB*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cB,cB);}
			
 
				+#endif
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+//_____________________________________________________________/\_______________________________________________________________
			
 
				+//==============================================================================================================================
			
 
				+//
			
 
				+//                                          FSR - [SRTM] SIMPLE REVERSIBLE TONE-MAPPER
			
 
				+//
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+// This provides a way to take linear HDR color {0 to FP16_MAX} and convert it into a temporary {0 to 1} ranged post-tonemapped linear.
			
 
				+// The tonemapper preserves RGB ratio, which helps maintain HDR color bleed during filtering.
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+// Reversible tonemapper usage,
			
 
				+//  FsrSrtm*(color); // {0 to FP16_MAX} converted to {0 to 1}.
			
 
				+//  FsrSrtmInv*(color); // {0 to 1} converted into {0 to 32768, output peak safe for FP16}.
			
 
				+//==============================================================================================================================
			
 
				+#if defined(FFX_GPU)
			
 
				+ void FsrSrtmF(inout FfxFloat32x3 c)
			
 
				+ {
			
 
				+     c *= ffxBroadcast3(rcp(ffxMax3(c.r, c.g, c.b) + FfxFloat32(1.0)));
			
 
				+ }
			
 
				+ // The extra max solves the c=1.0 case (which is a /0).
			
 
				+ void FsrSrtmInvF(inout FfxFloat32x3 c){c*=ffxBroadcast3(rcp(max(FfxFloat32(1.0/32768.0),FfxFloat32(1.0)-ffxMax3(c.r,c.g,c.b))));}
			
 
				+#endif
			
 
				+//==============================================================================================================================
			
 
				+#if defined(FFX_GPU )&& FFX_HALF == 1
			
 
				+ void FsrSrtmH(inout FfxFloat16x3 c)
			
 
				+ {
			
 
				+     c *= FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(ffxMax3Half(c.r, c.g, c.b) + FFX_BROADCAST_FLOAT16(1.0)));
			
 
				+ }
			
 
				+ void FsrSrtmInvH(inout FfxFloat16x3 c)
			
 
				+ {
			
 
				+     c *= FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(max(FFX_BROADCAST_FLOAT16(1.0 / 32768.0), FFX_BROADCAST_FLOAT16(1.0) - ffxMax3Half(c.r, c.g, c.b))));
			
 
				+ }
			
 
				+ //------------------------------------------------------------------------------------------------------------------------------
			
 
				+ void FsrSrtmHx2(inout FfxFloat16x2 cR, inout FfxFloat16x2 cG, inout FfxFloat16x2 cB)
			
 
				+ {
			
 
				+     FfxFloat16x2 rcp = ffxReciprocalHalf(ffxMax3Half(cR, cG, cB) + FFX_BROADCAST_FLOAT16X2(1.0));
			
 
				+     cR *= rcp;
			
 
				+     cG *= rcp;
			
 
				+     cB *= rcp;
			
 
				+ }
			
 
				+ void FsrSrtmInvHx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB)
			
 
				+ {
			
 
				+     FfxFloat16x2 rcp=ffxReciprocalHalf(max(FFX_BROADCAST_FLOAT16X2(1.0/32768.0),FFX_BROADCAST_FLOAT16X2(1.0)-ffxMax3Half(cR,cG,cB)));
			
 
				+     cR*=rcp;
			
 
				+     cG*=rcp;
			
 
				+     cB*=rcp;
			
 
				+ }
			
 
				+#endif
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
			
 
				+//_____________________________________________________________/\_______________________________________________________________
			
 
				+//==============================================================================================================================
			
 
				+//
			
 
				+//                                       FSR - [TEPD] TEMPORAL ENERGY PRESERVING DITHER
			
 
				+//
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+// Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion.
			
 
				+// Gamma 2.0 is used so that the conversion back to linear is just to square the color.
			
 
				+// The conversion comes in 8-bit and 10-bit modes, designed for output to 8-bit UNORM or 10:10:10:2 respectively.
			
 
				+// Given good non-biased temporal blue noise as dither input,
			
 
				+// the output dither will temporally conserve energy.
			
 
				+// This is done by choosing the linear nearest step point instead of perceptual nearest.
			
 
				+// See code below for details.
			
 
				+//------------------------------------------------------------------------------------------------------------------------------
			
 
				+// DX SPEC RULES FOR FLOAT->UNORM 8-BIT CONVERSION
			
 
				+// ===============================================
			
 
				+// - Output is 'FfxUInt32(floor(saturate(n)*255.0+0.5))'.
			
 
				+// - Thus rounding is to nearest.
			
 
				+// - NaN gets converted to zero.
			
 
				+// - INF is clamped to {0.0 to 1.0}.
			
 
				+//==============================================================================================================================
			
 
				+#if defined(FFX_GPU)
			
 
				+ // Hand tuned integer position to dither value, with more values than simple checkerboard.
			
 
				+ // Only 32-bit has enough precision for this compddation.
			
 
				+ // Output is {0 to <1}.
			
 
				+ FfxFloat32 FsrTepdDitF(FfxUInt32x2 p, FfxUInt32 f)
			
 
				+ {
			
 
				+     FfxFloat32 x = FfxFloat32(p.x + f);
			
 
				+     FfxFloat32 y = FfxFloat32(p.y);
			
 
				+     // The 1.61803 golden ratio.
			
 
				+     FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0);
			
 
				+     // Number designed to provide a good visual pattern.
			
 
				+     FfxFloat32 b = FfxFloat32(1.0 / 3.69);
			
 
				+     x            = x * a + (y * b);
			
 
				+     return ffxFract(x);
			
 
				+ }
			
 
				+  //------------------------------------------------------------------------------------------------------------------------------
			
 
				+ // This version is 8-bit gamma 2.0.
			
 
				+ // The 'c' input is {0 to 1}.
			
 
				+ // Output is {0 to 1} ready for image store.
			
 
				+ void FsrTepdC8F(inout FfxFloat32x3 c, FfxFloat32 dit)
			
 
				+ {
			
 
				+     FfxFloat32x3 n = ffxSqrt(c);
			
 
				+     n              = floor(n * ffxBroadcast3(255.0)) * ffxBroadcast3(1.0 / 255.0);
			
 
				+     FfxFloat32x3 a = n * n;
			
 
				+     FfxFloat32x3 b = n + ffxBroadcast3(1.0 / 255.0);
			
 
				+     b              = b * b;
			
 
				+     // Ratio of 'a' to 'b' required to produce 'c'.
			
 
				+     // ffxApproximateReciprocal() won't work here (at least for very high dynamic ranges).
			
 
				+     // ffxApproximateReciprocalMedium() is an IADD,FMA,MUL.
			
 
				+     FfxFloat32x3 r = (c - b) * ffxApproximateReciprocalMedium(a - b);
			
 
				+     // Use the ratio as a cutoff to choose 'a' or 'b'.
			
 
				+     // ffxIsGreaterThanZero() is a MUL.
			
 
				+     c = ffxSaturate(n + ffxIsGreaterThanZero(ffxBroadcast3(dit) - r) * ffxBroadcast3(1.0 / 255.0));
			
 
				+ }
			
 
				+ //------------------------------------------------------------------------------------------------------------------------------
			
 
				+ // This version is 10-bit gamma 2.0.
			
 
				+ // The 'c' input is {0 to 1}.
			
 
				+ // Output is {0 to 1} ready for image store.
			
 
				+ void FsrTepdC10F(inout FfxFloat32x3 c, FfxFloat32 dit)
			
 
				+ {
			
 
				+     FfxFloat32x3 n = ffxSqrt(c);
			
 
				+     n              = floor(n * ffxBroadcast3(1023.0)) * ffxBroadcast3(1.0 / 1023.0);
			
 
				+     FfxFloat32x3 a = n * n;
			
 
				+     FfxFloat32x3 b = n + ffxBroadcast3(1.0 / 1023.0);
			
 
				+     b              = b * b;
			
 
				+     FfxFloat32x3 r = (c - b) * ffxApproximateReciprocalMedium(a - b);
			
 
				+     c              = ffxSaturate(n + ffxIsGreaterThanZero(ffxBroadcast3(dit) - r) * ffxBroadcast3(1.0 / 1023.0));
			
 
				+ }
			
 
				+#endif
			
 
				+//==============================================================================================================================
			
 
				+#if defined(FFX_GPU)&& FFX_HALF == 1
			
 
				+ FfxFloat16 FsrTepdDitH(FfxUInt32x2 p, FfxUInt32 f)
			
 
				+ {
			
 
				+     FfxFloat32 x = FfxFloat32(p.x + f);
			
 
				+     FfxFloat32 y = FfxFloat32(p.y);
			
 
				+     FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0);
			
 
				+     FfxFloat32 b = FfxFloat32(1.0 / 3.69);
			
 
				+     x       = x * a + (y * b);
			
 
				+     return FfxFloat16(ffxFract(x));
			
 
				+ }
			
 
				+ //------------------------------------------------------------------------------------------------------------------------------
			
 
				+ void FsrTepdC8H(inout FfxFloat16x3 c, FfxFloat16 dit)
			
 
				+ {
			
 
				+     FfxFloat16x3 n = sqrt(c);
			
 
				+     n     = floor(n * FFX_BROADCAST_FLOAT16X3(255.0)) * FFX_BROADCAST_FLOAT16X3(1.0 / 255.0);
			
 
				+     FfxFloat16x3 a = n * n;
			
 
				+     FfxFloat16x3 b = n + FFX_BROADCAST_FLOAT16X3(1.0 / 255.0);
			
 
				+     b     = b * b;
			
 
				+     FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b);
			
 
				+     c     = ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFX_BROADCAST_FLOAT16X3(dit) - r) * FFX_BROADCAST_FLOAT16X3(1.0 / 255.0));
			
 
				+ }
			
 
				+ //------------------------------------------------------------------------------------------------------------------------------
			
 
				+ void FsrTepdC10H(inout FfxFloat16x3 c, FfxFloat16 dit)
			
 
				+ {
			
 
				+     FfxFloat16x3 n = sqrt(c);
			
 
				+     n     = floor(n * FFX_BROADCAST_FLOAT16X3(1023.0)) * FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0);
			
 
				+     FfxFloat16x3 a = n * n;
			
 
				+     FfxFloat16x3 b = n + FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0);
			
 
				+     b     = b * b;
			
 
				+     FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b);
			
 
				+     c     = ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFX_BROADCAST_FLOAT16X3(dit) - r) * FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0));
			
 
				+ }
			
 
				+ //==============================================================================================================================
			
 
				+ // This computes dither for positions 'p' and 'p+{8,0}'.
			
 
				+ FfxFloat16x2 FsrTepdDitHx2(FfxUInt32x2 p, FfxUInt32 f)
			
 
				+ {
			
 
				+     FfxFloat32x2 x;
			
 
				+     x.x     = FfxFloat32(p.x + f);
			
 
				+     x.y     = x.x + FfxFloat32(8.0);
			
 
				+     FfxFloat32 y = FfxFloat32(p.y);
			
 
				+     FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0);
			
 
				+     FfxFloat32 b = FfxFloat32(1.0 / 3.69);
			
 
				+     x       = x * ffxBroadcast2(a) + ffxBroadcast2(y * b);
			
 
				+     return FfxFloat16x2(ffxFract(x));
			
 
				+ }
			
 
				+ //------------------------------------------------------------------------------------------------------------------------------
			
 
				+ void FsrTepdC8Hx2(inout FfxFloat16x2 cR, inout FfxFloat16x2 cG, inout FfxFloat16x2 cB, FfxFloat16x2 dit)
			
 
				+ {
			
 
				+     FfxFloat16x2 nR = sqrt(cR);
			
 
				+     FfxFloat16x2 nG = sqrt(cG);
			
 
				+     FfxFloat16x2 nB = sqrt(cB);
			
 
				+     nR     = floor(nR * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0);
			
 
				+     nG     = floor(nG * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0);
			
 
				+     nB     = floor(nB * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0);
			
 
				+     FfxFloat16x2 aR = nR * nR;
			
 
				+     FfxFloat16x2 aG = nG * nG;
			
 
				+     FfxFloat16x2 aB = nB * nB;
			
 
				+     FfxFloat16x2 bR = nR + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0);
			
 
				+     bR     = bR * bR;
			
 
				+     FfxFloat16x2 bG = nG + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0);
			
 
				+     bG     = bG * bG;
			
 
				+     FfxFloat16x2 bB = nB + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0);
			
 
				+     bB     = bB * bB;
			
 
				+     FfxFloat16x2 rR = (cR - bR) * ffxApproximateReciprocalMediumHalf(aR - bR);
			
 
				+     FfxFloat16x2 rG = (cG - bG) * ffxApproximateReciprocalMediumHalf(aG - bG);
			
 
				+     FfxFloat16x2 rB = (cB - bB) * ffxApproximateReciprocalMediumHalf(aB - bB);
			
 
				+     cR     = ffxSaturate(nR + ffxIsGreaterThanZeroHalf(dit - rR) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0));
			
 
				+     cG     = ffxSaturate(nG + ffxIsGreaterThanZeroHalf(dit - rG) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0));
			
 
				+     cB     = ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0));
			
 
				+ }
			
 
				+ //------------------------------------------------------------------------------------------------------------------------------
			
 
				+ void FsrTepdC10Hx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB,FfxFloat16x2 dit){
			
 
				+  FfxFloat16x2 nR=sqrt(cR);
			
 
				+  FfxFloat16x2 nG=sqrt(cG);
			
 
				+  FfxFloat16x2 nB=sqrt(cB);
			
 
				+  nR=floor(nR*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0);
			
 
				+  nG=floor(nG*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0);
			
 
				+  nB=floor(nB*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0);
			
 
				+  FfxFloat16x2 aR=nR*nR;
			
 
				+  FfxFloat16x2 aG=nG*nG;
			
 
				+  FfxFloat16x2 aB=nB*nB;
			
 
				+  FfxFloat16x2 bR=nR+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bR=bR*bR;
			
 
				+  FfxFloat16x2 bG=nG+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bG=bG*bG;
			
 
				+  FfxFloat16x2 bB=nB+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bB=bB*bB;
			
 
				+  FfxFloat16x2 rR=(cR-bR)*ffxApproximateReciprocalMediumHalf(aR-bR);
			
 
				+  FfxFloat16x2 rG=(cG-bG)*ffxApproximateReciprocalMediumHalf(aG-bG);
			
 
				+  FfxFloat16x2 rB=(cB-bB)*ffxApproximateReciprocalMediumHalf(aB-bB);
			
 
				+  cR=ffxSaturate(nR+ffxIsGreaterThanZeroHalf(dit-rR)*FFX_BROADCAST_FLOAT16X2(1.0/1023.0));
			
 
				+  cG=ffxSaturate(nG+ffxIsGreaterThanZeroHalf(dit-rG)*FFX_BROADCAST_FLOAT16X2(1.0/1023.0));
			
 
				+  cB                                                       = ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFX_BROADCAST_FLOAT16X2(1.0 / 1023.0));
			
 
				+}
			
 
				+#endif
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate.h
@@ -0,0 +1,295 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#ifndef FFX_FSR2_ACCUMULATE_H
			
 
				+#define FFX_FSR2_ACCUMULATE_H
			
 
				+
			
 
				+FfxFloat32 GetPxHrVelocity(FfxFloat32x2 fMotionVector)
			
 
				+{
			
 
				+    return length(fMotionVector * DisplaySize());
			
 
				+}
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F GetPxHrVelocity(FFX_MIN16_F2 fMotionVector)
			
 
				+{
			
 
				+    return length(fMotionVector * FFX_MIN16_F2(DisplaySize()));
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+void Accumulate(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, FfxFloat32x3 fAccumulation, FFX_PARAMETER_IN FfxFloat32x4 fUpsampledColorAndWeight)
			
 
				+{
			
 
				+    // Aviod invalid values when accumulation and upsampled weight is 0
			
 
				+    fAccumulation = ffxMax(FSR2_EPSILON.xxx, fAccumulation + fUpsampledColorAndWeight.www);
			
 
				+
			
 
				+#if FFX_FSR2_OPTION_HDR_COLOR_INPUT
			
 
				+    //YCoCg -> RGB -> Tonemap -> YCoCg (Use RGB tonemapper to avoid color desaturation)
			
 
				+    fUpsampledColorAndWeight.xyz = RGBToYCoCg(Tonemap(YCoCgToRGB(fUpsampledColorAndWeight.xyz)));
			
 
				+    fHistoryColor = RGBToYCoCg(Tonemap(YCoCgToRGB(fHistoryColor)));
			
 
				+#endif
			
 
				+
			
 
				+    const FfxFloat32x3 fAlpha = fUpsampledColorAndWeight.www / fAccumulation;
			
 
				+    fHistoryColor = ffxLerp(fHistoryColor, fUpsampledColorAndWeight.xyz, fAlpha);
			
 
				+
			
 
				+    fHistoryColor = YCoCgToRGB(fHistoryColor);
			
 
				+
			
 
				+#if FFX_FSR2_OPTION_HDR_COLOR_INPUT
			
 
				+    fHistoryColor = InverseTonemap(fHistoryColor);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+void RectifyHistory(
			
 
				+    const AccumulationPassCommonParams params,
			
 
				+    RectificationBox clippingBox,
			
 
				+    FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor,
			
 
				+    FFX_PARAMETER_INOUT FfxFloat32x3 fAccumulation,
			
 
				+    FfxFloat32 fLockContributionThisFrame,
			
 
				+    FfxFloat32 fTemporalReactiveFactor,
			
 
				+    FfxFloat32 fLumaInstabilityFactor)
			
 
				+{
			
 
				+    FfxFloat32 fScaleFactorInfluence = ffxMin(20.0f, ffxPow(FfxFloat32(1.0f / length(DownscaleFactor().x * DownscaleFactor().y)), 3.0f));
			
 
				+
			
 
				+    const FfxFloat32 fVecolityFactor = ffxSaturate(params.fHrVelocity / 20.0f);
			
 
				+    const FfxFloat32 fBoxScaleT = ffxMax(params.fDepthClipFactor, ffxMax(params.fAccumulationMask, fVecolityFactor));
			
 
				+    FfxFloat32 fBoxScale = ffxLerp(fScaleFactorInfluence, 1.0f, fBoxScaleT);
			
 
				+
			
 
				+    FfxFloat32x3 fScaledBoxVec = clippingBox.boxVec * fBoxScale;
			
 
				+    FfxFloat32x3 boxMin = clippingBox.boxCenter - fScaledBoxVec;
			
 
				+    FfxFloat32x3 boxMax = clippingBox.boxCenter + fScaledBoxVec;
			
 
				+    FfxFloat32x3 boxCenter = clippingBox.boxCenter;
			
 
				+    FfxFloat32 boxVecSize = length(clippingBox.boxVec);
			
 
				+
			
 
				+    boxMin = ffxMax(clippingBox.aabbMin, boxMin);
			
 
				+    boxMax = ffxMin(clippingBox.aabbMax, boxMax);
			
 
				+
			
 
				+    if (any(FFX_GREATER_THAN(boxMin, fHistoryColor)) || any(FFX_GREATER_THAN(fHistoryColor, boxMax))) {
			
 
				+
			
 
				+        const FfxFloat32x3 fClampedHistoryColor = clamp(fHistoryColor, boxMin, boxMax);
			
 
				+
			
 
				+        FfxFloat32x3 fHistoryContribution = ffxMax(fLumaInstabilityFactor, fLockContributionThisFrame).xxx;
			
 
				+        
			
 
				+        const FfxFloat32 fReactiveFactor = params.fDilatedReactiveFactor;
			
 
				+        const FfxFloat32 fReactiveContribution = 1.0f - ffxPow(fReactiveFactor, 1.0f / 2.0f);
			
 
				+        fHistoryContribution *= fReactiveContribution;
			
 
				+
			
 
				+        // Scale history color using rectification info, also using accumulation mask to avoid potential invalid color protection
			
 
				+        fHistoryColor = ffxLerp(fClampedHistoryColor, fHistoryColor, ffxSaturate(fHistoryContribution));
			
 
				+
			
 
				+        // Scale accumulation using rectification info
			
 
				+        const FfxFloat32x3 fAccumulationMin = ffxMin(fAccumulation, FFX_BROADCAST_FLOAT32X3(0.1f));
			
 
				+        fAccumulation = ffxLerp(fAccumulationMin, fAccumulation, ffxSaturate(fHistoryContribution));
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void WriteUpscaledOutput(FfxInt32x2 iPxHrPos, FfxFloat32x3 fUpscaledColor)
			
 
				+{
			
 
				+    StoreUpscaledOutput(iPxHrPos, fUpscaledColor);
			
 
				+}
			
 
				+
			
 
				+void FinalizeLockStatus(const AccumulationPassCommonParams params, FfxFloat32x2 fLockStatus, FfxFloat32 fUpsampledWeight)
			
 
				+{
			
 
				+    // we expect similar motion for next frame
			
 
				+    // kill lock if that location is outside screen, avoid locks to be clamped to screen borders
			
 
				+    FfxFloat32x2 fEstimatedUvNextFrame = params.fHrUv - params.fMotionVector;
			
 
				+    if (IsUvInside(fEstimatedUvNextFrame) == false) {
			
 
				+        KillLock(fLockStatus);
			
 
				+    }
			
 
				+    else {
			
 
				+        // Decrease lock lifetime
			
 
				+        const FfxFloat32 fLifetimeDecreaseLanczosMax = FfxFloat32(JitterSequenceLength()) * FfxFloat32(fAverageLanczosWeightPerFrame);
			
 
				+        const FfxFloat32 fLifetimeDecrease = FfxFloat32(fUpsampledWeight / fLifetimeDecreaseLanczosMax);
			
 
				+        fLockStatus[LOCK_LIFETIME_REMAINING] = ffxMax(FfxFloat32(0), fLockStatus[LOCK_LIFETIME_REMAINING] - fLifetimeDecrease);
			
 
				+    }
			
 
				+
			
 
				+    StoreLockStatus(params.iPxHrPos, fLockStatus);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+FfxFloat32x3 ComputeBaseAccumulationWeight(const AccumulationPassCommonParams params, FfxFloat32 fThisFrameReactiveFactor, FfxBoolean bInMotionLastFrame, FfxFloat32 fUpsampledWeight, LockState lockState)
			
 
				+{
			
 
				+    // Always assume max accumulation was reached
			
 
				+    FfxFloat32 fBaseAccumulation = fMaxAccumulationLanczosWeight * FfxFloat32(params.bIsExistingSample) * (1.0f - fThisFrameReactiveFactor) * (1.0f - params.fDepthClipFactor);
			
 
				+
			
 
				+    fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight * 10.0f, ffxMax(FfxFloat32(bInMotionLastFrame), ffxSaturate(params.fHrVelocity * FfxFloat32(10)))));
			
 
				+
			
 
				+    fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight, ffxSaturate(params.fHrVelocity / FfxFloat32(20))));
			
 
				+
			
 
				+    return fBaseAccumulation.xxx;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 ComputeLumaInstabilityFactor(const AccumulationPassCommonParams params, RectificationBox clippingBox, FfxFloat32 fThisFrameReactiveFactor, FfxFloat32 fLuminanceDiff)
			
 
				+{
			
 
				+    const FfxFloat32 fUnormThreshold = 1.0f / 255.0f;
			
 
				+    const FfxInt32 N_MINUS_1 = 0;
			
 
				+    const FfxInt32 N_MINUS_2 = 1;
			
 
				+    const FfxInt32 N_MINUS_3 = 2;
			
 
				+    const FfxInt32 N_MINUS_4 = 3;
			
 
				+
			
 
				+    FfxFloat32 fCurrentFrameLuma = clippingBox.boxCenter.x;
			
 
				+
			
 
				+#if FFX_FSR2_OPTION_HDR_COLOR_INPUT
			
 
				+    fCurrentFrameLuma = fCurrentFrameLuma / (1.0f + ffxMax(0.0f, fCurrentFrameLuma));
			
 
				+#endif
			
 
				+
			
 
				+    fCurrentFrameLuma = round(fCurrentFrameLuma * 255.0f) / 255.0f;
			
 
				+
			
 
				+    const FfxBoolean bSampleLumaHistory = (ffxMax(ffxMax(params.fDepthClipFactor, params.fAccumulationMask), fLuminanceDiff) < 0.1f) && (params.bIsNewSample == false);
			
 
				+    FfxFloat32x4 fCurrentFrameLumaHistory = bSampleLumaHistory ? SampleLumaHistory(params.fReprojectedHrUv) : FFX_BROADCAST_FLOAT32X4(0.0f);
			
 
				+
			
 
				+    FfxFloat32 fLumaInstability = 0.0f;
			
 
				+    FfxFloat32 fDiffs0 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[N_MINUS_1]);
			
 
				+
			
 
				+    FfxFloat32 fMin = abs(fDiffs0);
			
 
				+
			
 
				+    if (fMin >= fUnormThreshold)
			
 
				+    {
			
 
				+        for (int i = N_MINUS_2; i <= N_MINUS_4; i++) {
			
 
				+            FfxFloat32 fDiffs1 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[i]);
			
 
				+
			
 
				+            if (sign(fDiffs0) == sign(fDiffs1)) {
			
 
				+                
			
 
				+                // Scale difference to protect historically similar values
			
 
				+                const FfxFloat32 fMinBias = 1.0f;
			
 
				+                fMin = ffxMin(fMin, abs(fDiffs1) * fMinBias);
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        const FfxFloat32 fBoxSize = clippingBox.boxVec.x;
			
 
				+        const FfxFloat32 fBoxSizeFactor = ffxPow(ffxSaturate(fBoxSize / 0.1f), 6.0f);
			
 
				+
			
 
				+        fLumaInstability = FfxFloat32(fMin != abs(fDiffs0)) * fBoxSizeFactor;
			
 
				+        fLumaInstability = FfxFloat32(fLumaInstability > fUnormThreshold);
			
 
				+
			
 
				+        fLumaInstability *= 1.0f - ffxMax(params.fAccumulationMask, ffxPow(fThisFrameReactiveFactor, 1.0f / 6.0f));
			
 
				+    }
			
 
				+
			
 
				+    //shift history
			
 
				+    fCurrentFrameLumaHistory[N_MINUS_4] = fCurrentFrameLumaHistory[N_MINUS_3];
			
 
				+    fCurrentFrameLumaHistory[N_MINUS_3] = fCurrentFrameLumaHistory[N_MINUS_2];
			
 
				+    fCurrentFrameLumaHistory[N_MINUS_2] = fCurrentFrameLumaHistory[N_MINUS_1];
			
 
				+    fCurrentFrameLumaHistory[N_MINUS_1] = fCurrentFrameLuma;
			
 
				+
			
 
				+    StoreLumaHistory(params.iPxHrPos, fCurrentFrameLumaHistory);
			
 
				+
			
 
				+    return fLumaInstability * FfxFloat32(fCurrentFrameLumaHistory[N_MINUS_4] != 0);
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 ComputeTemporalReactiveFactor(const AccumulationPassCommonParams params, FfxFloat32 fTemporalReactiveFactor)
			
 
				+{
			
 
				+    FfxFloat32 fNewFactor = ffxMin(0.99f, fTemporalReactiveFactor);
			
 
				+
			
 
				+    fNewFactor = ffxMax(fNewFactor, ffxLerp(fNewFactor, 0.4f, ffxSaturate(params.fHrVelocity)));
			
 
				+
			
 
				+    fNewFactor = ffxMax(fNewFactor * fNewFactor, ffxMax(params.fDepthClipFactor * 0.1f, params.fDilatedReactiveFactor));
			
 
				+
			
 
				+    // Force reactive factor for new samples
			
 
				+    fNewFactor = params.bIsNewSample ? 1.0f : fNewFactor;
			
 
				+
			
 
				+    if (ffxSaturate(params.fHrVelocity * 10.0f) >= 1.0f) {
			
 
				+        fNewFactor = ffxMax(FSR2_EPSILON, fNewFactor) * -1.0f;
			
 
				+    }
			
 
				+    
			
 
				+    return fNewFactor;
			
 
				+}
			
 
				+
			
 
				+AccumulationPassCommonParams InitParams(FfxInt32x2 iPxHrPos)
			
 
				+{
			
 
				+    AccumulationPassCommonParams params;
			
 
				+
			
 
				+    params.iPxHrPos = iPxHrPos;
			
 
				+    const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize();
			
 
				+    params.fHrUv = fHrUv;
			
 
				+    
			
 
				+    const FfxFloat32x2 fLrUvJittered = fHrUv + Jitter() / RenderSize();
			
 
				+    params.fLrUv_HwSampler = ClampUv(fLrUvJittered, RenderSize(), MaxRenderSize());
			
 
				+
			
 
				+    params.fMotionVector = GetMotionVector(iPxHrPos, fHrUv);
			
 
				+    params.fHrVelocity = GetPxHrVelocity(params.fMotionVector);
			
 
				+
			
 
				+    ComputeReprojectedUVs(params, params.fReprojectedHrUv, params.bIsExistingSample);
			
 
				+
			
 
				+    params.fDepthClipFactor = ffxSaturate(SampleDepthClip(params.fLrUv_HwSampler));
			
 
				+    
			
 
				+    const FfxFloat32x2 fDilatedReactiveMasks = SampleDilatedReactiveMasks(params.fLrUv_HwSampler);
			
 
				+    params.fDilatedReactiveFactor = fDilatedReactiveMasks.x;
			
 
				+    params.fAccumulationMask = fDilatedReactiveMasks.y;
			
 
				+    params.bIsResetFrame = (0 == FrameIndex());
			
 
				+
			
 
				+    params.bIsNewSample = (params.bIsExistingSample == false || params.bIsResetFrame);
			
 
				+
			
 
				+    return params;
			
 
				+}
			
 
				+
			
 
				+void Accumulate(FfxInt32x2 iPxHrPos)
			
 
				+{
			
 
				+    const AccumulationPassCommonParams params = InitParams(iPxHrPos);
			
 
				+
			
 
				+    FfxFloat32x3 fHistoryColor = FfxFloat32x3(0, 0, 0);
			
 
				+    FfxFloat32x2 fLockStatus;
			
 
				+    InitializeNewLockSample(fLockStatus);
			
 
				+
			
 
				+    FfxFloat32 fTemporalReactiveFactor = 0.0f;
			
 
				+    FfxBoolean bInMotionLastFrame = FFX_FALSE;
			
 
				+    LockState lockState = { FFX_FALSE , FFX_FALSE };
			
 
				+    if (params.bIsExistingSample && !params.bIsResetFrame) {
			
 
				+        ReprojectHistoryColor(params, fHistoryColor, fTemporalReactiveFactor, bInMotionLastFrame);
			
 
				+        lockState = ReprojectHistoryLockStatus(params, fLockStatus);
			
 
				+    }
			
 
				+
			
 
				+    FfxFloat32 fThisFrameReactiveFactor = ffxMax(params.fDilatedReactiveFactor, fTemporalReactiveFactor);
			
 
				+
			
 
				+    FfxFloat32 fLuminanceDiff = 0.0f;
			
 
				+    FfxFloat32 fLockContributionThisFrame = 0.0f;
			
 
				+    UpdateLockStatus(params, fThisFrameReactiveFactor, lockState, fLockStatus, fLockContributionThisFrame, fLuminanceDiff);
			
 
				+
			
 
				+    // Load upsampled input color
			
 
				+    RectificationBox clippingBox;
			
 
				+    FfxFloat32x4 fUpsampledColorAndWeight = ComputeUpsampledColorAndWeight(params, clippingBox, fThisFrameReactiveFactor);
			
 
				+    
			
 
				+    const FfxFloat32 fLumaInstabilityFactor = ComputeLumaInstabilityFactor(params, clippingBox, fThisFrameReactiveFactor, fLuminanceDiff);
			
 
				+
			
 
				+
			
 
				+    FfxFloat32x3 fAccumulation = ComputeBaseAccumulationWeight(params, fThisFrameReactiveFactor, bInMotionLastFrame, fUpsampledColorAndWeight.w, lockState);
			
 
				+
			
 
				+    if (params.bIsNewSample) {
			
 
				+        fHistoryColor = YCoCgToRGB(fUpsampledColorAndWeight.xyz);
			
 
				+    }
			
 
				+    else {
			
 
				+        RectifyHistory(params, clippingBox, fHistoryColor, fAccumulation, fLockContributionThisFrame, fThisFrameReactiveFactor, fLumaInstabilityFactor);
			
 
				+
			
 
				+        Accumulate(params, fHistoryColor, fAccumulation, fUpsampledColorAndWeight);
			
 
				+    }
			
 
				+
			
 
				+    fHistoryColor = UnprepareRgb(fHistoryColor, Exposure());
			
 
				+
			
 
				+    FinalizeLockStatus(params, fLockStatus, fUpsampledColorAndWeight.w);
			
 
				+
			
 
				+    // Get new temporal reactive factor
			
 
				+    fTemporalReactiveFactor = ComputeTemporalReactiveFactor(params, fThisFrameReactiveFactor);
			
 
				+
			
 
				+    StoreInternalColorAndWeight(iPxHrPos, FfxFloat32x4(fHistoryColor, fTemporalReactiveFactor));
			
 
				+
			
 
				+    // Output final color when RCAS is disabled
			
 
				+#if FFX_FSR2_OPTION_APPLY_SHARPENING == 0
			
 
				+    WriteUpscaledOutput(iPxHrPos, fHistoryColor);
			
 
				+#endif
			
 
				+    StoreNewLocks(iPxHrPos, 0);
			
 
				+}
			
 
				+
			
 
				+#endif // FFX_FSR2_ACCUMULATE_H
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl
@@ -0,0 +1,92 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+
			
 
				+
			
 
				+#extension GL_GOOGLE_include_directive : require
			
 
				+#extension GL_EXT_samplerless_texture_functions : require
			
 
				+// Needed for rw_upscaled_output declaration
			
 
				+#extension GL_EXT_shader_image_load_formatted : require
			
 
				+
			
 
				+#define FSR2_BIND_SRV_INPUT_EXPOSURE                         0
			
 
				+#define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS                 1
			
 
				+#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
			
 
				+#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS                 2
			
 
				+#else
			
 
				+#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS                   2
			
 
				+#endif
			
 
				+#define FSR2_BIND_SRV_INTERNAL_UPSCALED                      3
			
 
				+#define FSR2_BIND_SRV_LOCK_STATUS                            4
			
 
				+#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR                   6
			
 
				+#define FSR2_BIND_SRV_LUMA_INSTABILITY                       7
			
 
				+#define FSR2_BIND_SRV_LANCZOS_LUT                            8
			
 
				+#define FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT               9
			
 
				+#define FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS                   10
			
 
				+#define FSR2_BIND_SRV_AUTO_EXPOSURE                          11
			
 
				+#define FSR2_BIND_SRV_LUMA_HISTORY                           12
			
 
				+
			
 
				+#define FSR2_BIND_UAV_INTERNAL_UPSCALED                      13
			
 
				+#define FSR2_BIND_UAV_LOCK_STATUS                            14
			
 
				+#define FSR2_BIND_UAV_UPSCALED_OUTPUT                        15
			
 
				+#define FSR2_BIND_UAV_NEW_LOCKS                              16
			
 
				+#define FSR2_BIND_UAV_LUMA_HISTORY                           17
			
 
				+
			
 
				+#define FSR2_BIND_CB_FSR2                                    18
			
 
				+
			
 
				+// -- GODOT start --
			
 
				+#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS
			
 
				+#define FSR2_BIND_SRV_INPUT_DEPTH                            5
			
 
				+#endif
			
 
				+// -- GODOT end --
			
 
				+
			
 
				+#include "ffx_fsr2_callbacks_glsl.h"
			
 
				+#include "ffx_fsr2_common.h"
			
 
				+#include "ffx_fsr2_sample.h"
			
 
				+#include "ffx_fsr2_upsample.h"
			
 
				+#include "ffx_fsr2_postprocess_lock_status.h"
			
 
				+#include "ffx_fsr2_reproject.h"
			
 
				+#include "ffx_fsr2_accumulate.h"
			
 
				+
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
			
 
				+#define FFX_FSR2_THREAD_GROUP_WIDTH 8
			
 
				+#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
			
 
				+#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
			
 
				+#endif // FFX_FSR2_THREAD_GROUP_HEIGHT
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
			
 
				+#define FFX_FSR2_THREAD_GROUP_DEPTH 1
			
 
				+#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
			
 
				+#ifndef FFX_FSR2_NUM_THREADS
			
 
				+#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
			
 
				+
			
 
				+#endif // #ifndef FFX_FSR2_NUM_THREADS
			
 
				+
			
 
				+FFX_FSR2_NUM_THREADS
			
 
				+void main()
			
 
				+{
			
 
				+	uvec2 uGroupId = gl_WorkGroupID.xy;
			
 
				+    const uint GroupRows = (uint(DisplaySize().y) + FFX_FSR2_THREAD_GROUP_HEIGHT - 1) / FFX_FSR2_THREAD_GROUP_HEIGHT;
			
 
				+    uGroupId.y = GroupRows - uGroupId.y - 1;
			
 
				+
			
 
				+    uvec2 uDispatchThreadId = uGroupId * uvec2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + gl_LocalInvocationID.xy;
			
 
				+
			
 
				+    Accumulate(ivec2(uDispatchThreadId));
			
 
				+}
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl
@@ -0,0 +1,93 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+
			
 
				+
			
 
				+#extension GL_GOOGLE_include_directive : require
			
 
				+#extension GL_EXT_samplerless_texture_functions : require
			
 
				+
			
 
				+#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY                     0
			
 
				+#define FSR2_BIND_SRV_INPUT_COLOR                           1
			
 
				+#define FSR2_BIND_UAV_AUTOREACTIVE                          2
			
 
				+#define FSR2_BIND_CB_REACTIVE                               3
			
 
				+#define FSR2_BIND_CB_FSR2                                   4
			
 
				+
			
 
				+#include "ffx_fsr2_callbacks_glsl.h"
			
 
				+#include "ffx_fsr2_common.h"
			
 
				+
			
 
				+// layout (set = 1, binding = FSR2_BIND_SRV_PRE_ALPHA_COLOR)  uniform texture2D   r_input_color_pre_alpha;
			
 
				+// layout (set = 1, binding = FSR2_BIND_SRV_POST_ALPHA_COLOR) uniform texture2D   r_input_color_post_alpha;
			
 
				+// layout (set = 1, binding = FSR2_BIND_UAV_REACTIVE, r8)     uniform image2D     rw_output_reactive_mask;
			
 
				+
			
 
				+
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
			
 
				+#define FFX_FSR2_THREAD_GROUP_WIDTH 8
			
 
				+#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
			
 
				+#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
			
 
				+#endif // FFX_FSR2_THREAD_GROUP_HEIGHT
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
			
 
				+#define FFX_FSR2_THREAD_GROUP_DEPTH 1
			
 
				+#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
			
 
				+#ifndef FFX_FSR2_NUM_THREADS
			
 
				+#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
			
 
				+#endif // #ifndef FFX_FSR2_NUM_THREADS
			
 
				+
			
 
				+#if defined(FSR2_BIND_CB_REACTIVE)
			
 
				+layout (set = 1, binding = FSR2_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t
			
 
				+{
			
 
				+	float   scale;
			
 
				+	float   threshold;
			
 
				+	float   binaryValue;
			
 
				+	uint    flags;
			
 
				+} cbGenerateReactive;
			
 
				+#endif
			
 
				+
			
 
				+FFX_FSR2_NUM_THREADS
			
 
				+void main()
			
 
				+{
			
 
				+    FfxUInt32x2 uDispatchThreadId = gl_GlobalInvocationID.xy;
			
 
				+
			
 
				+    FfxFloat32x3 ColorPreAlpha  = LoadOpaqueOnly(FFX_MIN16_I2(uDispatchThreadId)).rgb;
			
 
				+    FfxFloat32x3 ColorPostAlpha = LoadInputColor(FFX_MIN16_I2(uDispatchThreadId)).rgb;
			
 
				+    
			
 
				+    if ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP) != 0)
			
 
				+    {
			
 
				+        ColorPreAlpha = Tonemap(ColorPreAlpha);
			
 
				+        ColorPostAlpha = Tonemap(ColorPostAlpha);
			
 
				+    }
			
 
				+
			
 
				+    if ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP) != 0)
			
 
				+    {
			
 
				+        ColorPreAlpha = InverseTonemap(ColorPreAlpha);
			
 
				+        ColorPostAlpha = InverseTonemap(ColorPostAlpha);
			
 
				+    }
			
 
				+
			
 
				+    FfxFloat32 out_reactive_value = 0.f;
			
 
				+    FfxFloat32x3 delta = abs(ColorPostAlpha - ColorPreAlpha);
			
 
				+    
			
 
				+    out_reactive_value = ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX)!=0) ? max(delta.x, max(delta.y, delta.z)) : length(delta);
			
 
				+    out_reactive_value *= cbGenerateReactive.scale;
			
 
				+
			
 
				+    out_reactive_value = ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD)!=0) ? ((out_reactive_value < cbGenerateReactive.threshold) ? 0 : cbGenerateReactive.binaryValue) : out_reactive_value;
			
 
				+
			
 
				+    imageStore(rw_output_autoreactive, FfxInt32x2(uDispatchThreadId), vec4(out_reactive_value));
			
 
				+}
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h
@@ -0,0 +1,704 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+#include "ffx_fsr2_resources.h"
			
 
				+
			
 
				+#if defined(FFX_GPU)
			
 
				+#include "ffx_core.h"
			
 
				+#endif // #if defined(FFX_GPU)
			
 
				+
			
 
				+#if defined(FFX_GPU)
			
 
				+#ifndef FFX_FSR2_PREFER_WAVE64
			
 
				+#define FFX_FSR2_PREFER_WAVE64
			
 
				+#endif // #if defined(FFX_GPU)
			
 
				+
			
 
				+#if defined(FSR2_BIND_CB_FSR2)
			
 
				+	layout (set = 1, binding = FSR2_BIND_CB_FSR2, std140) uniform cbFSR2_t
			
 
				+	{
			
 
				+		FfxInt32x2    iRenderSize;
			
 
				+		FfxInt32x2    iMaxRenderSize;
			
 
				+		FfxInt32x2    iDisplaySize;
			
 
				+		FfxInt32x2    iInputColorResourceDimensions;
			
 
				+		FfxInt32x2    iLumaMipDimensions;
			
 
				+		FfxInt32      iLumaMipLevelToUse;
			
 
				+		FfxInt32      iFrameIndex;
			
 
				+
			
 
				+		FfxFloat32x4  fDeviceToViewDepth;
			
 
				+		FfxFloat32x2  fJitter;
			
 
				+		FfxFloat32x2  fMotionVectorScale;
			
 
				+		FfxFloat32x2  fDownscaleFactor;
			
 
				+		FfxFloat32x2  fMotionVectorJitterCancellation;
			
 
				+		FfxFloat32    fPreExposure;
			
 
				+		FfxFloat32    fPreviousFramePreExposure;
			
 
				+		FfxFloat32    fTanHalfFOV;
			
 
				+		FfxFloat32    fJitterSequenceLength;
			
 
				+		FfxFloat32    fDeltaTime;
			
 
				+		FfxFloat32    fDynamicResChangeFactor;
			
 
				+		FfxFloat32    fViewSpaceToMetersFactor;
			
 
				+		
			
 
				+		// -- GODOT start --
			
 
				+		FfxFloat32    fPad;
			
 
				+		mat4          mReprojectionMatrix;
			
 
				+		// -- GODOT end --
			
 
				+	} cbFSR2;
			
 
				+#endif
			
 
				+
			
 
				+FfxInt32x2 RenderSize()
			
 
				+{
			
 
				+	return cbFSR2.iRenderSize;
			
 
				+}
			
 
				+
			
 
				+FfxInt32x2 MaxRenderSize()
			
 
				+{
			
 
				+	return cbFSR2.iMaxRenderSize;
			
 
				+}
			
 
				+
			
 
				+FfxInt32x2 DisplaySize()
			
 
				+{
			
 
				+	return cbFSR2.iDisplaySize;
			
 
				+}
			
 
				+
			
 
				+FfxInt32x2 InputColorResourceDimensions()
			
 
				+{
			
 
				+	return cbFSR2.iInputColorResourceDimensions;
			
 
				+}
			
 
				+
			
 
				+FfxInt32x2 LumaMipDimensions()
			
 
				+{
			
 
				+	return cbFSR2.iLumaMipDimensions;
			
 
				+}
			
 
				+
			
 
				+FfxInt32  LumaMipLevelToUse()
			
 
				+{
			
 
				+	return cbFSR2.iLumaMipLevelToUse;
			
 
				+}
			
 
				+
			
 
				+FfxInt32 FrameIndex()
			
 
				+{
			
 
				+	return cbFSR2.iFrameIndex;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x4 DeviceToViewSpaceTransformFactors()
			
 
				+{
			
 
				+	return cbFSR2.fDeviceToViewDepth;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x2 Jitter()
			
 
				+{
			
 
				+	return cbFSR2.fJitter;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x2 MotionVectorScale()
			
 
				+{
			
 
				+	return cbFSR2.fMotionVectorScale;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x2 DownscaleFactor()
			
 
				+{
			
 
				+	return cbFSR2.fDownscaleFactor;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x2 MotionVectorJitterCancellation()
			
 
				+{
			
 
				+	return cbFSR2.fMotionVectorJitterCancellation;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 PreExposure()
			
 
				+{
			
 
				+	return cbFSR2.fPreExposure;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 PreviousFramePreExposure()
			
 
				+{
			
 
				+	return cbFSR2.fPreviousFramePreExposure;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 TanHalfFoV()
			
 
				+{
			
 
				+	return cbFSR2.fTanHalfFOV;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 JitterSequenceLength()
			
 
				+{
			
 
				+	return cbFSR2.fJitterSequenceLength;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 DeltaTime()
			
 
				+{
			
 
				+	return cbFSR2.fDeltaTime;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 DynamicResChangeFactor()
			
 
				+{
			
 
				+	return cbFSR2.fDynamicResChangeFactor;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 ViewSpaceToMetersFactor()
			
 
				+{
			
 
				+	return cbFSR2.fViewSpaceToMetersFactor;
			
 
				+}
			
 
				+
			
 
				+layout (set = 0, binding = 0) uniform sampler s_PointClamp;
			
 
				+layout (set = 0, binding = 1) uniform sampler s_LinearClamp;
			
 
				+
			
 
				+// SRVs
			
 
				+#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY)
			
 
				+	layout (set = 1, binding = FSR2_BIND_SRV_INPUT_OPAQUE_ONLY)                       uniform texture2D  r_input_opaque_only;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_INPUT_COLOR)
			
 
				+	layout (set = 1, binding = FSR2_BIND_SRV_INPUT_COLOR)                             uniform texture2D  r_input_color_jittered;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS)
			
 
				+	layout (set = 1, binding = FSR2_BIND_SRV_INPUT_MOTION_VECTORS)                    uniform texture2D  r_input_motion_vectors;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_INPUT_DEPTH)
			
 
				+	layout (set = 1, binding = FSR2_BIND_SRV_INPUT_DEPTH)                             uniform texture2D  r_input_depth;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE)
			
 
				+	layout (set = 1, binding = FSR2_BIND_SRV_INPUT_EXPOSURE)                          uniform texture2D  r_input_exposure;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE)
			
 
				+	layout(set = 1, binding = FSR2_BIND_SRV_AUTO_EXPOSURE)                            uniform texture2D  r_auto_exposure;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_REACTIVE_MASK)
			
 
				+	layout (set = 1, binding = FSR2_BIND_SRV_REACTIVE_MASK)                           uniform texture2D  r_reactive_mask;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK)
			
 
				+	layout (set = 1, binding = FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK)       uniform texture2D  r_transparency_and_composition_mask;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
			
 
				+	layout (set = 1, binding = FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH)        uniform utexture2D r_reconstructed_previous_nearest_depth;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS)
			
 
				+	layout (set = 1, binding = FSR2_BIND_SRV_DILATED_MOTION_VECTORS)                  uniform texture2D  r_dilated_motion_vectors;
			
 
				+#endif
			
 
				+#if defined (FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS)
			
 
				+	layout(set = 1, binding = FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS)          uniform texture2D  r_previous_dilated_motion_vectors;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_DILATED_DEPTH)
			
 
				+	layout (set = 1, binding = FSR2_BIND_SRV_DILATED_DEPTH)                           uniform texture2D  r_dilatedDepth;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED)
			
 
				+	layout (set = 1, binding = FSR2_BIND_SRV_INTERNAL_UPSCALED)                       uniform texture2D  r_internal_upscaled_color;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_LOCK_STATUS)
			
 
				+	layout (set = 1, binding = FSR2_BIND_SRV_LOCK_STATUS)                             uniform texture2D  r_lock_status;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA)
			
 
				+	layout (set = 1, binding = FSR2_BIND_SRV_LOCK_INPUT_LUMA)                         uniform texture2D  r_lock_input_luma;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_NEW_LOCKS)
			
 
				+	layout(set = 1, binding = FSR2_BIND_SRV_NEW_LOCKS)                                uniform texture2D  r_new_locks;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR)
			
 
				+	layout (set = 1, binding = FSR2_BIND_SRV_PREPARED_INPUT_COLOR)                    uniform texture2D  r_prepared_input_color;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_LUMA_HISTORY)
			
 
				+	layout (set = 1, binding = FSR2_BIND_SRV_LUMA_HISTORY)                            uniform texture2D  r_luma_history;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_RCAS_INPUT)
			
 
				+	layout (set = 1, binding = FSR2_BIND_SRV_RCAS_INPUT)                              uniform texture2D  r_rcas_input;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_LANCZOS_LUT)
			
 
				+	layout (set = 1, binding = FSR2_BIND_SRV_LANCZOS_LUT)                             uniform texture2D  r_lanczos_lut;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)
			
 
				+	layout (set = 1, binding = FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)                    uniform texture2D  r_imgMips;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT)
			
 
				+	layout (set = 1, binding = FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT)                uniform texture2D  r_upsample_maximum_bias_lut;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)
			
 
				+	layout (set = 1, binding = FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)                  uniform texture2D  r_dilated_reactive_masks;
			
 
				+#endif			 
			
 
				+#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR)
			
 
				+	layout(set = 1, binding = FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) 				      uniform texture2D  r_input_prev_color_pre_alpha;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR)
			
 
				+	layout(set = 1, binding = FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) 				      uniform texture2D  r_input_prev_color_post_alpha;
			
 
				+#endif
			
 
				+
			
 
				+// UAV
			
 
				+#if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH
			
 
				+	layout (set = 1, binding = FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, r32ui) uniform uimage2D   rw_reconstructed_previous_nearest_depth;
			
 
				+#endif
			
 
				+#if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS
			
 
				+	layout (set = 1, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS, rg16f)           writeonly uniform image2D  rw_dilated_motion_vectors;
			
 
				+#endif
			
 
				+#if defined FSR2_BIND_UAV_DILATED_DEPTH
			
 
				+	layout (set = 1, binding = FSR2_BIND_UAV_DILATED_DEPTH, r16f)                     writeonly uniform image2D  rw_dilatedDepth;
			
 
				+#endif
			
 
				+#if defined FSR2_BIND_UAV_INTERNAL_UPSCALED
			
 
				+	layout (set = 1, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba16f)              writeonly uniform image2D  rw_internal_upscaled_color;
			
 
				+#endif
			
 
				+#if defined FSR2_BIND_UAV_LOCK_STATUS
			
 
				+	layout (set = 1, binding = FSR2_BIND_UAV_LOCK_STATUS, rg16f)                      uniform image2D    rw_lock_status;
			
 
				+#endif
			
 
				+#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA)
			
 
				+	layout(set = 1, binding = FSR2_BIND_UAV_LOCK_INPUT_LUMA, r16f)                    writeonly uniform image2D    rw_lock_input_luma;
			
 
				+#endif
			
 
				+#if defined FSR2_BIND_UAV_NEW_LOCKS
			
 
				+	layout(set = 1, binding = FSR2_BIND_UAV_NEW_LOCKS, r8)				 		      uniform image2D    rw_new_locks;
			
 
				+#endif
			
 
				+#if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
			
 
				+	layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba16)            writeonly uniform image2D  rw_prepared_input_color;
			
 
				+#endif
			
 
				+#if defined FSR2_BIND_UAV_LUMA_HISTORY
			
 
				+	layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba8)                     uniform image2D  rw_luma_history;
			
 
				+#endif
			
 
				+#if defined FSR2_BIND_UAV_UPSCALED_OUTPUT
			
 
				+	layout (set = 1, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT /* app controlled format */) writeonly uniform image2D  rw_upscaled_output;
			
 
				+#endif
			
 
				+#if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
			
 
				+	layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r16f)              coherent uniform image2D  rw_img_mip_shading_change;
			
 
				+#endif
			
 
				+#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
			
 
				+	layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r16f)                        coherent uniform image2D  rw_img_mip_5;
			
 
				+#endif
			
 
				+#if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS
			
 
				+	layout (set = 1, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, rg8)                 writeonly uniform image2D	 rw_dilated_reactive_masks;
			
 
				+#endif 
			
 
				+#if defined FSR2_BIND_UAV_EXPOSURE 
			
 
				+	layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE, rg32f)                         uniform image2D    rw_exposure;
			
 
				+#endif
			
 
				+#if defined FSR2_BIND_UAV_AUTO_EXPOSURE
			
 
				+	layout(set = 1, binding = FSR2_BIND_UAV_AUTO_EXPOSURE, rg32f)                         uniform image2D    rw_auto_exposure;
			
 
				+#endif
			
 
				+#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC 
			
 
				+	layout (set = 1, binding = FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC, r32ui)       coherent uniform uimage2D   rw_spd_global_atomic;
			
 
				+#endif
			
 
				+
			
 
				+#if defined FSR2_BIND_UAV_AUTOREACTIVE
			
 
				+	layout(set = 1, binding = FSR2_BIND_UAV_AUTOREACTIVE, r32f)                       uniform image2D   	    rw_output_autoreactive;
			
 
				+#endif
			
 
				+#if defined FSR2_BIND_UAV_AUTOCOMPOSITION
			
 
				+	layout(set = 1, binding = FSR2_BIND_UAV_AUTOCOMPOSITION, r32f)                    uniform image2D   	    rw_output_autocomposition;
			
 
				+#endif
			
 
				+#if defined FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR
			
 
				+	layout(set = 1, binding = FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR, r11f_g11f_b10f)     uniform image2D   	    rw_output_prev_color_pre_alpha;
			
 
				+#endif
			
 
				+#if defined FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR
			
 
				+	layout(set = 1, binding = FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR, r11f_g11f_b10f)    uniform image2D   	    rw_output_prev_color_post_alpha;
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)
			
 
				+FfxFloat32 LoadMipLuma(FfxInt32x2 iPxPos, FfxInt32 mipLevel)
			
 
				+{
			
 
				+	return texelFetch(r_imgMips, iPxPos, FfxInt32(mipLevel)).r;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)
			
 
				+FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxInt32 mipLevel)
			
 
				+{
			
 
				+	return textureLod(sampler2D(r_imgMips, s_LinearClamp), fUV, FfxFloat32(mipLevel)).r;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_INPUT_DEPTH)
			
 
				+FfxFloat32 LoadInputDepth(FfxInt32x2 iPxPos)
			
 
				+{
			
 
				+	return texelFetch(r_input_depth, iPxPos, 0).r;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_REACTIVE_MASK) 
			
 
				+FfxFloat32 LoadReactiveMask(FfxInt32x2 iPxPos)
			
 
				+{
			
 
				+// -- GODOT start --
			
 
				+#if FFX_FSR2_OPTION_GODOT_REACTIVE_MASK_CLAMP
			
 
				+	return min(texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r, 0.9f);
			
 
				+#else
			
 
				+	return texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r;
			
 
				+#endif
			
 
				+// -- GODOT end --
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK)
			
 
				+FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos)
			
 
				+{
			
 
				+	return texelFetch(r_transparency_and_composition_mask, FfxInt32x2(iPxPos), 0).r;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_INPUT_COLOR)
			
 
				+FfxFloat32x3 LoadInputColor(FfxInt32x2 iPxPos)
			
 
				+{
			
 
				+	return texelFetch(r_input_color_jittered, iPxPos, 0).rgb;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_INPUT_COLOR)
			
 
				+FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV)
			
 
				+{
			
 
				+	return textureLod(sampler2D(r_input_color_jittered, s_LinearClamp), fUV, 0.0f).rgb;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR)
			
 
				+FfxFloat32x3 LoadPreparedInputColor(FfxInt32x2 iPxPos)
			
 
				+{
			
 
				+	return texelFetch(r_prepared_input_color, iPxPos, 0).xyz;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS)
			
 
				+FfxFloat32x2 LoadInputMotionVector(FfxInt32x2 iPxDilatedMotionVectorPos)
			
 
				+{
			
 
				+	FfxFloat32x2 fSrcMotionVector = texelFetch(r_input_motion_vectors, iPxDilatedMotionVectorPos, 0).xy;
			
 
				+
			
 
				+// -- GODOT start --
			
 
				+#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS
			
 
				+	bool bInvalidMotionVector = all(lessThanEqual(fSrcMotionVector, vec2(-1.0f, -1.0f)));
			
 
				+	if (bInvalidMotionVector)
			
 
				+	{
			
 
				+		FfxFloat32 fSrcDepth = LoadInputDepth(iPxDilatedMotionVectorPos);
			
 
				+		FfxFloat32x2 fUv = (iPxDilatedMotionVectorPos + FfxFloat32(0.5)) / RenderSize();
			
 
				+		fSrcMotionVector = FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS_FUNCTION(fUv, fSrcDepth, cbFSR2.mReprojectionMatrix);
			
 
				+	}
			
 
				+#endif
			
 
				+// -- GODOT end --
			
 
				+
			
 
				+	FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale();
			
 
				+
			
 
				+#if FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS
			
 
				+	fUvMotionVector -= MotionVectorJitterCancellation();
			
 
				+#endif
			
 
				+
			
 
				+	return fUvMotionVector;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED)
			
 
				+FfxFloat32x4 LoadHistory(FfxInt32x2 iPxHistory)
			
 
				+{
			
 
				+	return texelFetch(r_internal_upscaled_color, iPxHistory, 0);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_LUMA_HISTORY)
			
 
				+void StoreLumaHistory(FfxInt32x2 iPxPos, FfxFloat32x4 fLumaHistory)
			
 
				+{
			
 
				+	imageStore(rw_luma_history, FfxInt32x2(iPxPos), fLumaHistory);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_LUMA_HISTORY)
			
 
				+FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV)
			
 
				+{
			
 
				+	return textureLod(sampler2D(r_luma_history, s_LinearClamp), fUV, 0.0f);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED)
			
 
				+void StoreReprojectedHistory(FfxInt32x2 iPxHistory, FfxFloat32x4 fHistory)
			
 
				+{
			
 
				+	imageStore(rw_internal_upscaled_color, iPxHistory, fHistory);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED)
			
 
				+void StoreInternalColorAndWeight(FfxInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight)
			
 
				+{
			
 
				+	imageStore(rw_internal_upscaled_color, FfxInt32x2(iPxPos), fColorAndWeight);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT)
			
 
				+void StoreUpscaledOutput(FfxInt32x2 iPxPos, FfxFloat32x3 fColor)
			
 
				+{
			
 
				+    imageStore(rw_upscaled_output, FfxInt32x2(iPxPos), FfxFloat32x4(fColor, 1.f));
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_LOCK_STATUS)
			
 
				+FfxFloat32x2 LoadLockStatus(FfxInt32x2 iPxPos)
			
 
				+{
			
 
				+	FfxFloat32x2 fLockStatus = texelFetch(r_lock_status, iPxPos, 0).rg;
			
 
				+
			
 
				+    return fLockStatus;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_LOCK_STATUS)
			
 
				+void StoreLockStatus(FfxInt32x2 iPxPos, FfxFloat32x2 fLockstatus)
			
 
				+{
			
 
				+	imageStore(rw_lock_status, iPxPos, vec4(fLockstatus, 0.0f, 0.0f));
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA)
			
 
				+FfxFloat32 LoadLockInputLuma(FfxInt32x2 iPxPos)
			
 
				+{
			
 
				+	return texelFetch(r_lock_input_luma, iPxPos, 0).r;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA)
			
 
				+void StoreLockInputLuma(FfxInt32x2 iPxPos, FfxFloat32 fLuma)
			
 
				+{
			
 
				+	imageStore(rw_lock_input_luma, iPxPos, vec4(fLuma, 0, 0, 0));
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_NEW_LOCKS)
			
 
				+FfxFloat32 LoadNewLocks(FfxInt32x2 iPxPos)
			
 
				+{
			
 
				+	return texelFetch(r_new_locks, iPxPos, 0).r;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_NEW_LOCKS)
			
 
				+FfxFloat32 LoadRwNewLocks(FfxInt32x2 iPxPos)
			
 
				+{
			
 
				+	return imageLoad(rw_new_locks, iPxPos).r;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_NEW_LOCKS)
			
 
				+void StoreNewLocks(FfxInt32x2 iPxPos, FfxFloat32 newLock)
			
 
				+{
			
 
				+	imageStore(rw_new_locks, iPxPos, vec4(newLock, 0, 0, 0));
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR)
			
 
				+void StorePreparedInputColor(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped)
			
 
				+{
			
 
				+	imageStore(rw_prepared_input_color, iPxPos, fTonemapped);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR)
			
 
				+FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV)
			
 
				+{
			
 
				+	return textureLod(sampler2D(r_prepared_input_color, s_LinearClamp), fUV, 0.0f).w;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_LOCK_STATUS)
			
 
				+FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV)
			
 
				+{
			
 
				+	FfxFloat32x2 fLockStatus = textureLod(sampler2D(r_lock_status, s_LinearClamp), fUV, 0.0f).rg;
			
 
				+	return fLockStatus;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_DEPTH)
			
 
				+FfxFloat32 LoadSceneDepth(FfxInt32x2 iPxInput)
			
 
				+{
			
 
				+	return texelFetch(r_input_depth, iPxInput, 0).r;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
			
 
				+FfxFloat32 LoadReconstructedPrevDepth(FfxInt32x2 iPxPos)
			
 
				+{
			
 
				+	return uintBitsToFloat(texelFetch(r_reconstructed_previous_nearest_depth, iPxPos, 0).r);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
			
 
				+void StoreReconstructedDepth(FfxInt32x2 iPxSample, FfxFloat32 fDepth)
			
 
				+{
			
 
				+	FfxUInt32 uDepth = floatBitsToUint(fDepth);
			
 
				+
			
 
				+	#if FFX_FSR2_OPTION_INVERTED_DEPTH
			
 
				+		imageAtomicMax(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth);
			
 
				+	#else
			
 
				+		imageAtomicMin(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth); // min for standard, max for inverted depth
			
 
				+	#endif
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
			
 
				+void SetReconstructedDepth(FfxInt32x2 iPxSample, FfxUInt32 uValue)
			
 
				+{
			
 
				+	imageStore(rw_reconstructed_previous_nearest_depth, iPxSample, uvec4(uValue, 0, 0, 0));
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_DILATED_DEPTH)
			
 
				+void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth)
			
 
				+{
			
 
				+	//FfxUInt32 uDepth = f32tof16(fDepth);
			
 
				+	imageStore(rw_dilatedDepth, iPxPos, vec4(fDepth, 0.0f, 0.0f, 0.0f));
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) 
			
 
				+void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector)
			
 
				+{
			
 
				+	imageStore(rw_dilated_motion_vectors, iPxPos, vec4(fMotionVector, 0.0f, 0.0f));
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS)
			
 
				+FfxFloat32x2 LoadDilatedMotionVector(FfxInt32x2 iPxInput)
			
 
				+{
			
 
				+	return texelFetch(r_dilated_motion_vectors, iPxInput, 0).rg;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS)
			
 
				+FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV)
			
 
				+{
			
 
				+	return textureLod(sampler2D(r_dilated_motion_vectors, s_LinearClamp), fUV, 0.0f).rg;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS)
			
 
				+FfxFloat32x2 LoadPreviousDilatedMotionVector(FfxInt32x2 iPxInput)
			
 
				+{
			
 
				+	return texelFetch(r_previous_dilated_motion_vectors, iPxInput, 0).rg;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x2 SamplePreviousDilatedMotionVector(FfxFloat32x2 fUV)
			
 
				+{
			
 
				+	return textureLod(sampler2D(r_previous_dilated_motion_vectors, s_LinearClamp), fUV, 0.0f).xy;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_DILATED_DEPTH)
			
 
				+FfxFloat32 LoadDilatedDepth(FfxInt32x2 iPxInput)
			
 
				+{
			
 
				+	return texelFetch(r_dilatedDepth, iPxInput, 0).r;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE)
			
 
				+FfxFloat32 Exposure()
			
 
				+{
			
 
				+	FfxFloat32 exposure = texelFetch(r_input_exposure, FfxInt32x2(0, 0), 0).x;
			
 
				+
			
 
				+	if (exposure == 0.0f) {
			
 
				+		exposure = 1.0f;
			
 
				+	}
			
 
				+
			
 
				+	return exposure;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE)
			
 
				+FfxFloat32 AutoExposure()
			
 
				+{
			
 
				+	FfxFloat32 exposure = texelFetch(r_auto_exposure, FfxInt32x2(0, 0), 0).x;
			
 
				+
			
 
				+	if (exposure == 0.0f) {
			
 
				+		exposure = 1.0f;
			
 
				+	}
			
 
				+
			
 
				+	return exposure;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+FfxFloat32 SampleLanczos2Weight(FfxFloat32 x)
			
 
				+{
			
 
				+#if defined(FSR2_BIND_SRV_LANCZOS_LUT)
			
 
				+	return textureLod(sampler2D(r_lanczos_lut, s_LinearClamp), FfxFloat32x2(x / 2.0f, 0.5f), 0.0f).x; 
			
 
				+#else
			
 
				+    return 0.f;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT)
			
 
				+FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv)
			
 
				+{
			
 
				+    // Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range.
			
 
				+    return FfxFloat32(2.0f) * FfxFloat32(textureLod(sampler2D(r_upsample_maximum_bias_lut, s_LinearClamp), abs(uv) * 2.0f, 0.0f).r);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)
			
 
				+FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV)
			
 
				+{
			
 
				+	return textureLod(sampler2D(r_dilated_reactive_masks, s_LinearClamp), fUV, 0.0f).rg;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)
			
 
				+FfxFloat32x2 LoadDilatedReactiveMasks(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
			
 
				+{
			
 
				+    return texelFetch(r_dilated_reactive_masks, iPxPos, 0).rg;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS)
			
 
				+void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fDilatedReactiveMasks)
			
 
				+{
			
 
				+    imageStore(rw_dilated_reactive_masks, iPxPos, vec4(fDilatedReactiveMasks, 0.0f, 0.0f));
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FFX_INTERNAL)
			
 
				+FfxFloat32x4 SampleDebug(FfxFloat32x2 fUV)
			
 
				+{
			
 
				+    return textureLod(sampler2D(r_debug_out, s_LinearClamp), fUV, 0.0f).rgba;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY)
			
 
				+FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
			
 
				+{
			
 
				+	return texelFetch(r_input_opaque_only, iPxPos, 0).xyz;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR)
			
 
				+FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
			
 
				+{
			
 
				+	return texelFetch(r_input_prev_color_pre_alpha, iPxPos, 0).xyz;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR)
			
 
				+FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
			
 
				+{
			
 
				+	return texelFetch(r_input_prev_color_post_alpha, iPxPos, 0).xyz;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_AUTOREACTIVE)
			
 
				+#if defined(FSR2_BIND_UAV_AUTOCOMPOSITION)
			
 
				+void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive)
			
 
				+{
			
 
				+	imageStore(rw_output_autoreactive, iPxPos, vec4(FfxFloat32(fReactive.x), 0.0f, 0.0f, 0.0f));
			
 
				+
			
 
				+	imageStore(rw_output_autocomposition, iPxPos, vec4(FfxFloat32(fReactive.y), 0.0f, 0.0f, 0.0f));
			
 
				+}
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR)
			
 
				+void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
			
 
				+{
			
 
				+	imageStore(rw_output_prev_color_pre_alpha, iPxPos, vec4(color, 0.0f));
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR)
			
 
				+void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
			
 
				+{
			
 
				+	imageStore(rw_output_prev_color_post_alpha, iPxPos, vec4(color, 0.0f));
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif // #if defined(FFX_GPU)
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_hlsl.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_hlsl.h
@@ -0,0 +1,799 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#include "ffx_fsr2_resources.h"
			
 
				+
			
 
				+#if defined(FFX_GPU)
			
 
				+#ifdef __hlsl_dx_compiler
			
 
				+#pragma dxc diagnostic push
			
 
				+#pragma dxc diagnostic ignored "-Wambig-lit-shift"
			
 
				+#endif //__hlsl_dx_compiler
			
 
				+#include "ffx_core.h"
			
 
				+#ifdef __hlsl_dx_compiler
			
 
				+#pragma dxc diagnostic pop
			
 
				+#endif //__hlsl_dx_compiler
			
 
				+#endif // #if defined(FFX_GPU)
			
 
				+
			
 
				+#if defined(FFX_GPU)
			
 
				+#ifndef FFX_FSR2_PREFER_WAVE64
			
 
				+#define FFX_FSR2_PREFER_WAVE64
			
 
				+#endif // #if defined(FFX_GPU)
			
 
				+
			
 
				+#if defined(FFX_GPU)
			
 
				+#pragma warning(disable: 3205)  // conversion from larger type to smaller
			
 
				+#endif // #if defined(FFX_GPU)
			
 
				+
			
 
				+#define DECLARE_SRV_REGISTER(regIndex)  t##regIndex
			
 
				+#define DECLARE_UAV_REGISTER(regIndex)  u##regIndex
			
 
				+#define DECLARE_CB_REGISTER(regIndex)   b##regIndex
			
 
				+#define FFX_FSR2_DECLARE_SRV(regIndex)  register(DECLARE_SRV_REGISTER(regIndex))
			
 
				+#define FFX_FSR2_DECLARE_UAV(regIndex)  register(DECLARE_UAV_REGISTER(regIndex))
			
 
				+#define FFX_FSR2_DECLARE_CB(regIndex)   register(DECLARE_CB_REGISTER(regIndex))
			
 
				+
			
 
				+#if defined(FSR2_BIND_CB_FSR2) || defined(FFX_INTERNAL)
			
 
				+    cbuffer cbFSR2 : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_FSR2)
			
 
				+    {
			
 
				+        FfxInt32x2    iRenderSize;
			
 
				+        FfxInt32x2    iMaxRenderSize;
			
 
				+        FfxInt32x2    iDisplaySize;
			
 
				+        FfxInt32x2    iInputColorResourceDimensions;
			
 
				+        FfxInt32x2    iLumaMipDimensions;
			
 
				+        FfxInt32      iLumaMipLevelToUse;
			
 
				+        FfxInt32      iFrameIndex;
			
 
				+
			
 
				+        FfxFloat32x4  fDeviceToViewDepth;
			
 
				+        FfxFloat32x2  fJitter;
			
 
				+        FfxFloat32x2  fMotionVectorScale;
			
 
				+        FfxFloat32x2  fDownscaleFactor;
			
 
				+        FfxFloat32x2  fMotionVectorJitterCancellation;
			
 
				+        FfxFloat32    fPreExposure;
			
 
				+        FfxFloat32    fPreviousFramePreExposure;
			
 
				+        FfxFloat32    fTanHalfFOV;
			
 
				+        FfxFloat32    fJitterSequenceLength;
			
 
				+        FfxFloat32    fDeltaTime;
			
 
				+        FfxFloat32    fDynamicResChangeFactor;
			
 
				+        FfxFloat32    fViewSpaceToMetersFactor;
			
 
				+    };
			
 
				+
			
 
				+#define FFX_FSR2_CONSTANT_BUFFER_1_SIZE (sizeof(cbFSR2) / 4)  // Number of 32-bit values. This must be kept in sync with the cbFSR2 size.
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FFX_GPU)
			
 
				+#define FFX_FSR2_ROOTSIG_STRINGIFY(p) FFX_FSR2_ROOTSIG_STR(p)
			
 
				+#define FFX_FSR2_ROOTSIG_STR(p) #p
			
 
				+#define FFX_FSR2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
			
 
				+                                    "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
			
 
				+                                    "RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_1_SIZE) ", b0), " \
			
 
				+                                    "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \
			
 
				+                                                      "addressU = TEXTURE_ADDRESS_CLAMP, " \
			
 
				+                                                      "addressV = TEXTURE_ADDRESS_CLAMP, " \
			
 
				+                                                      "addressW = TEXTURE_ADDRESS_CLAMP, " \
			
 
				+                                                      "comparisonFunc = COMPARISON_NEVER, " \
			
 
				+                                                      "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \
			
 
				+                                    "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
			
 
				+                                                      "addressU = TEXTURE_ADDRESS_CLAMP, " \
			
 
				+                                                      "addressV = TEXTURE_ADDRESS_CLAMP, " \
			
 
				+                                                      "addressW = TEXTURE_ADDRESS_CLAMP, " \
			
 
				+                                                      "comparisonFunc = COMPARISON_NEVER, " \
			
 
				+                                                      "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
			
 
				+
			
 
				+#define FFX_FSR2_CONSTANT_BUFFER_2_SIZE 6  // Number of 32-bit values. This must be kept in sync with max( cbRCAS , cbSPD) size.
			
 
				+
			
 
				+#define FFX_FSR2_CB2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
			
 
				+                                    "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
			
 
				+                                    "RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_1_SIZE) ", b0), " \
			
 
				+                                    "RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_2_SIZE) ", b1), " \
			
 
				+                                    "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \
			
 
				+                                                      "addressU = TEXTURE_ADDRESS_CLAMP, " \
			
 
				+                                                      "addressV = TEXTURE_ADDRESS_CLAMP, " \
			
 
				+                                                      "addressW = TEXTURE_ADDRESS_CLAMP, " \
			
 
				+                                                      "comparisonFunc = COMPARISON_NEVER, " \
			
 
				+                                                      "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \
			
 
				+                                    "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
			
 
				+                                                      "addressU = TEXTURE_ADDRESS_CLAMP, " \
			
 
				+                                                      "addressV = TEXTURE_ADDRESS_CLAMP, " \
			
 
				+                                                      "addressW = TEXTURE_ADDRESS_CLAMP, " \
			
 
				+                                                      "comparisonFunc = COMPARISON_NEVER, " \
			
 
				+                                                      "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
			
 
				+#if defined(FFX_FSR2_EMBED_ROOTSIG)
			
 
				+#define FFX_FSR2_EMBED_ROOTSIG_CONTENT FFX_FSR2_ROOTSIG
			
 
				+#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT FFX_FSR2_CB2_ROOTSIG
			
 
				+#else
			
 
				+#define FFX_FSR2_EMBED_ROOTSIG_CONTENT
			
 
				+#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT
			
 
				+#endif // #if FFX_FSR2_EMBED_ROOTSIG
			
 
				+#endif // #if defined(FFX_GPU)
			
 
				+
			
 
				+/* Define getter functions in the order they are defined in the CB! */
			
 
				+FfxInt32x2 RenderSize()
			
 
				+{
			
 
				+    return iRenderSize;
			
 
				+}
			
 
				+
			
 
				+FfxInt32x2 MaxRenderSize()
			
 
				+{
			
 
				+    return iMaxRenderSize;
			
 
				+}
			
 
				+
			
 
				+FfxInt32x2 DisplaySize()
			
 
				+{
			
 
				+    return iDisplaySize;
			
 
				+}
			
 
				+
			
 
				+FfxInt32x2 InputColorResourceDimensions()
			
 
				+{
			
 
				+    return iInputColorResourceDimensions;
			
 
				+}
			
 
				+
			
 
				+FfxInt32x2 LumaMipDimensions()
			
 
				+{
			
 
				+    return iLumaMipDimensions;
			
 
				+}
			
 
				+
			
 
				+FfxInt32  LumaMipLevelToUse()
			
 
				+{
			
 
				+    return iLumaMipLevelToUse;
			
 
				+}
			
 
				+
			
 
				+FfxInt32 FrameIndex()
			
 
				+{
			
 
				+    return iFrameIndex;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x2 Jitter()
			
 
				+{
			
 
				+    return fJitter;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x4 DeviceToViewSpaceTransformFactors()
			
 
				+{
			
 
				+    return fDeviceToViewDepth;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x2 MotionVectorScale()
			
 
				+{
			
 
				+    return fMotionVectorScale;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x2 DownscaleFactor()
			
 
				+{
			
 
				+    return fDownscaleFactor;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x2 MotionVectorJitterCancellation()
			
 
				+{
			
 
				+    return fMotionVectorJitterCancellation;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 PreExposure()
			
 
				+{
			
 
				+    return fPreExposure;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 PreviousFramePreExposure()
			
 
				+{
			
 
				+    return fPreviousFramePreExposure;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 TanHalfFoV()
			
 
				+{
			
 
				+    return fTanHalfFOV;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 JitterSequenceLength()
			
 
				+{
			
 
				+    return fJitterSequenceLength;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 DeltaTime()
			
 
				+{
			
 
				+    return fDeltaTime;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 DynamicResChangeFactor()
			
 
				+{
			
 
				+    return fDynamicResChangeFactor;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 ViewSpaceToMetersFactor()
			
 
				+{
			
 
				+    return fViewSpaceToMetersFactor;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+SamplerState s_PointClamp : register(s0);
			
 
				+SamplerState s_LinearClamp : register(s1);
			
 
				+
			
 
				+// SRVs
			
 
				+#if defined(FFX_INTERNAL)
			
 
				+    Texture2D<FfxFloat32x4>                       r_input_opaque_only                       : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY);
			
 
				+    Texture2D<FfxFloat32x4>                       r_input_color_jittered                    : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR);
			
 
				+    Texture2D<FfxFloat32x4>                       r_input_motion_vectors                    : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS);
			
 
				+    Texture2D<FfxFloat32>                         r_input_depth                             : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH);
			
 
				+    Texture2D<FfxFloat32x2>                       r_input_exposure                          : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE);
			
 
				+    Texture2D<FfxFloat32x2>                       r_auto_exposure                           : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE);
			
 
				+    Texture2D<FfxFloat32>                         r_reactive_mask                           : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK);
			
 
				+    Texture2D<FfxFloat32>                         r_transparency_and_composition_mask       : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK);
			
 
				+    Texture2D<FfxUInt32>                          r_reconstructed_previous_nearest_depth    : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
			
 
				+    Texture2D<FfxFloat32x2>                       r_dilated_motion_vectors                  : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
			
 
				+    Texture2D<FfxFloat32x2>                       r_previous_dilated_motion_vectors         : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS);
			
 
				+    Texture2D<FfxFloat32>                         r_dilatedDepth                            : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
			
 
				+    Texture2D<FfxFloat32x4>                       r_internal_upscaled_color                 : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
			
 
				+    Texture2D<unorm FfxFloat32x2>                 r_lock_status                             : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
			
 
				+    Texture2D<FfxFloat32>                         r_lock_input_luma                         : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA);
			
 
				+    Texture2D<unorm FfxFloat32>                   r_new_locks                               : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS);
			
 
				+    Texture2D<FfxFloat32x4>                       r_prepared_input_color                    : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
			
 
				+    Texture2D<FfxFloat32x4>                       r_luma_history                            : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
			
 
				+    Texture2D<FfxFloat32x4>                       r_rcas_input                              : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT);
			
 
				+    Texture2D<FfxFloat32>                         r_lanczos_lut                             : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT);
			
 
				+    Texture2D<FfxFloat32>                         r_imgMips                                 : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE);
			
 
				+    Texture2D<FfxFloat32>                         r_upsample_maximum_bias_lut               : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT);
			
 
				+    Texture2D<unorm FfxFloat32x2>                 r_dilated_reactive_masks                  : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS);
			
 
				+    Texture2D<float3>                             r_input_prev_color_pre_alpha              : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR);
			
 
				+    Texture2D<float3>                             r_input_prev_color_post_alpha             : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR);
			
 
				+
			
 
				+    Texture2D<FfxFloat32x4>                       r_debug_out                               : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT);
			
 
				+
			
 
				+    // UAV declarations
			
 
				+    RWTexture2D<FfxUInt32>                        rw_reconstructed_previous_nearest_depth   : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
			
 
				+    RWTexture2D<FfxFloat32x2>                     rw_dilated_motion_vectors                 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
			
 
				+    RWTexture2D<FfxFloat32>                       rw_dilatedDepth                           : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
			
 
				+    RWTexture2D<FfxFloat32x4>                     rw_internal_upscaled_color                : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
			
 
				+    RWTexture2D<unorm FfxFloat32x2>               rw_lock_status                            : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
			
 
				+    RWTexture2D<FfxFloat32>                       rw_lock_input_luma                        : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA);
			
 
				+    RWTexture2D<unorm FfxFloat32>                 rw_new_locks                              : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS);
			
 
				+    RWTexture2D<FfxFloat32x4>                     rw_prepared_input_color                   : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
			
 
				+    RWTexture2D<FfxFloat32x4>                     rw_luma_history                           : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
			
 
				+    RWTexture2D<FfxFloat32x4>                     rw_upscaled_output                        : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT);
			
 
				+
			
 
				+    globallycoherent RWTexture2D<FfxFloat32>      rw_img_mip_shading_change                 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE);
			
 
				+    globallycoherent RWTexture2D<FfxFloat32>      rw_img_mip_5                              : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5);
			
 
				+    RWTexture2D<unorm FfxFloat32x2>               rw_dilated_reactive_masks                 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS);
			
 
				+    RWTexture2D<FfxFloat32x2>                     rw_auto_exposure                          : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE);
			
 
				+    globallycoherent RWTexture2D<FfxUInt32>       rw_spd_global_atomic                      : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT);
			
 
				+    RWTexture2D<FfxFloat32x4>                     rw_debug_out                              : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT);
			
 
				+    
			
 
				+    RWTexture2D<float>                            rw_output_autoreactive                    : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE);
			
 
				+    RWTexture2D<float>                            rw_output_autocomposition                 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION);
			
 
				+    RWTexture2D<float3>                           rw_output_prev_color_pre_alpha            : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR);
			
 
				+    RWTexture2D<float3>                           rw_output_prev_color_post_alpha           : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR);  
			
 
				+
			
 
				+#else // #if defined(FFX_INTERNAL)
			
 
				+    #if defined FSR2_BIND_SRV_INPUT_COLOR
			
 
				+        Texture2D<FfxFloat32x4>                   r_input_color_jittered                    : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_COLOR);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_SRV_INPUT_OPAQUE_ONLY
			
 
				+        Texture2D<FfxFloat32x4>                   r_input_opaque_only                       : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_SRV_INPUT_MOTION_VECTORS
			
 
				+        Texture2D<FfxFloat32x4>                   r_input_motion_vectors                    : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_MOTION_VECTORS);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_SRV_INPUT_DEPTH
			
 
				+        Texture2D<FfxFloat32>                     r_input_depth                             : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_DEPTH);
			
 
				+    #endif 
			
 
				+    #if defined FSR2_BIND_SRV_INPUT_EXPOSURE
			
 
				+        Texture2D<FfxFloat32x2>                   r_input_exposure                          : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_EXPOSURE);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_SRV_AUTO_EXPOSURE
			
 
				+        Texture2D<FfxFloat32x2>                   r_auto_exposure                           : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_AUTO_EXPOSURE);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_SRV_REACTIVE_MASK
			
 
				+        Texture2D<FfxFloat32>                     r_reactive_mask                           : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_REACTIVE_MASK);
			
 
				+    #endif 
			
 
				+    #if defined FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK
			
 
				+        Texture2D<FfxFloat32>                     r_transparency_and_composition_mask       : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH
			
 
				+        Texture2D<FfxUInt32>                      r_reconstructed_previous_nearest_depth    : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH);
			
 
				+    #endif 
			
 
				+    #if defined FSR2_BIND_SRV_DILATED_MOTION_VECTORS
			
 
				+       Texture2D<FfxFloat32x2>                    r_dilated_motion_vectors                  : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_MOTION_VECTORS);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS
			
 
				+           Texture2D<FfxFloat32x2>                r_previous_dilated_motion_vectors         : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_SRV_DILATED_DEPTH
			
 
				+        Texture2D<FfxFloat32>                     r_dilatedDepth                            : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_DEPTH);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_SRV_INTERNAL_UPSCALED
			
 
				+        Texture2D<FfxFloat32x4>                   r_internal_upscaled_color                 : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INTERNAL_UPSCALED);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_SRV_LOCK_STATUS
			
 
				+        Texture2D<unorm FfxFloat32x2>             r_lock_status                             : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_STATUS);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_SRV_LOCK_INPUT_LUMA
			
 
				+        Texture2D<FfxFloat32>                     r_lock_input_luma                         : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_INPUT_LUMA);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_SRV_NEW_LOCKS
			
 
				+        Texture2D<unorm FfxFloat32>               r_new_locks                               : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_NEW_LOCKS);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_SRV_PREPARED_INPUT_COLOR
			
 
				+        Texture2D<FfxFloat32x4>                  r_prepared_input_color                    : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_SRV_LUMA_HISTORY
			
 
				+        Texture2D<unorm FfxFloat32x4>             r_luma_history                            : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_SRV_RCAS_INPUT
			
 
				+        Texture2D<FfxFloat32x4>                   r_rcas_input                              : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RCAS_INPUT);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_SRV_LANCZOS_LUT
			
 
				+        Texture2D<FfxFloat32>                     r_lanczos_lut                             : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LANCZOS_LUT);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS
			
 
				+        Texture2D<FfxFloat32>                     r_imgMips                                 : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT
			
 
				+        Texture2D<FfxFloat32>                     r_upsample_maximum_bias_lut               : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_SRV_DILATED_REACTIVE_MASKS
			
 
				+        Texture2D<unorm FfxFloat32x2>             r_dilated_reactive_masks                  : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS);
			
 
				+    #endif
			
 
				+
			
 
				+    #if defined FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR
			
 
				+        Texture2D<float3>                         r_input_prev_color_pre_alpha              : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR
			
 
				+        Texture2D<float3>                         r_input_prev_color_post_alpha             : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR);
			
 
				+    #endif
			
 
				+   
			
 
				+    // UAV declarations
			
 
				+    #if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH
			
 
				+        RWTexture2D<FfxUInt32>                    rw_reconstructed_previous_nearest_depth   : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS
			
 
				+        RWTexture2D<FfxFloat32x2>                 rw_dilated_motion_vectors                 : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_MOTION_VECTORS);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_UAV_DILATED_DEPTH
			
 
				+        RWTexture2D<FfxFloat32>                   rw_dilatedDepth                           : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_DEPTH);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_UAV_INTERNAL_UPSCALED
			
 
				+        RWTexture2D<FfxFloat32x4>                 rw_internal_upscaled_color                : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_INTERNAL_UPSCALED);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_UAV_LOCK_STATUS
			
 
				+        RWTexture2D<unorm FfxFloat32x2>           rw_lock_status                            : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_STATUS);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_UAV_LOCK_INPUT_LUMA
			
 
				+        RWTexture2D<FfxFloat32>                   rw_lock_input_luma                        : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_INPUT_LUMA);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_UAV_NEW_LOCKS
			
 
				+        RWTexture2D<unorm FfxFloat32>             rw_new_locks                              : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_NEW_LOCKS);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
			
 
				+        RWTexture2D<FfxFloat32x4>                 rw_prepared_input_color                   : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_UAV_LUMA_HISTORY
			
 
				+        RWTexture2D<FfxFloat32x4>                 rw_luma_history                           : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_UAV_UPSCALED_OUTPUT
			
 
				+        RWTexture2D<FfxFloat32x4>                 rw_upscaled_output                        : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_UPSCALED_OUTPUT);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
			
 
				+        globallycoherent RWTexture2D<FfxFloat32>  rw_img_mip_shading_change                 : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
			
 
				+        globallycoherent RWTexture2D<FfxFloat32>  rw_img_mip_5                              : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_5);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS
			
 
				+        RWTexture2D<unorm FfxFloat32x2>           rw_dilated_reactive_masks                 : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_UAV_EXPOSURE
			
 
				+        RWTexture2D<FfxFloat32x2>                 rw_exposure                               : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_UAV_AUTO_EXPOSURE
			
 
				+        RWTexture2D<FfxFloat32x2>                 rw_auto_exposure                          : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTO_EXPOSURE);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
			
 
				+        globallycoherent RWTexture2D<FfxUInt32>   rw_spd_global_atomic                      : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC);
			
 
				+    #endif
			
 
				+
			
 
				+    #if defined FSR2_BIND_UAV_AUTOREACTIVE
			
 
				+        RWTexture2D<float>                        rw_output_autoreactive                    : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTOREACTIVE);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_UAV_AUTOCOMPOSITION
			
 
				+        RWTexture2D<float>                        rw_output_autocomposition                 : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTOCOMPOSITION);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR
			
 
				+        RWTexture2D<float3>                       rw_output_prev_color_pre_alpha            : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR);
			
 
				+    #endif
			
 
				+    #if defined FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR
			
 
				+        RWTexture2D<float3>                       rw_output_prev_color_post_alpha           : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR);
			
 
				+    #endif
			
 
				+#endif // #if defined(FFX_INTERNAL)
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32 LoadMipLuma(FfxUInt32x2 iPxPos, FfxUInt32 mipLevel)
			
 
				+{
			
 
				+    return r_imgMips.mips[mipLevel][iPxPos];
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxUInt32 mipLevel)
			
 
				+{
			
 
				+    return r_imgMips.SampleLevel(s_LinearClamp, fUV, mipLevel);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_INPUT_DEPTH) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32 LoadInputDepth(FfxUInt32x2 iPxPos)
			
 
				+{
			
 
				+    return r_input_depth[iPxPos];
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_INPUT_DEPTH) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV)
			
 
				+{
			
 
				+    return r_input_depth.SampleLevel(s_LinearClamp, fUV, 0).x;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_REACTIVE_MASK) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32 LoadReactiveMask(FfxUInt32x2 iPxPos)
			
 
				+{
			
 
				+    return r_reactive_mask[iPxPos];
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos)
			
 
				+{
			
 
				+    return r_transparency_and_composition_mask[iPxPos];
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32x3 LoadInputColor(FfxUInt32x2 iPxPos)
			
 
				+{
			
 
				+    return r_input_color_jittered[iPxPos].rgb;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV)
			
 
				+{
			
 
				+    return r_input_color_jittered.SampleLevel(s_LinearClamp, fUV, 0).rgb;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32x3 LoadPreparedInputColor(FfxUInt32x2 iPxPos)
			
 
				+{
			
 
				+    return r_prepared_input_color[iPxPos].xyz;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32x2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos)
			
 
				+{
			
 
				+    FfxFloat32x2 fSrcMotionVector = r_input_motion_vectors[iPxDilatedMotionVectorPos].xy;
			
 
				+
			
 
				+    FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale();
			
 
				+
			
 
				+#if FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS
			
 
				+    fUvMotionVector -= MotionVectorJitterCancellation();
			
 
				+#endif
			
 
				+
			
 
				+    return fUvMotionVector;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32x4 LoadHistory(FfxUInt32x2 iPxHistory)
			
 
				+{
			
 
				+    return r_internal_upscaled_color[iPxHistory];
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_LUMA_HISTORY) || defined(FFX_INTERNAL)
			
 
				+void StoreLumaHistory(FfxUInt32x2 iPxPos, FfxFloat32x4 fLumaHistory)
			
 
				+{
			
 
				+    rw_luma_history[iPxPos] = fLumaHistory;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_LUMA_HISTORY) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV)
			
 
				+{
			
 
				+    return r_luma_history.SampleLevel(s_LinearClamp, fUV, 0);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FFX_INTERNAL)
			
 
				+FfxFloat32x4 SampleDebug(FfxFloat32x2 fUV)
			
 
				+{
			
 
				+    return r_debug_out.SampleLevel(s_LinearClamp, fUV, 0).w;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL)
			
 
				+void StoreReprojectedHistory(FfxUInt32x2 iPxHistory, FfxFloat32x4 fHistory)
			
 
				+{
			
 
				+    rw_internal_upscaled_color[iPxHistory] = fHistory;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL)
			
 
				+void StoreInternalColorAndWeight(FfxUInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight)
			
 
				+{
			
 
				+    rw_internal_upscaled_color[iPxPos] = fColorAndWeight;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT) || defined(FFX_INTERNAL)
			
 
				+void StoreUpscaledOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor)
			
 
				+{
			
 
				+    rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor, 1.f);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+//LOCK_LIFETIME_REMAINING == 0
			
 
				+//Should make LockInitialLifetime() return a const 1.0f later
			
 
				+#if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32x2 LoadLockStatus(FfxUInt32x2 iPxPos)
			
 
				+{
			
 
				+    return r_lock_status[iPxPos];
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_LOCK_STATUS) || defined(FFX_INTERNAL)
			
 
				+void StoreLockStatus(FfxUInt32x2 iPxPos, FfxFloat32x2 fLockStatus)
			
 
				+{
			
 
				+    rw_lock_status[iPxPos] = fLockStatus;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32 LoadLockInputLuma(FfxUInt32x2 iPxPos)
			
 
				+{
			
 
				+    return r_lock_input_luma[iPxPos];
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA) || defined(FFX_INTERNAL)
			
 
				+void StoreLockInputLuma(FfxUInt32x2 iPxPos, FfxFloat32 fLuma)
			
 
				+{
			
 
				+    rw_lock_input_luma[iPxPos] = fLuma;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_NEW_LOCKS) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32 LoadNewLocks(FfxUInt32x2 iPxPos)
			
 
				+{
			
 
				+    return r_new_locks[iPxPos];
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_NEW_LOCKS) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32 LoadRwNewLocks(FfxUInt32x2 iPxPos)
			
 
				+{
			
 
				+    return rw_new_locks[iPxPos];
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_NEW_LOCKS) || defined(FFX_INTERNAL)
			
 
				+void StoreNewLocks(FfxUInt32x2 iPxPos, FfxFloat32 newLock)
			
 
				+{
			
 
				+    rw_new_locks[iPxPos] = newLock;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL)
			
 
				+void StorePreparedInputColor(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped)
			
 
				+{
			
 
				+    rw_prepared_input_color[iPxPos] = fTonemapped;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV)
			
 
				+{
			
 
				+    return r_prepared_input_color.SampleLevel(s_LinearClamp, fUV, 0).w;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV)
			
 
				+{
			
 
				+    FfxFloat32x2 fLockStatus = r_lock_status.SampleLevel(s_LinearClamp, fUV, 0);
			
 
				+    return fLockStatus;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32 LoadReconstructedPrevDepth(FfxUInt32x2 iPxPos)
			
 
				+{
			
 
				+    return asfloat(r_reconstructed_previous_nearest_depth[iPxPos]);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL)
			
 
				+void StoreReconstructedDepth(FfxUInt32x2 iPxSample, FfxFloat32 fDepth)
			
 
				+{
			
 
				+    FfxUInt32 uDepth = asuint(fDepth);
			
 
				+
			
 
				+    #if FFX_FSR2_OPTION_INVERTED_DEPTH
			
 
				+        InterlockedMax(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth);
			
 
				+    #else
			
 
				+        InterlockedMin(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); // min for standard, max for inverted depth
			
 
				+    #endif
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL)
			
 
				+void SetReconstructedDepth(FfxUInt32x2 iPxSample, const FfxUInt32 uValue)
			
 
				+{
			
 
				+    rw_reconstructed_previous_nearest_depth[iPxSample] = uValue;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_DILATED_DEPTH) || defined(FFX_INTERNAL)
			
 
				+void StoreDilatedDepth(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth)
			
 
				+{
			
 
				+    rw_dilatedDepth[iPxPos] = fDepth;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL)
			
 
				+void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector)
			
 
				+{
			
 
				+    rw_dilated_motion_vectors[iPxPos] = fMotionVector;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32x2 LoadDilatedMotionVector(FfxUInt32x2 iPxInput)
			
 
				+{
			
 
				+    return r_dilated_motion_vectors[iPxInput].xy;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32x2 LoadPreviousDilatedMotionVector(FfxUInt32x2 iPxInput)
			
 
				+{
			
 
				+    return r_previous_dilated_motion_vectors[iPxInput].xy;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x2 SamplePreviousDilatedMotionVector(FfxFloat32x2 uv)
			
 
				+{
			
 
				+    return r_previous_dilated_motion_vectors.SampleLevel(s_LinearClamp, uv, 0).xy;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_DILATED_DEPTH) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32 LoadDilatedDepth(FfxUInt32x2 iPxInput)
			
 
				+{
			
 
				+    return r_dilatedDepth[iPxInput];
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32 Exposure()
			
 
				+{
			
 
				+    FfxFloat32 exposure = r_input_exposure[FfxUInt32x2(0, 0)].x;
			
 
				+
			
 
				+    if (exposure == 0.0f) {
			
 
				+        exposure = 1.0f;
			
 
				+    }
			
 
				+
			
 
				+    return exposure;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32 AutoExposure()
			
 
				+{
			
 
				+    FfxFloat32 exposure = r_auto_exposure[FfxUInt32x2(0, 0)].x;
			
 
				+
			
 
				+    if (exposure == 0.0f) {
			
 
				+        exposure = 1.0f;
			
 
				+    }
			
 
				+
			
 
				+    return exposure;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+FfxFloat32 SampleLanczos2Weight(FfxFloat32 x)
			
 
				+{
			
 
				+#if defined(FSR2_BIND_SRV_LANCZOS_LUT) || defined(FFX_INTERNAL)
			
 
				+    return r_lanczos_lut.SampleLevel(s_LinearClamp, FfxFloat32x2(x / 2, 0.5f), 0);
			
 
				+#else
			
 
				+    return 0.f;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv)
			
 
				+{
			
 
				+    // Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range.
			
 
				+    return FfxFloat32(2.0) * r_upsample_maximum_bias_lut.SampleLevel(s_LinearClamp, abs(uv) * 2.0, 0);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV)
			
 
				+{
			
 
				+	return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32x2 LoadDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos)
			
 
				+{
			
 
				+    return r_dilated_reactive_masks[iPxPos];
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL)
			
 
				+void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fDilatedReactiveMasks)
			
 
				+{
			
 
				+    rw_dilated_reactive_masks[iPxPos] = fDilatedReactiveMasks;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
			
 
				+{
			
 
				+    return r_input_opaque_only[iPxPos].xyz;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
			
 
				+{
			
 
				+    return r_input_prev_color_pre_alpha[iPxPos];
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) || defined(FFX_INTERNAL)
			
 
				+FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
			
 
				+{
			
 
				+    return r_input_prev_color_post_alpha[iPxPos];
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_AUTOREACTIVE) || defined(FFX_INTERNAL)
			
 
				+#if defined(FSR2_BIND_UAV_AUTOCOMPOSITION) || defined(FFX_INTERNAL)
			
 
				+void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive)
			
 
				+{
			
 
				+    rw_output_autoreactive[iPxPos] = fReactive.x;
			
 
				+
			
 
				+    rw_output_autocomposition[iPxPos] = fReactive.y;
			
 
				+}
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR) || defined(FFX_INTERNAL)
			
 
				+void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
			
 
				+{
			
 
				+    rw_output_prev_color_pre_alpha[iPxPos] = color;
			
 
				+
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if defined(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR) || defined(FFX_INTERNAL)
			
 
				+void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
			
 
				+{
			
 
				+    rw_output_prev_color_post_alpha[iPxPos] = color;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#endif // #if defined(FFX_GPU)
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_common.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_common.h
@@ -0,0 +1,565 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#if !defined(FFX_FSR2_COMMON_H)
			
 
				+#define FFX_FSR2_COMMON_H
			
 
				+
			
 
				+#if defined(FFX_CPU) || defined(FFX_GPU)
			
 
				+//Locks
			
 
				+#define LOCK_LIFETIME_REMAINING 0
			
 
				+#define LOCK_TEMPORAL_LUMA 1
			
 
				+#endif // #if defined(FFX_CPU) || defined(FFX_GPU)
			
 
				+
			
 
				+#if defined(FFX_GPU)
			
 
				+FFX_STATIC const FfxFloat32 FSR2_FP16_MIN = 6.10e-05f;
			
 
				+FFX_STATIC const FfxFloat32 FSR2_FP16_MAX = 65504.0f;
			
 
				+FFX_STATIC const FfxFloat32 FSR2_EPSILON = 1e-03f;
			
 
				+FFX_STATIC const FfxFloat32 FSR2_TONEMAP_EPSILON = 1.0f / FSR2_FP16_MAX;
			
 
				+FFX_STATIC const FfxFloat32 FSR2_FLT_MAX = 3.402823466e+38f;
			
 
				+FFX_STATIC const FfxFloat32 FSR2_FLT_MIN = 1.175494351e-38f;
			
 
				+
			
 
				+// treat vector truncation warnings as errors
			
 
				+#pragma warning(error: 3206)
			
 
				+
			
 
				+// suppress warnings
			
 
				+#pragma warning(disable: 3205)  // conversion from larger type to smaller
			
 
				+#pragma warning(disable: 3571)  // in ffxPow(f, e), f could be negative
			
 
				+
			
 
				+// Reconstructed depth usage
			
 
				+FFX_STATIC const FfxFloat32 fReconstructedDepthBilinearWeightThreshold = 0.01f;
			
 
				+
			
 
				+// Accumulation
			
 
				+FFX_STATIC const FfxFloat32 fUpsampleLanczosWeightScale = 1.0f / 12.0f;
			
 
				+FFX_STATIC const FfxFloat32 fMaxAccumulationLanczosWeight = 1.0f;
			
 
				+FFX_STATIC const FfxFloat32 fAverageLanczosWeightPerFrame = 0.74f * fUpsampleLanczosWeightScale; // Average lanczos weight for jitter accumulated samples
			
 
				+FFX_STATIC const FfxFloat32 fAccumulationMaxOnMotion = 3.0f * fUpsampleLanczosWeightScale;
			
 
				+
			
 
				+// Auto exposure
			
 
				+FFX_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e8f;
			
 
				+
			
 
				+struct AccumulationPassCommonParams
			
 
				+{
			
 
				+    FfxInt32x2 iPxHrPos;
			
 
				+    FfxFloat32x2 fHrUv;
			
 
				+    FfxFloat32x2 fLrUv_HwSampler;
			
 
				+    FfxFloat32x2 fMotionVector;
			
 
				+    FfxFloat32x2 fReprojectedHrUv;
			
 
				+    FfxFloat32 fHrVelocity;
			
 
				+    FfxFloat32 fDepthClipFactor;
			
 
				+    FfxFloat32 fDilatedReactiveFactor;
			
 
				+    FfxFloat32 fAccumulationMask;
			
 
				+
			
 
				+    FfxBoolean bIsResetFrame;
			
 
				+    FfxBoolean bIsExistingSample;
			
 
				+    FfxBoolean bIsNewSample;
			
 
				+};
			
 
				+
			
 
				+struct LockState
			
 
				+{
			
 
				+    FfxBoolean NewLock; //Set for both unique new and re-locked new
			
 
				+    FfxBoolean WasLockedPrevFrame; //Set to identify if the pixel was already locked (relock)
			
 
				+};
			
 
				+
			
 
				+void InitializeNewLockSample(FFX_PARAMETER_OUT FfxFloat32x2 fLockStatus)
			
 
				+{
			
 
				+    fLockStatus = FfxFloat32x2(0, 0);
			
 
				+}
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+void InitializeNewLockSample(FFX_PARAMETER_OUT FFX_MIN16_F2 fLockStatus)
			
 
				+{
			
 
				+    fLockStatus = FFX_MIN16_F2(0, 0);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+void KillLock(FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus)
			
 
				+{
			
 
				+    fLockStatus[LOCK_LIFETIME_REMAINING] = 0;
			
 
				+}
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+void KillLock(FFX_PARAMETER_INOUT FFX_MIN16_F2 fLockStatus)
			
 
				+{
			
 
				+    fLockStatus[LOCK_LIFETIME_REMAINING] = FFX_MIN16_F(0);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+struct RectificationBox
			
 
				+{
			
 
				+    FfxFloat32x3 boxCenter;
			
 
				+    FfxFloat32x3 boxVec;
			
 
				+    FfxFloat32x3 aabbMin;
			
 
				+    FfxFloat32x3 aabbMax;
			
 
				+    FfxFloat32 fBoxCenterWeight;
			
 
				+};
			
 
				+#if FFX_HALF
			
 
				+struct RectificationBoxMin16
			
 
				+{
			
 
				+    FFX_MIN16_F3 boxCenter;
			
 
				+    FFX_MIN16_F3 boxVec;
			
 
				+    FFX_MIN16_F3 aabbMin;
			
 
				+    FFX_MIN16_F3 aabbMax;
			
 
				+    FFX_MIN16_F fBoxCenterWeight;
			
 
				+};
			
 
				+#endif
			
 
				+
			
 
				+void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBox rectificationBox)
			
 
				+{
			
 
				+    rectificationBox.fBoxCenterWeight = FfxFloat32(0);
			
 
				+
			
 
				+    rectificationBox.boxCenter = FfxFloat32x3(0, 0, 0);
			
 
				+    rectificationBox.boxVec = FfxFloat32x3(0, 0, 0);
			
 
				+    rectificationBox.aabbMin = FfxFloat32x3(FSR2_FLT_MAX, FSR2_FLT_MAX, FSR2_FLT_MAX);
			
 
				+    rectificationBox.aabbMax = -FfxFloat32x3(FSR2_FLT_MAX, FSR2_FLT_MAX, FSR2_FLT_MAX);
			
 
				+}
			
 
				+#if FFX_HALF
			
 
				+void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox)
			
 
				+{
			
 
				+    rectificationBox.fBoxCenterWeight = FFX_MIN16_F(0);
			
 
				+
			
 
				+    rectificationBox.boxCenter = FFX_MIN16_F3(0, 0, 0);
			
 
				+    rectificationBox.boxVec = FFX_MIN16_F3(0, 0, 0);
			
 
				+    rectificationBox.aabbMin = FFX_MIN16_F3(FSR2_FP16_MAX, FSR2_FP16_MAX, FSR2_FP16_MAX);
			
 
				+    rectificationBox.aabbMax = -FFX_MIN16_F3(FSR2_FP16_MAX, FSR2_FP16_MAX, FSR2_FP16_MAX);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight)
			
 
				+{
			
 
				+    rectificationBox.aabbMin = colorSample;
			
 
				+    rectificationBox.aabbMax = colorSample;
			
 
				+
			
 
				+    FfxFloat32x3 weightedSample = colorSample * fSampleWeight;
			
 
				+    rectificationBox.boxCenter = weightedSample;
			
 
				+    rectificationBox.boxVec = colorSample * weightedSample;
			
 
				+    rectificationBox.fBoxCenterWeight = fSampleWeight;
			
 
				+}
			
 
				+
			
 
				+void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight)
			
 
				+{
			
 
				+    if (bInitialSample) {
			
 
				+        RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight);
			
 
				+    } else {
			
 
				+        rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample);
			
 
				+        rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample);
			
 
				+
			
 
				+        FfxFloat32x3 weightedSample = colorSample * fSampleWeight;
			
 
				+        rectificationBox.boxCenter += weightedSample;
			
 
				+        rectificationBox.boxVec += colorSample * weightedSample;
			
 
				+        rectificationBox.fBoxCenterWeight += fSampleWeight;
			
 
				+    }
			
 
				+}
			
 
				+#if FFX_HALF
			
 
				+void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight)
			
 
				+{
			
 
				+    rectificationBox.aabbMin = colorSample;
			
 
				+    rectificationBox.aabbMax = colorSample;
			
 
				+
			
 
				+    FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight;
			
 
				+    rectificationBox.boxCenter = weightedSample;
			
 
				+    rectificationBox.boxVec = colorSample * weightedSample;
			
 
				+    rectificationBox.fBoxCenterWeight = fSampleWeight;
			
 
				+}
			
 
				+
			
 
				+void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight)
			
 
				+{
			
 
				+    if (bInitialSample) {
			
 
				+        RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight);
			
 
				+    } else {
			
 
				+        rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample);
			
 
				+        rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample);
			
 
				+
			
 
				+        FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight;
			
 
				+        rectificationBox.boxCenter += weightedSample;
			
 
				+        rectificationBox.boxVec += colorSample * weightedSample;
			
 
				+        rectificationBox.fBoxCenterWeight += fSampleWeight;
			
 
				+    }
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBox rectificationBox)
			
 
				+{
			
 
				+    rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FfxFloat32(FSR2_EPSILON) ? rectificationBox.fBoxCenterWeight : FfxFloat32(1.f));
			
 
				+    rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight;
			
 
				+    rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight;
			
 
				+    FfxFloat32x3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter));
			
 
				+    rectificationBox.boxVec = stdDev;
			
 
				+}
			
 
				+#if FFX_HALF
			
 
				+void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox)
			
 
				+{
			
 
				+    rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FFX_MIN16_F(FSR2_EPSILON) ? rectificationBox.fBoxCenterWeight : FFX_MIN16_F(1.f));
			
 
				+    rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight;
			
 
				+    rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight;
			
 
				+    FFX_MIN16_F3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter));
			
 
				+    rectificationBox.boxVec = stdDev;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+FfxFloat32x3 SafeRcp3(FfxFloat32x3 v)
			
 
				+{
			
 
				+    return (all(FFX_NOT_EQUAL(v, FfxFloat32x3(0, 0, 0)))) ? (FfxFloat32x3(1, 1, 1) / v) : FfxFloat32x3(0, 0, 0);
			
 
				+}
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F3 SafeRcp3(FFX_MIN16_F3 v)
			
 
				+{
			
 
				+    return (all(FFX_NOT_EQUAL(v, FFX_MIN16_F3(0, 0, 0)))) ? (FFX_MIN16_F3(1, 1, 1) / v) : FFX_MIN16_F3(0, 0, 0);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+FfxFloat32 MinDividedByMax(const FfxFloat32 v0, const FfxFloat32 v1)
			
 
				+{
			
 
				+    const FfxFloat32 m = ffxMax(v0, v1);
			
 
				+    return m != 0 ? ffxMin(v0, v1) / m : 0;
			
 
				+}
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F MinDividedByMax(const FFX_MIN16_F v0, const FFX_MIN16_F v1)
			
 
				+{
			
 
				+    const FFX_MIN16_F m = ffxMax(v0, v1);
			
 
				+    return m != FFX_MIN16_F(0) ? ffxMin(v0, v1) / m : FFX_MIN16_F(0);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+FfxFloat32x3 YCoCgToRGB(FfxFloat32x3 fYCoCg)
			
 
				+{
			
 
				+    FfxFloat32x3 fRgb;
			
 
				+
			
 
				+    fRgb = FfxFloat32x3(
			
 
				+        fYCoCg.x + fYCoCg.y - fYCoCg.z,
			
 
				+        fYCoCg.x + fYCoCg.z,
			
 
				+        fYCoCg.x - fYCoCg.y - fYCoCg.z);
			
 
				+
			
 
				+    return fRgb;
			
 
				+}
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F3 YCoCgToRGB(FFX_MIN16_F3 fYCoCg)
			
 
				+{
			
 
				+    FFX_MIN16_F3 fRgb;
			
 
				+
			
 
				+    fRgb = FFX_MIN16_F3(
			
 
				+        fYCoCg.x + fYCoCg.y - fYCoCg.z,
			
 
				+        fYCoCg.x + fYCoCg.z,
			
 
				+        fYCoCg.x - fYCoCg.y - fYCoCg.z);
			
 
				+
			
 
				+    return fRgb;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+FfxFloat32x3 RGBToYCoCg(FfxFloat32x3 fRgb)
			
 
				+{
			
 
				+    FfxFloat32x3 fYCoCg;
			
 
				+
			
 
				+    fYCoCg = FfxFloat32x3(
			
 
				+        0.25f * fRgb.r + 0.5f * fRgb.g + 0.25f * fRgb.b,
			
 
				+        0.5f * fRgb.r - 0.5f * fRgb.b,
			
 
				+        -0.25f * fRgb.r + 0.5f * fRgb.g - 0.25f * fRgb.b);
			
 
				+
			
 
				+    return fYCoCg;
			
 
				+}
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F3 RGBToYCoCg(FFX_MIN16_F3 fRgb)
			
 
				+{
			
 
				+    FFX_MIN16_F3 fYCoCg;
			
 
				+
			
 
				+    fYCoCg = FFX_MIN16_F3(
			
 
				+        0.25 * fRgb.r + 0.5 * fRgb.g + 0.25 * fRgb.b,
			
 
				+        0.5 * fRgb.r - 0.5 * fRgb.b,
			
 
				+        -0.25 * fRgb.r + 0.5 * fRgb.g - 0.25 * fRgb.b);
			
 
				+
			
 
				+    return fYCoCg;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+FfxFloat32 RGBToLuma(FfxFloat32x3 fLinearRgb)
			
 
				+{
			
 
				+    return dot(fLinearRgb, FfxFloat32x3(0.2126f, 0.7152f, 0.0722f));
			
 
				+}
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F RGBToLuma(FFX_MIN16_F3 fLinearRgb)
			
 
				+{
			
 
				+    return dot(fLinearRgb, FFX_MIN16_F3(0.2126f, 0.7152f, 0.0722f));
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+FfxFloat32 RGBToPerceivedLuma(FfxFloat32x3 fLinearRgb)
			
 
				+{
			
 
				+    FfxFloat32 fLuminance = RGBToLuma(fLinearRgb);
			
 
				+
			
 
				+    FfxFloat32 fPercievedLuminance = 0;
			
 
				+    if (fLuminance <= 216.0f / 24389.0f) {
			
 
				+        fPercievedLuminance = fLuminance * (24389.0f / 27.0f);
			
 
				+    }
			
 
				+    else {
			
 
				+        fPercievedLuminance = ffxPow(fLuminance, 1.0f / 3.0f) * 116.0f - 16.0f;
			
 
				+    }
			
 
				+
			
 
				+    return fPercievedLuminance * 0.01f;
			
 
				+}
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F RGBToPerceivedLuma(FFX_MIN16_F3 fLinearRgb)
			
 
				+{
			
 
				+    FFX_MIN16_F fLuminance = RGBToLuma(fLinearRgb);
			
 
				+
			
 
				+    FFX_MIN16_F fPercievedLuminance = FFX_MIN16_F(0);
			
 
				+    if (fLuminance <= FFX_MIN16_F(216.0f / 24389.0f)) {
			
 
				+        fPercievedLuminance = fLuminance * FFX_MIN16_F(24389.0f / 27.0f);
			
 
				+    }
			
 
				+    else {
			
 
				+        fPercievedLuminance = ffxPow(fLuminance, FFX_MIN16_F(1.0f / 3.0f)) * FFX_MIN16_F(116.0f) - FFX_MIN16_F(16.0f);
			
 
				+    }
			
 
				+
			
 
				+    return fPercievedLuminance * FFX_MIN16_F(0.01f);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+FfxFloat32x3 Tonemap(FfxFloat32x3 fRgb)
			
 
				+{
			
 
				+    return fRgb / (ffxMax(ffxMax(0.f, fRgb.r), ffxMax(fRgb.g, fRgb.b)) + 1.f).xxx;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x3 InverseTonemap(FfxFloat32x3 fRgb)
			
 
				+{
			
 
				+    return fRgb / ffxMax(FSR2_TONEMAP_EPSILON, 1.f - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx;
			
 
				+}
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F3 Tonemap(FFX_MIN16_F3 fRgb)
			
 
				+{
			
 
				+    return fRgb / (ffxMax(ffxMax(FFX_MIN16_F(0.f), fRgb.r), ffxMax(fRgb.g, fRgb.b)) + FFX_MIN16_F(1.f)).xxx;
			
 
				+}
			
 
				+
			
 
				+FFX_MIN16_F3 InverseTonemap(FFX_MIN16_F3 fRgb)
			
 
				+{
			
 
				+    return fRgb / ffxMax(FFX_MIN16_F(FSR2_TONEMAP_EPSILON), FFX_MIN16_F(1.f) - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+FfxInt32x2 ClampLoad(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize)
			
 
				+{
			
 
				+    FfxInt32x2 result = iPxSample + iPxOffset;
			
 
				+    result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x;
			
 
				+    result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x;
			
 
				+    result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y;
			
 
				+    result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y;
			
 
				+    return result;
			
 
				+
			
 
				+    // return ffxMed3(iPxSample + iPxOffset, FfxInt32x2(0, 0), iTextureSize - FfxInt32x2(1, 1));
			
 
				+}
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_I2 ClampLoad(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize)
			
 
				+{
			
 
				+    FFX_MIN16_I2 result = iPxSample + iPxOffset;
			
 
				+    result.x = (iPxOffset.x < 0) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x;
			
 
				+    result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x;
			
 
				+    result.y = (iPxOffset.y < 0) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y;
			
 
				+    result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y;
			
 
				+    return result;
			
 
				+
			
 
				+    // return ffxMed3Half(iPxSample + iPxOffset, FFX_MIN16_I2(0, 0), iTextureSize - FFX_MIN16_I2(1, 1));
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+FfxFloat32x2 ClampUv(FfxFloat32x2 fUv, FfxInt32x2 iTextureSize, FfxInt32x2 iResourceSize)
			
 
				+{
			
 
				+    const FfxFloat32x2 fSampleLocation = fUv * iTextureSize;
			
 
				+    const FfxFloat32x2 fClampedLocation = ffxMax(FfxFloat32x2(0.5f, 0.5f), ffxMin(fSampleLocation, FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f)));
			
 
				+    const FfxFloat32x2 fClampedUv = fClampedLocation / FfxFloat32x2(iResourceSize);
			
 
				+
			
 
				+    return fClampedUv;
			
 
				+}
			
 
				+
			
 
				+FfxBoolean IsOnScreen(FfxInt32x2 pos, FfxInt32x2 size)
			
 
				+{
			
 
				+    return all(FFX_LESS_THAN(FfxUInt32x2(pos), FfxUInt32x2(size)));
			
 
				+}
			
 
				+#if FFX_HALF
			
 
				+FfxBoolean IsOnScreen(FFX_MIN16_I2 pos, FFX_MIN16_I2 size)
			
 
				+{
			
 
				+    return all(FFX_LESS_THAN(FFX_MIN16_U2(pos), FFX_MIN16_U2(size)));
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+FfxFloat32 ComputeAutoExposureFromLavg(FfxFloat32 Lavg)
			
 
				+{
			
 
				+    Lavg = exp(Lavg);
			
 
				+
			
 
				+    const FfxFloat32 S = 100.0f; //ISO arithmetic speed
			
 
				+    const FfxFloat32 K = 12.5f;
			
 
				+    FfxFloat32 ExposureISO100 = log2((Lavg * S) / K);
			
 
				+
			
 
				+    const FfxFloat32 q = 0.65f;
			
 
				+    FfxFloat32 Lmax = (78.0f / (q * S)) * ffxPow(2.0f, ExposureISO100);
			
 
				+
			
 
				+    return 1 / Lmax;
			
 
				+}
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F ComputeAutoExposureFromLavg(FFX_MIN16_F Lavg)
			
 
				+{
			
 
				+    Lavg = exp(Lavg);
			
 
				+
			
 
				+    const FFX_MIN16_F S = FFX_MIN16_F(100.0f); //ISO arithmetic speed
			
 
				+    const FFX_MIN16_F K = FFX_MIN16_F(12.5f);
			
 
				+    const FFX_MIN16_F ExposureISO100 = log2((Lavg * S) / K);
			
 
				+
			
 
				+    const FFX_MIN16_F q = FFX_MIN16_F(0.65f);
			
 
				+    const FFX_MIN16_F Lmax = (FFX_MIN16_F(78.0f) / (q * S)) * ffxPow(FFX_MIN16_F(2.0f), ExposureISO100);
			
 
				+
			
 
				+    return FFX_MIN16_F(1) / Lmax;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+FfxInt32x2 ComputeHrPosFromLrPos(FfxInt32x2 iPxLrPos)
			
 
				+{
			
 
				+    FfxFloat32x2 fSrcJitteredPos = FfxFloat32x2(iPxLrPos) + 0.5f - Jitter();
			
 
				+    FfxFloat32x2 fLrPosInHr = (fSrcJitteredPos / RenderSize()) * DisplaySize();
			
 
				+    FfxInt32x2 iPxHrPos = FfxInt32x2(floor(fLrPosInHr));
			
 
				+    return iPxHrPos;
			
 
				+}
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_I2 ComputeHrPosFromLrPos(FFX_MIN16_I2 iPxLrPos)
			
 
				+{
			
 
				+    FFX_MIN16_F2 fSrcJitteredPos = FFX_MIN16_F2(iPxLrPos) + FFX_MIN16_F(0.5f) - FFX_MIN16_F2(Jitter());
			
 
				+    FFX_MIN16_F2 fLrPosInHr = (fSrcJitteredPos / FFX_MIN16_F2(RenderSize())) * FFX_MIN16_F2(DisplaySize());
			
 
				+    FFX_MIN16_I2 iPxHrPos = FFX_MIN16_I2(floor(fLrPosInHr));
			
 
				+    return iPxHrPos;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+FfxFloat32x2 ComputeNdc(FfxFloat32x2 fPxPos, FfxInt32x2 iSize)
			
 
				+{
			
 
				+    return fPxPos / FfxFloat32x2(iSize) * FfxFloat32x2(2.0f, -2.0f) + FfxFloat32x2(-1.0f, 1.0f);
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 GetViewSpaceDepth(FfxFloat32 fDeviceDepth)
			
 
				+{
			
 
				+    const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors();
			
 
				+
			
 
				+    // fDeviceToViewDepth details found in ffx_fsr2.cpp
			
 
				+    return (fDeviceToViewDepth[1] / (fDeviceDepth - fDeviceToViewDepth[0]));
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 GetViewSpaceDepthInMeters(FfxFloat32 fDeviceDepth)
			
 
				+{
			
 
				+    return GetViewSpaceDepth(fDeviceDepth) * ViewSpaceToMetersFactor();
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x3 GetViewSpacePosition(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth)
			
 
				+{
			
 
				+    const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors();
			
 
				+
			
 
				+    const FfxFloat32 Z = GetViewSpaceDepth(fDeviceDepth);
			
 
				+
			
 
				+    const FfxFloat32x2 fNdcPos = ComputeNdc(iViewportPos, iViewportSize);
			
 
				+    const FfxFloat32 X = fDeviceToViewDepth[2] * fNdcPos.x * Z;
			
 
				+    const FfxFloat32 Y = fDeviceToViewDepth[3] * fNdcPos.y * Z;
			
 
				+
			
 
				+    return FfxFloat32x3(X, Y, Z);
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x3 GetViewSpacePositionInMeters(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth)
			
 
				+{
			
 
				+    return GetViewSpacePosition(iViewportPos, iViewportSize, fDeviceDepth) * ViewSpaceToMetersFactor();
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 GetMaxDistanceInMeters()
			
 
				+{
			
 
				+#if FFX_FSR2_OPTION_INVERTED_DEPTH
			
 
				+    return GetViewSpaceDepth(0.0f) * ViewSpaceToMetersFactor();
			
 
				+#else
			
 
				+    return GetViewSpaceDepth(1.0f) * ViewSpaceToMetersFactor();
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x3 PrepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure, FfxFloat32 fPreExposure)
			
 
				+{
			
 
				+    fRgb /= fPreExposure;
			
 
				+    fRgb *= fExposure;
			
 
				+
			
 
				+    fRgb = clamp(fRgb, 0.0f, FSR2_FP16_MAX);
			
 
				+
			
 
				+    return fRgb;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x3 UnprepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure)
			
 
				+{
			
 
				+    fRgb /= fExposure;
			
 
				+    fRgb *= PreExposure();
			
 
				+
			
 
				+    return fRgb;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+struct BilinearSamplingData
			
 
				+{
			
 
				+    FfxInt32x2 iOffsets[4];
			
 
				+    FfxFloat32 fWeights[4];
			
 
				+    FfxInt32x2 iBasePos;
			
 
				+};
			
 
				+
			
 
				+BilinearSamplingData GetBilinearSamplingData(FfxFloat32x2 fUv, FfxInt32x2 iSize)
			
 
				+{
			
 
				+    BilinearSamplingData data;
			
 
				+
			
 
				+    FfxFloat32x2 fPxSample = (fUv * iSize) - FfxFloat32x2(0.5f, 0.5f);
			
 
				+    data.iBasePos = FfxInt32x2(floor(fPxSample));
			
 
				+    FfxFloat32x2 fPxFrac = ffxFract(fPxSample);
			
 
				+
			
 
				+    data.iOffsets[0] = FfxInt32x2(0, 0);
			
 
				+    data.iOffsets[1] = FfxInt32x2(1, 0);
			
 
				+    data.iOffsets[2] = FfxInt32x2(0, 1);
			
 
				+    data.iOffsets[3] = FfxInt32x2(1, 1);
			
 
				+
			
 
				+    data.fWeights[0] = (1 - fPxFrac.x) * (1 - fPxFrac.y);
			
 
				+    data.fWeights[1] = (fPxFrac.x) * (1 - fPxFrac.y);
			
 
				+    data.fWeights[2] = (1 - fPxFrac.x) * (fPxFrac.y);
			
 
				+    data.fWeights[3] = (fPxFrac.x) * (fPxFrac.y);
			
 
				+
			
 
				+    return data;
			
 
				+}
			
 
				+
			
 
				+struct PlaneData
			
 
				+{
			
 
				+    FfxFloat32x3 fNormal;
			
 
				+    FfxFloat32 fDistanceFromOrigin;
			
 
				+};
			
 
				+
			
 
				+PlaneData GetPlaneFromPoints(FfxFloat32x3 fP0, FfxFloat32x3 fP1, FfxFloat32x3 fP2)
			
 
				+{
			
 
				+    PlaneData plane;
			
 
				+
			
 
				+    FfxFloat32x3 v0 = fP0 - fP1;
			
 
				+    FfxFloat32x3 v1 = fP0 - fP2;
			
 
				+    plane.fNormal = normalize(cross(v0, v1));
			
 
				+    plane.fDistanceFromOrigin = -dot(fP0, plane.fNormal);
			
 
				+
			
 
				+    return plane;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 PointToPlaneDistance(PlaneData plane, FfxFloat32x3 fPoint)
			
 
				+{
			
 
				+    return abs(dot(plane.fNormal, fPoint) + plane.fDistanceFromOrigin);
			
 
				+}
			
 
				+
			
 
				+#endif // #if defined(FFX_GPU)
			
 
				+
			
 
				+#endif //!defined(FFX_FSR2_COMMON_H)
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h
@@ -0,0 +1,189 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+FFX_GROUPSHARED FfxUInt32 spdCounter;
			
 
				+
			
 
				+#ifndef SPD_PACKED_ONLY
			
 
				+FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16];
			
 
				+FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16];
			
 
				+FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16];
			
 
				+FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16];
			
 
				+
			
 
				+FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 tex, FfxUInt32 slice)
			
 
				+{
			
 
				+    FfxFloat32x2 fUv = (tex + 0.5f + Jitter()) / RenderSize();
			
 
				+    fUv = ClampUv(fUv, RenderSize(), InputColorResourceDimensions());
			
 
				+    FfxFloat32x3 fRgb = SampleInputColor(fUv);
			
 
				+
			
 
				+    fRgb /= PreExposure();
			
 
				+   
			
 
				+    //compute log luma
			
 
				+    const FfxFloat32 fLogLuma = log(ffxMax(FSR2_EPSILON, RGBToLuma(fRgb)));
			
 
				+
			
 
				+    // Make sure out of screen pixels contribute no value to the end result
			
 
				+    const FfxFloat32 result = all(FFX_LESS_THAN(tex, RenderSize())) ? fLogLuma : 0.0f;
			
 
				+
			
 
				+    return FfxFloat32x4(result, 0, 0, 0);
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice)
			
 
				+{
			
 
				+    return SPD_LoadMipmap5(tex);
			
 
				+}
			
 
				+
			
 
				+void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice)
			
 
				+{
			
 
				+    if (index == LumaMipLevelToUse() || index == 5)
			
 
				+    {
			
 
				+        SPD_SetMipmap(pix, index, outValue.r);
			
 
				+    }
			
 
				+
			
 
				+    if (index == MipCount() - 1) { //accumulate on 1x1 level
			
 
				+
			
 
				+        if (all(FFX_EQUAL(pix, FfxInt32x2(0, 0))))
			
 
				+        {
			
 
				+            FfxFloat32 prev = SPD_LoadExposureBuffer().y;
			
 
				+            FfxFloat32 result = outValue.r;
			
 
				+
			
 
				+            if (prev < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values
			
 
				+            {
			
 
				+                FfxFloat32 rate = 1.0f;
			
 
				+                result = prev + (result - prev) * (1 - exp(-DeltaTime() * rate));
			
 
				+            }
			
 
				+            FfxFloat32x2 spdOutput = FfxFloat32x2(ComputeAutoExposureFromLavg(result), result);
			
 
				+            SPD_SetExposureBuffer(spdOutput);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void SpdIncreaseAtomicCounter(FfxUInt32 slice)
			
 
				+{
			
 
				+    SPD_IncreaseAtomicCounter(spdCounter);
			
 
				+}
			
 
				+
			
 
				+FfxUInt32 SpdGetAtomicCounter()
			
 
				+{
			
 
				+    return spdCounter;
			
 
				+}
			
 
				+
			
 
				+void SpdResetAtomicCounter(FfxUInt32 slice)
			
 
				+{
			
 
				+    SPD_ResetAtomicCounter();
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y)
			
 
				+{
			
 
				+    return FfxFloat32x4(
			
 
				+        spdIntermediateR[x][y],
			
 
				+        spdIntermediateG[x][y],
			
 
				+        spdIntermediateB[x][y],
			
 
				+        spdIntermediateA[x][y]);
			
 
				+}
			
 
				+void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value)
			
 
				+{
			
 
				+    spdIntermediateR[x][y] = value.x;
			
 
				+    spdIntermediateG[x][y] = value.y;
			
 
				+    spdIntermediateB[x][y] = value.z;
			
 
				+    spdIntermediateA[x][y] = value.w;
			
 
				+}
			
 
				+FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3)
			
 
				+{
			
 
				+    return (v0 + v1 + v2 + v3) * 0.25f;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+// define fetch and store functions Packed
			
 
				+#if FFX_HALF
			
 
				+#error Callback must be implemented
			
 
				+
			
 
				+FFX_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16];
			
 
				+FFX_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16];
			
 
				+
			
 
				+FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice)
			
 
				+{
			
 
				+    return FfxFloat16x4(imgDst[0][FfxFloat32x3(tex, slice)]);
			
 
				+}
			
 
				+FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice)
			
 
				+{
			
 
				+    return FfxFloat16x4(imgDst6[FfxUInt32x3(p, slice)]);
			
 
				+}
			
 
				+void SpdStoreH(FfxInt32x2 p, FfxFloat16x4 value, FfxUInt32 mip, FfxUInt32 slice)
			
 
				+{
			
 
				+    if (index == LumaMipLevelToUse() || index == 5)
			
 
				+    {
			
 
				+        imgDst6[FfxUInt32x3(p, slice)] = FfxFloat32x4(value);
			
 
				+        return;
			
 
				+    }
			
 
				+    imgDst[mip + 1][FfxUInt32x3(p, slice)] = FfxFloat32x4(value);
			
 
				+}
			
 
				+void SpdIncreaseAtomicCounter(FfxUInt32 slice)
			
 
				+{
			
 
				+    InterlockedAdd(rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice], 1, spdCounter);
			
 
				+}
			
 
				+FfxUInt32 SpdGetAtomicCounter()
			
 
				+{
			
 
				+    return spdCounter;
			
 
				+}
			
 
				+void SpdResetAtomicCounter(FfxUInt32 slice)
			
 
				+{
			
 
				+    rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice] = 0;
			
 
				+}
			
 
				+FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y)
			
 
				+{
			
 
				+    return FfxFloat16x4(
			
 
				+        spdIntermediateRG[x][y].x,
			
 
				+        spdIntermediateRG[x][y].y,
			
 
				+        spdIntermediateBA[x][y].x,
			
 
				+        spdIntermediateBA[x][y].y);
			
 
				+}
			
 
				+void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value)
			
 
				+{
			
 
				+    spdIntermediateRG[x][y] = value.xy;
			
 
				+    spdIntermediateBA[x][y] = value.zw;
			
 
				+}
			
 
				+FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3)
			
 
				+{
			
 
				+    return (v0 + v1 + v2 + v3) * FfxFloat16(0.25);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#include "ffx_spd.h"
			
 
				+
			
 
				+void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex)
			
 
				+{
			
 
				+#if FFX_HALF
			
 
				+    SpdDownsampleH(
			
 
				+        FfxUInt32x2(WorkGroupId.xy),
			
 
				+        FfxUInt32(LocalThreadIndex),
			
 
				+        FfxUInt32(MipCount()),
			
 
				+        FfxUInt32(NumWorkGroups()),
			
 
				+        FfxUInt32(WorkGroupId.z),
			
 
				+        FfxUInt32x2(WorkGroupOffset()));
			
 
				+#else
			
 
				+    SpdDownsample(
			
 
				+        FfxUInt32x2(WorkGroupId.xy),
			
 
				+        FfxUInt32(LocalThreadIndex),
			
 
				+        FfxUInt32(MipCount()),
			
 
				+        FfxUInt32(NumWorkGroups()),
			
 
				+        FfxUInt32(WorkGroupId.z),
			
 
				+        FfxUInt32x2(WorkGroupOffset()));
			
 
				+#endif
			
 
				+}
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl
@@ -0,0 +1,134 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+
			
 
				+
			
 
				+#extension GL_GOOGLE_include_directive : require
			
 
				+#extension GL_EXT_samplerless_texture_functions : require
			
 
				+
			
 
				+#define FSR2_BIND_SRV_INPUT_COLOR                     0
			
 
				+#define FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC               1
			
 
				+#define FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE        2
			
 
				+#define FSR2_BIND_UAV_EXPOSURE_MIP_5                  3
			
 
				+#define FSR2_BIND_UAV_AUTO_EXPOSURE                   4
			
 
				+#define FSR2_BIND_CB_FSR2                             5
			
 
				+#define FSR2_BIND_CB_SPD                              6
			
 
				+
			
 
				+#include "ffx_fsr2_callbacks_glsl.h"
			
 
				+#include "ffx_fsr2_common.h"
			
 
				+
			
 
				+#if defined(FSR2_BIND_CB_SPD)
			
 
				+	layout (set = 1, binding = FSR2_BIND_CB_SPD, std140) uniform cbSPD_t
			
 
				+	{
			
 
				+		uint mips;
			
 
				+		uint numWorkGroups;
			
 
				+		uvec2 workGroupOffset;
			
 
				+		uvec2 renderSize;
			
 
				+	} cbSPD;
			
 
				+
			
 
				+	uint MipCount()
			
 
				+	{
			
 
				+		return cbSPD.mips;
			
 
				+	}
			
 
				+
			
 
				+	uint NumWorkGroups()
			
 
				+	{
			
 
				+		return cbSPD.numWorkGroups;
			
 
				+	}
			
 
				+
			
 
				+	uvec2 WorkGroupOffset()
			
 
				+	{
			
 
				+		return cbSPD.workGroupOffset;
			
 
				+	}
			
 
				+
			
 
				+	uvec2 SPD_RenderSize()
			
 
				+	{
			
 
				+		return cbSPD.renderSize;
			
 
				+	}
			
 
				+#endif
			
 
				+
			
 
				+vec2 SPD_LoadExposureBuffer()
			
 
				+{
			
 
				+	return imageLoad(rw_auto_exposure, ivec2(0,0)).xy;
			
 
				+}
			
 
				+
			
 
				+void SPD_SetExposureBuffer(vec2 value)
			
 
				+{
			
 
				+	imageStore(rw_auto_exposure, ivec2(0,0), vec4(value, 0.0f, 0.0f));
			
 
				+}
			
 
				+
			
 
				+vec4 SPD_LoadMipmap5(ivec2 iPxPos)
			
 
				+{
			
 
				+	return vec4(imageLoad(rw_img_mip_5, iPxPos).x, 0.0f, 0.0f, 0.0f);
			
 
				+}
			
 
				+
			
 
				+void SPD_SetMipmap(ivec2 iPxPos, uint slice, float value)
			
 
				+{
			
 
				+	switch (slice)
			
 
				+	{
			
 
				+	case FFX_FSR2_SHADING_CHANGE_MIP_LEVEL:
			
 
				+		imageStore(rw_img_mip_shading_change, iPxPos, vec4(value, 0.0f, 0.0f, 0.0f));
			
 
				+		break;
			
 
				+	case 5:
			
 
				+		imageStore(rw_img_mip_5, iPxPos, vec4(value, 0.0f, 0.0f, 0.0f));
			
 
				+		break;
			
 
				+	default:
			
 
				+
			
 
				+        // avoid flattened side effect
			
 
				+#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE)
			
 
				+		imageStore(rw_img_mip_shading_change, iPxPos, vec4(imageLoad(rw_img_mip_shading_change, iPxPos).x, 0.0f, 0.0f, 0.0f));
			
 
				+#elif defined(FSR2_BIND_UAV_EXPOSURE_MIP_5)
			
 
				+		imageStore(rw_img_mip_5, iPxPos, vec4(imageLoad(rw_img_mip_5, iPxPos).x, 0.0f, 0.0f, 0.0f));
			
 
				+#endif
			
 
				+		break;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void SPD_IncreaseAtomicCounter(inout uint spdCounter)
			
 
				+{
			
 
				+	spdCounter = imageAtomicAdd(rw_spd_global_atomic, ivec2(0,0), 1);
			
 
				+}
			
 
				+
			
 
				+void SPD_ResetAtomicCounter()
			
 
				+{
			
 
				+	imageStore(rw_spd_global_atomic, ivec2(0,0), uvec4(0));
			
 
				+}
			
 
				+
			
 
				+#include "ffx_fsr2_compute_luminance_pyramid.h"
			
 
				+
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
			
 
				+#define FFX_FSR2_THREAD_GROUP_WIDTH 256
			
 
				+#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
			
 
				+#define FFX_FSR2_THREAD_GROUP_HEIGHT 1
			
 
				+#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
			
 
				+#define FFX_FSR2_THREAD_GROUP_DEPTH 1
			
 
				+#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
			
 
				+#ifndef FFX_FSR2_NUM_THREADS
			
 
				+#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
			
 
				+#endif // #ifndef FFX_FSR2_NUM_THREADS
			
 
				+
			
 
				+FFX_FSR2_NUM_THREADS
			
 
				+void main()
			
 
				+{
			
 
				+    ComputeAutoExposure(gl_WorkGroupID.xyz, gl_LocalInvocationIndex);
			
 
				+}
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip.h
@@ -0,0 +1,258 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#ifndef FFX_FSR2_DEPTH_CLIP_H
			
 
				+#define FFX_FSR2_DEPTH_CLIP_H
			
 
				+
			
 
				+FFX_STATIC const FfxFloat32 DepthClipBaseScale = 4.0f;
			
 
				+
			
 
				+FfxFloat32 ComputeDepthClip(FfxFloat32x2 fUvSample, FfxFloat32 fCurrentDepthSample)
			
 
				+{
			
 
				+    FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(fCurrentDepthSample);
			
 
				+    BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUvSample, RenderSize());
			
 
				+
			
 
				+    FfxFloat32 fDilatedSum = 0.0f;
			
 
				+    FfxFloat32 fDepth = 0.0f;
			
 
				+    FfxFloat32 fWeightSum = 0.0f;
			
 
				+    for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) {
			
 
				+
			
 
				+        const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
			
 
				+        const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset;
			
 
				+
			
 
				+        if (IsOnScreen(iSamplePos, RenderSize())) {
			
 
				+            const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex];
			
 
				+            if (fWeight > fReconstructedDepthBilinearWeightThreshold) {
			
 
				+
			
 
				+                const FfxFloat32 fPrevDepthSample = LoadReconstructedPrevDepth(iSamplePos);
			
 
				+                const FfxFloat32 fPrevNearestDepthViewSpace = GetViewSpaceDepth(fPrevDepthSample);
			
 
				+
			
 
				+                const FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace;
			
 
				+
			
 
				+                if (fDepthDiff > 0.0f) {
			
 
				+
			
 
				+#if FFX_FSR2_OPTION_INVERTED_DEPTH
			
 
				+                    const FfxFloat32 fPlaneDepth = ffxMin(fPrevDepthSample, fCurrentDepthSample);
			
 
				+#else
			
 
				+                    const FfxFloat32 fPlaneDepth = ffxMax(fPrevDepthSample, fCurrentDepthSample);
			
 
				+#endif
			
 
				+                    
			
 
				+                    const FfxFloat32x3 fCenter = GetViewSpacePosition(FfxInt32x2(RenderSize() * 0.5f), RenderSize(), fPlaneDepth);
			
 
				+                    const FfxFloat32x3 fCorner = GetViewSpacePosition(FfxInt32x2(0, 0), RenderSize(), fPlaneDepth);
			
 
				+
			
 
				+                    const FfxFloat32 fHalfViewportWidth = length(FfxFloat32x2(RenderSize()));
			
 
				+                    const FfxFloat32 fDepthThreshold = ffxMax(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace);
			
 
				+
			
 
				+                    const FfxFloat32 Ksep = 1.37e-05f;
			
 
				+                    const FfxFloat32 Kfov = length(fCorner) / length(fCenter);
			
 
				+                    const FfxFloat32 fRequiredDepthSeparation = Ksep * Kfov * fHalfViewportWidth * fDepthThreshold;
			
 
				+
			
 
				+                    const FfxFloat32 fResolutionFactor = ffxSaturate(length(FfxFloat32x2(RenderSize())) / length(FfxFloat32x2(1920.0f, 1080.0f)));
			
 
				+                    const FfxFloat32 fPower = ffxLerp(1.0f, 3.0f, fResolutionFactor);
			
 
				+                    fDepth += ffxPow(ffxSaturate(FfxFloat32(fRequiredDepthSeparation / fDepthDiff)), fPower) * fWeight;
			
 
				+                    fWeightSum += fWeight;
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    return (fWeightSum > 0) ? ffxSaturate(1.0f - fDepth / fWeightSum) : 0.0f;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 ComputeMotionDivergence(FfxInt32x2 iPxPos, FfxInt32x2 iPxInputMotionVectorSize)
			
 
				+{
			
 
				+    FfxFloat32 minconvergence = 1.0f;
			
 
				+
			
 
				+    FfxFloat32x2 fMotionVectorNucleus = LoadInputMotionVector(iPxPos);
			
 
				+    FfxFloat32 fNucleusVelocityLr = length(fMotionVectorNucleus * RenderSize());
			
 
				+    FfxFloat32 fMaxVelocityUv = length(fMotionVectorNucleus);
			
 
				+
			
 
				+    const FfxFloat32 MotionVectorVelocityEpsilon = 1e-02f;
			
 
				+
			
 
				+    if (fNucleusVelocityLr > MotionVectorVelocityEpsilon) {
			
 
				+        for (FfxInt32 y = -1; y <= 1; ++y) {
			
 
				+            for (FfxInt32 x = -1; x <= 1; ++x) {
			
 
				+
			
 
				+                FfxInt32x2 sp = ClampLoad(iPxPos, FfxInt32x2(x, y), iPxInputMotionVectorSize);
			
 
				+
			
 
				+                FfxFloat32x2 fMotionVector = LoadInputMotionVector(sp);
			
 
				+                FfxFloat32 fVelocityUv = length(fMotionVector);
			
 
				+
			
 
				+                fMaxVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv);
			
 
				+                fVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv);
			
 
				+                minconvergence = ffxMin(minconvergence, dot(fMotionVector / fVelocityUv, fMotionVectorNucleus / fVelocityUv));
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    return ffxSaturate(1.0f - minconvergence) * ffxSaturate(fMaxVelocityUv / 0.01f);
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 ComputeDepthDivergence(FfxInt32x2 iPxPos)
			
 
				+{
			
 
				+    const FfxFloat32 fMaxDistInMeters = GetMaxDistanceInMeters();
			
 
				+    FfxFloat32 fDepthMax = 0.0f;
			
 
				+    FfxFloat32 fDepthMin = fMaxDistInMeters;
			
 
				+
			
 
				+    FfxInt32 iMaxDistFound = 0;
			
 
				+
			
 
				+    for (FfxInt32 y = -1; y < 2; y++) {
			
 
				+        for (FfxInt32 x = -1; x < 2; x++) {
			
 
				+
			
 
				+            const FfxInt32x2 iOffset = FfxInt32x2(x, y);
			
 
				+            const FfxInt32x2 iSamplePos = iPxPos + iOffset;
			
 
				+
			
 
				+            const FfxFloat32 fOnScreenFactor = IsOnScreen(iSamplePos, RenderSize()) ? 1.0f : 0.0f;
			
 
				+            FfxFloat32 fDepth = GetViewSpaceDepthInMeters(LoadDilatedDepth(iSamplePos)) * fOnScreenFactor;
			
 
				+
			
 
				+            iMaxDistFound |= FfxInt32(fMaxDistInMeters == fDepth);
			
 
				+
			
 
				+            fDepthMin = ffxMin(fDepthMin, fDepth);
			
 
				+            fDepthMax = ffxMax(fDepthMax, fDepth);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    return (1.0f - fDepthMin / fDepthMax) * (FfxBoolean(iMaxDistFound) ? 0.0f : 1.0f);
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 ComputeTemporalMotionDivergence(FfxInt32x2 iPxPos)
			
 
				+{
			
 
				+    const FfxFloat32x2 fUv = FfxFloat32x2(iPxPos + 0.5f) / RenderSize();
			
 
				+
			
 
				+    FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos);
			
 
				+    FfxFloat32x2 fReprojectedUv = fUv + fMotionVector;
			
 
				+    fReprojectedUv = ClampUv(fReprojectedUv, RenderSize(), MaxRenderSize());
			
 
				+    FfxFloat32x2 fPrevMotionVector = SamplePreviousDilatedMotionVector(fReprojectedUv);
			
 
				+
			
 
				+    float fPxDistance = length(fMotionVector * DisplaySize());
			
 
				+    return fPxDistance > 1.0f ? ffxLerp(0.0f, 1.0f - ffxSaturate(length(fPrevMotionVector) / length(fMotionVector)), ffxSaturate(ffxPow(fPxDistance / 20.0f, 3.0f))) : 0;
			
 
				+}
			
 
				+
			
 
				+void PreProcessReactiveMasks(FfxInt32x2 iPxLrPos, FfxFloat32 fMotionDivergence)
			
 
				+{
			
 
				+    // Compensate for bilinear sampling in accumulation pass
			
 
				+
			
 
				+    FfxFloat32x3 fReferenceColor = LoadInputColor(iPxLrPos).xyz;
			
 
				+    FfxFloat32x2 fReactiveFactor = FfxFloat32x2(0.0f, fMotionDivergence);
			
 
				+
			
 
				+    float fMasksSum = 0.0f;
			
 
				+
			
 
				+    FfxFloat32x3 fColorSamples[9];
			
 
				+    FfxFloat32 fReactiveSamples[9];
			
 
				+    FfxFloat32 fTransparencyAndCompositionSamples[9];
			
 
				+
			
 
				+    FFX_UNROLL
			
 
				+    for (FfxInt32 y = -1; y < 2; y++) {
			
 
				+        FFX_UNROLL
			
 
				+        for (FfxInt32 x = -1; x < 2; x++) {
			
 
				+
			
 
				+            const FfxInt32x2 sampleCoord = ClampLoad(iPxLrPos, FfxInt32x2(x, y), FfxInt32x2(RenderSize()));
			
 
				+
			
 
				+            FfxInt32 sampleIdx = (y + 1) * 3 + x + 1;
			
 
				+
			
 
				+            FfxFloat32x3 fColorSample = LoadInputColor(sampleCoord).xyz;
			
 
				+            FfxFloat32 fReactiveSample = LoadReactiveMask(sampleCoord);
			
 
				+            FfxFloat32 fTransparencyAndCompositionSample = LoadTransparencyAndCompositionMask(sampleCoord);
			
 
				+
			
 
				+            fColorSamples[sampleIdx] = fColorSample;
			
 
				+            fReactiveSamples[sampleIdx] = fReactiveSample;
			
 
				+            fTransparencyAndCompositionSamples[sampleIdx] = fTransparencyAndCompositionSample;
			
 
				+
			
 
				+            fMasksSum += (fReactiveSample + fTransparencyAndCompositionSample);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    if (fMasksSum > 0)
			
 
				+    {
			
 
				+        for (FfxInt32 sampleIdx = 0; sampleIdx < 9; sampleIdx++)
			
 
				+        {
			
 
				+            FfxFloat32x3 fColorSample = fColorSamples[sampleIdx];
			
 
				+            FfxFloat32 fReactiveSample = fReactiveSamples[sampleIdx];
			
 
				+            FfxFloat32 fTransparencyAndCompositionSample = fTransparencyAndCompositionSamples[sampleIdx];
			
 
				+
			
 
				+            const FfxFloat32 fMaxLenSq = ffxMax(dot(fReferenceColor, fReferenceColor), dot(fColorSample, fColorSample));
			
 
				+            const FfxFloat32 fSimilarity = dot(fReferenceColor, fColorSample) / fMaxLenSq;
			
 
				+
			
 
				+            // Increase power for non-similar samples
			
 
				+            const FfxFloat32 fPowerBiasMax = 6.0f;
			
 
				+            const FfxFloat32 fSimilarityPower = 1.0f + (fPowerBiasMax - fSimilarity * fPowerBiasMax);
			
 
				+            const FfxFloat32 fWeightedReactiveSample = ffxPow(fReactiveSample, fSimilarityPower);
			
 
				+            const FfxFloat32 fWeightedTransparencyAndCompositionSample = ffxPow(fTransparencyAndCompositionSample, fSimilarityPower);
			
 
				+
			
 
				+            fReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32x2(fWeightedReactiveSample, fWeightedTransparencyAndCompositionSample));
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    StoreDilatedReactiveMasks(iPxLrPos, fReactiveFactor);
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x3 ComputePreparedInputColor(FfxInt32x2 iPxLrPos)
			
 
				+{
			
 
				+    //We assume linear data. if non-linear input (sRGB, ...),
			
 
				+    //then we should convert to linear first and back to sRGB on output.
			
 
				+    FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos));
			
 
				+
			
 
				+    fRgb = PrepareRgb(fRgb, Exposure(), PreExposure());
			
 
				+
			
 
				+    const FfxFloat32x3 fPreparedYCoCg = RGBToYCoCg(fRgb);
			
 
				+
			
 
				+    return fPreparedYCoCg;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 EvaluateSurface(FfxInt32x2 iPxPos, FfxFloat32x2 fMotionVector)
			
 
				+{
			
 
				+    FfxFloat32 d0 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, -1)));
			
 
				+    FfxFloat32 d1 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 0)));
			
 
				+    FfxFloat32 d2 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 1)));
			
 
				+
			
 
				+    return 1.0f - FfxFloat32(((d0 - d1) > (d1 * 0.01f)) && ((d1 - d2) > (d2 * 0.01f)));
			
 
				+}
			
 
				+
			
 
				+void DepthClip(FfxInt32x2 iPxPos)
			
 
				+{
			
 
				+    FfxFloat32x2 fDepthUv = (iPxPos + 0.5f) / RenderSize();
			
 
				+    FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos);
			
 
				+
			
 
				+    // Discard tiny mvs
			
 
				+    fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.01f);
			
 
				+
			
 
				+    const FfxFloat32x2 fDilatedUv = fDepthUv + fMotionVector;
			
 
				+    const FfxFloat32 fDilatedDepth = LoadDilatedDepth(iPxPos);
			
 
				+    const FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(LoadInputDepth(iPxPos));
			
 
				+
			
 
				+    // Compute prepared input color and depth clip
			
 
				+    FfxFloat32 fDepthClip = ComputeDepthClip(fDilatedUv, fDilatedDepth) * EvaluateSurface(iPxPos, fMotionVector);
			
 
				+    FfxFloat32x3 fPreparedYCoCg = ComputePreparedInputColor(iPxPos);
			
 
				+    StorePreparedInputColor(iPxPos, FfxFloat32x4(fPreparedYCoCg, fDepthClip));
			
 
				+
			
 
				+    // Compute dilated reactive mask
			
 
				+#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
			
 
				+    FfxInt32x2 iSamplePos = iPxPos;
			
 
				+#else
			
 
				+    FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxPos);
			
 
				+#endif
			
 
				+
			
 
				+    FfxFloat32 fMotionDivergence = ComputeMotionDivergence(iSamplePos, RenderSize());
			
 
				+    FfxFloat32 fTemporalMotionDifference = ffxSaturate(ComputeTemporalMotionDivergence(iPxPos) - ComputeDepthDivergence(iPxPos));
			
 
				+
			
 
				+    PreProcessReactiveMasks(iPxPos, ffxMax(fTemporalMotionDifference, fMotionDivergence));
			
 
				+}
			
 
				+
			
 
				+#endif //!defined( FFX_FSR2_DEPTH_CLIPH )
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl
@@ -0,0 +1,67 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+
			
 
				+
			
 
				+#extension GL_GOOGLE_include_directive : require
			
 
				+#extension GL_EXT_samplerless_texture_functions : require
			
 
				+
			
 
				+#define FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH      0
			
 
				+#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS                1
			
 
				+#define FSR2_BIND_SRV_DILATED_DEPTH                         2
			
 
				+#define FSR2_BIND_SRV_REACTIVE_MASK                         3
			
 
				+#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK     4
			
 
				+#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR                  5
			
 
				+#define FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS       6
			
 
				+#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS                  7
			
 
				+#define FSR2_BIND_SRV_INPUT_COLOR                           8
			
 
				+#define FSR2_BIND_SRV_INPUT_DEPTH                           9
			
 
				+#define FSR2_BIND_SRV_INPUT_EXPOSURE                        10
			
 
				+
			
 
				+#define FSR2_BIND_UAV_DEPTH_CLIP                            11
			
 
				+#define FSR2_BIND_UAV_DILATED_REACTIVE_MASKS                12
			
 
				+#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR                  13
			
 
				+
			
 
				+#define FSR2_BIND_CB_FSR2                                   14
			
 
				+
			
 
				+#include "ffx_fsr2_callbacks_glsl.h"
			
 
				+#include "ffx_fsr2_common.h"
			
 
				+#include "ffx_fsr2_sample.h"
			
 
				+#include "ffx_fsr2_depth_clip.h"
			
 
				+
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
			
 
				+#define FFX_FSR2_THREAD_GROUP_WIDTH 8
			
 
				+#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
			
 
				+#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
			
 
				+#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
			
 
				+#define FFX_FSR2_THREAD_GROUP_DEPTH 1
			
 
				+#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
			
 
				+#ifndef FFX_FSR2_NUM_THREADS
			
 
				+#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
			
 
				+#endif // #ifndef FFX_FSR2_NUM_THREADS
			
 
				+
			
 
				+FFX_FSR2_NUM_THREADS
			
 
				+void main()
			
 
				+{
			
 
				+	DepthClip(ivec2(gl_GlobalInvocationID.xy));
			
 
				+}
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_begin.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_begin.h
@@ -0,0 +1 @@
 
				+// This file doesn't exist in this version of FSR.
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_end.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_end.h
@@ -0,0 +1 @@
 
				+// This file doesn't exist in this version of FSR.
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock.h
@@ -0,0 +1,115 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#ifndef FFX_FSR2_LOCK_H
			
 
				+#define FFX_FSR2_LOCK_H
			
 
				+
			
 
				+void ClearResourcesForNextFrame(in FfxInt32x2 iPxHrPos)
			
 
				+{
			
 
				+    if (all(FFX_LESS_THAN(iPxHrPos, FfxInt32x2(RenderSize()))))
			
 
				+    {
			
 
				+#if FFX_FSR2_OPTION_INVERTED_DEPTH
			
 
				+        const FfxUInt32 farZ = 0x0;
			
 
				+#else
			
 
				+        const FfxUInt32 farZ = 0x3f800000;
			
 
				+#endif
			
 
				+        SetReconstructedDepth(iPxHrPos, farZ);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+FfxBoolean ComputeThinFeatureConfidence(FfxInt32x2 pos)
			
 
				+{
			
 
				+    const FfxInt32 RADIUS = 1;
			
 
				+
			
 
				+    FfxFloat32 fNucleus = LoadLockInputLuma(pos);
			
 
				+
			
 
				+    FfxFloat32 similar_threshold = 1.05f;
			
 
				+    FfxFloat32 dissimilarLumaMin = FSR2_FLT_MAX;
			
 
				+    FfxFloat32 dissimilarLumaMax = 0;
			
 
				+
			
 
				+    /*
			
 
				+     0 1 2
			
 
				+     3 4 5
			
 
				+     6 7 8
			
 
				+    */
			
 
				+
			
 
				+    #define SETBIT(x) (1U << x)
			
 
				+
			
 
				+    FfxUInt32 mask = SETBIT(4); //flag fNucleus as similar
			
 
				+
			
 
				+    const FfxUInt32 uNumRejectionMasks = 4;
			
 
				+    const FfxUInt32 uRejectionMasks[uNumRejectionMasks] = {
			
 
				+        SETBIT(0) | SETBIT(1) | SETBIT(3) | SETBIT(4), //Upper left
			
 
				+        SETBIT(1) | SETBIT(2) | SETBIT(4) | SETBIT(5), //Upper right
			
 
				+        SETBIT(3) | SETBIT(4) | SETBIT(6) | SETBIT(7), //Lower left
			
 
				+        SETBIT(4) | SETBIT(5) | SETBIT(7) | SETBIT(8), //Lower right
			
 
				+    };
			
 
				+
			
 
				+    FfxInt32 idx = 0;
			
 
				+    FFX_UNROLL
			
 
				+    for (FfxInt32 y = -RADIUS; y <= RADIUS; y++) {
			
 
				+        FFX_UNROLL
			
 
				+        for (FfxInt32 x = -RADIUS; x <= RADIUS; x++, idx++) {
			
 
				+            if (x == 0 && y == 0) continue;
			
 
				+
			
 
				+            FfxInt32x2 samplePos = ClampLoad(pos, FfxInt32x2(x, y), FfxInt32x2(RenderSize()));
			
 
				+
			
 
				+            FfxFloat32 sampleLuma = LoadLockInputLuma(samplePos);
			
 
				+            FfxFloat32 difference = ffxMax(sampleLuma, fNucleus) / ffxMin(sampleLuma, fNucleus);
			
 
				+
			
 
				+            if (difference > 0 && (difference < similar_threshold)) {
			
 
				+                mask |= SETBIT(idx);
			
 
				+            } else {
			
 
				+                dissimilarLumaMin = ffxMin(dissimilarLumaMin, sampleLuma);
			
 
				+                dissimilarLumaMax = ffxMax(dissimilarLumaMax, sampleLuma);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    FfxBoolean isRidge = fNucleus > dissimilarLumaMax || fNucleus < dissimilarLumaMin;
			
 
				+
			
 
				+    if (FFX_FALSE == isRidge) {
			
 
				+
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    FFX_UNROLL
			
 
				+    for (FfxInt32 i = 0; i < 4; i++) {
			
 
				+
			
 
				+        if ((mask & uRejectionMasks[i]) == uRejectionMasks[i]) {
			
 
				+            return false;
			
 
				+        }
			
 
				+    }
			
 
				+    
			
 
				+    return true;
			
 
				+}
			
 
				+
			
 
				+void ComputeLock(FfxInt32x2 iPxLrPos)
			
 
				+{
			
 
				+    if (ComputeThinFeatureConfidence(iPxLrPos))
			
 
				+    {
			
 
				+        StoreNewLocks(ComputeHrPosFromLrPos(iPxLrPos), 1.f);
			
 
				+    }
			
 
				+
			
 
				+    ClearResourcesForNextFrame(iPxLrPos);
			
 
				+}
			
 
				+
			
 
				+#endif // FFX_FSR2_LOCK_H
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl
@@ -0,0 +1,56 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+
			
 
				+
			
 
				+#extension GL_GOOGLE_include_directive : require
			
 
				+#extension GL_EXT_samplerless_texture_functions : require
			
 
				+
			
 
				+#define FSR2_BIND_SRV_LOCK_INPUT_LUMA                       0
			
 
				+#define FSR2_BIND_UAV_NEW_LOCKS                             1
			
 
				+#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH      2
			
 
				+#define FSR2_BIND_CB_FSR2                                   3
			
 
				+
			
 
				+#include "ffx_fsr2_callbacks_glsl.h"
			
 
				+#include "ffx_fsr2_common.h"
			
 
				+#include "ffx_fsr2_sample.h"
			
 
				+#include "ffx_fsr2_lock.h"
			
 
				+
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
			
 
				+#define FFX_FSR2_THREAD_GROUP_WIDTH 8
			
 
				+#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
			
 
				+#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
			
 
				+#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
			
 
				+#define FFX_FSR2_THREAD_GROUP_DEPTH 1
			
 
				+#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
			
 
				+#ifndef FFX_FSR2_NUM_THREADS
			
 
				+#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
			
 
				+#endif // #ifndef FFX_FSR2_NUM_THREADS
			
 
				+
			
 
				+FFX_FSR2_NUM_THREADS
			
 
				+void main()
			
 
				+{
			
 
				+    uvec2 uDispatchThreadId = gl_WorkGroupID.xy * uvec2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + gl_LocalInvocationID.xy;
			
 
				+
			
 
				+    ComputeLock(ivec2(uDispatchThreadId));
			
 
				+}
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_postprocess_lock_status.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_postprocess_lock_status.h
@@ -0,0 +1,106 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#ifndef FFX_FSR2_POSTPROCESS_LOCK_STATUS_H
			
 
				+#define FFX_FSR2_POSTPROCESS_LOCK_STATUS_H
			
 
				+
			
 
				+FfxFloat32x4 WrapShadingChangeLuma(FfxInt32x2 iPxSample)
			
 
				+{
			
 
				+    return FfxFloat32x4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0);
			
 
				+}
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F4 WrapShadingChangeLuma(FFX_MIN16_I2 iPxSample)
			
 
				+{
			
 
				+    return FFX_MIN16_F4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if FFX_FSR2_OPTION_POSTPROCESSLOCKSTATUS_SAMPLERS_USE_DATA_HALF && FFX_HALF
			
 
				+DeclareCustomFetchBilinearSamplesMin16(FetchShadingChangeLumaSamples, WrapShadingChangeLuma)
			
 
				+#else
			
 
				+DeclareCustomFetchBicubicSamples(FetchShadingChangeLumaSamples, WrapShadingChangeLuma)
			
 
				+#endif
			
 
				+DeclareCustomTextureSample(ShadingChangeLumaSample, Lanczos2, FetchShadingChangeLumaSamples)
			
 
				+
			
 
				+FfxFloat32 GetShadingChangeLuma(FfxInt32x2 iPxHrPos, FfxFloat32x2 fUvCoord)
			
 
				+{
			
 
				+    FfxFloat32 fShadingChangeLuma = 0;
			
 
				+
			
 
				+#if 0
			
 
				+    fShadingChangeLuma = Exposure() * exp(ShadingChangeLumaSample(fUvCoord, LumaMipDimensions()).x);
			
 
				+#else
			
 
				+
			
 
				+    const FfxFloat32 fDiv = FfxFloat32(2 << LumaMipLevelToUse());
			
 
				+    FfxInt32x2 iMipRenderSize = FfxInt32x2(RenderSize() / fDiv);
			
 
				+
			
 
				+    fUvCoord = ClampUv(fUvCoord, iMipRenderSize, LumaMipDimensions());
			
 
				+    fShadingChangeLuma = Exposure() * exp(FfxFloat32(SampleMipLuma(fUvCoord, LumaMipLevelToUse())));
			
 
				+#endif
			
 
				+
			
 
				+    fShadingChangeLuma = ffxPow(fShadingChangeLuma, 1.0f / 6.0f);
			
 
				+
			
 
				+    return fShadingChangeLuma;
			
 
				+}
			
 
				+
			
 
				+void UpdateLockStatus(AccumulationPassCommonParams params,
			
 
				+    FFX_PARAMETER_INOUT FfxFloat32 fReactiveFactor, LockState state,
			
 
				+    FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus,
			
 
				+    FFX_PARAMETER_OUT FfxFloat32 fLockContributionThisFrame,
			
 
				+    FFX_PARAMETER_OUT FfxFloat32 fLuminanceDiff) {
			
 
				+
			
 
				+    const FfxFloat32 fShadingChangeLuma = GetShadingChangeLuma(params.iPxHrPos, params.fHrUv);
			
 
				+
			
 
				+    //init temporal shading change factor, init to -1 or so in reproject to know if "true new"?
			
 
				+    fLockStatus[LOCK_TEMPORAL_LUMA] = (fLockStatus[LOCK_TEMPORAL_LUMA] == FfxFloat32(0.0f)) ? fShadingChangeLuma : fLockStatus[LOCK_TEMPORAL_LUMA];
			
 
				+
			
 
				+    FfxFloat32 fPreviousShadingChangeLuma = fLockStatus[LOCK_TEMPORAL_LUMA];
			
 
				+
			
 
				+    fLuminanceDiff = 1.0f - MinDividedByMax(fPreviousShadingChangeLuma, fShadingChangeLuma);
			
 
				+
			
 
				+    if (state.NewLock) {
			
 
				+        fLockStatus[LOCK_TEMPORAL_LUMA] = fShadingChangeLuma;
			
 
				+
			
 
				+        fLockStatus[LOCK_LIFETIME_REMAINING] = (fLockStatus[LOCK_LIFETIME_REMAINING] != 0.0f) ? 2.0f : 1.0f;
			
 
				+    }
			
 
				+    else if(fLockStatus[LOCK_LIFETIME_REMAINING] <= 1.0f) {
			
 
				+        fLockStatus[LOCK_TEMPORAL_LUMA] = ffxLerp(fLockStatus[LOCK_TEMPORAL_LUMA], FfxFloat32(fShadingChangeLuma), 0.5f);
			
 
				+    }
			
 
				+    else {
			
 
				+        if (fLuminanceDiff > 0.1f) {
			
 
				+            KillLock(fLockStatus);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    fReactiveFactor = ffxMax(fReactiveFactor, ffxSaturate((fLuminanceDiff - 0.1f) * 10.0f));
			
 
				+    fLockStatus[LOCK_LIFETIME_REMAINING] *= (1.0f - fReactiveFactor);
			
 
				+
			
 
				+    fLockStatus[LOCK_LIFETIME_REMAINING] *= ffxSaturate(1.0f - params.fAccumulationMask);
			
 
				+    fLockStatus[LOCK_LIFETIME_REMAINING] *= FfxFloat32(params.fDepthClipFactor < 0.1f);
			
 
				+
			
 
				+    // Compute this frame lock contribution
			
 
				+    const FfxFloat32 fLifetimeContribution = ffxSaturate(fLockStatus[LOCK_LIFETIME_REMAINING] - 1.0f);
			
 
				+    const FfxFloat32 fShadingChangeContribution = ffxSaturate(MinDividedByMax(fLockStatus[LOCK_TEMPORAL_LUMA], fShadingChangeLuma));
			
 
				+
			
 
				+    fLockContributionThisFrame = ffxSaturate(ffxSaturate(fLifetimeContribution * 4.0f) * fShadingChangeContribution);
			
 
				+}
			
 
				+
			
 
				+#endif //!defined( FFX_FSR2_POSTPROCESS_LOCK_STATUS_H )
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas.h
@@ -0,0 +1,67 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#define GROUP_SIZE  8
			
 
				+
			
 
				+#define FSR_RCAS_DENOISE 1
			
 
				+
			
 
				+void WriteUpscaledOutput(FFX_MIN16_U2 iPxHrPos, FfxFloat32x3 fUpscaledColor)
			
 
				+{
			
 
				+    StoreUpscaledOutput(FFX_MIN16_I2(iPxHrPos), fUpscaledColor);
			
 
				+}
			
 
				+
			
 
				+#define FSR_RCAS_F
			
 
				+FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p)
			
 
				+{
			
 
				+    FfxFloat32x4 fColor = LoadRCAS_Input(p);
			
 
				+
			
 
				+    fColor.rgb = PrepareRgb(fColor.rgb, Exposure(), PreExposure());
			
 
				+
			
 
				+    return fColor;
			
 
				+}
			
 
				+
			
 
				+void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {}
			
 
				+
			
 
				+#include "ffx_fsr1.h"
			
 
				+
			
 
				+
			
 
				+void CurrFilter(FFX_MIN16_U2 pos)
			
 
				+{
			
 
				+    FfxFloat32x3 c;
			
 
				+    FsrRcasF(c.r, c.g, c.b, pos, RCASConfig());
			
 
				+
			
 
				+    c = UnprepareRgb(c, Exposure());
			
 
				+
			
 
				+    WriteUpscaledOutput(pos, c);
			
 
				+}
			
 
				+
			
 
				+void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid)
			
 
				+{
			
 
				+    // Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
			
 
				+    FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u);
			
 
				+    CurrFilter(FFX_MIN16_U2(gxy));
			
 
				+    gxy.x += 8u;
			
 
				+    CurrFilter(FFX_MIN16_U2(gxy));
			
 
				+    gxy.y += 8u;
			
 
				+    CurrFilter(FFX_MIN16_U2(gxy));
			
 
				+    gxy.x -= 8u;
			
 
				+    CurrFilter(FFX_MIN16_U2(gxy));
			
 
				+}
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl
@@ -0,0 +1,80 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+
			
 
				+
			
 
				+#extension GL_GOOGLE_include_directive : require
			
 
				+#extension GL_EXT_samplerless_texture_functions : require
			
 
				+// Needed for rw_upscaled_output declaration
			
 
				+#extension GL_EXT_shader_image_load_formatted : require
			
 
				+
			
 
				+#define FSR2_BIND_SRV_INPUT_EXPOSURE        0
			
 
				+#define FSR2_BIND_SRV_RCAS_INPUT            1
			
 
				+#define FSR2_BIND_UAV_UPSCALED_OUTPUT       2
			
 
				+#define FSR2_BIND_CB_FSR2                   3
			
 
				+#define FSR2_BIND_CB_RCAS                   4
			
 
				+
			
 
				+#include "ffx_fsr2_callbacks_glsl.h"
			
 
				+#include "ffx_fsr2_common.h"
			
 
				+
			
 
				+//Move to prototype shader!
			
 
				+#if defined(FSR2_BIND_CB_RCAS)
			
 
				+    layout (set = 1, binding = FSR2_BIND_CB_RCAS, std140) uniform cbRCAS_t
			
 
				+    {
			
 
				+        uvec4 rcasConfig;
			
 
				+    } cbRCAS;
			
 
				+
			
 
				+    uvec4 RCASConfig()
			
 
				+    {
			
 
				+        return cbRCAS.rcasConfig;
			
 
				+    }
			
 
				+#else
			
 
				+    uvec4 RCASConfig()
			
 
				+    {
			
 
				+        return uvec4(0);
			
 
				+    }
			
 
				+#endif
			
 
				+
			
 
				+vec4 LoadRCAS_Input(FfxInt32x2 iPxPos)
			
 
				+{
			
 
				+    return texelFetch(r_rcas_input, iPxPos, 0);
			
 
				+}
			
 
				+
			
 
				+#include "ffx_fsr2_rcas.h"
			
 
				+
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
			
 
				+#define FFX_FSR2_THREAD_GROUP_WIDTH 64
			
 
				+#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
			
 
				+#define FFX_FSR2_THREAD_GROUP_HEIGHT 1
			
 
				+#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
			
 
				+#define FFX_FSR2_THREAD_GROUP_DEPTH 1
			
 
				+#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
			
 
				+#ifndef FFX_FSR2_NUM_THREADS
			
 
				+#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
			
 
				+#endif // #ifndef FFX_FSR2_NUM_THREADS
			
 
				+
			
 
				+FFX_FSR2_NUM_THREADS
			
 
				+void main()
			
 
				+{
			
 
				+    RCAS(gl_LocalInvocationID.xyz, gl_WorkGroupID.xyz, gl_GlobalInvocationID.xyz);
			
 
				+}
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h
@@ -0,0 +1,145 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#ifndef FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H
			
 
				+#define FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H
			
 
				+
			
 
				+void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector, FfxInt32x2 iPxDepthSize)
			
 
				+{
			
 
				+    fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.1f);
			
 
				+
			
 
				+    FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5)) / iPxDepthSize;
			
 
				+    FfxFloat32x2 fReprojectedUv = fUv + fMotionVector;
			
 
				+ 
			
 
				+    BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize());
			
 
				+
			
 
				+    // Project current depth into previous frame locations.
			
 
				+    // Push to all pixels having some contribution if reprojection is using bilinear logic.
			
 
				+    for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) {
			
 
				+        
			
 
				+        const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
			
 
				+        FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex];
			
 
				+
			
 
				+        if (fWeight > fReconstructedDepthBilinearWeightThreshold) {
			
 
				+
			
 
				+            FfxInt32x2 iStorePos = bilinearInfo.iBasePos + iOffset;
			
 
				+            if (IsOnScreen(iStorePos, iPxDepthSize)) {
			
 
				+                StoreReconstructedDepth(iStorePos, fDepth);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+void FindNearestDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxInt32x2 iPxSize, FFX_PARAMETER_OUT FfxFloat32 fNearestDepth, FFX_PARAMETER_OUT FfxInt32x2 fNearestDepthCoord)
			
 
				+{
			
 
				+    const FfxInt32 iSampleCount = 9;
			
 
				+    const FfxInt32x2 iSampleOffsets[iSampleCount] = {
			
 
				+        FfxInt32x2(+0, +0),
			
 
				+        FfxInt32x2(+1, +0),
			
 
				+        FfxInt32x2(+0, +1),
			
 
				+        FfxInt32x2(+0, -1),
			
 
				+        FfxInt32x2(-1, +0),
			
 
				+        FfxInt32x2(-1, +1),
			
 
				+        FfxInt32x2(+1, +1),
			
 
				+        FfxInt32x2(-1, -1),
			
 
				+        FfxInt32x2(+1, -1),
			
 
				+    };
			
 
				+
			
 
				+    // pull out the depth loads to allow SC to batch them
			
 
				+    FfxFloat32 depth[9];
			
 
				+    FfxInt32 iSampleIndex = 0;
			
 
				+    FFX_UNROLL
			
 
				+    for (iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) {
			
 
				+
			
 
				+        FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex];
			
 
				+        depth[iSampleIndex] = LoadInputDepth(iPos);
			
 
				+    }
			
 
				+
			
 
				+    // find closest depth
			
 
				+    fNearestDepthCoord = iPxPos;
			
 
				+    fNearestDepth = depth[0];
			
 
				+    FFX_UNROLL
			
 
				+    for (iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex) {
			
 
				+
			
 
				+        FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex];
			
 
				+        if (IsOnScreen(iPos, iPxSize)) {
			
 
				+
			
 
				+            FfxFloat32 fNdDepth = depth[iSampleIndex];
			
 
				+#if FFX_FSR2_OPTION_INVERTED_DEPTH
			
 
				+            if (fNdDepth > fNearestDepth) {
			
 
				+#else
			
 
				+            if (fNdDepth < fNearestDepth) {
			
 
				+#endif
			
 
				+                fNearestDepthCoord = iPos;
			
 
				+                fNearestDepth = fNdDepth;
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 ComputeLockInputLuma(FfxInt32x2 iPxLrPos)
			
 
				+{
			
 
				+    //We assume linear data. if non-linear input (sRGB, ...),
			
 
				+    //then we should convert to linear first and back to sRGB on output.
			
 
				+    FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos));
			
 
				+
			
 
				+    // Use internal auto exposure for locking logic
			
 
				+    fRgb /= PreExposure();
			
 
				+    fRgb *= Exposure();
			
 
				+
			
 
				+#if FFX_FSR2_OPTION_HDR_COLOR_INPUT
			
 
				+    fRgb = Tonemap(fRgb);
			
 
				+#endif
			
 
				+
			
 
				+    //compute luma used to lock pixels, if used elsewhere the ffxPow must be moved!
			
 
				+    const FfxFloat32 fLockInputLuma = ffxPow(RGBToPerceivedLuma(fRgb), FfxFloat32(1.0 / 6.0));
			
 
				+
			
 
				+    return fLockInputLuma;
			
 
				+}
			
 
				+
			
 
				+void ReconstructAndDilate(FfxInt32x2 iPxLrPos)
			
 
				+{
			
 
				+    FfxFloat32 fDilatedDepth;
			
 
				+    FfxInt32x2 iNearestDepthCoord;
			
 
				+
			
 
				+    FindNearestDepth(iPxLrPos, RenderSize(), fDilatedDepth, iNearestDepthCoord);
			
 
				+
			
 
				+#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
			
 
				+    FfxInt32x2 iSamplePos = iPxLrPos;
			
 
				+    FfxInt32x2 iMotionVectorPos = iNearestDepthCoord;
			
 
				+#else
			
 
				+    FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxLrPos);
			
 
				+    FfxInt32x2 iMotionVectorPos = ComputeHrPosFromLrPos(iNearestDepthCoord);
			
 
				+#endif
			
 
				+
			
 
				+    FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iMotionVectorPos);
			
 
				+
			
 
				+    StoreDilatedDepth(iPxLrPos, fDilatedDepth);
			
 
				+    StoreDilatedMotionVector(iPxLrPos, fDilatedMotionVector);
			
 
				+
			
 
				+    ReconstructPrevDepth(iPxLrPos, fDilatedDepth, fDilatedMotionVector, RenderSize());
			
 
				+
			
 
				+    FfxFloat32 fLockInputLuma = ComputeLockInputLuma(iPxLrPos);
			
 
				+    StoreLockInputLuma(iPxLrPos, fLockInputLuma);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#endif //!defined( FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H )
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl
@@ -0,0 +1,65 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+
			
 
				+
			
 
				+#extension GL_GOOGLE_include_directive : require
			
 
				+#extension GL_EXT_samplerless_texture_functions : require
			
 
				+
			
 
				+#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS                  0
			
 
				+#define FSR2_BIND_SRV_INPUT_DEPTH                           1
			
 
				+#define FSR2_BIND_SRV_INPUT_COLOR                           2
			
 
				+#define FSR2_BIND_SRV_INPUT_EXPOSURE                        3
			
 
				+#define FSR2_BIND_SRV_LUMA_HISTORY                          4
			
 
				+
			
 
				+#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH      5
			
 
				+#define FSR2_BIND_UAV_DILATED_MOTION_VECTORS                6
			
 
				+#define FSR2_BIND_UAV_DILATED_DEPTH                         7
			
 
				+#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR                  8
			
 
				+#define FSR2_BIND_UAV_LUMA_HISTORY                          9
			
 
				+#define FSR2_BIND_UAV_LUMA_INSTABILITY                      10
			
 
				+#define FSR2_BIND_UAV_LOCK_INPUT_LUMA                       11
			
 
				+
			
 
				+#define FSR2_BIND_CB_FSR2                                   12
			
 
				+
			
 
				+#include "ffx_fsr2_callbacks_glsl.h"
			
 
				+#include "ffx_fsr2_common.h"
			
 
				+#include "ffx_fsr2_sample.h"
			
 
				+#include "ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h"
			
 
				+
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
			
 
				+#define FFX_FSR2_THREAD_GROUP_WIDTH 8
			
 
				+#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
			
 
				+#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
			
 
				+#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
			
 
				+#define FFX_FSR2_THREAD_GROUP_DEPTH 1
			
 
				+#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
			
 
				+#ifndef FFX_FSR2_NUM_THREADS
			
 
				+#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
			
 
				+#endif // #ifndef FFX_FSR2_NUM_THREADS
			
 
				+
			
 
				+FFX_FSR2_NUM_THREADS
			
 
				+void main()
			
 
				+{
			
 
				+	ReconstructAndDilate(FFX_MIN16_I2(gl_GlobalInvocationID.xy));
			
 
				+}
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_reproject.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_reproject.h
@@ -0,0 +1,136 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#ifndef FFX_FSR2_REPROJECT_H
			
 
				+#define FFX_FSR2_REPROJECT_H
			
 
				+
			
 
				+#ifndef FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE
			
 
				+#define FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE 0 // Reference
			
 
				+#endif
			
 
				+
			
 
				+FfxFloat32x4 WrapHistory(FfxInt32x2 iPxSample)
			
 
				+{
			
 
				+    return LoadHistory(iPxSample);
			
 
				+}
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F4 WrapHistory(FFX_MIN16_I2 iPxSample)
			
 
				+{
			
 
				+    return FFX_MIN16_F4(LoadHistory(iPxSample));
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+#if FFX_FSR2_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF
			
 
				+DeclareCustomFetchBicubicSamplesMin16(FetchHistorySamples, WrapHistory)
			
 
				+DeclareCustomTextureSampleMin16(HistorySample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples)
			
 
				+#else
			
 
				+DeclareCustomFetchBicubicSamples(FetchHistorySamples, WrapHistory)
			
 
				+DeclareCustomTextureSample(HistorySample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples)
			
 
				+#endif
			
 
				+
			
 
				+FfxFloat32x4 WrapLockStatus(FfxInt32x2 iPxSample)
			
 
				+{
			
 
				+    FfxFloat32x4 fSample = FfxFloat32x4(LoadLockStatus(iPxSample), 0.0f, 0.0f);
			
 
				+    return fSample;
			
 
				+}
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F4 WrapLockStatus(FFX_MIN16_I2 iPxSample)
			
 
				+{
			
 
				+    FFX_MIN16_F4 fSample = FFX_MIN16_F4(LoadLockStatus(iPxSample), 0.0, 0.0);
			
 
				+
			
 
				+    return fSample;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if 1
			
 
				+#if FFX_FSR2_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF
			
 
				+DeclareCustomFetchBilinearSamplesMin16(FetchLockStatusSamples, WrapLockStatus)
			
 
				+DeclareCustomTextureSampleMin16(LockStatusSample, Bilinear, FetchLockStatusSamples)
			
 
				+#else
			
 
				+DeclareCustomFetchBilinearSamples(FetchLockStatusSamples, WrapLockStatus)
			
 
				+DeclareCustomTextureSample(LockStatusSample, Bilinear, FetchLockStatusSamples)
			
 
				+#endif
			
 
				+#else
			
 
				+#if FFX_FSR2_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF
			
 
				+DeclareCustomFetchBicubicSamplesMin16(FetchLockStatusSamples, WrapLockStatus)
			
 
				+DeclareCustomTextureSampleMin16(LockStatusSample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchLockStatusSamples)
			
 
				+#else
			
 
				+DeclareCustomFetchBicubicSamples(FetchLockStatusSamples, WrapLockStatus)
			
 
				+DeclareCustomTextureSample(LockStatusSample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchLockStatusSamples)
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+FfxFloat32x2 GetMotionVector(FfxInt32x2 iPxHrPos, FfxFloat32x2 fHrUv)
			
 
				+{
			
 
				+#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
			
 
				+    FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(FFX_MIN16_I2(fHrUv * RenderSize()));
			
 
				+#else
			
 
				+    FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iPxHrPos);
			
 
				+#endif
			
 
				+
			
 
				+    return fDilatedMotionVector;
			
 
				+}
			
 
				+
			
 
				+FfxBoolean IsUvInside(FfxFloat32x2 fUv)
			
 
				+{
			
 
				+    return (fUv.x >= 0.0f && fUv.x <= 1.0f) && (fUv.y >= 0.0f && fUv.y <= 1.0f);
			
 
				+}
			
 
				+
			
 
				+void ComputeReprojectedUVs(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxBoolean bIsExistingSample)
			
 
				+{
			
 
				+    fReprojectedHrUv = params.fHrUv + params.fMotionVector;
			
 
				+
			
 
				+    bIsExistingSample = IsUvInside(fReprojectedHrUv);
			
 
				+}
			
 
				+
			
 
				+void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x3 fHistoryColor, FFX_PARAMETER_OUT FfxFloat32 fTemporalReactiveFactor, FFX_PARAMETER_OUT FfxBoolean bInMotionLastFrame)
			
 
				+{
			
 
				+    FfxFloat32x4 fHistory = HistorySample(params.fReprojectedHrUv, DisplaySize());
			
 
				+
			
 
				+    fHistoryColor = PrepareRgb(fHistory.rgb, Exposure(), PreviousFramePreExposure());
			
 
				+
			
 
				+    fHistoryColor = RGBToYCoCg(fHistoryColor);
			
 
				+
			
 
				+    //Compute temporal reactivity info
			
 
				+    fTemporalReactiveFactor = ffxSaturate(abs(fHistory.w));
			
 
				+    bInMotionLastFrame = (fHistory.w < 0.0f);
			
 
				+}
			
 
				+
			
 
				+LockState ReprojectHistoryLockStatus(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedLockStatus)
			
 
				+{
			
 
				+    LockState state = { FFX_FALSE, FFX_FALSE };
			
 
				+    const FfxFloat32 fNewLockIntensity = LoadRwNewLocks(params.iPxHrPos);
			
 
				+    state.NewLock = fNewLockIntensity > (127.0f / 255.0f);
			
 
				+
			
 
				+    FfxFloat32 fInPlaceLockLifetime = state.NewLock ? fNewLockIntensity : 0;
			
 
				+
			
 
				+    fReprojectedLockStatus = SampleLockStatus(params.fReprojectedHrUv);
			
 
				+
			
 
				+    if (fReprojectedLockStatus[LOCK_LIFETIME_REMAINING] != FfxFloat32(0.0f)) {
			
 
				+        state.WasLockedPrevFrame = true;
			
 
				+    }
			
 
				+
			
 
				+    return state;
			
 
				+}
			
 
				+
			
 
				+#endif //!defined( FFX_FSR2_REPROJECT_H )
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_resources.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_resources.h
@@ -0,0 +1,105 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#ifndef FFX_FSR2_RESOURCES_H
			
 
				+#define FFX_FSR2_RESOURCES_H
			
 
				+
			
 
				+#if defined(FFX_CPU) || defined(FFX_GPU)
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_NULL                                           0
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY                              1
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR                                    2
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS                           3
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH                                    4
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE                                 5
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK                            6
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK        7
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH           8
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS                         9
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH                                  10
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR                        11
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS                                    12
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS                                      13
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR                           14
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY                                   15
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT                                   16
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT                                    17
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT                               18
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT                                19
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT                                     20
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1                                  21
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2                                  22
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1                      23
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2                      24
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY                    25
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_TRANSPARENCY_AND_COMPOSITION  26
			
 
				+#define FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT                      27
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS                         28
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE                                29 // same as FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0                       29
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_1                       30
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_2                       31
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_3                       32
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4                       33
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5                       34
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_6                       35
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_7                       36
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_8                       37
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_9                       38
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_10                      39
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_11                      40
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12                      41
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE                      42
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE                                  43
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE                                   44
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION                                45
			
 
				+
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR                           46
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR                          47
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1                         48
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1                        49
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2                         50
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2                        51
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS                52
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1              53
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2              54
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1                                 55
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2                                 56
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA                                57
			
 
				+
			
 
				+// Shading change detection mip level setting, value must be in the range [FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0, FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12]
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE          FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4
			
 
				+#define FFX_FSR2_SHADING_CHANGE_MIP_LEVEL                                           (FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE - FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE)
			
 
				+
			
 
				+#define FFX_FSR2_RESOURCE_IDENTIFIER_COUNT                                          58
			
 
				+
			
 
				+#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2                                     0
			
 
				+#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD                                      1
			
 
				+#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS                                     2
			
 
				+#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE                              3
			
 
				+
			
 
				+#define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP                                    1
			
 
				+#define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP                             2
			
 
				+#define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD                                  4
			
 
				+#define FFX_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX                               8
			
 
				+
			
 
				+#endif // #if defined(FFX_CPU) || defined(FFX_GPU)
			
 
				+
			
 
				+#endif //!defined( FFX_FSR2_RESOURCES_H )
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h
@@ -0,0 +1,605 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#ifndef FFX_FSR2_SAMPLE_H
			
 
				+#define FFX_FSR2_SAMPLE_H
			
 
				+
			
 
				+// suppress warnings
			
 
				+#ifdef FFX_HLSL
			
 
				+#pragma warning(disable: 4008) // potentially divide by zero
			
 
				+#endif //FFX_HLSL
			
 
				+
			
 
				+struct FetchedBilinearSamples {
			
 
				+
			
 
				+    FfxFloat32x4 fColor00;
			
 
				+    FfxFloat32x4 fColor10;
			
 
				+
			
 
				+    FfxFloat32x4 fColor01;
			
 
				+    FfxFloat32x4 fColor11;
			
 
				+};
			
 
				+
			
 
				+struct FetchedBicubicSamples {
			
 
				+
			
 
				+    FfxFloat32x4 fColor00;
			
 
				+    FfxFloat32x4 fColor10;
			
 
				+    FfxFloat32x4 fColor20;
			
 
				+    FfxFloat32x4 fColor30;
			
 
				+
			
 
				+    FfxFloat32x4 fColor01;
			
 
				+    FfxFloat32x4 fColor11;
			
 
				+    FfxFloat32x4 fColor21;
			
 
				+    FfxFloat32x4 fColor31;
			
 
				+
			
 
				+    FfxFloat32x4 fColor02;
			
 
				+    FfxFloat32x4 fColor12;
			
 
				+    FfxFloat32x4 fColor22;
			
 
				+    FfxFloat32x4 fColor32;
			
 
				+
			
 
				+    FfxFloat32x4 fColor03;
			
 
				+    FfxFloat32x4 fColor13;
			
 
				+    FfxFloat32x4 fColor23;
			
 
				+    FfxFloat32x4 fColor33;
			
 
				+};
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+struct FetchedBilinearSamplesMin16 {
			
 
				+
			
 
				+    FFX_MIN16_F4 fColor00;
			
 
				+    FFX_MIN16_F4 fColor10;
			
 
				+
			
 
				+    FFX_MIN16_F4 fColor01;
			
 
				+    FFX_MIN16_F4 fColor11;
			
 
				+};
			
 
				+
			
 
				+struct FetchedBicubicSamplesMin16 {
			
 
				+
			
 
				+    FFX_MIN16_F4 fColor00;
			
 
				+    FFX_MIN16_F4 fColor10;
			
 
				+    FFX_MIN16_F4 fColor20;
			
 
				+    FFX_MIN16_F4 fColor30;
			
 
				+
			
 
				+    FFX_MIN16_F4 fColor01;
			
 
				+    FFX_MIN16_F4 fColor11;
			
 
				+    FFX_MIN16_F4 fColor21;
			
 
				+    FFX_MIN16_F4 fColor31;
			
 
				+
			
 
				+    FFX_MIN16_F4 fColor02;
			
 
				+    FFX_MIN16_F4 fColor12;
			
 
				+    FFX_MIN16_F4 fColor22;
			
 
				+    FFX_MIN16_F4 fColor32;
			
 
				+
			
 
				+    FFX_MIN16_F4 fColor03;
			
 
				+    FFX_MIN16_F4 fColor13;
			
 
				+    FFX_MIN16_F4 fColor23;
			
 
				+    FFX_MIN16_F4 fColor33;
			
 
				+};
			
 
				+#else //FFX_HALF
			
 
				+#define FetchedBicubicSamplesMin16 FetchedBicubicSamples
			
 
				+#define FetchedBilinearSamplesMin16 FetchedBilinearSamples
			
 
				+#endif //FFX_HALF
			
 
				+
			
 
				+FfxFloat32x4 Linear(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32 t)
			
 
				+{
			
 
				+    return A + (B - A) * t;
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x4 Bilinear(FetchedBilinearSamples BilinearSamples, FfxFloat32x2 fPxFrac)
			
 
				+{
			
 
				+    FfxFloat32x4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);
			
 
				+    FfxFloat32x4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);
			
 
				+    FfxFloat32x4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);
			
 
				+    return fColorXY;
			
 
				+}
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F4 Linear(FFX_MIN16_F4 A, FFX_MIN16_F4 B, FFX_MIN16_F t)
			
 
				+{
			
 
				+    return A + (B - A) * t;
			
 
				+}
			
 
				+
			
 
				+FFX_MIN16_F4 Bilinear(FetchedBilinearSamplesMin16 BilinearSamples, FFX_MIN16_F2 fPxFrac)
			
 
				+{
			
 
				+    FFX_MIN16_F4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);
			
 
				+    FFX_MIN16_F4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);
			
 
				+    FFX_MIN16_F4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);
			
 
				+    return fColorXY;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+FfxFloat32 Lanczos2NoClamp(FfxFloat32 x)
			
 
				+{
			
 
				+    const FfxFloat32 PI = 3.141592653589793f; // TODO: share SDK constants
			
 
				+    return abs(x) < FSR2_EPSILON ? 1.f : (sin(PI * x) / (PI * x)) * (sin(0.5f * PI * x) / (0.5f * PI * x));
			
 
				+}
			
 
				+
			
 
				+FfxFloat32 Lanczos2(FfxFloat32 x)
			
 
				+{
			
 
				+    x = ffxMin(abs(x), 2.0f);
			
 
				+    return Lanczos2NoClamp(x);
			
 
				+}
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+
			
 
				+#if 0
			
 
				+FFX_MIN16_F Lanczos2NoClamp(FFX_MIN16_F x)
			
 
				+{
			
 
				+    const FFX_MIN16_F PI = FFX_MIN16_F(3.141592653589793f); // TODO: share SDK constants
			
 
				+    return abs(x) < FFX_MIN16_F(FSR2_EPSILON) ? FFX_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFX_MIN16_F(0.5f) * PI * x) / (FFX_MIN16_F(0.5f) * PI * x));
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+FFX_MIN16_F Lanczos2(FFX_MIN16_F x)
			
 
				+{
			
 
				+    x = ffxMin(abs(x), FFX_MIN16_F(2.0f));
			
 
				+    return FFX_MIN16_F(Lanczos2NoClamp(x));
			
 
				+}
			
 
				+#endif //FFX_HALF
			
 
				+
			
 
				+// FSR1 lanczos approximation. Input is x*x and must be <= 4.
			
 
				+FfxFloat32 Lanczos2ApproxSqNoClamp(FfxFloat32 x2)
			
 
				+{
			
 
				+    FfxFloat32 a = (2.0f / 5.0f) * x2 - 1;
			
 
				+    FfxFloat32 b = (1.0f / 4.0f) * x2 - 1;
			
 
				+    return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b);
			
 
				+}
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F Lanczos2ApproxSqNoClamp(FFX_MIN16_F x2)
			
 
				+{
			
 
				+    FFX_MIN16_F a = FFX_MIN16_F(2.0f / 5.0f) * x2 - FFX_MIN16_F(1);
			
 
				+    FFX_MIN16_F b = FFX_MIN16_F(1.0f / 4.0f) * x2 - FFX_MIN16_F(1);
			
 
				+    return (FFX_MIN16_F(25.0f / 16.0f) * a * a - FFX_MIN16_F(25.0f / 16.0f - 1)) * (b * b);
			
 
				+}
			
 
				+#endif //FFX_HALF
			
 
				+
			
 
				+FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2)
			
 
				+{
			
 
				+    x2 = ffxMin(x2, 4.0f);
			
 
				+    return Lanczos2ApproxSqNoClamp(x2);
			
 
				+}
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F Lanczos2ApproxSq(FFX_MIN16_F x2)
			
 
				+{
			
 
				+    x2 = ffxMin(x2, FFX_MIN16_F(4.0f));
			
 
				+    return Lanczos2ApproxSqNoClamp(x2);
			
 
				+}
			
 
				+#endif //FFX_HALF
			
 
				+
			
 
				+FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x)
			
 
				+{
			
 
				+    return Lanczos2ApproxSqNoClamp(x * x);
			
 
				+}
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F Lanczos2ApproxNoClamp(FFX_MIN16_F x)
			
 
				+{
			
 
				+    return Lanczos2ApproxSqNoClamp(x * x);
			
 
				+}
			
 
				+#endif //FFX_HALF
			
 
				+
			
 
				+FfxFloat32 Lanczos2Approx(FfxFloat32 x)
			
 
				+{
			
 
				+    return Lanczos2ApproxSq(x * x);
			
 
				+}
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F Lanczos2Approx(FFX_MIN16_F x)
			
 
				+{
			
 
				+    return Lanczos2ApproxSq(x * x);
			
 
				+}
			
 
				+#endif //FFX_HALF
			
 
				+
			
 
				+FfxFloat32 Lanczos2_UseLUT(FfxFloat32 x)
			
 
				+{
			
 
				+    return SampleLanczos2Weight(abs(x));
			
 
				+}
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F Lanczos2_UseLUT(FFX_MIN16_F x)
			
 
				+{
			
 
				+    return FFX_MIN16_F(SampleLanczos2Weight(abs(x)));
			
 
				+}
			
 
				+#endif //FFX_HALF
			
 
				+
			
 
				+FfxFloat32x4 Lanczos2_UseLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
			
 
				+{
			
 
				+    FfxFloat32 fWeight0 = Lanczos2_UseLUT(-1.f - t);
			
 
				+    FfxFloat32 fWeight1 = Lanczos2_UseLUT(-0.f - t);
			
 
				+    FfxFloat32 fWeight2 = Lanczos2_UseLUT(+1.f - t);
			
 
				+    FfxFloat32 fWeight3 = Lanczos2_UseLUT(+2.f - t);
			
 
				+    return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
			
 
				+}
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F4 Lanczos2_UseLUT(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
			
 
				+{
			
 
				+    FFX_MIN16_F fWeight0 = Lanczos2_UseLUT(FFX_MIN16_F(-1.f) - t);
			
 
				+    FFX_MIN16_F fWeight1 = Lanczos2_UseLUT(FFX_MIN16_F(-0.f) - t);
			
 
				+    FFX_MIN16_F fWeight2 = Lanczos2_UseLUT(FFX_MIN16_F(+1.f) - t);
			
 
				+    FFX_MIN16_F fWeight3 = Lanczos2_UseLUT(FFX_MIN16_F(+2.f) - t);
			
 
				+    return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+FfxFloat32x4 Lanczos2(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
			
 
				+{
			
 
				+    FfxFloat32 fWeight0 = Lanczos2(-1.f - t);
			
 
				+    FfxFloat32 fWeight1 = Lanczos2(-0.f - t);
			
 
				+    FfxFloat32 fWeight2 = Lanczos2(+1.f - t);
			
 
				+    FfxFloat32 fWeight3 = Lanczos2(+2.f - t);
			
 
				+    return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
			
 
				+}
			
 
				+
			
 
				+FfxFloat32x4 Lanczos2(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
			
 
				+{
			
 
				+    FfxFloat32x4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
			
 
				+    FfxFloat32x4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
			
 
				+    FfxFloat32x4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
			
 
				+    FfxFloat32x4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
			
 
				+    FfxFloat32x4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
			
 
				+
			
 
				+    // Deringing
			
 
				+
			
 
				+    // TODO: only use 4 by checking jitter
			
 
				+    const FfxInt32 iDeringingSampleCount = 4;
			
 
				+    const FfxFloat32x4 fDeringingSamples[4] = {
			
 
				+        Samples.fColor11,
			
 
				+        Samples.fColor21,
			
 
				+        Samples.fColor12,
			
 
				+        Samples.fColor22,
			
 
				+    };
			
 
				+
			
 
				+    FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
			
 
				+    FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
			
 
				+
			
 
				+    FFX_UNROLL
			
 
				+    for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {
			
 
				+
			
 
				+        fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
			
 
				+        fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
			
 
				+    }
			
 
				+
			
 
				+    fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
			
 
				+
			
 
				+    return fColorXY;
			
 
				+}
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F4 Lanczos2(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
			
 
				+{
			
 
				+    FFX_MIN16_F fWeight0 = Lanczos2(FFX_MIN16_F(-1.f) - t);
			
 
				+    FFX_MIN16_F fWeight1 = Lanczos2(FFX_MIN16_F(-0.f) - t);
			
 
				+    FFX_MIN16_F fWeight2 = Lanczos2(FFX_MIN16_F(+1.f) - t);
			
 
				+    FFX_MIN16_F fWeight3 = Lanczos2(FFX_MIN16_F(+2.f) - t);
			
 
				+    return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
			
 
				+}
			
 
				+
			
 
				+FFX_MIN16_F4 Lanczos2(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
			
 
				+{
			
 
				+    FFX_MIN16_F4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
			
 
				+    FFX_MIN16_F4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
			
 
				+    FFX_MIN16_F4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
			
 
				+    FFX_MIN16_F4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
			
 
				+    FFX_MIN16_F4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
			
 
				+
			
 
				+    // Deringing
			
 
				+
			
 
				+    // TODO: only use 4 by checking jitter
			
 
				+    const FfxInt32 iDeringingSampleCount = 4;
			
 
				+    const FFX_MIN16_F4 fDeringingSamples[4] = {
			
 
				+        Samples.fColor11,
			
 
				+        Samples.fColor21,
			
 
				+        Samples.fColor12,
			
 
				+        Samples.fColor22,
			
 
				+    };
			
 
				+
			
 
				+    FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
			
 
				+    FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
			
 
				+
			
 
				+    FFX_UNROLL
			
 
				+    for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
			
 
				+    {
			
 
				+        fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
			
 
				+        fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
			
 
				+    }
			
 
				+
			
 
				+    fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
			
 
				+
			
 
				+    return fColorXY;
			
 
				+}
			
 
				+#endif //FFX_HALF
			
 
				+
			
 
				+
			
 
				+FfxFloat32x4 Lanczos2LUT(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
			
 
				+{
			
 
				+    FfxFloat32x4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
			
 
				+    FfxFloat32x4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
			
 
				+    FfxFloat32x4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
			
 
				+    FfxFloat32x4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
			
 
				+    FfxFloat32x4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
			
 
				+
			
 
				+    // Deringing
			
 
				+
			
 
				+    // TODO: only use 4 by checking jitter
			
 
				+    const FfxInt32 iDeringingSampleCount = 4;
			
 
				+    const FfxFloat32x4 fDeringingSamples[4] = {
			
 
				+        Samples.fColor11,
			
 
				+        Samples.fColor21,
			
 
				+        Samples.fColor12,
			
 
				+        Samples.fColor22,
			
 
				+    };
			
 
				+
			
 
				+    FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
			
 
				+    FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
			
 
				+
			
 
				+    FFX_UNROLL
			
 
				+    for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {
			
 
				+
			
 
				+        fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
			
 
				+        fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
			
 
				+    }
			
 
				+
			
 
				+    fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
			
 
				+
			
 
				+    return fColorXY;
			
 
				+}
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F4 Lanczos2LUT(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
			
 
				+{
			
 
				+    FFX_MIN16_F4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
			
 
				+    FFX_MIN16_F4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
			
 
				+    FFX_MIN16_F4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
			
 
				+    FFX_MIN16_F4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
			
 
				+    FFX_MIN16_F4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
			
 
				+
			
 
				+    // Deringing
			
 
				+
			
 
				+    // TODO: only use 4 by checking jitter
			
 
				+    const FfxInt32 iDeringingSampleCount = 4;
			
 
				+    const FFX_MIN16_F4 fDeringingSamples[4] = {
			
 
				+        Samples.fColor11,
			
 
				+        Samples.fColor21,
			
 
				+        Samples.fColor12,
			
 
				+        Samples.fColor22,
			
 
				+    };
			
 
				+
			
 
				+    FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
			
 
				+    FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
			
 
				+
			
 
				+    FFX_UNROLL
			
 
				+    for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
			
 
				+    {
			
 
				+        fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
			
 
				+        fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
			
 
				+    }
			
 
				+
			
 
				+    fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
			
 
				+
			
 
				+    return fColorXY;
			
 
				+}
			
 
				+#endif //FFX_HALF
			
 
				+
			
 
				+
			
 
				+
			
 
				+FfxFloat32x4 Lanczos2Approx(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
			
 
				+{
			
 
				+    FfxFloat32 fWeight0 = Lanczos2ApproxNoClamp(-1.f - t);
			
 
				+    FfxFloat32 fWeight1 = Lanczos2ApproxNoClamp(-0.f - t);
			
 
				+    FfxFloat32 fWeight2 = Lanczos2ApproxNoClamp(+1.f - t);
			
 
				+    FfxFloat32 fWeight3 = Lanczos2ApproxNoClamp(+2.f - t);
			
 
				+    return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
			
 
				+}
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F4 Lanczos2Approx(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
			
 
				+{
			
 
				+    FFX_MIN16_F fWeight0 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-1.f) - t);
			
 
				+    FFX_MIN16_F fWeight1 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-0.f) - t);
			
 
				+    FFX_MIN16_F fWeight2 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+1.f) - t);
			
 
				+    FFX_MIN16_F fWeight3 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+2.f) - t);
			
 
				+    return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
			
 
				+}
			
 
				+#endif //FFX_HALF
			
 
				+
			
 
				+FfxFloat32x4 Lanczos2Approx(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
			
 
				+{
			
 
				+    FfxFloat32x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
			
 
				+    FfxFloat32x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
			
 
				+    FfxFloat32x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
			
 
				+    FfxFloat32x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
			
 
				+    FfxFloat32x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
			
 
				+
			
 
				+    // Deringing
			
 
				+
			
 
				+    // TODO: only use 4 by checking jitter
			
 
				+    const FfxInt32 iDeringingSampleCount = 4;
			
 
				+    const FfxFloat32x4 fDeringingSamples[4] = {
			
 
				+        Samples.fColor11,
			
 
				+        Samples.fColor21,
			
 
				+        Samples.fColor12,
			
 
				+        Samples.fColor22,
			
 
				+    };
			
 
				+
			
 
				+    FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
			
 
				+    FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
			
 
				+
			
 
				+    FFX_UNROLL
			
 
				+    for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
			
 
				+    {
			
 
				+        fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
			
 
				+        fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
			
 
				+    }
			
 
				+
			
 
				+    fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
			
 
				+
			
 
				+    return fColorXY;
			
 
				+}
			
 
				+
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_F4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
			
 
				+{
			
 
				+    FFX_MIN16_F4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
			
 
				+    FFX_MIN16_F4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
			
 
				+    FFX_MIN16_F4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
			
 
				+    FFX_MIN16_F4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
			
 
				+    FFX_MIN16_F4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
			
 
				+
			
 
				+    // Deringing
			
 
				+
			
 
				+    // TODO: only use 4 by checking jitter
			
 
				+    const FfxInt32 iDeringingSampleCount = 4;
			
 
				+    const FFX_MIN16_F4 fDeringingSamples[4] = {
			
 
				+        Samples.fColor11,
			
 
				+        Samples.fColor21,
			
 
				+        Samples.fColor12,
			
 
				+        Samples.fColor22,
			
 
				+    };
			
 
				+
			
 
				+    FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
			
 
				+    FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
			
 
				+
			
 
				+    FFX_UNROLL
			
 
				+    for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
			
 
				+    {
			
 
				+        fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
			
 
				+        fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
			
 
				+    }
			
 
				+
			
 
				+    fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
			
 
				+
			
 
				+    return fColorXY;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+// Clamp by offset direction. Assuming iPxSample is already in range and iPxOffset is compile time constant.
			
 
				+FfxInt32x2 ClampCoord(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize)
			
 
				+{
			
 
				+    FfxInt32x2 result = iPxSample + iPxOffset;
			
 
				+    result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x;
			
 
				+    result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x;
			
 
				+    result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y;
			
 
				+    result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y;
			
 
				+    return result;
			
 
				+}
			
 
				+#if FFX_HALF
			
 
				+FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize)
			
 
				+{
			
 
				+    FFX_MIN16_I2 result = iPxSample + iPxOffset;
			
 
				+    result.x = (iPxOffset.x < FFX_MIN16_I(0)) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x;
			
 
				+    result.x = (iPxOffset.x > FFX_MIN16_I(0)) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x;
			
 
				+    result.y = (iPxOffset.y < FFX_MIN16_I(0)) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y;
			
 
				+    result.y = (iPxOffset.y > FFX_MIN16_I(0)) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y;
			
 
				+    return result;
			
 
				+}
			
 
				+#endif //FFX_HALF
			
 
				+
			
 
				+
			
 
				+#define DeclareCustomFetchBicubicSamplesWithType(SampleType, TextureType, AddrType, Name, LoadTexture)               \
			
 
				+    SampleType Name(AddrType iPxSample, AddrType iTextureSize)                                          \
			
 
				+    {                                                                                                   \
			
 
				+        SampleType Samples;                                                                             \
			
 
				+                                                                                                        \
			
 
				+        Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, -1), iTextureSize)));    \
			
 
				+        Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, -1), iTextureSize)));    \
			
 
				+        Samples.fColor20 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, -1), iTextureSize)));    \
			
 
				+        Samples.fColor30 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, -1), iTextureSize)));    \
			
 
				+                                                                                                        \
			
 
				+        Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +0), iTextureSize)));    \
			
 
				+        Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize)));    \
			
 
				+        Samples.fColor21 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize)));    \
			
 
				+        Samples.fColor31 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +0), iTextureSize)));    \
			
 
				+                                                                                                        \
			
 
				+        Samples.fColor02 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +1), iTextureSize)));    \
			
 
				+        Samples.fColor12 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize)));    \
			
 
				+        Samples.fColor22 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize)));    \
			
 
				+        Samples.fColor32 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +1), iTextureSize)));    \
			
 
				+                                                                                                        \
			
 
				+        Samples.fColor03 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +2), iTextureSize)));    \
			
 
				+        Samples.fColor13 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +2), iTextureSize)));    \
			
 
				+        Samples.fColor23 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +2), iTextureSize)));    \
			
 
				+        Samples.fColor33 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +2), iTextureSize)));    \
			
 
				+                                                                                                        \
			
 
				+        return Samples;                                                                                 \
			
 
				+    }
			
 
				+
			
 
				+#define DeclareCustomFetchBicubicSamples(Name, LoadTexture)                                             \
			
 
				+    DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)
			
 
				+
			
 
				+#define DeclareCustomFetchBicubicSamplesMin16(Name, LoadTexture)                                        \
			
 
				+    DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture)
			
 
				+
			
 
				+#define DeclareCustomFetchBilinearSamplesWithType(SampleType, TextureType,AddrType, Name, LoadTexture)  \
			
 
				+    SampleType Name(AddrType iPxSample, AddrType iTextureSize)                                          \
			
 
				+    {                                                                                                   \
			
 
				+        SampleType Samples;                                                                             \
			
 
				+        Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize)));           \
			
 
				+        Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize)));           \
			
 
				+        Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize)));           \
			
 
				+        Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize)));           \
			
 
				+        return Samples;                                                                                 \
			
 
				+    }
			
 
				+
			
 
				+#define DeclareCustomFetchBilinearSamples(Name, LoadTexture)                                             \
			
 
				+    DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)
			
 
				+
			
 
				+#define DeclareCustomFetchBilinearSamplesMin16(Name, LoadTexture)                                        \
			
 
				+    DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture)
			
 
				+
			
 
				+// BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102)
			
 
				+// is common, so iPxSample can "jitter"
			
 
				+#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples)                                           \
			
 
				+    FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize)                                               \
			
 
				+    {                                                                                                                \
			
 
				+        FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f);                \
			
 
				+        /* Clamp base coords */                                                                                      \
			
 
				+        fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x));                                 \
			
 
				+        fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y));                                 \
			
 
				+        /* */                                                                                                        \
			
 
				+        FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample));                                                         \
			
 
				+        FfxFloat32x2 fPxFrac = ffxFract(fPxSample);                                                                  \
			
 
				+        FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac));    \
			
 
				+        return fColorXY;                                                                                             \
			
 
				+    }
			
 
				+
			
 
				+#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples)                                      \
			
 
				+    FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize)                                               \
			
 
				+    {                                                                                                                \
			
 
				+        FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f);                \
			
 
				+        /* Clamp base coords */                                                                                      \
			
 
				+        fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x));                                 \
			
 
				+        fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y));                                 \
			
 
				+        /* */                                                                                                        \
			
 
				+        FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample));                                                         \
			
 
				+        FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample));                                                    \
			
 
				+        FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac));    \
			
 
				+        return fColorXY;                                                                                             \
			
 
				+    }
			
 
				+
			
 
				+#define FFX_FSR2_CONCAT_ID(x, y) x ## y
			
 
				+#define FFX_FSR2_CONCAT(x, y) FFX_FSR2_CONCAT_ID(x, y)
			
 
				+#define FFX_FSR2_SAMPLER_1D_0 Lanczos2
			
 
				+#define FFX_FSR2_SAMPLER_1D_1 Lanczos2LUT
			
 
				+#define FFX_FSR2_SAMPLER_1D_2 Lanczos2Approx
			
 
				+
			
 
				+#define FFX_FSR2_GET_LANCZOS_SAMPLER1D(x) FFX_FSR2_CONCAT(FFX_FSR2_SAMPLER_1D_, x)
			
 
				+
			
 
				+#endif //!defined( FFX_FSR2_SAMPLE_H )
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen.h
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen.h
@@ -0,0 +1,250 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+#define USE_YCOCG 1
			
 
				+
			
 
				+#define fAutogenEpsilon 0.01f
			
 
				+
			
 
				+// EXPERIMENTAL
			
 
				+
			
 
				+FFX_MIN16_F ComputeAutoTC_01(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
			
 
				+{
			
 
				+    FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);
			
 
				+    FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);
			
 
				+    FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);
			
 
				+    FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);
			
 
				+
			
 
				+#if USE_YCOCG    
			
 
				+    colorPreAlpha = RGBToYCoCg(colorPreAlpha);
			
 
				+    colorPostAlpha = RGBToYCoCg(colorPostAlpha);
			
 
				+    colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);
			
 
				+    colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);
			
 
				+#endif
			
 
				+
			
 
				+    FfxFloat32x3 colorDeltaCurr = colorPostAlpha - colorPreAlpha;
			
 
				+    FfxFloat32x3 colorDeltaPrev = colorPrevPostAlpha - colorPrevPreAlpha;
			
 
				+    bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDeltaCurr), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
			
 
				+    bool hadAlpha = any(FFX_GREATER_THAN(abs(colorDeltaPrev), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
			
 
				+
			
 
				+    FfxFloat32x3 X = colorPreAlpha;
			
 
				+    FfxFloat32x3 Y = colorPostAlpha;
			
 
				+    FfxFloat32x3 Z = colorPrevPreAlpha;
			
 
				+    FfxFloat32x3 W = colorPrevPostAlpha;
			
 
				+
			
 
				+    FFX_MIN16_F retVal = FFX_MIN16_F(ffxSaturate(dot(abs(abs(Y - X) - abs(W - Z)), FfxFloat32x3(1, 1, 1))));
			
 
				+
			
 
				+    // cleanup very small values
			
 
				+    retVal = (retVal < getTcThreshold()) ? FFX_MIN16_F(0.0f) : FFX_MIN16_F(1.f);
			
 
				+
			
 
				+    return retVal;
			
 
				+}
			
 
				+
			
 
				+// works ok: thin edges
			
 
				+FFX_MIN16_F ComputeAutoTC_02(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
			
 
				+{
			
 
				+    FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);
			
 
				+    FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);
			
 
				+    FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);
			
 
				+    FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);
			
 
				+
			
 
				+#if USE_YCOCG    
			
 
				+    colorPreAlpha = RGBToYCoCg(colorPreAlpha);
			
 
				+    colorPostAlpha = RGBToYCoCg(colorPostAlpha);
			
 
				+    colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);
			
 
				+    colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);
			
 
				+#endif
			
 
				+
			
 
				+    FfxFloat32x3 colorDelta = colorPostAlpha - colorPreAlpha;
			
 
				+    FfxFloat32x3 colorPrevDelta = colorPrevPostAlpha - colorPrevPreAlpha;
			
 
				+    bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
			
 
				+    bool hadAlpha = any(FFX_GREATER_THAN(abs(colorPrevDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
			
 
				+
			
 
				+    FfxFloat32x3 delta = colorPostAlpha - colorPreAlpha;              //prev+1*d = post   => d = color, alpha =
			
 
				+    FfxFloat32x3 deltaPrev = colorPrevPostAlpha - colorPrevPreAlpha;
			
 
				+
			
 
				+    FfxFloat32x3 X = colorPrevPreAlpha;
			
 
				+    FfxFloat32x3 N = colorPreAlpha - colorPrevPreAlpha;
			
 
				+    FfxFloat32x3 YAminusXA = colorPrevPostAlpha - colorPrevPreAlpha;
			
 
				+    FfxFloat32x3 NminusNA = colorPostAlpha - colorPrevPostAlpha;
			
 
				+
			
 
				+    FfxFloat32x3 A = (hasAlpha || hadAlpha) ? NminusNA / max(FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon), N) : FfxFloat32x3(0, 0, 0);
			
 
				+
			
 
				+    FFX_MIN16_F retVal = FFX_MIN16_F( max(max(A.x, A.y), A.z) );
			
 
				+
			
 
				+    // only pixels that have significantly changed in color shuold be considered
			
 
				+    retVal = ffxSaturate(retVal * FFX_MIN16_F(length(colorPostAlpha - colorPrevPostAlpha)) );
			
 
				+
			
 
				+    return retVal;
			
 
				+}
			
 
				+
			
 
				+// This function computes the TransparencyAndComposition mask:
			
 
				+// This mask indicates pixels that should discard locks and apply color clamping.
			
 
				+// 
			
 
				+// Typically this is the case for translucent pixels (that don't write depth values) or pixels where the correctness of 
			
 
				+// the MVs can not be guaranteed (e.g. procedutal movement or vegetation that does not have MVs to reduce the cost during rasterization)
			
 
				+// Also, large changes in color due to changed lighting should be marked to remove locks on pixels with "old" lighting.
			
 
				+//
			
 
				+// This function takes a opaque only and a final texture and uses internal copies of those textures from the last frame.
			
 
				+// The function tries to determine where the color changes between opaque only and final image to determine the pixels that use transparency.
			
 
				+// Also it uses the previous frames and detects where the use of transparency changed to mark those pixels.
			
 
				+// Additionally it marks pixels where the color changed significantly in the opaque only image, e.g. due to lighting or texture animation.
			
 
				+// 
			
 
				+// In the final step it stores the current textures in internal textures for the next frame
			
 
				+
			
 
				+FFX_MIN16_F ComputeTransparencyAndComposition(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
			
 
				+{
			
 
				+    FFX_MIN16_F retVal = ComputeAutoTC_02(uDispatchThreadId, iPrevIdx);
			
 
				+
			
 
				+    // [branch]
			
 
				+    if (retVal > FFX_MIN16_F(0.01f))
			
 
				+    {
			
 
				+        retVal = ComputeAutoTC_01(uDispatchThreadId, iPrevIdx);
			
 
				+    }
			
 
				+    return retVal;
			
 
				+}
			
 
				+
			
 
				+float computeSolidEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos)
			
 
				+{
			
 
				+    float lum[9];
			
 
				+    int i = 0;
			
 
				+    for (int y = -1; y < 2; ++y)
			
 
				+    {
			
 
				+        for (int x = -1; x < 2; ++x)
			
 
				+        {
			
 
				+            FfxFloat32x3 curCol  = LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb;
			
 
				+            FfxFloat32x3 prevCol = LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb;
			
 
				+            lum[i++] = length(curCol - prevCol);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    //float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]);
			
 
				+    //float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]);
			
 
				+
			
 
				+    //return sqrt(gradX * gradX + gradY * gradY);
			
 
				+
			
 
				+    float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]);
			
 
				+    float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]);
			
 
				+
			
 
				+    return sqrt(sqrt(gradX * gradY));
			
 
				+}
			
 
				+
			
 
				+float computeAlphaEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos)
			
 
				+{
			
 
				+    float lum[9];
			
 
				+    int i = 0;
			
 
				+    for (int y = -1; y < 2; ++y)
			
 
				+    {
			
 
				+        for (int x = -1; x < 2; ++x)
			
 
				+        {
			
 
				+            FfxFloat32x3 curCol  = abs(LoadInputColor(curPos + FFX_MIN16_I2(x, y)).rgb - LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb);
			
 
				+            FfxFloat32x3 prevCol = abs(LoadPrevPostAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb - LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb);
			
 
				+            lum[i++] = length(curCol - prevCol);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    //float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]);
			
 
				+    //float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]);
			
 
				+
			
 
				+    //return sqrt(gradX * gradX + gradY * gradY);
			
 
				+
			
 
				+    float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]);
			
 
				+    float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]);
			
 
				+
			
 
				+    return sqrt(sqrt(gradX * gradY));
			
 
				+}
			
 
				+
			
 
				+FFX_MIN16_F ComputeAabbOverlap(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
			
 
				+{
			
 
				+    FFX_MIN16_F retVal = FFX_MIN16_F(0.f);
			
 
				+
			
 
				+    FfxFloat32x2 fMotionVector = LoadInputMotionVector(uDispatchThreadId);
			
 
				+    FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);
			
 
				+    FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);
			
 
				+    FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);
			
 
				+    FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);
			
 
				+
			
 
				+#if USE_YCOCG    
			
 
				+    colorPreAlpha = RGBToYCoCg(colorPreAlpha);
			
 
				+    colorPostAlpha = RGBToYCoCg(colorPostAlpha);
			
 
				+    colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);
			
 
				+    colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);
			
 
				+#endif
			
 
				+    FfxFloat32x3 minPrev = FFX_MIN16_F3(+1000.f, +1000.f, +1000.f);
			
 
				+    FfxFloat32x3 maxPrev = FFX_MIN16_F3(-1000.f, -1000.f, -1000.f);
			
 
				+    for (int y = -1; y < 2; ++y)
			
 
				+    {
			
 
				+        for (int x = -1; x < 2; ++x)
			
 
				+        {
			
 
				+            FfxFloat32x3 W = LoadPrevPostAlpha(iPrevIdx + FFX_MIN16_I2(x, y));
			
 
				+
			
 
				+#if USE_YCOCG
			
 
				+            W = RGBToYCoCg(W);
			
 
				+#endif
			
 
				+            minPrev = min(minPrev, W);
			
 
				+            maxPrev = max(maxPrev, W);
			
 
				+        }
			
 
				+    }
			
 
				+    // instead of computing the overlap: simply count how many samples are outside
			
 
				+    // set reactive based on that
			
 
				+    FFX_MIN16_F count = FFX_MIN16_F(0.f);
			
 
				+    for (int y = -1; y < 2; ++y)
			
 
				+    {
			
 
				+        for (int x = -1; x < 2; ++x)
			
 
				+        {
			
 
				+            FfxFloat32x3 Y = LoadInputColor(uDispatchThreadId + FFX_MIN16_I2(x, y));
			
 
				+
			
 
				+#if USE_YCOCG
			
 
				+            Y = RGBToYCoCg(Y);
			
 
				+#endif
			
 
				+            count += ((Y.x < minPrev.x) || (Y.x > maxPrev.x)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);
			
 
				+            count += ((Y.y < minPrev.y) || (Y.y > maxPrev.y)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);
			
 
				+            count += ((Y.z < minPrev.z) || (Y.z > maxPrev.z)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);
			
 
				+        }
			
 
				+    }
			
 
				+    retVal = count / FFX_MIN16_F(27.f);
			
 
				+
			
 
				+    return retVal;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+// This function computes the Reactive mask:
			
 
				+// We want pixels marked where the alpha portion of the frame changes a lot between neighbours
			
 
				+// Those pixels are expected to change quickly between frames, too. (e.g. small particles, reflections on curved surfaces...)
			
 
				+// As a result history would not be trustworthy.
			
 
				+// On the other hand we don't want pixels marked where pre-alpha has a large differnce, since those would profit from accumulation
			
 
				+// For mirrors we may assume the pre-alpha is pretty uniform color.
			
 
				+// 
			
 
				+// This works well generally, but also marks edge pixels
			
 
				+FFX_MIN16_F ComputeReactive(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
			
 
				+{
			
 
				+    // we only get here if alpha has a significant contribution and has changed since last frame.
			
 
				+    FFX_MIN16_F retVal = FFX_MIN16_F(0.f);
			
 
				+
			
 
				+    // mark pixels with huge variance in alpha as reactive
			
 
				+    FFX_MIN16_F alphaEdge = FFX_MIN16_F(computeAlphaEdge(uDispatchThreadId, iPrevIdx));
			
 
				+    FFX_MIN16_F opaqueEdge = FFX_MIN16_F(computeSolidEdge(uDispatchThreadId, iPrevIdx));
			
 
				+    retVal = ffxSaturate(alphaEdge - opaqueEdge);
			
 
				+
			
 
				+    // the above also marks edge pixels due to jitter, so we need to cancel those out
			
 
				+
			
 
				+
			
 
				+    return retVal;
			
 
				+}
			
--- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl
@@ -0,0 +1,122 @@
 
				+// This file is part of the FidelityFX SDK.
			
 
				+//
			
 
				+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
			
 
				+//
			
 
				+// Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+// of this software and associated documentation files (the "Software"), to deal
			
 
				+// in the Software without restriction, including without limitation the rights
			
 
				+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+// copies of the Software, and to permit persons to whom the Software is
			
 
				+// furnished to do so, subject to the following conditions:
			
 
				+// The above copyright notice and this permission notice shall be included in
			
 
				+// all copies or substantial portions of the Software.
			
 
				+//
			
 
				+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+// THE SOFTWARE.
			
 
				+
			
 
				+
			
 
				+
			
 
				+#extension GL_GOOGLE_include_directive : require
			
 
				+#extension GL_EXT_samplerless_texture_functions : require
			
 
				+
			
 
				+#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY                     0
			
 
				+#define FSR2_BIND_SRV_INPUT_COLOR                           1
			
 
				+#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS                  2
			
 
				+#define FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR                  3
			
 
				+#define FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR                 4
			
 
				+#define FSR2_BIND_SRV_REACTIVE_MASK                         5
			
 
				+#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK     6
			
 
				+
			
 
				+#define FSR2_BIND_UAV_AUTOREACTIVE                          7
			
 
				+#define FSR2_BIND_UAV_AUTOCOMPOSITION                       8
			
 
				+#define FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR                  9
			
 
				+#define FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR                 10
			
 
				+
			
 
				+#define FSR2_BIND_CB_FSR2									11
			
 
				+#define FSR2_BIND_CB_REACTIVE                               12
			
 
				+
			
 
				+// -- GODOT start --
			
 
				+#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS
			
 
				+#define FSR2_BIND_SRV_INPUT_DEPTH                           13
			
 
				+#endif
			
 
				+// -- GODOT end --
			
 
				+
			
 
				+#include "ffx_fsr2_callbacks_glsl.h"
			
 
				+#include "ffx_fsr2_common.h"
			
 
				+
			
 
				+#ifdef FSR2_BIND_CB_REACTIVE
			
 
				+layout (set = 1, binding = FSR2_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t
			
 
				+{
			
 
				+        float   fTcThreshold; // 0.1 is a good starting value, lower will result in more TC pixels
			
 
				+        float   fTcScale;     
			
 
				+        float   fReactiveScale;
			
 
				+        float   fReactiveMax;
			
 
				+} cbGenerateReactive;
			
 
				+
			
 
				+float getTcThreshold()
			
 
				+{
			
 
				+    return cbGenerateReactive.fTcThreshold;
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+ float getTcThreshold()
			
 
				+ {
			
 
				+    return 0.05f;
			
 
				+ }
			
 
				+#endif
			
 
				+
			
 
				+#include "ffx_fsr2_tcr_autogen.h"
			
 
				+
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
			
 
				+#define FFX_FSR2_THREAD_GROUP_WIDTH 8
			
 
				+#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
			
 
				+#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
			
 
				+#endif // FFX_FSR2_THREAD_GROUP_HEIGHT
			
 
				+#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
			
 
				+#define FFX_FSR2_THREAD_GROUP_DEPTH 1
			
 
				+#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
			
 
				+#ifndef FFX_FSR2_NUM_THREADS
			
 
				+#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
			
 
				+#endif // #ifndef FFX_FSR2_NUM_THREADS
			
 
				+
			
 
				+FFX_FSR2_NUM_THREADS
			
 
				+void main()
			
 
				+{
			
 
				+    FFX_MIN16_I2 uDispatchThreadId = FFX_MIN16_I2(gl_GlobalInvocationID.xy);
			
 
				+
			
 
				+    // ToDo: take into account jitter (i.e. add delta of previous jitter and current jitter to previous UV
			
 
				+    // fetch pre- and post-alpha color values
			
 
				+    FFX_MIN16_F2 fUv = ( FFX_MIN16_F2(uDispatchThreadId) + FFX_MIN16_F2(0.5f, 0.5f) ) / FFX_MIN16_F2( RenderSize() );
			
 
				+    FFX_MIN16_F2 fPrevUV = fUv + FFX_MIN16_F2( LoadInputMotionVector(uDispatchThreadId) );
			
 
				+    FFX_MIN16_I2 iPrevIdx = FFX_MIN16_I2(fPrevUV * FFX_MIN16_F2(RenderSize()) - 0.5f);
			
 
				+
			
 
				+    FFX_MIN16_F3 colorPreAlpha  = FFX_MIN16_F3( LoadOpaqueOnly( uDispatchThreadId ) );
			
 
				+    FFX_MIN16_F3 colorPostAlpha = FFX_MIN16_F3( LoadInputColor( uDispatchThreadId ) );
			
 
				+
			
 
				+    FFX_MIN16_F2 outReactiveMask = FFX_MIN16_F2( 0.f, 0.f );
			
 
				+    
			
 
				+    outReactiveMask.y = ComputeTransparencyAndComposition(uDispatchThreadId, iPrevIdx);
			
 
				+
			
 
				+    if (outReactiveMask.y > 0.5f)
			
 
				+    {
			
 
				+        outReactiveMask.x = ComputeReactive(uDispatchThreadId, iPrevIdx);
			
 
				+        outReactiveMask.x *= FFX_MIN16_F(cbGenerateReactive.fReactiveScale);
			
 
				+        outReactiveMask.x = outReactiveMask.x < cbGenerateReactive.fReactiveMax ? outReactiveMask.x : FFX_MIN16_F( cbGenerateReactive.fReactiveMax );
			
 
				+    }
			
 
				+
			
 
				+    outReactiveMask.y *= FFX_MIN16_F(cbGenerateReactive.fTcScale);
			
 
				+
			
 
				+    outReactiveMask.x = ffxMax(outReactiveMask.x, FFX_MIN16_F(LoadReactiveMask(uDispatchThreadId)));
			
 
				+    outReactiveMask.y = ffxMax(outReactiveMask.y, FFX_MIN16_F(LoadTransparencyAndCompositionMask(uDispatchThreadId)));
			
 
				+
			
 
				+    StoreAutoReactive(uDispatchThreadId, outReactiveMask);
			
 
				+
			
 
				+    StorePrevPreAlpha(uDispatchThreadId, colorPreAlpha);
			
 
				+    StorePrevPostAlpha(uDispatchThreadId, colorPostAlpha);
			
 
				+}
		`@@ -0,0 +1 @@`
		`+// This file doesn't exist in this version of FSR.`