1 year ago · 3d1220c81a
--- a/AnKi/Renderer/Dbg.cpp
+++ b/AnKi/Renderer/Dbg.cpp
@@ -222,6 +222,7 @@ void Dbg::run(RenderPassWorkContext& rgraphCtx, const RenderingContext& ctx)
 
															 	// Restore state
														
 
															 	cmdb.setDepthCompareOperation(CompareOperation::kLess);
														
 
															+	cmdb.setDepthWrite(true);
														
 
															 }
														
 
															 void Dbg::populateRenderGraph(RenderingContext& ctx)
														
--- a/AnKi/Shaders/ApplyIrradianceToReflection.ankiprog
+++ b/AnKi/Shaders/ApplyIrradianceToReflection.ankiprog
@@ -10,8 +10,8 @@
 
															 SamplerState g_nearestAnyClampSampler : register(s0);
														
 
															 TextureCube<Vec4> g_gbufferTex[3u] : register(t0);
														
 
															-StructuredBuffer<RVec4> g_irradianceDice : register(t3);
														
 
															-RWTexture2D<RVec4> g_cubeTex[6u] : register(u0); // RWTexture2D because there is no RWTextureCube
														
 
															+StructuredBuffer<Vec4> g_irradianceDice : register(t3);
														
 
															+RWTexture2D<Vec4> g_cubeTex[6u] : register(u0); // RWTexture2D because there is no RWTextureCube
														
 
															 [numthreads(8, 8, 6)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID, UVec3 svGroupThreadId : SV_GROUPTHREADID)
														
 
															 {
														
@@ -27,7 +27,7 @@ RWTexture2D<RVec4> g_cubeTex[6u] : register(u0); // RWTexture2D because there is
 
															 	const Vec3 sampleUv = getCubemapDirection(uv, faceIdx);
														
 
															 	// Read the gbuffer
														
 
															-	GbufferInfo gbuffer = (GbufferInfo)0;
														
 
															+	GbufferInfo<F32> gbuffer = (GbufferInfo<F32>)0;
														
 
															 	unpackGBufferNoVelocity(g_gbufferTex[0u].SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0),
														
 
															 							g_gbufferTex[1u].SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0),
														
 
															 							g_gbufferTex[2u].SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0), gbuffer);
														
--- a/AnKi/Shaders/Common.hlsl
+++ b/AnKi/Shaders/Common.hlsl
@@ -13,6 +13,79 @@
 
															 #	include <AnKi/Shaders/Include/Common.h>
														
 
															 #endif
														
 
															+// Common constants
														
 
															+constexpr F32 kEpsilonF32 = 0.000001f;
														
 
															+#if ANKI_SUPPORTS_16BIT_TYPES
														
 
															+constexpr F16 kEpsilonF16 = (F16)0.0001f; // Divisions by this should be OK according to http://weitz.de/ieee
														
 
															+#endif
														
 
															+constexpr RF32 kEpsilonRF32 = 0.0001f;
														
 
															+
														
 
															+template<typename T>
														
 
															+T getEpsilon();
														
 
															+
														
 
															+template<>
														
 
															+F32 getEpsilon()
														
 
															+{
														
 
															+	return kEpsilonF32;
														
 
															+}
														
 
															+
														
 
															+#if ANKI_SUPPORTS_16BIT_TYPES
														
 
															+template<>
														
 
															+F16 getEpsilon()
														
 
															+{
														
 
															+	return kEpsilonF16;
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+#if !ANKI_FORCE_FULL_FP_PRECISION
														
 
															+template<>
														
 
															+RF32 getEpsilon()
														
 
															+{
														
 
															+	return kEpsilonRF32;
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+constexpr U32 kMaxU32 = 0xFFFFFFFFu;
														
 
															+constexpr F32 kMaxF32 = 3.402823e+38;
														
 
															+constexpr RF32 kMaxRF32 = 65504.0f; // Max half float value according to wikipedia
														
 
															+#if ANKI_SUPPORTS_16BIT_TYPES
														
 
															+constexpr F16 kMaxF16 = (F16)65504.0;
														
 
															+#endif
														
 
															+
														
 
															+template<typename T>
														
 
															+T getMaxNumericLimit();
														
 
															+
														
 
															+template<>
														
 
															+F32 getMaxNumericLimit()
														
 
															+{
														
 
															+	return kMaxF32;
														
 
															+}
														
 
															+
														
 
															+#if !ANKI_FORCE_FULL_FP_PRECISION
														
 
															+template<>
														
 
															+RF32 getMaxNumericLimit()
														
 
															+{
														
 
															+	return kMaxRF32;
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+#if ANKI_SUPPORTS_16BIT_TYPES
														
 
															+template<>
														
 
															+F16 getMaxNumericLimit()
														
 
															+{
														
 
															+	return kMaxF16;
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+template<>
														
 
															+U32 getMaxNumericLimit()
														
 
															+{
														
 
															+	return kMaxU32;
														
 
															+}
														
 
															+
														
 
															+constexpr F32 kPi = 3.14159265358979323846f;
														
 
															+constexpr F32 kNaN = 0.0f / 0.0f;
														
 
															+
														
 
															 #if ANKI_GR_BACKEND_VULKAN
														
 
															 #	define ANKI_FAST_CONSTANTS(type, var) [[vk::push_constant]] ConstantBuffer<type> var;
														
 
															 #else
														
--- a/AnKi/Shaders/GBufferGeneric.ankiprog
+++ b/AnKi/Shaders/GBufferGeneric.ankiprog
@@ -170,14 +170,6 @@ struct MeshPerPrimitiveOut
 
															 	ANKI_PER_PRIMITIVE_MEMBER Bool m_cullPrimitive : SV_CULLPRIMITIVE;
														
 
															 };
														
 
															-struct PixelOut
														
 
															-{
														
 
															-	Vec4 m_color0 : SV_TARGET0;
														
 
															-	Vec4 m_color1 : SV_TARGET1;
														
 
															-	Vec4 m_color2 : SV_TARGET2;
														
 
															-	Vec2 m_color3 : SV_TARGET3;
														
 
															-};
														
 
															-
														
 
															 struct Mat3x4_2
														
 
															 {
														
 
															 	Mat3x4 m_a;
														
@@ -500,7 +492,7 @@ void main(
 
															 #	else // GBUFFER
														
 
															-PixelOut main(
														
 
															+GBufferPixelOut main(
														
 
															 #		if ANKI_TECHNIQUE_GBufferMeshShaders
														
 
															 	MeshPerVertOut vertInput, ANKI_PER_PRIMITIVE_VAR MeshPerPrimitiveOut primInput
														
 
															 #		else
														
@@ -576,7 +568,7 @@ PixelOut main(
 
															 	const Vec2 velocity = Vec2(1.0, 1.0);
														
 
															 #		endif
														
 
															-	GbufferInfo g;
														
 
															+	GbufferInfo<RF32> g;
														
 
															 	g.m_diffuse = diffColor;
														
 
															 	g.m_normal = normal;
														
 
															 	g.m_f0 = specColor;
														
@@ -615,7 +607,7 @@ PixelOut main(
 
															 	}
														
 
															 #		endif
														
 
															-	PixelOut output;
														
 
															+	GBufferPixelOut output;
														
 
															 	packGBuffer(g, output.m_color0, output.m_color1, output.m_color2, output.m_color3);
														
 
															 	return output;
														
 
															 }
														
--- a/AnKi/Shaders/GBufferGpuParticles.ankiprog
+++ b/AnKi/Shaders/GBufferGpuParticles.ankiprog
@@ -30,14 +30,6 @@ struct VertOut
 
															 	Vec4 m_svPosition : SV_POSITION;
														
 
															 };
														
 
															-struct PixelOut
														
 
															-{
														
 
															-	Vec4 m_color0 : SV_TARGET0;
														
 
															-	Vec4 m_color1 : SV_TARGET1;
														
 
															-	Vec4 m_color2 : SV_TARGET2;
														
 
															-	Vec2 m_color3 : SV_TARGET3;
														
 
															-};
														
 
															-
														
 
															 #if ANKI_VERTEX_SHADER
														
 
															 VertOut main(VertIn input)
														
 
															 {
														
@@ -82,12 +74,12 @@ VertOut main(VertIn input)
 
															 #if ANKI_PIXEL_SHADER
														
 
															 #	include <AnKi/Shaders/PackFunctions.hlsl>
														
 
															-PixelOut main(VertOut input)
														
 
															+GBufferPixelOut main(VertOut input)
														
 
															 {
														
 
															-	PixelOut output;
														
 
															+	GBufferPixelOut output;
														
 
															 	const AnKiLocalConstants localConstants = loadAnKiLocalConstants(g_gpuScene, input.m_constantsOffset);
														
 
															-	GbufferInfo g;
														
 
															+	GbufferInfo<RF32> g;
														
 
															 	g.m_diffuse = localConstants.m_diffColor;
														
 
															 	const Mat3x4 camTrf = g_globalConstants.m_cameraTransform;
														
--- a/AnKi/Shaders/GBufferVisualizeProbe.ankiprog
+++ b/AnKi/Shaders/GBufferVisualizeProbe.ankiprog
@@ -168,7 +168,7 @@ PixelOut main(VertOut input)
 
															 	output.m_svDepth = p.z / p.w;
														
 
															 	// Set the GBuffer
														
 
															-	GbufferInfo g;
														
 
															+	GbufferInfo<F32> g;
														
 
															 	g.m_diffuse = (PROBE_TYPE == 0) ? 0.5 : 1.0;
														
 
															 	g.m_normal = normalize(collisionPoint - input.m_sphereCenter);
														
 
															 	g.m_f0 = 0.04;
														
--- a/AnKi/Shaders/Include/Common.h
+++ b/AnKi/Shaders/Include/Common.h
@@ -409,331 +409,6 @@ typedef min16float3 RVec3;
 
															 typedef min16float4 RVec4;
														
 
															 _ANKI_MAT3(RMat3, RVec3, RF32)
														
 
															 #	endif
														
 
															-
														
 
															-// Common constants
														
 
															-constexpr F32 kEpsilonF32 = 0.000001f;
														
 
															-#	if ANKI_SUPPORTS_16BIT_TYPES
														
 
															-constexpr F16 kEpsilonhF16 = (F16)0.0001f; // Divisions by this should be OK according to http://weitz.de/ieee
														
 
															-#	endif
														
 
															-constexpr RF32 kEpsilonRF32 = 0.0001f;
														
 
															-
														
 
															-constexpr RF32 kMaxRF32 = 65504.0f; // Max half float value according to wikipedia
														
 
															-constexpr U32 kMaxU32 = 0xFFFFFFFFu;
														
 
															-constexpr F32 kMaxF32 = 3.402823e+38;
														
 
															-#	if ANKI_SUPPORTS_16BIT_TYPES
														
 
															-constexpr F16 kMaxF16 = (F16)65504.0;
														
 
															-constexpr F16 kMinF16 = (F16)0.00006104;
														
 
															-#	endif
														
 
															-
														
 
															-constexpr F32 kPi = 3.14159265358979323846f;
														
 
															-constexpr F32 kNaN = 0.0f / 0.0f;
														
 
															-
														
 
															-//! == GLSL ============================================================================================================
														
 
															-#else
														
 
															-#	define ANKI_HLSL 0
														
 
															-#	define ANKI_GLSL 1
														
 
															-#	define ANKI_CPP 0
														
 
															-
														
 
															-#	define ANKI_BEGIN_NAMESPACE
														
 
															-#	define ANKI_END_NAMESPACE
														
 
															-#	define inline
														
 
															-
														
 
															-#	define ANKI_SHADER_STATIC_ASSERT(cond_)
														
 
															-
														
 
															-#	define ScalarVec4 Vec4
														
 
															-#	define ScalarMat3x4 Mat3x4
														
 
															-#	define ScalarMat4 Mat4
														
 
															-
														
 
															-#	define constexpr const
														
 
															-
														
 
															-#	define ANKI_SUPPORTS_64BIT_TYPES !ANKI_PLATFORM_MOBILE
														
 
															-
														
 
															-#	extension GL_EXT_control_flow_attributes : require
														
 
															-#	extension GL_KHR_shader_subgroup_vote : require
														
 
															-#	extension GL_KHR_shader_subgroup_ballot : require
														
 
															-#	extension GL_KHR_shader_subgroup_shuffle : require
														
 
															-#	extension GL_KHR_shader_subgroup_arithmetic : require
														
 
															-
														
 
															-#	extension GL_EXT_samplerless_texture_functions : require
														
 
															-#	extension GL_EXT_shader_image_load_formatted : require
														
 
															-#	extension GL_EXT_nonuniform_qualifier : enable
														
 
															-
														
 
															-#	extension GL_EXT_buffer_reference : enable
														
 
															-#	extension GL_EXT_buffer_reference2 : enable
														
 
															-
														
 
															-#	extension GL_EXT_shader_explicit_arithmetic_types : enable
														
 
															-#	extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable
														
 
															-#	extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable
														
 
															-#	extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable
														
 
															-#	extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
														
 
															-#	extension GL_EXT_shader_explicit_arithmetic_types_float32 : enable
														
 
															-
														
 
															-#	if ANKI_SUPPORTS_64BIT_TYPES
														
 
															-#		extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable
														
 
															-#		extension GL_EXT_shader_explicit_arithmetic_types_float64 : enable
														
 
															-#		extension GL_EXT_shader_atomic_int64 : enable
														
 
															-#		extension GL_EXT_shader_subgroup_extended_types_int64 : enable
														
 
															-#	endif
														
 
															-
														
 
															-#	extension GL_EXT_nonuniform_qualifier : enable
														
 
															-#	extension GL_EXT_scalar_block_layout : enable
														
 
															-
														
 
															-#	if defined(ANKI_RAY_GEN_SHADER) || defined(ANKI_ANY_HIT_SHADER) || defined(ANKI_CLOSEST_HIT_SHADER) || defined(ANKI_MISS_SHADER) \
														
 
															-		|| defined(ANKI_INTERSECTION_SHADER) || defined(ANKI_CALLABLE_SHADER)
														
 
															-#		extension GL_EXT_ray_tracing : enable
														
 
															-#	endif
														
 
															-
														
 
															-#	define unroll [unroll]
														
 
															-#	define branch [branch]
														
 
															-
														
 
															-#	define F32 float
														
 
															-const uint kSizeof_float = 4u;
														
 
															-#	define Vec2 vec2
														
 
															-const uint kSizeof_vec2 = 8u;
														
 
															-#	define Vec3 vec3
														
 
															-const uint kSizeof_vec3 = 12u;
														
 
															-#	define Vec4 vec4
														
 
															-const uint kSizeof_vec4 = 16u;
														
 
															-
														
 
															-#	define F16 float16_t
														
 
															-const uint kSizeof_float16_t = 2u;
														
 
															-#	define HVec2 f16vec2
														
 
															-const uint kSizeof_f16vec2 = 4u;
														
 
															-#	define HVec3 f16vec3
														
 
															-const uint kSizeof_f16vec3 = 6u;
														
 
															-#	define HVec4 f16vec4
														
 
															-const uint kSizeof_f16vec4 = 8u;
														
 
															-
														
 
															-#	define U8 uint8_t
														
 
															-const uint kSizeof_uint8_t = 1u;
														
 
															-#	define U8Vec2 u8vec2
														
 
															-const uint kSizeof_u8vec2 = 2u;
														
 
															-#	define U8Vec3 u8vec3
														
 
															-const uint kSizeof_u8vec3 = 3u;
														
 
															-#	define U8Vec4 u8vec4
														
 
															-const uint kSizeof_u8vec4 = 4u;
														
 
															-
														
 
															-#	define I8 int8_t
														
 
															-const uint kSizeof_int8_t = 1u;
														
 
															-#	define I8Vec2 i8vec2
														
 
															-const uint kSizeof_i8vec2 = 2u;
														
 
															-#	define I8Vec3 i8vec3
														
 
															-const uint kSizeof_i8vec3 = 3u;
														
 
															-#	define I8Vec4 i8vec4
														
 
															-const uint kSizeof_i8vec4 = 4u;
														
 
															-
														
 
															-#	define U16 uint16_t
														
 
															-const uint kSizeof_uint16_t = 2u;
														
 
															-#	define U16Vec2 u16vec2
														
 
															-const uint kSizeof_u16vec2 = 4u;
														
 
															-#	define U16Vec3 u16vec3
														
 
															-const uint kSizeof_u16vec3 = 6u;
														
 
															-#	define U16Vec4 u16vec4
														
 
															-const uint kSizeof_u16vec4 = 8u;
														
 
															-
														
 
															-#	define I16 int16_t
														
 
															-const uint kSizeof_int16_t = 2u;
														
 
															-#	define I16Vec2 i16vec2
														
 
															-const uint kSizeof_i16vec2 = 4u;
														
 
															-#	define I16Vec3 i16vec3
														
 
															-const uint kSizeof_i16vec3 = 6u;
														
 
															-#	define i16Vec4 i16vec4
														
 
															-const uint kSizeof_i16vec4 = 8u;
														
 
															-
														
 
															-#	define U32 uint
														
 
															-const uint kSizeof_uint = 4u;
														
 
															-#	define UVec2 uvec2
														
 
															-const uint kSizeof_uvec2 = 8u;
														
 
															-#	define UVec3 uvec3
														
 
															-const uint kSizeof_uvec3 = 12u;
														
 
															-#	define UVec4 uvec4
														
 
															-const uint kSizeof_uvec4 = 16u;
														
 
															-
														
 
															-#	define I32 int
														
 
															-const uint kSizeof_int = 4u;
														
 
															-#	define IVec2 ivec2
														
 
															-const uint kSizeof_ivec2 = 8u;
														
 
															-#	define IVec3 ivec3
														
 
															-const uint kSizeof_ivec3 = 12u;
														
 
															-#	define IVec4 ivec4
														
 
															-const uint kSizeof_ivec4 = 16u;
														
 
															-
														
 
															-#	if ANKI_SUPPORTS_64BIT_TYPES
														
 
															-#		define U64 uint64_t
														
 
															-const uint kSizeof_uint64_t = 8u;
														
 
															-#		define U64Vec2 u64vec2
														
 
															-const uint kSizeof_u64vec2 = 16u;
														
 
															-#		define U64Vec3 u64vec3
														
 
															-const uint kSizeof_u64vec3 = 24u;
														
 
															-#		define U64Vec4 u64vec4
														
 
															-const uint kSizeof_u64vec4 = 32u;
														
 
															-
														
 
															-#		define I64 int64_t
														
 
															-const uint kSizeof_int64_t = 8u;
														
 
															-#		define I64Vec2 i64vec2
														
 
															-const uint kSizeof_i64vec2 = 16u;
														
 
															-#		define I64Vec3 i64vec3
														
 
															-const uint kSizeof_i64vec3 = 24u;
														
 
															-#		define I64Vec4 i64vec4
														
 
															-const uint kSizeof_i64vec4 = 32u;
														
 
															-#	endif
														
 
															-
														
 
															-#	define Mat3 mat3
														
 
															-const uint kSizeof_mat3 = 36u;
														
 
															-
														
 
															-#	define Mat4 mat4
														
 
															-const uint kSizeof_mat4 = 64u;
														
 
															-
														
 
															-#	define Mat3x4 mat4x3 // GLSL has the column number first and then the rows
														
 
															-const uint kSizeof_mat4x3 = 48u;
														
 
															-
														
 
															-#	define Bool bool
														
 
															-
														
 
															-#	if ANKI_SUPPORTS_64BIT_TYPES
														
 
															-#		define Address U64
														
 
															-#	else
														
 
															-#		define Address UVec2
														
 
															-#	endif
														
 
															-
														
 
															-#	if ANKI_FORCE_FULL_FP_PRECISION
														
 
															-#		define RF32 F32
														
 
															-#		define RVec2 Vec2
														
 
															-#		define RVec3 Vec3
														
 
															-#		define RVec4 Vec4
														
 
															-#		define RMat3 Mat3
														
 
															-#	else
														
 
															-#		define RF32 mediump F32
														
 
															-#		define RVec2 mediump Vec2
														
 
															-#		define RVec3 mediump Vec3
														
 
															-#		define RVec4 mediump Vec4
														
 
															-#		define RMat3 mediump Mat3
														
 
															-#	endif
														
 
															-
														
 
															-#	define _ANKI_CONCATENATE(a, b) a##b
														
 
															-#	define ANKI_CONCATENATE(a, b) _ANKI_CONCATENATE(a, b)
														
 
															-
														
 
															-#	define sizeof(type) _ANKI_CONCATENATE(kSizeof_, type)
														
 
															-#	define alignof(type) _ANKI_CONCATENATE(kAlignof_, type)
														
 
															-
														
 
															-#	define _ANKI_SCONST_X(type, n, id) layout(constant_id = id) const type n = type(1)
														
 
															-
														
 
															-#	define _ANKI_SCONST_X2(type, componentType, n, id, constWorkaround) \
														
 
															-		layout(constant_id = id + 0u) const componentType ANKI_CONCATENATE(_anki_const_0_2_, n) = componentType(1); \
														
 
															-		layout(constant_id = id + 1u) const componentType ANKI_CONCATENATE(_anki_const_1_2_, n) = componentType(1); \
														
 
															-		constWorkaround type n = type(ANKI_CONCATENATE(_anki_const_0_2_, n), ANKI_CONCATENATE(_anki_const_1_2_, n))
														
 
															-
														
 
															-#	define _ANKI_SCONST_X3(type, componentType, n, id, constWorkaround) \
														
 
															-		layout(constant_id = id + 0u) const componentType ANKI_CONCATENATE(_anki_const_0_3_, n) = componentType(1); \
														
 
															-		layout(constant_id = id + 1u) const componentType ANKI_CONCATENATE(_anki_const_1_3_, n) = componentType(1); \
														
 
															-		layout(constant_id = id + 2u) const componentType ANKI_CONCATENATE(_anki_const_2_3_, n) = componentType(1); \
														
 
															-		constWorkaround type n = \
														
 
															-			type(ANKI_CONCATENATE(_anki_const_0_3_, n), ANKI_CONCATENATE(_anki_const_1_3_, n), ANKI_CONCATENATE(_anki_const_2_3_, n))
														
 
															-
														
 
															-#	define _ANKI_SCONST_X4(type, componentType, n, id, constWorkaround) \
														
 
															-		layout(constant_id = id + 0u) const componentType ANKI_CONCATENATE(_anki_const_0_4_, n) = componentType(1); \
														
 
															-		layout(constant_id = id + 1u) const componentType ANKI_CONCATENATE(_anki_const_1_4_, n) = componentType(1); \
														
 
															-		layout(constant_id = id + 2u) const componentType ANKI_CONCATENATE(_anki_const_2_4_, n) = componentType(1); \
														
 
															-		layout(constant_id = id + 3u) const componentType ANKI_CONCATENATE(_anki_const_3_4_, n) = componentType(1); \
														
 
															-		constWorkaround type n = type(ANKI_CONCATENATE(_anki_const_0_4_, n), ANKI_CONCATENATE(_anki_const_1_4_, n), \
														
 
															-									  ANKI_CONCATENATE(_anki_const_2_4_, n), ANKI_CONCATENATE(_anki_const_2_4_, n))
														
 
															-
														
 
															-#	define ANKI_SPECIALIZATION_CONSTANT_I32(n, id) _ANKI_SCONST_X(I32, n, id)
														
 
															-#	define ANKI_SPECIALIZATION_CONSTANT_IVEC2(n, id) _ANKI_SCONST_X2(IVec2, I32, n, id, const)
														
 
															-#	define ANKI_SPECIALIZATION_CONSTANT_IVEC3(n, id) _ANKI_SCONST_X3(IVec3, I32, n, id, const)
														
 
															-#	define ANKI_SPECIALIZATION_CONSTANT_IVEC4(n, id) _ANKI_SCONST_X4(IVec4, I32, n, id, const)
														
 
															-
														
 
															-#	define ANKI_SPECIALIZATION_CONSTANT_U32(n, id) _ANKI_SCONST_X(U32, n, id)
														
 
															-#	define ANKI_SPECIALIZATION_CONSTANT_UVEC2(n, id) _ANKI_SCONST_X2(UVec2, U32, n, id, const)
														
 
															-#	define ANKI_SPECIALIZATION_CONSTANT_UVEC3(n, id) _ANKI_SCONST_X3(UVec3, U32, n, id, const)
														
 
															-#	define ANKI_SPECIALIZATION_CONSTANT_UVEC4(n, id) _ANKI_SCONST_X4(UVec4, U32, n, id, const)
														
 
															-
														
 
															-#	define ANKI_SPECIALIZATION_CONSTANT_F32(n, id) _ANKI_SCONST_X(F32, n, id)
														
 
															-#	define ANKI_SPECIALIZATION_CONSTANT_VEC2(n, id) _ANKI_SCONST_X2(Vec2, F32, n, id, )
														
 
															-#	define ANKI_SPECIALIZATION_CONSTANT_VEC3(n, id) _ANKI_SCONST_X3(Vec3, F32, n, id, )
														
 
															-#	define ANKI_SPECIALIZATION_CONSTANT_VEC4(n, id) _ANKI_SCONST_X4(Vec4, F32, n, id, )
														
 
															-
														
 
															-#	define ANKI_DEFINE_LOAD_STORE(type, alignment) \
														
 
															-		layout(buffer_reference, scalar, buffer_reference_align = (alignment)) buffer _Ref##type \
														
 
															-		{ \
														
 
															-			type m_value; \
														
 
															-		}; \
														
 
															-		void load(U64 address, out type o) \
														
 
															-		{ \
														
 
															-			o = _Ref##type(address).m_value; \
														
 
															-		} \
														
 
															-		void store(U64 address, type i) \
														
 
															-		{ \
														
 
															-			_Ref##type(address).m_value = i; \
														
 
															-		}
														
 
															-
														
 
															-layout(std140, row_major) uniform;
														
 
															-layout(std140, row_major) buffer;
														
 
															-
														
 
															-#	if ANKI_FORCE_FULL_FP_PRECISION
														
 
															-#		define ANKI_RP
														
 
															-#	else
														
 
															-#		define ANKI_RP mediump
														
 
															-#	endif
														
 
															-
														
 
															-#	define ANKI_FP highp
														
 
															-
														
 
															-precision highp int;
														
 
															-precision highp float;
														
 
															-
														
 
															-#	define ANKI_BINDLESS_SET(s) \
														
 
															-		layout(set = s, binding = 0) uniform utexture2D u_bindlessTextures2dU32[kMaxBindlessTextures]; \
														
 
															-		layout(set = s, binding = 0) uniform itexture2D u_bindlessTextures2dI32[kMaxBindlessTextures]; \
														
 
															-		layout(set = s, binding = 0) uniform texture2D u_bindlessTextures2dF32[kMaxBindlessTextures]; \
														
 
															-		layout(set = s, binding = 0) uniform texture2DArray u_bindlessTextures2dArrayF32[kMaxBindlessTextures]; \
														
 
															-		layout(set = s, binding = 1) uniform textureBuffer u_bindlessTextureBuffers[kMaxBindlessReadonlyTextureBuffers];
														
 
															-
														
 
															-Vec2 pow(Vec2 a, F32 b)
														
 
															-{
														
 
															-	return pow(a, Vec2(b));
														
 
															-}
														
 
															-
														
 
															-Vec3 pow(Vec3 a, F32 b)
														
 
															-{
														
 
															-	return pow(a, Vec3(b));
														
 
															-}
														
 
															-
														
 
															-Vec4 pow(Vec4 a, F32 b)
														
 
															-{
														
 
															-	return pow(a, Vec4(b));
														
 
															-}
														
 
															-
														
 
															-Bool all(Bool b)
														
 
															-{
														
 
															-	return b;
														
 
															-}
														
 
															-
														
 
															-Bool any(Bool b)
														
 
															-{
														
 
															-	return b;
														
 
															-}
														
 
															-
														
 
															-#	define saturate(x_) clamp((x_), 0.0, 1.0)
														
 
															-#	define saturateRp(x) min(x, F32(kMaxF16))
														
 
															-#	define mad(a_, b_, c_) fma((a_), (b_), (c_))
														
 
															-#	define frac(x) fract(x)
														
 
															-#	define lerp(a, b, t) mix(a, b, t)
														
 
															-#	define atan2(x, y) atan(x, y)
														
 
															-
														
 
															-float asfloat(uint u)
														
 
															-{
														
 
															-	return uintBitsToFloat(u);
														
 
															-}
														
 
															-
														
 
															-constexpr F32 kEpsilonf = 0.000001f;
														
 
															-constexpr F16 kEpsilonhf = 0.0001hf; // Divisions by this should be OK according to http://weitz.de/ieee/
														
 
															-constexpr ANKI_RP F32 kEpsilonRp = F32(kEpsilonhf);
														
 
															-
														
 
															-constexpr U32 kMaxU32 = 0xFFFFFFFFu;
														
 
															-constexpr F32 kMaxF32 = 3.402823e+38;
														
 
															-constexpr F16 kMaxF16 = 65504.0hf;
														
 
															-constexpr F16 kMinF16 = 0.00006104hf;
														
 
															-
														
 
															-constexpr F32 kPi = 3.14159265358979323846f;
														
 
															 #endif
														
 
															 //! == Common ==========================================================================================================
														
--- a/AnKi/Shaders/IrradianceDice.ankiprog
+++ b/AnKi/Shaders/IrradianceDice.ankiprog
@@ -129,7 +129,7 @@ RVec3 sampleLightShadingTexture(const U32 face, UVec3 svGroupThreadId)
 
															 		// Read the gbuffer
														
 
															 		const Vec3 gbufferUv = getCubemapDirection(faceUv, f);
														
 
															-		GbufferInfo gbuffer = (GbufferInfo)0;
														
 
															+		GbufferInfo<F32> gbuffer = (GbufferInfo<F32>)0;
														
 
															 		unpackGBufferNoVelocity(g_gbufferTex[0u].SampleLevel(g_nearestAnyClampSampler, gbufferUv, 0.0),
														
 
															 								g_gbufferTex[1u].SampleLevel(g_nearestAnyClampSampler, gbufferUv, 0.0),
														
 
															 								g_gbufferTex[2u].SampleLevel(g_nearestAnyClampSampler, gbufferUv, 0.0), gbuffer);
														
--- a/AnKi/Shaders/LightFunctions.hlsl
+++ b/AnKi/Shaders/LightFunctions.hlsl
@@ -32,28 +32,30 @@ Vec3 F_Unreal(Vec3 specular, F32 VoH)
 
															 // Fresnel Schlick: "An Inexpensive BRDF Model for Physically-Based Rendering"
														
 
															 // It has lower VGRPs than F_Unreal
														
 
															-RVec3 F_Schlick(RVec3 f0, RF32 VoH)
														
 
															+template<typename T>
														
 
															+vector<T, 3> F_Schlick(vector<T, 3> f0, T VoH)
														
 
															 {
														
 
															-	const RF32 f = pow(1.0 - VoH, 5.0);
														
 
															-	return f + f0 * (1.0 - f);
														
 
															+	const T f = pow(max(T(0), T(1) - VoH), T(5.0));
														
 
															+	return f + f0 * (T(1) - f);
														
 
															 }
														
 
															 // D(n,h) aka NDF: GGX Trowbridge-Reitz
														
 
															-RF32 D_GGX(RF32 roughness, RF32 NoH, RVec3 h, RVec3 worldNormal)
														
 
															+template<typename T>
														
 
															+T D_GGX(T roughness, T NoH, vector<T, 3> h, vector<T, 3> worldNormal)
														
 
															 {
														
 
															 #if 0 && ANKI_PLATFORM_MOBILE
														
 
															-	const RVec3 NxH = cross(worldNormal, h);
														
 
															-	const RF32 oneMinusNoHSquared = dot(NxH, NxH);
														
 
															+	const vector<T, 3> NxH = cross(worldNormal, h);
														
 
															+	const T oneMinusNoHSquared = dot(NxH, NxH);
														
 
															 #else
														
 
															-	const RF32 oneMinusNoHSquared = 1.0 - NoH * NoH;
														
 
															+	const T oneMinusNoHSquared = T(1) - NoH * NoH;
														
 
															 	ANKI_MAYBE_UNUSED(h);
														
 
															 	ANKI_MAYBE_UNUSED(worldNormal);
														
 
															 #endif
														
 
															-	const RF32 a = roughness * roughness;
														
 
															-	const RF32 v = NoH * a;
														
 
															-	const RF32 k = a / (oneMinusNoHSquared + v * v);
														
 
															-	const RF32 d = k * k * (1.0 / kPi);
														
 
															+	const T a = roughness * roughness;
														
 
															+	const T v = NoH * a;
														
 
															+	const T k = a / (oneMinusNoHSquared + v * v);
														
 
															+	const T d = k * k * T(1.0 / kPi);
														
 
															 	return saturate(d);
														
 
															 }
														
@@ -67,10 +69,11 @@ RF32 V_Schlick(RF32 roughness, RF32 NoV, RF32 NoL)
 
															 }
														
 
															 // Visibility term: Hammon 2017, "PBR Diffuse Lighting for GGX+Smith Microsurfaces"
														
 
															-RF32 V_SmithGGXCorrelatedFast(RF32 roughness, RF32 NoV, RF32 NoL)
														
 
															+template<typename T>
														
 
															+T V_SmithGGXCorrelatedFast(T roughness, T NoV, T NoL)
														
 
															 {
														
 
															-	const RF32 a = roughness * roughness;
														
 
															-	const RF32 v = 0.5 / lerp(2.0 * NoL * NoV, NoL + NoV, a);
														
 
															+	const T a = roughness * roughness;
														
 
															+	const T v = T(0.5) / lerp(T(2) * NoL * NoV, NoL + NoV, a);
														
 
															 	return saturate(v);
														
 
															 }
														
@@ -85,23 +88,24 @@ RVec3 diffuseLobe(RVec3 diffuse)
 
															 }
														
 
															 // Performs BRDF specular lighting
														
 
															-RVec3 specularIsotropicLobe(GbufferInfo gbuffer, Vec3 viewDir, Vec3 frag2Light)
														
 
															+template<typename T>
														
 
															+vector<T, 3> specularIsotropicLobe(vector<T, 3> normal, vector<T, 3> f0, T roughness, vector<T, 3> viewDir, vector<T, 3> frag2Light)
														
 
															 {
														
 
															-	const RVec3 H = normalize(frag2Light + viewDir);
														
 
															+	const vector<T, 3> H = normalize(frag2Light + viewDir);
														
 
															-	const RF32 NoL = max(0.0, dot(gbuffer.m_normal, frag2Light));
														
 
															-	const RF32 VoH = max(0.0, dot(viewDir, H));
														
 
															-	const RF32 NoH = max(0.0, dot(gbuffer.m_normal, H));
														
 
															-	const RF32 NoV = max(0.05, dot(gbuffer.m_normal, viewDir));
														
 
															+	const T NoL = max(0.0, dot(normal, frag2Light));
														
 
															+	const T VoH = max(0.0, dot(viewDir, H));
														
 
															+	const T NoH = max(0.0, dot(normal, H));
														
 
															+	const T NoV = max(0.05, dot(normal, viewDir));
														
 
															 	// F
														
 
															-	const RVec3 F = F_Schlick(gbuffer.m_f0, VoH);
														
 
															+	const vector<T, 3> F = F_Schlick(f0, VoH);
														
 
															 	// D
														
 
															-	const RF32 D = D_GGX(gbuffer.m_roughness, NoH, H, gbuffer.m_normal);
														
 
															+	const T D = D_GGX(roughness, NoH, H, normal);
														
 
															 	// Vis
														
 
															-	const RF32 V = V_SmithGGXCorrelatedFast(gbuffer.m_roughness, NoV, NoL);
														
 
															+	const T V = V_SmithGGXCorrelatedFast(roughness, NoV, NoL);
														
 
															 	return F * (V * D);
														
 
															 }
														
--- a/AnKi/Shaders/LightShading.ankiprog
+++ b/AnKi/Shaders/LightShading.ankiprog
@@ -36,7 +36,7 @@ Texture2D<RVec4> g_integrationLut : register(t12);
 
															 #	define LIGHTING_COMMON_BRDF() \
														
 
															 		const RVec3 frag2Light = light.m_position - worldPos; \
														
 
															 		const RVec3 l = normalize(frag2Light); \
														
 
															-		const RVec3 specC = specularIsotropicLobe(gbuffer, viewDir, l); \
														
 
															+		const RVec3 specC = specularIsotropicLobe(gbuffer.m_normal, gbuffer.m_f0, gbuffer.m_roughness, viewDir, l); \
														
 
															 		const RVec3 diffC = diffuseLobe(gbuffer.m_diffuse); \
														
 
															 		const RF32 att = computeAttenuationFactor(light.m_radius, frag2Light); \
														
 
															 		RF32 lambert = max(0.0, dot(gbuffer.m_normal, l));
														
@@ -64,10 +64,10 @@ RVec4 main(VertOut input) : SV_TARGET0
 
															 	// return clusterHeatmap(cluster, 1u << (U32)GpuSceneNonRenderableObjectType::kLight, 3);
														
 
															 	// Decode GBuffer
														
 
															-	GbufferInfo gbuffer = (GbufferInfo)0;
														
 
															-	unpackGBufferNoVelocity(g_gbuffer0Tex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0),
														
 
															-							g_gbuffer1Tex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0),
														
 
															-							g_gbuffer2Tex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0), gbuffer);
														
 
															+	GbufferInfo<RF32> gbuffer = (GbufferInfo<RF32>)0;
														
 
															+	unpackGBufferNoVelocity<RF32>(g_gbuffer0Tex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0),
														
 
															+								  g_gbuffer1Tex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0),
														
 
															+								  g_gbuffer2Tex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0), gbuffer);
														
 
															 	gbuffer.m_subsurface = max(gbuffer.m_subsurface, kSubsurfaceMin);
														
 
															 	// Apply SSAO
														
@@ -211,7 +211,7 @@ RVec4 main(VertOut input) : SV_TARGET0
 
															 		const RF32 lambert = max(gbuffer.m_subsurface, dot(l, gbuffer.m_normal));
														
 
															 		const RVec3 diffC = diffuseLobe(gbuffer.m_diffuse);
														
 
															-		const RVec3 specC = specularIsotropicLobe(gbuffer, viewDir, l);
														
 
															+		const RVec3 specC = specularIsotropicLobe(gbuffer.m_normal, gbuffer.m_f0, gbuffer.m_roughness, viewDir, l);
														
 
															 		outColor += (diffC + specC) * dirLight.m_diffuseColor * (shadowFactor * lambert);
														
 
															 	}
														
--- a/AnKi/Shaders/MaterialShadersCommon.hlsl
+++ b/AnKi/Shaders/MaterialShadersCommon.hlsl
@@ -61,6 +61,14 @@ Texture2D<Vec4> g_shadowAtlasTex : register(ANKI_REG(t, ANKI_MATERIAL_REGISTER_S
 
															 #undef ANKI_REG
														
 
															+struct GBufferPixelOut
														
 
															+{
														
 
															+	RVec4 m_color0 : SV_TARGET0;
														
 
															+	RVec4 m_color1 : SV_TARGET1;
														
 
															+	RVec4 m_color2 : SV_TARGET2;
														
 
															+	Vec2 m_color3 : SV_TARGET3;
														
 
															+};
														
 
															+
														
 
															 UnpackedMeshVertex loadVertex(GpuSceneMeshLod mlod, U32 svVertexId, Bool bones)
														
 
															 {
														
 
															 	UnpackedMeshVertex v;
														
@@ -99,6 +107,6 @@ UnpackedMeshVertex loadVertex(MeshletGeometryDescriptor meshlet, U32 vertexIndex
 
															 Bool cullBackfaceMeshlet(MeshletBoundingVolume meshlet, Mat3x4 worldTransform, Vec3 cameraWorldPos)
														
 
															 {
														
 
															-	const Vec4 coneDirAndAng = unpackSnorm4x8(meshlet.m_coneDirection_R8G8B8_Snorm_cosHalfAngle_R8_Snorm);
														
 
															+	const Vec4 coneDirAndAng = unpackSnorm4x8<F32>(meshlet.m_coneDirection_R8G8B8_Snorm_cosHalfAngle_R8_Snorm);
														
 
															 	return cullBackfaceMeshlet(coneDirAndAng.xyz, coneDirAndAng.w, meshlet.m_coneApex, worldTransform, cameraWorldPos);
														
 
															 }
														
--- a/AnKi/Shaders/PackFunctions.hlsl
+++ b/AnKi/Shaders/PackFunctions.hlsl
@@ -8,54 +8,58 @@
 
															 #include <AnKi/Shaders/Common.hlsl>
														
 
															 #include <AnKi/Shaders/TonemappingFunctions.hlsl>
														
 
															-constexpr RF32 kMinRoughness = 0.05;
														
 
															+constexpr F32 kMinRoughness = 0.05;
														
 
															 /// Pack 3D normal to 2D vector
														
 
															 /// See the clean code in comments in revision < r467
														
 
															-Vec2 packNormal(const Vec3 normal)
														
 
															+template<typename T>
														
 
															+vector<T, 2> packNormal(vector<T, 3> normal)
														
 
															 {
														
 
															-	const F32 scale = 1.7777;
														
 
															-	const F32 scalar1 = (normal.z + 1.0) * (scale * 2.0);
														
 
															-	return normal.xy / scalar1 + 0.5;
														
 
															+	const T scale = 1.7777;
														
 
															+	const T scalar1 = (normal.z + T(1)) * (scale * T(2));
														
 
															+	return normal.xy / scalar1 + T(0.5);
														
 
															 }
														
 
															 /// Reverse the packNormal
														
 
															-Vec3 unpackNormal(const Vec2 enc)
														
 
															+template<typename T>
														
 
															+vector<T, 3> unpackNormal(const vector<T, 2> enc)
														
 
															 {
														
 
															-	const F32 scale = 1.7777;
														
 
															-	const Vec2 nn = enc * (2.0 * scale) - scale;
														
 
															-	const F32 g = 2.0 / (dot(nn.xy, nn.xy) + 1.0);
														
 
															-	Vec3 normal;
														
 
															+	const T scale = 1.7777;
														
 
															+	const vector<T, 2> nn = enc * (T(2) * scale) - scale;
														
 
															+	const T g = T(2) / (dot(nn.xy, nn.xy) + T(1));
														
 
															+	vector<T, 3> normal;
														
 
															 	normal.xy = g * nn.xy;
														
 
															-	normal.z = g - 1.0;
														
 
															+	normal.z = g - T(1);
														
 
															 	return normalize(normal);
														
 
															 }
														
 
															 // See http://johnwhite3d.blogspot.no/2017/10/signed-octahedron-normal-encoding.html
														
 
															 // Result in [0.0, 1.0]
														
 
															-Vec3 signedOctEncode(Vec3 n)
														
 
															+template<typename T>
														
 
															+vector<T, 3> signedOctEncode(vector<T, 3> n)
														
 
															 {
														
 
															-	Vec3 outn;
														
 
															+	vector<T, 3> outn;
														
 
															-	const Vec3 nabs = abs(n);
														
 
															+	const vector<T, 3> nabs = abs(n);
														
 
															 	n /= nabs.x + nabs.y + nabs.z;
														
 
															-	outn.y = n.y * 0.5 + 0.5;
														
 
															-	outn.x = n.x * 0.5 + outn.y;
														
 
															-	outn.y = n.x * -0.5 + outn.y;
														
 
															+	outn.y = n.y * T(0.5) + T(0.5);
														
 
															+	outn.x = n.x * T(0.5) + outn.y;
														
 
															+	outn.y = n.x * -T(0.5) + outn.y;
														
 
															-	outn.z = saturate(n.z * kMaxF32);
														
 
															+	outn.z = saturate(n.z * getMaxNumericLimit<T>());
														
 
															 	return outn;
														
 
															 }
														
 
															 // See http://johnwhite3d.blogspot.no/2017/10/signed-octahedron-normal-encoding.html
														
 
															-Vec3 signedOctDecode(const Vec3 n)
														
 
															+template<typename T>
														
 
															+vector<T, 3> signedOctDecode(vector<T, 3> n)
														
 
															 {
														
 
															-	Vec3 outn;
														
 
															+	vector<T, 3> outn;
														
 
															 	outn.x = n.x - n.y;
														
 
															-	outn.y = n.x + n.y - 1.0;
														
 
															-	outn.z = n.z * 2.0 - 1.0;
														
 
															+	outn.y = n.x + n.y - T(1);
														
 
															+	outn.z = n.z * T(2) - T(1);
														
 
															 	outn.z = outn.z * (1.0 - abs(outn.x) - abs(outn.y));
														
 
															 	outn = normalize(outn);
														
@@ -63,146 +67,160 @@ Vec3 signedOctDecode(const Vec3 n)
 
															 }
														
 
															 // Vectorized version. Assumes that v is in [0.0, 1.0]
														
 
															-U32 newPackUnorm4x8(const Vec4 v)
														
 
															+template<typename T>
														
 
															+U32 newPackUnorm4x8(const vector<T, 4> v)
														
 
															 {
														
 
															-	Vec4 a = v * 255.0;
														
 
															-	UVec4 b = UVec4(a) << UVec4(0u, 8u, 16u, 24u);
														
 
															-	UVec2 c = b.xy | b.zw;
														
 
															+	const vector<T, 4> a = v * 255.0;
														
 
															+	const UVec4 b = UVec4(a) << UVec4(0u, 8u, 16u, 24u);
														
 
															+	const UVec2 c = b.xy | b.zw;
														
 
															 	return c.x | c.y;
														
 
															 }
														
 
															 // Vectorized version
														
 
															-Vec4 newUnpackUnorm4x8(const U32 u)
														
 
															+template<typename T>
														
 
															+vector<T, 4> newUnpackUnorm4x8(const U32 u)
														
 
															 {
														
 
															 	const UVec4 a = ((UVec4)u) >> UVec4(0u, 8u, 16u, 24u);
														
 
															 	const UVec4 b = a & ((UVec4)0xFFu);
														
 
															 	const Vec4 c = Vec4(b);
														
 
															-	return c * (1.0 / 255.0);
														
 
															+	return c * T(1.0 / 255.0);
														
 
															 }
														
 
															-U32 packSnorm4x8(Vec4 value)
														
 
															+template<typename T>
														
 
															+U32 packSnorm4x8(vector<T, 4> value)
														
 
															 {
														
 
															-	const IVec4 packed = IVec4(round(clamp(value, -1.0f, 1.0f) * 127.0f)) & 0xFFu;
														
 
															+	const IVec4 packed = IVec4(round(clamp(value, T(-1), T(1)) * T(127))) & 0xFFu;
														
 
															 	return U32(packed.x | (packed.y << 8) | (packed.z << 16) | (packed.w << 24));
														
 
															 }
														
 
															-Vec4 unpackSnorm4x8(U32 value)
														
 
															+template<typename T>
														
 
															+vector<T, 4> unpackSnorm4x8(U32 value)
														
 
															 {
														
 
															 	const I32 signedValue = (I32)value;
														
 
															 	const IVec4 packed = IVec4(signedValue << 24, signedValue << 16, signedValue << 8, signedValue) >> 24;
														
 
															-	return clamp(Vec4(packed) / 127.0f, -1.0f, 1.0f);
														
 
															+	return clamp(vector<T, 4>(packed) / T(127), T(-1), T(1));
														
 
															 }
														
 
															 // Convert from RGB to YCbCr.
														
 
															 // The RGB should be in [0, 1] and the output YCbCr will be in [0, 1] as well.
														
 
															-Vec3 rgbToYCbCr(const Vec3 rgb)
														
 
															+template<typename T>
														
 
															+vector<T, 3> rgbToYCbCr(const vector<T, 3> rgb)
														
 
															 {
														
 
															-	const F32 y = dot(rgb, Vec3(0.299, 0.587, 0.114));
														
 
															-	const F32 cb = 0.5 + dot(rgb, Vec3(-0.168736, -0.331264, 0.5));
														
 
															-	const F32 cr = 0.5 + dot(rgb, Vec3(0.5, -0.418688, -0.081312));
														
 
															-	return Vec3(y, cb, cr);
														
 
															+	const T y = dot(rgb, vector<T, 3>(0.299, 0.587, 0.114));
														
 
															+	const T cb = T(0.5) + dot(rgb, vector<T, 3>(-0.168736, -0.331264, 0.5));
														
 
															+	const T cr = T(0.5) + dot(rgb, vector<T, 3>(0.5, -0.418688, -0.081312));
														
 
															+	return vector<T, 3>(y, cb, cr);
														
 
															 }
														
 
															 // Convert the output of rgbToYCbCr back to RGB.
														
 
															-Vec3 yCbCrToRgb(const Vec3 ycbcr)
														
 
															+template<typename T>
														
 
															+vector<T, 3> yCbCrToRgb(const vector<T, 3> ycbcr)
														
 
															 {
														
 
															-	const F32 cb = ycbcr.y - 0.5;
														
 
															-	const F32 cr = ycbcr.z - 0.5;
														
 
															-	const F32 y = ycbcr.x;
														
 
															-	const F32 r = 1.402 * cr;
														
 
															-	const F32 g = -0.344 * cb - 0.714 * cr;
														
 
															-	const F32 b = 1.772 * cb;
														
 
															-	return Vec3(r, g, b) + y;
														
 
															+	const T cb = ycbcr.y - T(0.5);
														
 
															+	const T cr = ycbcr.z - T(0.5);
														
 
															+	const T y = ycbcr.x;
														
 
															+	const T r = T(1.402) * cr;
														
 
															+	const T g = T(-0.344) * cb - T(0.714) * cr;
														
 
															+	const T b = T(1.772) * cb;
														
 
															+	return vector<T, 3>(r, g, b) + y;
														
 
															 }
														
 
															 // Pack a Vec2 to a single F32.
														
 
															 // comp should be in [0, 1] and the output will be in [0, 1].
														
 
															-F32 packUnorm2ToUnorm1(const Vec2 comp)
														
 
															+template<typename T>
														
 
															+T packUnorm2ToUnorm1(const vector<T, 2> comp)
														
 
															 {
														
 
															-	return dot(round(comp * 15.0), Vec2(1.0 / (255.0 / 16.0), 1.0 / 255.0));
														
 
															+	return dot(round(comp * T(15)), Vec2(T(1) / T(255.0 / 16.0), T(1.0 / 255.0)));
														
 
															 }
														
 
															 // Unpack a single F32 to Vec2. Does the oposite of packUnorm2ToUnorm1.
														
 
															-Vec2 unpackUnorm1ToUnorm2(F32 c)
														
 
															+template<typename T>
														
 
															+vector<T, 2> unpackUnorm1ToUnorm2(T c)
														
 
															 {
														
 
															 #if 1
														
 
															-	const F32 temp = c * (255.0 / 16.0);
														
 
															-	const F32 a = floor(temp);
														
 
															-	const F32 b = temp - a; // b = fract(temp)
														
 
															-	return Vec2(a, b) * Vec2(1.0 / 15.0, 16.0 / 15.0);
														
 
															+	const T temp = c * T(255.0 / 16.0);
														
 
															+	const T a = floor(temp);
														
 
															+	const T b = temp - a; // b = fract(temp)
														
 
															+	return vector<T, 2>(a, b) * vector<T, 2>(1.0 / 15.0, 16.0 / 15.0);
														
 
															 #else
														
 
															 	const U32 temp = U32(c * 255.0);
														
 
															 	const U32 a = temp >> 4;
														
 
															 	const U32 b = temp & 0xF;
														
 
															-	return Vec2(a, b) / 15.0;
														
 
															+	return vector<T, 2>(a, b) / T(15);
														
 
															 #endif
														
 
															 }
														
 
															 // G-Buffer structure
														
 
															+template<typename T>
														
 
															 struct GbufferInfo
														
 
															 {
														
 
															-	RVec3 m_diffuse;
														
 
															-	RVec3 m_f0; ///< Freshnel at zero angles.
														
 
															-	RVec3 m_normal;
														
 
															-	RF32 m_roughness;
														
 
															-	RF32 m_metallic;
														
 
															-	RF32 m_subsurface;
														
 
															-	RVec3 m_emission;
														
 
															+	vector<T, 3> m_diffuse;
														
 
															+	vector<T, 3> m_f0; ///< Freshnel at zero angles.
														
 
															+	vector<T, 3> m_normal;
														
 
															+	vector<T, 3> m_emission;
														
 
															+	T m_roughness;
														
 
															+	T m_metallic;
														
 
															+	T m_subsurface;
														
 
															 	Vec2 m_velocity;
														
 
															 };
														
 
															 // Populate the G buffer
														
 
															-void packGBuffer(GbufferInfo g, out Vec4 rt0, out Vec4 rt1, out Vec4 rt2, out Vec2 rt3)
														
 
															+template<typename T>
														
 
															+void packGBuffer(GbufferInfo<T> g, out vector<T, 4> rt0, out vector<T, 4> rt1, out vector<T, 4> rt2, out Vec2 rt3)
														
 
															 {
														
 
															-	const F32 packedSubsurfaceMetallic = packUnorm2ToUnorm1(Vec2(g.m_subsurface, g.m_metallic));
														
 
															+	const T packedSubsurfaceMetallic = packUnorm2ToUnorm1(vector<T, 2>(g.m_subsurface, g.m_metallic));
														
 
															-	const Vec3 tonemappedEmission = reinhardTonemap(g.m_emission);
														
 
															+	const vector<T, 3> tonemappedEmission = reinhardTonemap(g.m_emission);
														
 
															-	rt0 = Vec4(g.m_diffuse, packedSubsurfaceMetallic);
														
 
															-	rt1 = Vec4(g.m_roughness, g.m_f0.x, tonemappedEmission.rb);
														
 
															+	rt0 = vector<T, 4>(g.m_diffuse, packedSubsurfaceMetallic);
														
 
															+	rt1 = vector<T, 4>(g.m_roughness, g.m_f0.x, tonemappedEmission.rb);
														
 
															-	const Vec3 encNorm = signedOctEncode(g.m_normal);
														
 
															-	rt2 = Vec4(tonemappedEmission.g, encNorm);
														
 
															+	const vector<T, 3> encNorm = signedOctEncode(g.m_normal);
														
 
															+	rt2 = vector<T, 4>(tonemappedEmission.g, encNorm);
														
 
															 	rt3 = g.m_velocity;
														
 
															 }
														
 
															-RVec3 unpackDiffuseFromGBuffer(RVec4 rt0, RF32 metallic)
														
 
															+template<typename T>
														
 
															+vector<T, 3> unpackDiffuseFromGBuffer(vector<T, 4> rt0, T metallic)
														
 
															 {
														
 
															-	return rt0.xyz *= 1.0 - metallic;
														
 
															+	return rt0.xyz *= T(1) - metallic;
														
 
															 }
														
 
															-Vec3 unpackNormalFromGBuffer(Vec4 rt2)
														
 
															+template<typename T>
														
 
															+vector<T, 3> unpackNormalFromGBuffer(vector<T, 4> rt2)
														
 
															 {
														
 
															 	return signedOctDecode(rt2.yzw);
														
 
															 }
														
 
															-RF32 unpackRoughnessFromGBuffer(RVec4 rt1)
														
 
															+template<typename T>
														
 
															+T unpackRoughnessFromGBuffer(vector<T, 4> rt1)
														
 
															 {
														
 
															-	RF32 r = rt1.x;
														
 
															-	r = r * (1.0 - kMinRoughness) + kMinRoughness;
														
 
															+	T r = rt1.x;
														
 
															+	r = r * (T(1) - T(kMinRoughness)) + T(kMinRoughness);
														
 
															 	return r;
														
 
															 }
														
 
															 // Read part of the G-buffer
														
 
															-void unpackGBufferNoVelocity(Vec4 rt0, Vec4 rt1, Vec4 rt2, out GbufferInfo g)
														
 
															+template<typename T>
														
 
															+void unpackGBufferNoVelocity(vector<T, 4> rt0, vector<T, 4> rt1, vector<T, 4> rt2, out GbufferInfo<T> g)
														
 
															 {
														
 
															 	g.m_diffuse = rt0.xyz;
														
 
															-	const Vec2 unpackedSubsurfaceMetallic = unpackUnorm1ToUnorm2(rt0.w);
														
 
															+	const vector<T, 2> unpackedSubsurfaceMetallic = unpackUnorm1ToUnorm2(rt0.w);
														
 
															 	g.m_subsurface = unpackedSubsurfaceMetallic.x;
														
 
															 	g.m_metallic = unpackedSubsurfaceMetallic.y;
														
 
															 	g.m_roughness = unpackRoughnessFromGBuffer(rt1);
														
 
															-	g.m_f0 = Vec3(rt1.y, rt1.y, rt1.y);
														
 
															-	g.m_emission = invertReinhardTonemap(Vec3(rt1.z, rt2.x, rt1.w));
														
 
															+	g.m_f0 = vector<T, 3>(rt1.y, rt1.y, rt1.y);
														
 
															+	g.m_emission = invertReinhardTonemap(vector<T, 3>(rt1.z, rt2.x, rt1.w));
														
 
															 	g.m_normal = signedOctDecode(rt2.yzw);
														
 
															-	g.m_velocity = Vec2(kMaxF32, kMaxF32); // Put something random
														
 
															+	g.m_velocity = getMaxNumericLimit<T>(); // Put something random
														
 
															 	// Compute reflectance
														
 
															 	g.m_f0 = lerp(g.m_f0, g.m_diffuse, g.m_metallic);
														
 
															 	// Compute diffuse
														
 
															-	g.m_diffuse *= 1.0 - g.m_metallic;
														
 
															+	g.m_diffuse *= T(1) - g.m_metallic;
														
 
															 }
														
--- a/AnKi/Shaders/TonemappingFunctions.hlsl
+++ b/AnKi/Shaders/TonemappingFunctions.hlsl
@@ -11,19 +11,19 @@
 
															 template<typename T>
														
 
															 T log10(T x)
														
 
															 {
														
 
															-	return log(x) / log((T)10.0);
														
 
															+	return log(x) / log(T(10));
														
 
															 }
														
 
															 template<typename T>
														
 
															 vector<T, 3> computeLuminance(vector<T, 3> color)
														
 
															 {
														
 
															-	return max(dot(vector<T, 3>(0.30, 0.59, 0.11), color), T(kEpsilonRF32));
														
 
															+	return max(dot(vector<T, 3>(0.30, 0.59, 0.11), color), getEpsilon<T>());
														
 
															 }
														
 
															 template<typename T>
														
 
															 T computeExposure(T avgLum, T threshold)
														
 
															 {
														
 
															-	const T keyValue = T(1.03) - (T(2.0) / (T(2.0) + log10(avgLum + T(1.0))));
														
 
															+	const T keyValue = T(1.03) - (T(2) / (T(2) + log10(avgLum + T(1))));
														
 
															 	const T linearExposure = (keyValue / avgLum);
														
 
															 	T exposure = log2(linearExposure);
														
@@ -41,12 +41,12 @@ vector<T, 3> computeExposedColor(vector<T, 3> color, vector<T, 3> avgLum, vector
 
															 template<typename T>
														
 
															 vector<T, 3> tonemapUncharted2(vector<T, 3> color)
														
 
															 {
														
 
															-	const T A = 0.15;
														
 
															-	const T B = 0.50;
														
 
															-	const T C = 0.10;
														
 
															-	const T D = 0.20;
														
 
															-	const T E = 0.02;
														
 
															-	const T F = 0.30;
														
 
															+	constexpr T A = 0.15;
														
 
															+	constexpr T B = 0.50;
														
 
															+	constexpr T C = 0.10;
														
 
															+	constexpr T D = 0.20;
														
 
															+	constexpr T E = 0.02;
														
 
															+	constexpr T F = 0.30;
														
 
															 	return ((color * (A * color + C * B) + D * E) / (color * (A * color + B) + D * F)) - E / F;
														
 
															 }
														
@@ -75,8 +75,8 @@ vector<T, 3> invertTonemapACESFilm(vector<T, 3> x)
 
															 	constexpr T kAcesE = 0.14;
														
 
															 	vector<T, 3> res = kAcesD * x - kAcesB;
														
 
															-	res += sqrt(x * x * (kAcesD * kAcesD - T(4.0) * kAcesE * kAcesC) + x * (T(4.0) * kAcesE * kAcesA - T(2.0) * kAcesB * kAcesD) + kAcesB * kAcesB);
														
 
															-	res /= T(2.0) * kAcesA - T(2.0) * kAcesC * x;
														
 
															+	res += sqrt(x * x * (kAcesD * kAcesD - T(4) * kAcesE * kAcesC) + x * (T(4) * kAcesE * kAcesA - T(2) * kAcesB * kAcesD) + kAcesB * kAcesB);
														
 
															+	res /= T(2) * kAcesA - T(2) * kAcesC * x;
														
 
															 	return res;
														
 
															 }
														
@@ -92,7 +92,7 @@ template<typename T>
 
															 vector<T, 3> invertTonemap(vector<T, 3> color, T exposure)
														
 
															 {
														
 
															 	color = invertTonemapACESFilm(color);
														
 
															-	color /= max(T(kEpsilonRF32), exposure);
														
 
															+	color /= max(getEpsilon<T>(), exposure);
														
 
															 	return color;
														
 
															 }
														
@@ -108,12 +108,12 @@ template<typename T>
 
															 vector<T, 3> reinhardTonemap(vector<T, 3> colour)
														
 
															 {
														
 
															 	// rgb / (1 + max(rgb))
														
 
															-	return colour / (T(1.0) + max(max(colour.r, colour.g), colour.b));
														
 
															+	return colour / (T(1) + max(max(colour.r, colour.g), colour.b));
														
 
															 }
														
 
															 template<typename T>
														
 
															 vector<T, 3> invertReinhardTonemap(vector<T, 3> colour)
														
 
															 {
														
 
															 	// rgb / (1 - max(rgb))
														
 
															-	return colour / max(T(1.0 / 32768.0), T(1.0) - max(max(colour.r, colour.g), colour.b));
														
 
															+	return colour / max(T(1.0 / 32768.0), T(1) - max(max(colour.r, colour.g), colour.b));
														
 
															 }
														
--- a/AnKi/Shaders/TraditionalDeferredShading.ankiprog
+++ b/AnKi/Shaders/TraditionalDeferredShading.ankiprog
@@ -44,7 +44,7 @@ Vec4 main(VertOut input) : SV_TARGET0
 
															 	}
														
 
															 	// Decode and process gbuffer
														
 
															-	GbufferInfo gbuffer = (GbufferInfo)0;
														
 
															+	GbufferInfo<F32> gbuffer = (GbufferInfo<F32>)0;
														
 
															 	unpackGBufferNoVelocity(g_gbufferTex0.SampleLevel(g_gbufferSampler, uv, 0.0), g_gbufferTex1.SampleLevel(g_gbufferSampler, uv, 0.0),
														
 
															 							g_gbufferTex2.SampleLevel(g_gbufferSampler, uv, 0.0), gbuffer);
														
 
															 	gbuffer.m_subsurface = max(gbuffer.m_subsurface, kSubsurfaceMin * 8.0);
														
@@ -80,7 +80,7 @@ Vec4 main(VertOut input) : SV_TARGET0
 
															 		const RF32 factor = shadowFactor * max(gbuffer.m_subsurface, lambert);
														
 
															 #	if SPECULAR == 1
														
 
															-		const Vec3 specC = specularIsotropicLobe(gbuffer, viewDir, l);
														
 
															+		const Vec3 specC = specularIsotropicLobe(gbuffer.m_normal, gbuffer.m_f0, gbuffer.m_roughness, viewDir, l);
														
 
															 #	else
														
 
															 		const Vec3 specC = Vec3(0.0, 0.0, 0.0);
														
 
															 #	endif
														
@@ -105,7 +105,7 @@ Vec4 main(VertOut input) : SV_TARGET0
 
															 		const F32 factor = att * spot * max(lambert, gbuffer.m_subsurface);
														
 
															 #	if SPECULAR == 1
														
 
															-		const Vec3 specC = specularIsotropicLobe(gbuffer, viewDir, l);
														
 
															+		const Vec3 specC = specularIsotropicLobe(gbuffer.m_normal, gbuffer.m_f0, gbuffer.m_roughness, viewDir, l);
														
 
															 #	else
														
 
															 		const Vec3 specC = Vec3(0.0, 0.0, 0.0);
														
 
															 #	endif
														
--- a/Tools/Shader/ShaderProgramBinaryDumpMain.cpp
+++ b/Tools/Shader/ShaderProgramBinaryDumpMain.cpp
@@ -7,8 +7,10 @@
 
															 #include <AnKi/ShaderCompiler/ShaderDump.h>
														
 
															 #include <AnKi/ShaderCompiler/MaliOfflineCompiler.h>
														
 
															 #include <AnKi/ShaderCompiler/RadeonGpuAnalyzer.h>
														
 
															+#include <AnKi/ShaderCompiler/Dxc.h>
														
 
															 #include <AnKi/Util/ThreadHive.h>
														
 
															 #include <AnKi/Util/System.h>
														
 
															+#include <ThirdParty/SpirvCross/spirv.hpp>
														
 
															 using namespace anki;
														
@@ -171,13 +173,33 @@ Error dumpStats(const ShaderBinary& bin)
 
															 					const ShaderBinaryCodeBlock& codeBlock = ctx.m_bin->m_codeBlocks[codeblockIdx];
														
 
															+					// Rewrite spir-v because of the decorations we ask DXC to put
														
 
															+					Bool bRequiresMeshShaders = false;
														
 
															+					DynamicArray<U8> newSpirv;
														
 
															+					newSpirv.resize(codeBlock.m_binary.getSize());
														
 
															+					memcpy(newSpirv.getBegin(), codeBlock.m_binary.getBegin(), codeBlock.m_binary.getSizeInBytes());
														
 
															+					visitSpirv(WeakArray<U32>(reinterpret_cast<U32*>(newSpirv.getBegin()), newSpirv.getSizeInBytes() / sizeof(U32)),
														
 
															+							   [&](U32 cmd, WeakArray<U32> instructions) {
														
 
															+								   if(cmd == spv::OpDecorate && instructions[1] == spv::DecorationDescriptorSet
														
 
															+									  && instructions[2] == kDxcVkBindlessRegisterSpace)
														
 
															+								   {
														
 
															+									   // Bindless set, rewrite its set
														
 
															+									   instructions[2] = kMaxRegisterSpaces;
														
 
															+								   }
														
 
															+								   else if(cmd == spv::OpCapability && instructions[0] == spv::CapabilityMeshShadingEXT)
														
 
															+								   {
														
 
															+									   bRequiresMeshShaders = true;
														
 
															+								   }
														
 
															+							   });
														
 
															+
														
 
															 					// Arm stats
														
 
															 					MaliOfflineCompilerOut maliocOut;
														
 
															 					Error err = Error::kNone;
														
 
															-					if(shaderType == ShaderType::kVertex || shaderType == ShaderType::kPixel || shaderType == ShaderType::kCompute)
														
 
															+					if((shaderType == ShaderType::kVertex || shaderType == ShaderType::kPixel || shaderType == ShaderType::kCompute)
														
 
															+					   && !bRequiresMeshShaders)
														
 
															 					{
														
 
															-						err = runMaliOfflineCompiler(codeBlock.m_binary, shaderType, maliocOut);
														
 
															+						err = runMaliOfflineCompiler(newSpirv, shaderType, maliocOut);
														
 
															 						if(err)
														
 
															 						{
														
@@ -190,7 +212,7 @@ Error dumpStats(const ShaderBinary& bin)
 
															 					// AMD
														
 
															 					RgaOutput rgaOut = {};
														
 
															 #if 0
														
 
															-					err = runRadeonGpuAnalyzer(codeBlock.m_binary, shaderType, rgaOut);
														
 
															+					err = runRadeonGpuAnalyzer(newSpirv, shaderType, rgaOut);
														
 
															 					if(err)
														
 
															 					{
														
 
															 						ANKI_LOGE("Radeon GPU Analyzer compiler failed");