Browse Source

Make some HLSL functions templates

Panagiotis Christopoulos Charitos 1 year ago
parent
commit
3d1220c81a

+ 1 - 0
AnKi/Renderer/Dbg.cpp

@@ -222,6 +222,7 @@ void Dbg::run(RenderPassWorkContext& rgraphCtx, const RenderingContext& ctx)
 
 
 	// Restore state
 	// Restore state
 	cmdb.setDepthCompareOperation(CompareOperation::kLess);
 	cmdb.setDepthCompareOperation(CompareOperation::kLess);
+	cmdb.setDepthWrite(true);
 }
 }
 
 
 void Dbg::populateRenderGraph(RenderingContext& ctx)
 void Dbg::populateRenderGraph(RenderingContext& ctx)

+ 3 - 3
AnKi/Shaders/ApplyIrradianceToReflection.ankiprog

@@ -10,8 +10,8 @@
 
 
 SamplerState g_nearestAnyClampSampler : register(s0);
 SamplerState g_nearestAnyClampSampler : register(s0);
 TextureCube<Vec4> g_gbufferTex[3u] : register(t0);
 TextureCube<Vec4> g_gbufferTex[3u] : register(t0);
-StructuredBuffer<RVec4> g_irradianceDice : register(t3);
-RWTexture2D<RVec4> g_cubeTex[6u] : register(u0); // RWTexture2D because there is no RWTextureCube
+StructuredBuffer<Vec4> g_irradianceDice : register(t3);
+RWTexture2D<Vec4> g_cubeTex[6u] : register(u0); // RWTexture2D because there is no RWTextureCube
 
 
 [numthreads(8, 8, 6)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID, UVec3 svGroupThreadId : SV_GROUPTHREADID)
 [numthreads(8, 8, 6)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID, UVec3 svGroupThreadId : SV_GROUPTHREADID)
 {
 {
@@ -27,7 +27,7 @@ RWTexture2D<RVec4> g_cubeTex[6u] : register(u0); // RWTexture2D because there is
 	const Vec3 sampleUv = getCubemapDirection(uv, faceIdx);
 	const Vec3 sampleUv = getCubemapDirection(uv, faceIdx);
 
 
 	// Read the gbuffer
 	// Read the gbuffer
-	GbufferInfo gbuffer = (GbufferInfo)0;
+	GbufferInfo<F32> gbuffer = (GbufferInfo<F32>)0;
 	unpackGBufferNoVelocity(g_gbufferTex[0u].SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0),
 	unpackGBufferNoVelocity(g_gbufferTex[0u].SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0),
 							g_gbufferTex[1u].SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0),
 							g_gbufferTex[1u].SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0),
 							g_gbufferTex[2u].SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0), gbuffer);
 							g_gbufferTex[2u].SampleLevel(g_nearestAnyClampSampler, sampleUv, 0.0), gbuffer);

+ 73 - 0
AnKi/Shaders/Common.hlsl

@@ -13,6 +13,79 @@
 #	include <AnKi/Shaders/Include/Common.h>
 #	include <AnKi/Shaders/Include/Common.h>
 #endif
 #endif
 
 
+// Common constants
+constexpr F32 kEpsilonF32 = 0.000001f;
+#if ANKI_SUPPORTS_16BIT_TYPES
+constexpr F16 kEpsilonF16 = (F16)0.0001f; // Divisions by this should be OK according to http://weitz.de/ieee
+#endif
+constexpr RF32 kEpsilonRF32 = 0.0001f;
+
+template<typename T>
+T getEpsilon();
+
+template<>
+F32 getEpsilon()
+{
+	return kEpsilonF32;
+}
+
+#if ANKI_SUPPORTS_16BIT_TYPES
+template<>
+F16 getEpsilon()
+{
+	return kEpsilonF16;
+}
+#endif
+
+#if !ANKI_FORCE_FULL_FP_PRECISION
+template<>
+RF32 getEpsilon()
+{
+	return kEpsilonRF32;
+}
+#endif
+
+constexpr U32 kMaxU32 = 0xFFFFFFFFu;
+constexpr F32 kMaxF32 = 3.402823e+38;
+constexpr RF32 kMaxRF32 = 65504.0f; // Max half float value according to wikipedia
+#if ANKI_SUPPORTS_16BIT_TYPES
+constexpr F16 kMaxF16 = (F16)65504.0;
+#endif
+
+template<typename T>
+T getMaxNumericLimit();
+
+template<>
+F32 getMaxNumericLimit()
+{
+	return kMaxF32;
+}
+
+#if !ANKI_FORCE_FULL_FP_PRECISION
+template<>
+RF32 getMaxNumericLimit()
+{
+	return kMaxRF32;
+}
+#endif
+
+#if ANKI_SUPPORTS_16BIT_TYPES
+template<>
+F16 getMaxNumericLimit()
+{
+	return kMaxF16;
+}
+#endif
+
+template<>
+U32 getMaxNumericLimit()
+{
+	return kMaxU32;
+}
+
+constexpr F32 kPi = 3.14159265358979323846f;
+constexpr F32 kNaN = 0.0f / 0.0f;
+
 #if ANKI_GR_BACKEND_VULKAN
 #if ANKI_GR_BACKEND_VULKAN
 #	define ANKI_FAST_CONSTANTS(type, var) [[vk::push_constant]] ConstantBuffer<type> var;
 #	define ANKI_FAST_CONSTANTS(type, var) [[vk::push_constant]] ConstantBuffer<type> var;
 #else
 #else

+ 3 - 11
AnKi/Shaders/GBufferGeneric.ankiprog

@@ -170,14 +170,6 @@ struct MeshPerPrimitiveOut
 	ANKI_PER_PRIMITIVE_MEMBER Bool m_cullPrimitive : SV_CULLPRIMITIVE;
 	ANKI_PER_PRIMITIVE_MEMBER Bool m_cullPrimitive : SV_CULLPRIMITIVE;
 };
 };
 
 
-struct PixelOut
-{
-	Vec4 m_color0 : SV_TARGET0;
-	Vec4 m_color1 : SV_TARGET1;
-	Vec4 m_color2 : SV_TARGET2;
-	Vec2 m_color3 : SV_TARGET3;
-};
-
 struct Mat3x4_2
 struct Mat3x4_2
 {
 {
 	Mat3x4 m_a;
 	Mat3x4 m_a;
@@ -500,7 +492,7 @@ void main(
 
 
 #	else // GBUFFER
 #	else // GBUFFER
 
 
-PixelOut main(
+GBufferPixelOut main(
 #		if ANKI_TECHNIQUE_GBufferMeshShaders
 #		if ANKI_TECHNIQUE_GBufferMeshShaders
 	MeshPerVertOut vertInput, ANKI_PER_PRIMITIVE_VAR MeshPerPrimitiveOut primInput
 	MeshPerVertOut vertInput, ANKI_PER_PRIMITIVE_VAR MeshPerPrimitiveOut primInput
 #		else
 #		else
@@ -576,7 +568,7 @@ PixelOut main(
 	const Vec2 velocity = Vec2(1.0, 1.0);
 	const Vec2 velocity = Vec2(1.0, 1.0);
 #		endif
 #		endif
 
 
-	GbufferInfo g;
+	GbufferInfo<RF32> g;
 	g.m_diffuse = diffColor;
 	g.m_diffuse = diffColor;
 	g.m_normal = normal;
 	g.m_normal = normal;
 	g.m_f0 = specColor;
 	g.m_f0 = specColor;
@@ -615,7 +607,7 @@ PixelOut main(
 	}
 	}
 #		endif
 #		endif
 
 
-	PixelOut output;
+	GBufferPixelOut output;
 	packGBuffer(g, output.m_color0, output.m_color1, output.m_color2, output.m_color3);
 	packGBuffer(g, output.m_color0, output.m_color1, output.m_color2, output.m_color3);
 	return output;
 	return output;
 }
 }

+ 3 - 11
AnKi/Shaders/GBufferGpuParticles.ankiprog

@@ -30,14 +30,6 @@ struct VertOut
 	Vec4 m_svPosition : SV_POSITION;
 	Vec4 m_svPosition : SV_POSITION;
 };
 };
 
 
-struct PixelOut
-{
-	Vec4 m_color0 : SV_TARGET0;
-	Vec4 m_color1 : SV_TARGET1;
-	Vec4 m_color2 : SV_TARGET2;
-	Vec2 m_color3 : SV_TARGET3;
-};
-
 #if ANKI_VERTEX_SHADER
 #if ANKI_VERTEX_SHADER
 VertOut main(VertIn input)
 VertOut main(VertIn input)
 {
 {
@@ -82,12 +74,12 @@ VertOut main(VertIn input)
 #if ANKI_PIXEL_SHADER
 #if ANKI_PIXEL_SHADER
 #	include <AnKi/Shaders/PackFunctions.hlsl>
 #	include <AnKi/Shaders/PackFunctions.hlsl>
 
 
-PixelOut main(VertOut input)
+GBufferPixelOut main(VertOut input)
 {
 {
-	PixelOut output;
+	GBufferPixelOut output;
 	const AnKiLocalConstants localConstants = loadAnKiLocalConstants(g_gpuScene, input.m_constantsOffset);
 	const AnKiLocalConstants localConstants = loadAnKiLocalConstants(g_gpuScene, input.m_constantsOffset);
 
 
-	GbufferInfo g;
+	GbufferInfo<RF32> g;
 	g.m_diffuse = localConstants.m_diffColor;
 	g.m_diffuse = localConstants.m_diffColor;
 
 
 	const Mat3x4 camTrf = g_globalConstants.m_cameraTransform;
 	const Mat3x4 camTrf = g_globalConstants.m_cameraTransform;

+ 1 - 1
AnKi/Shaders/GBufferVisualizeProbe.ankiprog

@@ -168,7 +168,7 @@ PixelOut main(VertOut input)
 	output.m_svDepth = p.z / p.w;
 	output.m_svDepth = p.z / p.w;
 
 
 	// Set the GBuffer
 	// Set the GBuffer
-	GbufferInfo g;
+	GbufferInfo<F32> g;
 	g.m_diffuse = (PROBE_TYPE == 0) ? 0.5 : 1.0;
 	g.m_diffuse = (PROBE_TYPE == 0) ? 0.5 : 1.0;
 	g.m_normal = normalize(collisionPoint - input.m_sphereCenter);
 	g.m_normal = normalize(collisionPoint - input.m_sphereCenter);
 	g.m_f0 = 0.04;
 	g.m_f0 = 0.04;

+ 0 - 325
AnKi/Shaders/Include/Common.h

@@ -409,331 +409,6 @@ typedef min16float3 RVec3;
 typedef min16float4 RVec4;
 typedef min16float4 RVec4;
 _ANKI_MAT3(RMat3, RVec3, RF32)
 _ANKI_MAT3(RMat3, RVec3, RF32)
 #	endif
 #	endif
-
-// Common constants
-constexpr F32 kEpsilonF32 = 0.000001f;
-#	if ANKI_SUPPORTS_16BIT_TYPES
-constexpr F16 kEpsilonhF16 = (F16)0.0001f; // Divisions by this should be OK according to http://weitz.de/ieee
-#	endif
-constexpr RF32 kEpsilonRF32 = 0.0001f;
-
-constexpr RF32 kMaxRF32 = 65504.0f; // Max half float value according to wikipedia
-constexpr U32 kMaxU32 = 0xFFFFFFFFu;
-constexpr F32 kMaxF32 = 3.402823e+38;
-#	if ANKI_SUPPORTS_16BIT_TYPES
-constexpr F16 kMaxF16 = (F16)65504.0;
-constexpr F16 kMinF16 = (F16)0.00006104;
-#	endif
-
-constexpr F32 kPi = 3.14159265358979323846f;
-constexpr F32 kNaN = 0.0f / 0.0f;
-
-//! == GLSL ============================================================================================================
-#else
-#	define ANKI_HLSL 0
-#	define ANKI_GLSL 1
-#	define ANKI_CPP 0
-
-#	define ANKI_BEGIN_NAMESPACE
-#	define ANKI_END_NAMESPACE
-#	define inline
-
-#	define ANKI_SHADER_STATIC_ASSERT(cond_)
-
-#	define ScalarVec4 Vec4
-#	define ScalarMat3x4 Mat3x4
-#	define ScalarMat4 Mat4
-
-#	define constexpr const
-
-#	define ANKI_SUPPORTS_64BIT_TYPES !ANKI_PLATFORM_MOBILE
-
-#	extension GL_EXT_control_flow_attributes : require
-#	extension GL_KHR_shader_subgroup_vote : require
-#	extension GL_KHR_shader_subgroup_ballot : require
-#	extension GL_KHR_shader_subgroup_shuffle : require
-#	extension GL_KHR_shader_subgroup_arithmetic : require
-
-#	extension GL_EXT_samplerless_texture_functions : require
-#	extension GL_EXT_shader_image_load_formatted : require
-#	extension GL_EXT_nonuniform_qualifier : enable
-
-#	extension GL_EXT_buffer_reference : enable
-#	extension GL_EXT_buffer_reference2 : enable
-
-#	extension GL_EXT_shader_explicit_arithmetic_types : enable
-#	extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable
-#	extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable
-#	extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable
-#	extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
-#	extension GL_EXT_shader_explicit_arithmetic_types_float32 : enable
-
-#	if ANKI_SUPPORTS_64BIT_TYPES
-#		extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable
-#		extension GL_EXT_shader_explicit_arithmetic_types_float64 : enable
-#		extension GL_EXT_shader_atomic_int64 : enable
-#		extension GL_EXT_shader_subgroup_extended_types_int64 : enable
-#	endif
-
-#	extension GL_EXT_nonuniform_qualifier : enable
-#	extension GL_EXT_scalar_block_layout : enable
-
-#	if defined(ANKI_RAY_GEN_SHADER) || defined(ANKI_ANY_HIT_SHADER) || defined(ANKI_CLOSEST_HIT_SHADER) || defined(ANKI_MISS_SHADER) \
-		|| defined(ANKI_INTERSECTION_SHADER) || defined(ANKI_CALLABLE_SHADER)
-#		extension GL_EXT_ray_tracing : enable
-#	endif
-
-#	define unroll [unroll]
-#	define branch [branch]
-
-#	define F32 float
-const uint kSizeof_float = 4u;
-#	define Vec2 vec2
-const uint kSizeof_vec2 = 8u;
-#	define Vec3 vec3
-const uint kSizeof_vec3 = 12u;
-#	define Vec4 vec4
-const uint kSizeof_vec4 = 16u;
-
-#	define F16 float16_t
-const uint kSizeof_float16_t = 2u;
-#	define HVec2 f16vec2
-const uint kSizeof_f16vec2 = 4u;
-#	define HVec3 f16vec3
-const uint kSizeof_f16vec3 = 6u;
-#	define HVec4 f16vec4
-const uint kSizeof_f16vec4 = 8u;
-
-#	define U8 uint8_t
-const uint kSizeof_uint8_t = 1u;
-#	define U8Vec2 u8vec2
-const uint kSizeof_u8vec2 = 2u;
-#	define U8Vec3 u8vec3
-const uint kSizeof_u8vec3 = 3u;
-#	define U8Vec4 u8vec4
-const uint kSizeof_u8vec4 = 4u;
-
-#	define I8 int8_t
-const uint kSizeof_int8_t = 1u;
-#	define I8Vec2 i8vec2
-const uint kSizeof_i8vec2 = 2u;
-#	define I8Vec3 i8vec3
-const uint kSizeof_i8vec3 = 3u;
-#	define I8Vec4 i8vec4
-const uint kSizeof_i8vec4 = 4u;
-
-#	define U16 uint16_t
-const uint kSizeof_uint16_t = 2u;
-#	define U16Vec2 u16vec2
-const uint kSizeof_u16vec2 = 4u;
-#	define U16Vec3 u16vec3
-const uint kSizeof_u16vec3 = 6u;
-#	define U16Vec4 u16vec4
-const uint kSizeof_u16vec4 = 8u;
-
-#	define I16 int16_t
-const uint kSizeof_int16_t = 2u;
-#	define I16Vec2 i16vec2
-const uint kSizeof_i16vec2 = 4u;
-#	define I16Vec3 i16vec3
-const uint kSizeof_i16vec3 = 6u;
-#	define i16Vec4 i16vec4
-const uint kSizeof_i16vec4 = 8u;
-
-#	define U32 uint
-const uint kSizeof_uint = 4u;
-#	define UVec2 uvec2
-const uint kSizeof_uvec2 = 8u;
-#	define UVec3 uvec3
-const uint kSizeof_uvec3 = 12u;
-#	define UVec4 uvec4
-const uint kSizeof_uvec4 = 16u;
-
-#	define I32 int
-const uint kSizeof_int = 4u;
-#	define IVec2 ivec2
-const uint kSizeof_ivec2 = 8u;
-#	define IVec3 ivec3
-const uint kSizeof_ivec3 = 12u;
-#	define IVec4 ivec4
-const uint kSizeof_ivec4 = 16u;
-
-#	if ANKI_SUPPORTS_64BIT_TYPES
-#		define U64 uint64_t
-const uint kSizeof_uint64_t = 8u;
-#		define U64Vec2 u64vec2
-const uint kSizeof_u64vec2 = 16u;
-#		define U64Vec3 u64vec3
-const uint kSizeof_u64vec3 = 24u;
-#		define U64Vec4 u64vec4
-const uint kSizeof_u64vec4 = 32u;
-
-#		define I64 int64_t
-const uint kSizeof_int64_t = 8u;
-#		define I64Vec2 i64vec2
-const uint kSizeof_i64vec2 = 16u;
-#		define I64Vec3 i64vec3
-const uint kSizeof_i64vec3 = 24u;
-#		define I64Vec4 i64vec4
-const uint kSizeof_i64vec4 = 32u;
-#	endif
-
-#	define Mat3 mat3
-const uint kSizeof_mat3 = 36u;
-
-#	define Mat4 mat4
-const uint kSizeof_mat4 = 64u;
-
-#	define Mat3x4 mat4x3 // GLSL has the column number first and then the rows
-const uint kSizeof_mat4x3 = 48u;
-
-#	define Bool bool
-
-#	if ANKI_SUPPORTS_64BIT_TYPES
-#		define Address U64
-#	else
-#		define Address UVec2
-#	endif
-
-#	if ANKI_FORCE_FULL_FP_PRECISION
-#		define RF32 F32
-#		define RVec2 Vec2
-#		define RVec3 Vec3
-#		define RVec4 Vec4
-#		define RMat3 Mat3
-#	else
-#		define RF32 mediump F32
-#		define RVec2 mediump Vec2
-#		define RVec3 mediump Vec3
-#		define RVec4 mediump Vec4
-#		define RMat3 mediump Mat3
-#	endif
-
-#	define _ANKI_CONCATENATE(a, b) a##b
-#	define ANKI_CONCATENATE(a, b) _ANKI_CONCATENATE(a, b)
-
-#	define sizeof(type) _ANKI_CONCATENATE(kSizeof_, type)
-#	define alignof(type) _ANKI_CONCATENATE(kAlignof_, type)
-
-#	define _ANKI_SCONST_X(type, n, id) layout(constant_id = id) const type n = type(1)
-
-#	define _ANKI_SCONST_X2(type, componentType, n, id, constWorkaround) \
-		layout(constant_id = id + 0u) const componentType ANKI_CONCATENATE(_anki_const_0_2_, n) = componentType(1); \
-		layout(constant_id = id + 1u) const componentType ANKI_CONCATENATE(_anki_const_1_2_, n) = componentType(1); \
-		constWorkaround type n = type(ANKI_CONCATENATE(_anki_const_0_2_, n), ANKI_CONCATENATE(_anki_const_1_2_, n))
-
-#	define _ANKI_SCONST_X3(type, componentType, n, id, constWorkaround) \
-		layout(constant_id = id + 0u) const componentType ANKI_CONCATENATE(_anki_const_0_3_, n) = componentType(1); \
-		layout(constant_id = id + 1u) const componentType ANKI_CONCATENATE(_anki_const_1_3_, n) = componentType(1); \
-		layout(constant_id = id + 2u) const componentType ANKI_CONCATENATE(_anki_const_2_3_, n) = componentType(1); \
-		constWorkaround type n = \
-			type(ANKI_CONCATENATE(_anki_const_0_3_, n), ANKI_CONCATENATE(_anki_const_1_3_, n), ANKI_CONCATENATE(_anki_const_2_3_, n))
-
-#	define _ANKI_SCONST_X4(type, componentType, n, id, constWorkaround) \
-		layout(constant_id = id + 0u) const componentType ANKI_CONCATENATE(_anki_const_0_4_, n) = componentType(1); \
-		layout(constant_id = id + 1u) const componentType ANKI_CONCATENATE(_anki_const_1_4_, n) = componentType(1); \
-		layout(constant_id = id + 2u) const componentType ANKI_CONCATENATE(_anki_const_2_4_, n) = componentType(1); \
-		layout(constant_id = id + 3u) const componentType ANKI_CONCATENATE(_anki_const_3_4_, n) = componentType(1); \
-		constWorkaround type n = type(ANKI_CONCATENATE(_anki_const_0_4_, n), ANKI_CONCATENATE(_anki_const_1_4_, n), \
-									  ANKI_CONCATENATE(_anki_const_2_4_, n), ANKI_CONCATENATE(_anki_const_2_4_, n))
-
-#	define ANKI_SPECIALIZATION_CONSTANT_I32(n, id) _ANKI_SCONST_X(I32, n, id)
-#	define ANKI_SPECIALIZATION_CONSTANT_IVEC2(n, id) _ANKI_SCONST_X2(IVec2, I32, n, id, const)
-#	define ANKI_SPECIALIZATION_CONSTANT_IVEC3(n, id) _ANKI_SCONST_X3(IVec3, I32, n, id, const)
-#	define ANKI_SPECIALIZATION_CONSTANT_IVEC4(n, id) _ANKI_SCONST_X4(IVec4, I32, n, id, const)
-
-#	define ANKI_SPECIALIZATION_CONSTANT_U32(n, id) _ANKI_SCONST_X(U32, n, id)
-#	define ANKI_SPECIALIZATION_CONSTANT_UVEC2(n, id) _ANKI_SCONST_X2(UVec2, U32, n, id, const)
-#	define ANKI_SPECIALIZATION_CONSTANT_UVEC3(n, id) _ANKI_SCONST_X3(UVec3, U32, n, id, const)
-#	define ANKI_SPECIALIZATION_CONSTANT_UVEC4(n, id) _ANKI_SCONST_X4(UVec4, U32, n, id, const)
-
-#	define ANKI_SPECIALIZATION_CONSTANT_F32(n, id) _ANKI_SCONST_X(F32, n, id)
-#	define ANKI_SPECIALIZATION_CONSTANT_VEC2(n, id) _ANKI_SCONST_X2(Vec2, F32, n, id, )
-#	define ANKI_SPECIALIZATION_CONSTANT_VEC3(n, id) _ANKI_SCONST_X3(Vec3, F32, n, id, )
-#	define ANKI_SPECIALIZATION_CONSTANT_VEC4(n, id) _ANKI_SCONST_X4(Vec4, F32, n, id, )
-
-#	define ANKI_DEFINE_LOAD_STORE(type, alignment) \
-		layout(buffer_reference, scalar, buffer_reference_align = (alignment)) buffer _Ref##type \
-		{ \
-			type m_value; \
-		}; \
-		void load(U64 address, out type o) \
-		{ \
-			o = _Ref##type(address).m_value; \
-		} \
-		void store(U64 address, type i) \
-		{ \
-			_Ref##type(address).m_value = i; \
-		}
-
-layout(std140, row_major) uniform;
-layout(std140, row_major) buffer;
-
-#	if ANKI_FORCE_FULL_FP_PRECISION
-#		define ANKI_RP
-#	else
-#		define ANKI_RP mediump
-#	endif
-
-#	define ANKI_FP highp
-
-precision highp int;
-precision highp float;
-
-#	define ANKI_BINDLESS_SET(s) \
-		layout(set = s, binding = 0) uniform utexture2D u_bindlessTextures2dU32[kMaxBindlessTextures]; \
-		layout(set = s, binding = 0) uniform itexture2D u_bindlessTextures2dI32[kMaxBindlessTextures]; \
-		layout(set = s, binding = 0) uniform texture2D u_bindlessTextures2dF32[kMaxBindlessTextures]; \
-		layout(set = s, binding = 0) uniform texture2DArray u_bindlessTextures2dArrayF32[kMaxBindlessTextures]; \
-		layout(set = s, binding = 1) uniform textureBuffer u_bindlessTextureBuffers[kMaxBindlessReadonlyTextureBuffers];
-
-Vec2 pow(Vec2 a, F32 b)
-{
-	return pow(a, Vec2(b));
-}
-
-Vec3 pow(Vec3 a, F32 b)
-{
-	return pow(a, Vec3(b));
-}
-
-Vec4 pow(Vec4 a, F32 b)
-{
-	return pow(a, Vec4(b));
-}
-
-Bool all(Bool b)
-{
-	return b;
-}
-
-Bool any(Bool b)
-{
-	return b;
-}
-
-#	define saturate(x_) clamp((x_), 0.0, 1.0)
-#	define saturateRp(x) min(x, F32(kMaxF16))
-#	define mad(a_, b_, c_) fma((a_), (b_), (c_))
-#	define frac(x) fract(x)
-#	define lerp(a, b, t) mix(a, b, t)
-#	define atan2(x, y) atan(x, y)
-
-float asfloat(uint u)
-{
-	return uintBitsToFloat(u);
-}
-
-constexpr F32 kEpsilonf = 0.000001f;
-constexpr F16 kEpsilonhf = 0.0001hf; // Divisions by this should be OK according to http://weitz.de/ieee/
-constexpr ANKI_RP F32 kEpsilonRp = F32(kEpsilonhf);
-
-constexpr U32 kMaxU32 = 0xFFFFFFFFu;
-constexpr F32 kMaxF32 = 3.402823e+38;
-constexpr F16 kMaxF16 = 65504.0hf;
-constexpr F16 kMinF16 = 0.00006104hf;
-
-constexpr F32 kPi = 3.14159265358979323846f;
 #endif
 #endif
 
 
 //! == Common ==========================================================================================================
 //! == Common ==========================================================================================================

+ 1 - 1
AnKi/Shaders/IrradianceDice.ankiprog

@@ -129,7 +129,7 @@ RVec3 sampleLightShadingTexture(const U32 face, UVec3 svGroupThreadId)
 
 
 		// Read the gbuffer
 		// Read the gbuffer
 		const Vec3 gbufferUv = getCubemapDirection(faceUv, f);
 		const Vec3 gbufferUv = getCubemapDirection(faceUv, f);
-		GbufferInfo gbuffer = (GbufferInfo)0;
+		GbufferInfo<F32> gbuffer = (GbufferInfo<F32>)0;
 		unpackGBufferNoVelocity(g_gbufferTex[0u].SampleLevel(g_nearestAnyClampSampler, gbufferUv, 0.0),
 		unpackGBufferNoVelocity(g_gbufferTex[0u].SampleLevel(g_nearestAnyClampSampler, gbufferUv, 0.0),
 								g_gbufferTex[1u].SampleLevel(g_nearestAnyClampSampler, gbufferUv, 0.0),
 								g_gbufferTex[1u].SampleLevel(g_nearestAnyClampSampler, gbufferUv, 0.0),
 								g_gbufferTex[2u].SampleLevel(g_nearestAnyClampSampler, gbufferUv, 0.0), gbuffer);
 								g_gbufferTex[2u].SampleLevel(g_nearestAnyClampSampler, gbufferUv, 0.0), gbuffer);

+ 27 - 23
AnKi/Shaders/LightFunctions.hlsl

@@ -32,28 +32,30 @@ Vec3 F_Unreal(Vec3 specular, F32 VoH)
 
 
 // Fresnel Schlick: "An Inexpensive BRDF Model for Physically-Based Rendering"
 // Fresnel Schlick: "An Inexpensive BRDF Model for Physically-Based Rendering"
 // It has lower VGRPs than F_Unreal
 // It has lower VGRPs than F_Unreal
-RVec3 F_Schlick(RVec3 f0, RF32 VoH)
+template<typename T>
+vector<T, 3> F_Schlick(vector<T, 3> f0, T VoH)
 {
 {
-	const RF32 f = pow(1.0 - VoH, 5.0);
-	return f + f0 * (1.0 - f);
+	const T f = pow(max(T(0), T(1) - VoH), T(5.0));
+	return f + f0 * (T(1) - f);
 }
 }
 
 
 // D(n,h) aka NDF: GGX Trowbridge-Reitz
 // D(n,h) aka NDF: GGX Trowbridge-Reitz
-RF32 D_GGX(RF32 roughness, RF32 NoH, RVec3 h, RVec3 worldNormal)
+template<typename T>
+T D_GGX(T roughness, T NoH, vector<T, 3> h, vector<T, 3> worldNormal)
 {
 {
 #if 0 && ANKI_PLATFORM_MOBILE
 #if 0 && ANKI_PLATFORM_MOBILE
-	const RVec3 NxH = cross(worldNormal, h);
-	const RF32 oneMinusNoHSquared = dot(NxH, NxH);
+	const vector<T, 3> NxH = cross(worldNormal, h);
+	const T oneMinusNoHSquared = dot(NxH, NxH);
 #else
 #else
-	const RF32 oneMinusNoHSquared = 1.0 - NoH * NoH;
+	const T oneMinusNoHSquared = T(1) - NoH * NoH;
 	ANKI_MAYBE_UNUSED(h);
 	ANKI_MAYBE_UNUSED(h);
 	ANKI_MAYBE_UNUSED(worldNormal);
 	ANKI_MAYBE_UNUSED(worldNormal);
 #endif
 #endif
 
 
-	const RF32 a = roughness * roughness;
-	const RF32 v = NoH * a;
-	const RF32 k = a / (oneMinusNoHSquared + v * v);
-	const RF32 d = k * k * (1.0 / kPi);
+	const T a = roughness * roughness;
+	const T v = NoH * a;
+	const T k = a / (oneMinusNoHSquared + v * v);
+	const T d = k * k * T(1.0 / kPi);
 	return saturate(d);
 	return saturate(d);
 }
 }
 
 
@@ -67,10 +69,11 @@ RF32 V_Schlick(RF32 roughness, RF32 NoV, RF32 NoL)
 }
 }
 
 
 // Visibility term: Hammon 2017, "PBR Diffuse Lighting for GGX+Smith Microsurfaces"
 // Visibility term: Hammon 2017, "PBR Diffuse Lighting for GGX+Smith Microsurfaces"
-RF32 V_SmithGGXCorrelatedFast(RF32 roughness, RF32 NoV, RF32 NoL)
+template<typename T>
+T V_SmithGGXCorrelatedFast(T roughness, T NoV, T NoL)
 {
 {
-	const RF32 a = roughness * roughness;
-	const RF32 v = 0.5 / lerp(2.0 * NoL * NoV, NoL + NoV, a);
+	const T a = roughness * roughness;
+	const T v = T(0.5) / lerp(T(2) * NoL * NoV, NoL + NoV, a);
 	return saturate(v);
 	return saturate(v);
 }
 }
 
 
@@ -85,23 +88,24 @@ RVec3 diffuseLobe(RVec3 diffuse)
 }
 }
 
 
 // Performs BRDF specular lighting
 // Performs BRDF specular lighting
-RVec3 specularIsotropicLobe(GbufferInfo gbuffer, Vec3 viewDir, Vec3 frag2Light)
+template<typename T>
+vector<T, 3> specularIsotropicLobe(vector<T, 3> normal, vector<T, 3> f0, T roughness, vector<T, 3> viewDir, vector<T, 3> frag2Light)
 {
 {
-	const RVec3 H = normalize(frag2Light + viewDir);
+	const vector<T, 3> H = normalize(frag2Light + viewDir);
 
 
-	const RF32 NoL = max(0.0, dot(gbuffer.m_normal, frag2Light));
-	const RF32 VoH = max(0.0, dot(viewDir, H));
-	const RF32 NoH = max(0.0, dot(gbuffer.m_normal, H));
-	const RF32 NoV = max(0.05, dot(gbuffer.m_normal, viewDir));
+	const T NoL = max(0.0, dot(normal, frag2Light));
+	const T VoH = max(0.0, dot(viewDir, H));
+	const T NoH = max(0.0, dot(normal, H));
+	const T NoV = max(0.05, dot(normal, viewDir));
 
 
 	// F
 	// F
-	const RVec3 F = F_Schlick(gbuffer.m_f0, VoH);
+	const vector<T, 3> F = F_Schlick(f0, VoH);
 
 
 	// D
 	// D
-	const RF32 D = D_GGX(gbuffer.m_roughness, NoH, H, gbuffer.m_normal);
+	const T D = D_GGX(roughness, NoH, H, normal);
 
 
 	// Vis
 	// Vis
-	const RF32 V = V_SmithGGXCorrelatedFast(gbuffer.m_roughness, NoV, NoL);
+	const T V = V_SmithGGXCorrelatedFast(roughness, NoV, NoL);
 
 
 	return F * (V * D);
 	return F * (V * D);
 }
 }

+ 6 - 6
AnKi/Shaders/LightShading.ankiprog

@@ -36,7 +36,7 @@ Texture2D<RVec4> g_integrationLut : register(t12);
 #	define LIGHTING_COMMON_BRDF() \
 #	define LIGHTING_COMMON_BRDF() \
 		const RVec3 frag2Light = light.m_position - worldPos; \
 		const RVec3 frag2Light = light.m_position - worldPos; \
 		const RVec3 l = normalize(frag2Light); \
 		const RVec3 l = normalize(frag2Light); \
-		const RVec3 specC = specularIsotropicLobe(gbuffer, viewDir, l); \
+		const RVec3 specC = specularIsotropicLobe(gbuffer.m_normal, gbuffer.m_f0, gbuffer.m_roughness, viewDir, l); \
 		const RVec3 diffC = diffuseLobe(gbuffer.m_diffuse); \
 		const RVec3 diffC = diffuseLobe(gbuffer.m_diffuse); \
 		const RF32 att = computeAttenuationFactor(light.m_radius, frag2Light); \
 		const RF32 att = computeAttenuationFactor(light.m_radius, frag2Light); \
 		RF32 lambert = max(0.0, dot(gbuffer.m_normal, l));
 		RF32 lambert = max(0.0, dot(gbuffer.m_normal, l));
@@ -64,10 +64,10 @@ RVec4 main(VertOut input) : SV_TARGET0
 	// return clusterHeatmap(cluster, 1u << (U32)GpuSceneNonRenderableObjectType::kLight, 3);
 	// return clusterHeatmap(cluster, 1u << (U32)GpuSceneNonRenderableObjectType::kLight, 3);
 
 
 	// Decode GBuffer
 	// Decode GBuffer
-	GbufferInfo gbuffer = (GbufferInfo)0;
-	unpackGBufferNoVelocity(g_gbuffer0Tex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0),
-							g_gbuffer1Tex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0),
-							g_gbuffer2Tex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0), gbuffer);
+	GbufferInfo<RF32> gbuffer = (GbufferInfo<RF32>)0;
+	unpackGBufferNoVelocity<RF32>(g_gbuffer0Tex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0),
+								  g_gbuffer1Tex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0),
+								  g_gbuffer2Tex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0), gbuffer);
 	gbuffer.m_subsurface = max(gbuffer.m_subsurface, kSubsurfaceMin);
 	gbuffer.m_subsurface = max(gbuffer.m_subsurface, kSubsurfaceMin);
 
 
 	// Apply SSAO
 	// Apply SSAO
@@ -211,7 +211,7 @@ RVec4 main(VertOut input) : SV_TARGET0
 		const RF32 lambert = max(gbuffer.m_subsurface, dot(l, gbuffer.m_normal));
 		const RF32 lambert = max(gbuffer.m_subsurface, dot(l, gbuffer.m_normal));
 
 
 		const RVec3 diffC = diffuseLobe(gbuffer.m_diffuse);
 		const RVec3 diffC = diffuseLobe(gbuffer.m_diffuse);
-		const RVec3 specC = specularIsotropicLobe(gbuffer, viewDir, l);
+		const RVec3 specC = specularIsotropicLobe(gbuffer.m_normal, gbuffer.m_f0, gbuffer.m_roughness, viewDir, l);
 
 
 		outColor += (diffC + specC) * dirLight.m_diffuseColor * (shadowFactor * lambert);
 		outColor += (diffC + specC) * dirLight.m_diffuseColor * (shadowFactor * lambert);
 	}
 	}

+ 9 - 1
AnKi/Shaders/MaterialShadersCommon.hlsl

@@ -61,6 +61,14 @@ Texture2D<Vec4> g_shadowAtlasTex : register(ANKI_REG(t, ANKI_MATERIAL_REGISTER_S
 
 
 #undef ANKI_REG
 #undef ANKI_REG
 
 
+struct GBufferPixelOut
+{
+	RVec4 m_color0 : SV_TARGET0;
+	RVec4 m_color1 : SV_TARGET1;
+	RVec4 m_color2 : SV_TARGET2;
+	Vec2 m_color3 : SV_TARGET3;
+};
+
 UnpackedMeshVertex loadVertex(GpuSceneMeshLod mlod, U32 svVertexId, Bool bones)
 UnpackedMeshVertex loadVertex(GpuSceneMeshLod mlod, U32 svVertexId, Bool bones)
 {
 {
 	UnpackedMeshVertex v;
 	UnpackedMeshVertex v;
@@ -99,6 +107,6 @@ UnpackedMeshVertex loadVertex(MeshletGeometryDescriptor meshlet, U32 vertexIndex
 
 
 Bool cullBackfaceMeshlet(MeshletBoundingVolume meshlet, Mat3x4 worldTransform, Vec3 cameraWorldPos)
 Bool cullBackfaceMeshlet(MeshletBoundingVolume meshlet, Mat3x4 worldTransform, Vec3 cameraWorldPos)
 {
 {
-	const Vec4 coneDirAndAng = unpackSnorm4x8(meshlet.m_coneDirection_R8G8B8_Snorm_cosHalfAngle_R8_Snorm);
+	const Vec4 coneDirAndAng = unpackSnorm4x8<F32>(meshlet.m_coneDirection_R8G8B8_Snorm_cosHalfAngle_R8_Snorm);
 	return cullBackfaceMeshlet(coneDirAndAng.xyz, coneDirAndAng.w, meshlet.m_coneApex, worldTransform, cameraWorldPos);
 	return cullBackfaceMeshlet(coneDirAndAng.xyz, coneDirAndAng.w, meshlet.m_coneApex, worldTransform, cameraWorldPos);
 }
 }

+ 97 - 79
AnKi/Shaders/PackFunctions.hlsl

@@ -8,54 +8,58 @@
 #include <AnKi/Shaders/Common.hlsl>
 #include <AnKi/Shaders/Common.hlsl>
 #include <AnKi/Shaders/TonemappingFunctions.hlsl>
 #include <AnKi/Shaders/TonemappingFunctions.hlsl>
 
 
-constexpr RF32 kMinRoughness = 0.05;
+constexpr F32 kMinRoughness = 0.05;
 
 
 /// Pack 3D normal to 2D vector
 /// Pack 3D normal to 2D vector
 /// See the clean code in comments in revision < r467
 /// See the clean code in comments in revision < r467
-Vec2 packNormal(const Vec3 normal)
+template<typename T>
+vector<T, 2> packNormal(vector<T, 3> normal)
 {
 {
-	const F32 scale = 1.7777;
-	const F32 scalar1 = (normal.z + 1.0) * (scale * 2.0);
-	return normal.xy / scalar1 + 0.5;
+	const T scale = 1.7777;
+	const T scalar1 = (normal.z + T(1)) * (scale * T(2));
+	return normal.xy / scalar1 + T(0.5);
 }
 }
 
 
 /// Reverse the packNormal
 /// Reverse the packNormal
-Vec3 unpackNormal(const Vec2 enc)
+template<typename T>
+vector<T, 3> unpackNormal(const vector<T, 2> enc)
 {
 {
-	const F32 scale = 1.7777;
-	const Vec2 nn = enc * (2.0 * scale) - scale;
-	const F32 g = 2.0 / (dot(nn.xy, nn.xy) + 1.0);
-	Vec3 normal;
+	const T scale = 1.7777;
+	const vector<T, 2> nn = enc * (T(2) * scale) - scale;
+	const T g = T(2) / (dot(nn.xy, nn.xy) + T(1));
+	vector<T, 3> normal;
 	normal.xy = g * nn.xy;
 	normal.xy = g * nn.xy;
-	normal.z = g - 1.0;
+	normal.z = g - T(1);
 	return normalize(normal);
 	return normalize(normal);
 }
 }
 
 
 // See http://johnwhite3d.blogspot.no/2017/10/signed-octahedron-normal-encoding.html
 // See http://johnwhite3d.blogspot.no/2017/10/signed-octahedron-normal-encoding.html
 // Result in [0.0, 1.0]
 // Result in [0.0, 1.0]
-Vec3 signedOctEncode(Vec3 n)
+template<typename T>
+vector<T, 3> signedOctEncode(vector<T, 3> n)
 {
 {
-	Vec3 outn;
+	vector<T, 3> outn;
 
 
-	const Vec3 nabs = abs(n);
+	const vector<T, 3> nabs = abs(n);
 	n /= nabs.x + nabs.y + nabs.z;
 	n /= nabs.x + nabs.y + nabs.z;
 
 
-	outn.y = n.y * 0.5 + 0.5;
-	outn.x = n.x * 0.5 + outn.y;
-	outn.y = n.x * -0.5 + outn.y;
+	outn.y = n.y * T(0.5) + T(0.5);
+	outn.x = n.x * T(0.5) + outn.y;
+	outn.y = n.x * -T(0.5) + outn.y;
 
 
-	outn.z = saturate(n.z * kMaxF32);
+	outn.z = saturate(n.z * getMaxNumericLimit<T>());
 	return outn;
 	return outn;
 }
 }
 
 
 // See http://johnwhite3d.blogspot.no/2017/10/signed-octahedron-normal-encoding.html
 // See http://johnwhite3d.blogspot.no/2017/10/signed-octahedron-normal-encoding.html
-Vec3 signedOctDecode(const Vec3 n)
+template<typename T>
+vector<T, 3> signedOctDecode(vector<T, 3> n)
 {
 {
-	Vec3 outn;
+	vector<T, 3> outn;
 
 
 	outn.x = n.x - n.y;
 	outn.x = n.x - n.y;
-	outn.y = n.x + n.y - 1.0;
-	outn.z = n.z * 2.0 - 1.0;
+	outn.y = n.x + n.y - T(1);
+	outn.z = n.z * T(2) - T(1);
 	outn.z = outn.z * (1.0 - abs(outn.x) - abs(outn.y));
 	outn.z = outn.z * (1.0 - abs(outn.x) - abs(outn.y));
 
 
 	outn = normalize(outn);
 	outn = normalize(outn);
@@ -63,146 +67,160 @@ Vec3 signedOctDecode(const Vec3 n)
 }
 }
 
 
 // Vectorized version. Assumes that v is in [0.0, 1.0]
 // Vectorized version. Assumes that v is in [0.0, 1.0]
-U32 newPackUnorm4x8(const Vec4 v)
+template<typename T>
+U32 newPackUnorm4x8(const vector<T, 4> v)
 {
 {
-	Vec4 a = v * 255.0;
-	UVec4 b = UVec4(a) << UVec4(0u, 8u, 16u, 24u);
-	UVec2 c = b.xy | b.zw;
+	const vector<T, 4> a = v * 255.0;
+	const UVec4 b = UVec4(a) << UVec4(0u, 8u, 16u, 24u);
+	const UVec2 c = b.xy | b.zw;
 	return c.x | c.y;
 	return c.x | c.y;
 }
 }
 
 
 // Vectorized version
 // Vectorized version
-Vec4 newUnpackUnorm4x8(const U32 u)
+template<typename T>
+vector<T, 4> newUnpackUnorm4x8(const U32 u)
 {
 {
 	const UVec4 a = ((UVec4)u) >> UVec4(0u, 8u, 16u, 24u);
 	const UVec4 a = ((UVec4)u) >> UVec4(0u, 8u, 16u, 24u);
 	const UVec4 b = a & ((UVec4)0xFFu);
 	const UVec4 b = a & ((UVec4)0xFFu);
 	const Vec4 c = Vec4(b);
 	const Vec4 c = Vec4(b);
-	return c * (1.0 / 255.0);
+	return c * T(1.0 / 255.0);
 }
 }
 
 
-U32 packSnorm4x8(Vec4 value)
+template<typename T>
+U32 packSnorm4x8(vector<T, 4> value)
 {
 {
-	const IVec4 packed = IVec4(round(clamp(value, -1.0f, 1.0f) * 127.0f)) & 0xFFu;
+	const IVec4 packed = IVec4(round(clamp(value, T(-1), T(1)) * T(127))) & 0xFFu;
 	return U32(packed.x | (packed.y << 8) | (packed.z << 16) | (packed.w << 24));
 	return U32(packed.x | (packed.y << 8) | (packed.z << 16) | (packed.w << 24));
 }
 }
 
 
-Vec4 unpackSnorm4x8(U32 value)
+template<typename T>
+vector<T, 4> unpackSnorm4x8(U32 value)
 {
 {
 	const I32 signedValue = (I32)value;
 	const I32 signedValue = (I32)value;
 	const IVec4 packed = IVec4(signedValue << 24, signedValue << 16, signedValue << 8, signedValue) >> 24;
 	const IVec4 packed = IVec4(signedValue << 24, signedValue << 16, signedValue << 8, signedValue) >> 24;
-	return clamp(Vec4(packed) / 127.0f, -1.0f, 1.0f);
+	return clamp(vector<T, 4>(packed) / T(127), T(-1), T(1));
 }
 }
 
 
 // Convert from RGB to YCbCr.
 // Convert from RGB to YCbCr.
 // The RGB should be in [0, 1] and the output YCbCr will be in [0, 1] as well.
 // The RGB should be in [0, 1] and the output YCbCr will be in [0, 1] as well.
-Vec3 rgbToYCbCr(const Vec3 rgb)
+template<typename T>
+vector<T, 3> rgbToYCbCr(const vector<T, 3> rgb)
 {
 {
-	const F32 y = dot(rgb, Vec3(0.299, 0.587, 0.114));
-	const F32 cb = 0.5 + dot(rgb, Vec3(-0.168736, -0.331264, 0.5));
-	const F32 cr = 0.5 + dot(rgb, Vec3(0.5, -0.418688, -0.081312));
-	return Vec3(y, cb, cr);
+	const T y = dot(rgb, vector<T, 3>(0.299, 0.587, 0.114));
+	const T cb = T(0.5) + dot(rgb, vector<T, 3>(-0.168736, -0.331264, 0.5));
+	const T cr = T(0.5) + dot(rgb, vector<T, 3>(0.5, -0.418688, -0.081312));
+	return vector<T, 3>(y, cb, cr);
 }
 }
 
 
 // Convert the output of rgbToYCbCr back to RGB.
 // Convert the output of rgbToYCbCr back to RGB.
-Vec3 yCbCrToRgb(const Vec3 ycbcr)
+template<typename T>
+vector<T, 3> yCbCrToRgb(const vector<T, 3> ycbcr)
 {
 {
-	const F32 cb = ycbcr.y - 0.5;
-	const F32 cr = ycbcr.z - 0.5;
-	const F32 y = ycbcr.x;
-	const F32 r = 1.402 * cr;
-	const F32 g = -0.344 * cb - 0.714 * cr;
-	const F32 b = 1.772 * cb;
-	return Vec3(r, g, b) + y;
+	const T cb = ycbcr.y - T(0.5);
+	const T cr = ycbcr.z - T(0.5);
+	const T y = ycbcr.x;
+	const T r = T(1.402) * cr;
+	const T g = T(-0.344) * cb - T(0.714) * cr;
+	const T b = T(1.772) * cb;
+	return vector<T, 3>(r, g, b) + y;
 }
 }
 
 
 // Pack a Vec2 to a single F32.
 // Pack a Vec2 to a single F32.
 // comp should be in [0, 1] and the output will be in [0, 1].
 // comp should be in [0, 1] and the output will be in [0, 1].
-F32 packUnorm2ToUnorm1(const Vec2 comp)
+template<typename T>
+T packUnorm2ToUnorm1(const vector<T, 2> comp)
 {
 {
-	return dot(round(comp * 15.0), Vec2(1.0 / (255.0 / 16.0), 1.0 / 255.0));
+	return dot(round(comp * T(15)), Vec2(T(1) / T(255.0 / 16.0), T(1.0 / 255.0)));
 }
 }
 
 
 // Unpack a single F32 to Vec2. Does the oposite of packUnorm2ToUnorm1.
 // Unpack a single F32 to Vec2. Does the oposite of packUnorm2ToUnorm1.
-Vec2 unpackUnorm1ToUnorm2(F32 c)
+template<typename T>
+vector<T, 2> unpackUnorm1ToUnorm2(T c)
 {
 {
 #if 1
 #if 1
-	const F32 temp = c * (255.0 / 16.0);
-	const F32 a = floor(temp);
-	const F32 b = temp - a; // b = fract(temp)
-	return Vec2(a, b) * Vec2(1.0 / 15.0, 16.0 / 15.0);
+	const T temp = c * T(255.0 / 16.0);
+	const T a = floor(temp);
+	const T b = temp - a; // b = fract(temp)
+	return vector<T, 2>(a, b) * vector<T, 2>(1.0 / 15.0, 16.0 / 15.0);
 #else
 #else
 	const U32 temp = U32(c * 255.0);
 	const U32 temp = U32(c * 255.0);
 	const U32 a = temp >> 4;
 	const U32 a = temp >> 4;
 	const U32 b = temp & 0xF;
 	const U32 b = temp & 0xF;
-	return Vec2(a, b) / 15.0;
+	return vector<T, 2>(a, b) / T(15);
 #endif
 #endif
 }
 }
 
 
 // G-Buffer structure
 // G-Buffer structure
+template<typename T>
 struct GbufferInfo
 struct GbufferInfo
 {
 {
-	RVec3 m_diffuse;
-	RVec3 m_f0; ///< Freshnel at zero angles.
-	RVec3 m_normal;
-	RF32 m_roughness;
-	RF32 m_metallic;
-	RF32 m_subsurface;
-	RVec3 m_emission;
+	vector<T, 3> m_diffuse;
+	vector<T, 3> m_f0; ///< Freshnel at zero angles.
+	vector<T, 3> m_normal;
+	vector<T, 3> m_emission;
+	T m_roughness;
+	T m_metallic;
+	T m_subsurface;
 	Vec2 m_velocity;
 	Vec2 m_velocity;
 };
 };
 
 
 // Populate the G buffer
 // Populate the G buffer
-void packGBuffer(GbufferInfo g, out Vec4 rt0, out Vec4 rt1, out Vec4 rt2, out Vec2 rt3)
+template<typename T>
+void packGBuffer(GbufferInfo<T> g, out vector<T, 4> rt0, out vector<T, 4> rt1, out vector<T, 4> rt2, out Vec2 rt3)
 {
 {
-	const F32 packedSubsurfaceMetallic = packUnorm2ToUnorm1(Vec2(g.m_subsurface, g.m_metallic));
+	const T packedSubsurfaceMetallic = packUnorm2ToUnorm1(vector<T, 2>(g.m_subsurface, g.m_metallic));
 
 
-	const Vec3 tonemappedEmission = reinhardTonemap(g.m_emission);
+	const vector<T, 3> tonemappedEmission = reinhardTonemap(g.m_emission);
 
 
-	rt0 = Vec4(g.m_diffuse, packedSubsurfaceMetallic);
-	rt1 = Vec4(g.m_roughness, g.m_f0.x, tonemappedEmission.rb);
+	rt0 = vector<T, 4>(g.m_diffuse, packedSubsurfaceMetallic);
+	rt1 = vector<T, 4>(g.m_roughness, g.m_f0.x, tonemappedEmission.rb);
 
 
-	const Vec3 encNorm = signedOctEncode(g.m_normal);
-	rt2 = Vec4(tonemappedEmission.g, encNorm);
+	const vector<T, 3> encNorm = signedOctEncode(g.m_normal);
+	rt2 = vector<T, 4>(tonemappedEmission.g, encNorm);
 
 
 	rt3 = g.m_velocity;
 	rt3 = g.m_velocity;
 }
 }
 
 
-RVec3 unpackDiffuseFromGBuffer(RVec4 rt0, RF32 metallic)
+template<typename T>
+vector<T, 3> unpackDiffuseFromGBuffer(vector<T, 4> rt0, T metallic)
 {
 {
-	return rt0.xyz *= 1.0 - metallic;
+	return rt0.xyz *= T(1) - metallic;
 }
 }
 
 
-Vec3 unpackNormalFromGBuffer(Vec4 rt2)
+template<typename T>
+vector<T, 3> unpackNormalFromGBuffer(vector<T, 4> rt2)
 {
 {
 	return signedOctDecode(rt2.yzw);
 	return signedOctDecode(rt2.yzw);
 }
 }
 
 
-RF32 unpackRoughnessFromGBuffer(RVec4 rt1)
+template<typename T>
+T unpackRoughnessFromGBuffer(vector<T, 4> rt1)
 {
 {
-	RF32 r = rt1.x;
-	r = r * (1.0 - kMinRoughness) + kMinRoughness;
+	T r = rt1.x;
+	r = r * (T(1) - T(kMinRoughness)) + T(kMinRoughness);
 	return r;
 	return r;
 }
 }
 
 
 // Read part of the G-buffer
 // Read part of the G-buffer
-void unpackGBufferNoVelocity(Vec4 rt0, Vec4 rt1, Vec4 rt2, out GbufferInfo g)
+template<typename T>
+void unpackGBufferNoVelocity(vector<T, 4> rt0, vector<T, 4> rt1, vector<T, 4> rt2, out GbufferInfo<T> g)
 {
 {
 	g.m_diffuse = rt0.xyz;
 	g.m_diffuse = rt0.xyz;
-	const Vec2 unpackedSubsurfaceMetallic = unpackUnorm1ToUnorm2(rt0.w);
+	const vector<T, 2> unpackedSubsurfaceMetallic = unpackUnorm1ToUnorm2(rt0.w);
 	g.m_subsurface = unpackedSubsurfaceMetallic.x;
 	g.m_subsurface = unpackedSubsurfaceMetallic.x;
 	g.m_metallic = unpackedSubsurfaceMetallic.y;
 	g.m_metallic = unpackedSubsurfaceMetallic.y;
 
 
 	g.m_roughness = unpackRoughnessFromGBuffer(rt1);
 	g.m_roughness = unpackRoughnessFromGBuffer(rt1);
-	g.m_f0 = Vec3(rt1.y, rt1.y, rt1.y);
-	g.m_emission = invertReinhardTonemap(Vec3(rt1.z, rt2.x, rt1.w));
+	g.m_f0 = vector<T, 3>(rt1.y, rt1.y, rt1.y);
+	g.m_emission = invertReinhardTonemap(vector<T, 3>(rt1.z, rt2.x, rt1.w));
 
 
 	g.m_normal = signedOctDecode(rt2.yzw);
 	g.m_normal = signedOctDecode(rt2.yzw);
 
 
-	g.m_velocity = Vec2(kMaxF32, kMaxF32); // Put something random
+	g.m_velocity = getMaxNumericLimit<T>(); // Put something random
 
 
 	// Compute reflectance
 	// Compute reflectance
 	g.m_f0 = lerp(g.m_f0, g.m_diffuse, g.m_metallic);
 	g.m_f0 = lerp(g.m_f0, g.m_diffuse, g.m_metallic);
 
 
 	// Compute diffuse
 	// Compute diffuse
-	g.m_diffuse *= 1.0 - g.m_metallic;
+	g.m_diffuse *= T(1) - g.m_metallic;
 }
 }

+ 14 - 14
AnKi/Shaders/TonemappingFunctions.hlsl

@@ -11,19 +11,19 @@
 template<typename T>
 template<typename T>
 T log10(T x)
 T log10(T x)
 {
 {
-	return log(x) / log((T)10.0);
+	return log(x) / log(T(10));
 }
 }
 
 
 template<typename T>
 template<typename T>
 vector<T, 3> computeLuminance(vector<T, 3> color)
 vector<T, 3> computeLuminance(vector<T, 3> color)
 {
 {
-	return max(dot(vector<T, 3>(0.30, 0.59, 0.11), color), T(kEpsilonRF32));
+	return max(dot(vector<T, 3>(0.30, 0.59, 0.11), color), getEpsilon<T>());
 }
 }
 
 
 template<typename T>
 template<typename T>
 T computeExposure(T avgLum, T threshold)
 T computeExposure(T avgLum, T threshold)
 {
 {
-	const T keyValue = T(1.03) - (T(2.0) / (T(2.0) + log10(avgLum + T(1.0))));
+	const T keyValue = T(1.03) - (T(2) / (T(2) + log10(avgLum + T(1))));
 	const T linearExposure = (keyValue / avgLum);
 	const T linearExposure = (keyValue / avgLum);
 	T exposure = log2(linearExposure);
 	T exposure = log2(linearExposure);
 
 
@@ -41,12 +41,12 @@ vector<T, 3> computeExposedColor(vector<T, 3> color, vector<T, 3> avgLum, vector
 template<typename T>
 template<typename T>
 vector<T, 3> tonemapUncharted2(vector<T, 3> color)
 vector<T, 3> tonemapUncharted2(vector<T, 3> color)
 {
 {
-	const T A = 0.15;
-	const T B = 0.50;
-	const T C = 0.10;
-	const T D = 0.20;
-	const T E = 0.02;
-	const T F = 0.30;
+	constexpr T A = 0.15;
+	constexpr T B = 0.50;
+	constexpr T C = 0.10;
+	constexpr T D = 0.20;
+	constexpr T E = 0.02;
+	constexpr T F = 0.30;
 
 
 	return ((color * (A * color + C * B) + D * E) / (color * (A * color + B) + D * F)) - E / F;
 	return ((color * (A * color + C * B) + D * E) / (color * (A * color + B) + D * F)) - E / F;
 }
 }
@@ -75,8 +75,8 @@ vector<T, 3> invertTonemapACESFilm(vector<T, 3> x)
 	constexpr T kAcesE = 0.14;
 	constexpr T kAcesE = 0.14;
 
 
 	vector<T, 3> res = kAcesD * x - kAcesB;
 	vector<T, 3> res = kAcesD * x - kAcesB;
-	res += sqrt(x * x * (kAcesD * kAcesD - T(4.0) * kAcesE * kAcesC) + x * (T(4.0) * kAcesE * kAcesA - T(2.0) * kAcesB * kAcesD) + kAcesB * kAcesB);
-	res /= T(2.0) * kAcesA - T(2.0) * kAcesC * x;
+	res += sqrt(x * x * (kAcesD * kAcesD - T(4) * kAcesE * kAcesC) + x * (T(4) * kAcesE * kAcesA - T(2) * kAcesB * kAcesD) + kAcesB * kAcesB);
+	res /= T(2) * kAcesA - T(2) * kAcesC * x;
 
 
 	return res;
 	return res;
 }
 }
@@ -92,7 +92,7 @@ template<typename T>
 vector<T, 3> invertTonemap(vector<T, 3> color, T exposure)
 vector<T, 3> invertTonemap(vector<T, 3> color, T exposure)
 {
 {
 	color = invertTonemapACESFilm(color);
 	color = invertTonemapACESFilm(color);
-	color /= max(T(kEpsilonRF32), exposure);
+	color /= max(getEpsilon<T>(), exposure);
 	return color;
 	return color;
 }
 }
 
 
@@ -108,12 +108,12 @@ template<typename T>
 vector<T, 3> reinhardTonemap(vector<T, 3> colour)
 vector<T, 3> reinhardTonemap(vector<T, 3> colour)
 {
 {
 	// rgb / (1 + max(rgb))
 	// rgb / (1 + max(rgb))
-	return colour / (T(1.0) + max(max(colour.r, colour.g), colour.b));
+	return colour / (T(1) + max(max(colour.r, colour.g), colour.b));
 }
 }
 
 
 template<typename T>
 template<typename T>
 vector<T, 3> invertReinhardTonemap(vector<T, 3> colour)
 vector<T, 3> invertReinhardTonemap(vector<T, 3> colour)
 {
 {
 	// rgb / (1 - max(rgb))
 	// rgb / (1 - max(rgb))
-	return colour / max(T(1.0 / 32768.0), T(1.0) - max(max(colour.r, colour.g), colour.b));
+	return colour / max(T(1.0 / 32768.0), T(1) - max(max(colour.r, colour.g), colour.b));
 }
 }

+ 3 - 3
AnKi/Shaders/TraditionalDeferredShading.ankiprog

@@ -44,7 +44,7 @@ Vec4 main(VertOut input) : SV_TARGET0
 	}
 	}
 
 
 	// Decode and process gbuffer
 	// Decode and process gbuffer
-	GbufferInfo gbuffer = (GbufferInfo)0;
+	GbufferInfo<F32> gbuffer = (GbufferInfo<F32>)0;
 	unpackGBufferNoVelocity(g_gbufferTex0.SampleLevel(g_gbufferSampler, uv, 0.0), g_gbufferTex1.SampleLevel(g_gbufferSampler, uv, 0.0),
 	unpackGBufferNoVelocity(g_gbufferTex0.SampleLevel(g_gbufferSampler, uv, 0.0), g_gbufferTex1.SampleLevel(g_gbufferSampler, uv, 0.0),
 							g_gbufferTex2.SampleLevel(g_gbufferSampler, uv, 0.0), gbuffer);
 							g_gbufferTex2.SampleLevel(g_gbufferSampler, uv, 0.0), gbuffer);
 	gbuffer.m_subsurface = max(gbuffer.m_subsurface, kSubsurfaceMin * 8.0);
 	gbuffer.m_subsurface = max(gbuffer.m_subsurface, kSubsurfaceMin * 8.0);
@@ -80,7 +80,7 @@ Vec4 main(VertOut input) : SV_TARGET0
 		const RF32 factor = shadowFactor * max(gbuffer.m_subsurface, lambert);
 		const RF32 factor = shadowFactor * max(gbuffer.m_subsurface, lambert);
 
 
 #	if SPECULAR == 1
 #	if SPECULAR == 1
-		const Vec3 specC = specularIsotropicLobe(gbuffer, viewDir, l);
+		const Vec3 specC = specularIsotropicLobe(gbuffer.m_normal, gbuffer.m_f0, gbuffer.m_roughness, viewDir, l);
 #	else
 #	else
 		const Vec3 specC = Vec3(0.0, 0.0, 0.0);
 		const Vec3 specC = Vec3(0.0, 0.0, 0.0);
 #	endif
 #	endif
@@ -105,7 +105,7 @@ Vec4 main(VertOut input) : SV_TARGET0
 		const F32 factor = att * spot * max(lambert, gbuffer.m_subsurface);
 		const F32 factor = att * spot * max(lambert, gbuffer.m_subsurface);
 
 
 #	if SPECULAR == 1
 #	if SPECULAR == 1
-		const Vec3 specC = specularIsotropicLobe(gbuffer, viewDir, l);
+		const Vec3 specC = specularIsotropicLobe(gbuffer.m_normal, gbuffer.m_f0, gbuffer.m_roughness, viewDir, l);
 #	else
 #	else
 		const Vec3 specC = Vec3(0.0, 0.0, 0.0);
 		const Vec3 specC = Vec3(0.0, 0.0, 0.0);
 #	endif
 #	endif

+ 25 - 3
Tools/Shader/ShaderProgramBinaryDumpMain.cpp

@@ -7,8 +7,10 @@
 #include <AnKi/ShaderCompiler/ShaderDump.h>
 #include <AnKi/ShaderCompiler/ShaderDump.h>
 #include <AnKi/ShaderCompiler/MaliOfflineCompiler.h>
 #include <AnKi/ShaderCompiler/MaliOfflineCompiler.h>
 #include <AnKi/ShaderCompiler/RadeonGpuAnalyzer.h>
 #include <AnKi/ShaderCompiler/RadeonGpuAnalyzer.h>
+#include <AnKi/ShaderCompiler/Dxc.h>
 #include <AnKi/Util/ThreadHive.h>
 #include <AnKi/Util/ThreadHive.h>
 #include <AnKi/Util/System.h>
 #include <AnKi/Util/System.h>
+#include <ThirdParty/SpirvCross/spirv.hpp>
 
 
 using namespace anki;
 using namespace anki;
 
 
@@ -171,13 +173,33 @@ Error dumpStats(const ShaderBinary& bin)
 
 
 					const ShaderBinaryCodeBlock& codeBlock = ctx.m_bin->m_codeBlocks[codeblockIdx];
 					const ShaderBinaryCodeBlock& codeBlock = ctx.m_bin->m_codeBlocks[codeblockIdx];
 
 
+					// Rewrite spir-v because of the decorations we ask DXC to put
+					Bool bRequiresMeshShaders = false;
+					DynamicArray<U8> newSpirv;
+					newSpirv.resize(codeBlock.m_binary.getSize());
+					memcpy(newSpirv.getBegin(), codeBlock.m_binary.getBegin(), codeBlock.m_binary.getSizeInBytes());
+					visitSpirv(WeakArray<U32>(reinterpret_cast<U32*>(newSpirv.getBegin()), newSpirv.getSizeInBytes() / sizeof(U32)),
+							   [&](U32 cmd, WeakArray<U32> instructions) {
+								   if(cmd == spv::OpDecorate && instructions[1] == spv::DecorationDescriptorSet
+									  && instructions[2] == kDxcVkBindlessRegisterSpace)
+								   {
+									   // Bindless set, rewrite its set
+									   instructions[2] = kMaxRegisterSpaces;
+								   }
+								   else if(cmd == spv::OpCapability && instructions[0] == spv::CapabilityMeshShadingEXT)
+								   {
+									   bRequiresMeshShaders = true;
+								   }
+							   });
+
 					// Arm stats
 					// Arm stats
 					MaliOfflineCompilerOut maliocOut;
 					MaliOfflineCompilerOut maliocOut;
 					Error err = Error::kNone;
 					Error err = Error::kNone;
 
 
-					if(shaderType == ShaderType::kVertex || shaderType == ShaderType::kPixel || shaderType == ShaderType::kCompute)
+					if((shaderType == ShaderType::kVertex || shaderType == ShaderType::kPixel || shaderType == ShaderType::kCompute)
+					   && !bRequiresMeshShaders)
 					{
 					{
-						err = runMaliOfflineCompiler(codeBlock.m_binary, shaderType, maliocOut);
+						err = runMaliOfflineCompiler(newSpirv, shaderType, maliocOut);
 
 
 						if(err)
 						if(err)
 						{
 						{
@@ -190,7 +212,7 @@ Error dumpStats(const ShaderBinary& bin)
 					// AMD
 					// AMD
 					RgaOutput rgaOut = {};
 					RgaOutput rgaOut = {};
 #if 0
 #if 0
-					err = runRadeonGpuAnalyzer(codeBlock.m_binary, shaderType, rgaOut);
+					err = runRadeonGpuAnalyzer(newSpirv, shaderType, rgaOut);
 					if(err)
 					if(err)
 					{
 					{
 						ANKI_LOGE("Radeon GPU Analyzer compiler failed");
 						ANKI_LOGE("Radeon GPU Analyzer compiler failed");