1 month ago · dce6e8c3c1
--- a/AnKi/Editor/EditorUi.cpp
+++ b/AnKi/Editor/EditorUi.cpp
@@ -1084,16 +1084,6 @@ void EditorUi::consoleWindow()
 
				 
			
 
				 	if(ImGui::Begin("Console", &m_showConsoleWindow, ImGuiWindowFlags_NoCollapse))
			
 
				 	{
			
 
				-		// Log controls
			
 
				-		{
			
 
				-			if(ImGui::Button(ICON_MDI_DELETE))
			
 
				-			{
			
 
				-				state.m_log.destroy();
			
 
				-			}
			
 
				-			ImGui::SetItemTooltip("Clear log");
			
 
				-			ImGui::SameLine();
			
 
				-		}
			
 
				-
			
 
				 		// Lua input
			
 
				 		{
			
 
				 			Char consoleTxt[kMaxTextInputLen] = "";
			
@@ -1106,6 +1096,19 @@ void EditorUi::consoleWindow()
 
				 			}
			
 
				 		}
			
 
				 
			
 
				+		// Clear Log
			
 
				+		{
			
 
				+			if(ImGui::Button(ICON_MDI_DELETE))
			
 
				+			{
			
 
				+				state.m_log.destroy();
			
 
				+			}
			
 
				+			ImGui::SetItemTooltip("Clear log");
			
 
				+			ImGui::SameLine();
			
 
				+		}
			
 
				+
			
 
				+		// Search log
			
 
				+		filter(state.m_logFilter);
			
 
				+
			
 
				 		// Log
			
 
				 		{
			
 
				 			if(ImGui::BeginChild("Log", Vec2(0.0f), ImGuiChildFlags_Borders | ImGuiChildFlags_NavFlattened))
			
@@ -1118,14 +1121,17 @@ void EditorUi::consoleWindow()
 
				 
			
 
				 					for(const auto& logEntry : state.m_log)
			
 
				 					{
			
 
				-						ImGui::TableNextRow();
			
 
				-						ImGui::TableNextColumn();
			
 
				-						constexpr Array<Vec3, U(LoggerMessageType::kCount)> colors = {Vec3(0.074f, 0.631f, 0.054f), Vec3(0.074f, 0.354f, 0.631f),
			
 
				-																					  Vec3(1.0f, 0.0f, 0.0f), Vec3(0.756f, 0.611f, 0.0f),
			
 
				-																					  Vec3(1.0f, 0.0f, 0.0f)};
			
 
				-						ImGui::PushStyleColor(ImGuiCol_Text, colors[logEntry.first].xyz1());
			
 
				-						ImGui::TextUnformatted(logEntry.second.cstr());
			
 
				-						ImGui::PopStyleColor();
			
 
				+						if(state.m_logFilter.PassFilter(logEntry.second.cstr()))
			
 
				+						{
			
 
				+							ImGui::TableNextRow();
			
 
				+							ImGui::TableNextColumn();
			
 
				+							constexpr Array<Vec3, U(LoggerMessageType::kCount)> colors = {Vec3(0.074f, 0.631f, 0.054f), Vec3(0.074f, 0.354f, 0.631f),
			
 
				+																						  Vec3(1.0f, 0.0f, 0.0f), Vec3(0.756f, 0.611f, 0.0f),
			
 
				+																						  Vec3(1.0f, 0.0f, 0.0f)};
			
 
				+							ImGui::PushStyleColor(ImGuiCol_Text, colors[logEntry.first].xyz1());
			
 
				+							ImGui::TextUnformatted(logEntry.second.cstr());
			
 
				+							ImGui::PopStyleColor();
			
 
				+						}
			
 
				 					}
			
 
				 
			
 
				 					if(state.m_forceLogScrollDown)
			
--- a/AnKi/Editor/EditorUi.h
+++ b/AnKi/Editor/EditorUi.h
@@ -113,6 +113,7 @@ private:
 
				 		List<std::pair<LoggerMessageType, String>> m_log;
			
 
				 		Bool m_forceLogScrollDown = true;
			
 
				 		SpinLock m_logMtx;
			
 
				+		ImGuiTextFilter m_logFilter;
			
 
				 	} m_consoleWindow;
			
 
				 
			
 
				 	class
			
--- a/AnKi/Math/Mat.h
+++ b/AnKi/Math/Mat.h
@@ -894,6 +894,27 @@ public:
 
				 		return getColumn(3);
			
 
				 	}
			
 
				 
			
 
				+	TMat& setDiagonal(ColumnVec diag)
			
 
				+	{
			
 
				+		TMat& m = *this;
			
 
				+		for(U32 r = 0; r < kRowCount; ++r)
			
 
				+		{
			
 
				+			m(r, r) = diag[r];
			
 
				+		}
			
 
				+		return *this;
			
 
				+	}
			
 
				+
			
 
				+	ColumnVec getDiagonal() const
			
 
				+	{
			
 
				+		TMat& m = *this;
			
 
				+		ColumnVec diag;
			
 
				+		for(U32 r = 0; r < kRowCount; ++r)
			
 
				+		{
			
 
				+			diag[r] = m(r, r);
			
 
				+		}
			
 
				+		return diag;
			
 
				+	}
			
 
				+
			
 
				 	[[nodiscard]] TMat reorthogonalize() const requires(kTRowCount == 3)
			
 
				 	{
			
 
				 		// There are 2 methods, the standard and the Gram-Schmidt method with a twist for zAxis. This uses the 2nd. For the first see < r664
			
@@ -1337,21 +1358,19 @@ public:
 
				 	}
			
 
				 
			
 
				 	/// If we suppose this matrix represents a transformation, return the inverted transformation
			
 
				-	[[nodiscard]] TMat invertTransformation() const requires(kSize == 16)
			
 
				+	[[nodiscard]] TMat invertTransformation() const requires(kSize == 16 || kSize == 12)
			
 
				 	{
			
 
				-		const TMat<T, 3, 3> invertedRot = getRotationPart().transpose();
			
 
				-		TVec<T, 3> invertedTsl = getTranslationPart().xyz();
			
 
				-		invertedTsl = -(invertedRot * invertedTsl);
			
 
				-		return TMat(invertedTsl.xyz0(), invertedRot);
			
 
				-	}
			
 
				+		const TVec<T, 3> scale = extractScale();
			
 
				+		const TVec<T, 3> invScale = T(1) / scale;
			
 
				 
			
 
				-	/// If we suppose this matrix represents a transformation, return the inverted transformation
			
 
				-	[[nodiscard]] TMat invertTransformation() const requires(kSize == 12)
			
 
				-	{
			
 
				-		const TMat<T, 3, 3> invertedRot = getRotationPart().transpose();
			
 
				-		TVec<T, 3> invertedTsl = getTranslationPart().xyz();
			
 
				-		invertedTsl = -(invertedRot * invertedTsl);
			
 
				-		return TMat(invertedTsl.xyz(), invertedRot);
			
 
				+		TMat<T, 3, 3> rot;
			
 
				+		rot.setRows(getRow(0).xyz() * invScale, getRow(1).xyz() * invScale, getRow(2).xyz() * invScale);
			
 
				+
			
 
				+		const TMat<T, 3, 3> invRot = rot.transpose();
			
 
				+
			
 
				+		const TVec<T, 3> invTsl = -(invRot * (getTranslationPart().xyz() * invScale));
			
 
				+
			
 
				+		return TMat(invTsl, invRot, invScale);
			
 
				 	}
			
 
				 
			
 
				 	/// @note 9 muls, 9 adds
			
@@ -1410,6 +1429,12 @@ public:
 
				 		return ((*this) * (T(1) - t)) + (b * t);
			
 
				 	}
			
 
				 
			
 
				+	// If we assume this is a transformation matrix then extract the scale
			
 
				+	[[nodiscard]] TVec<T, 3> extractScale() const
			
 
				+	{
			
 
				+		return TVec<T, 3>(getColumn(0).xyz().length(), getColumn(1).xyz().length(), getColumn(2).xyz().length());
			
 
				+	}
			
 
				+
			
 
				 	static TMat getZero()
			
 
				 	{
			
 
				 		return TMat(T(0));
			
--- a/AnKi/Math/Transform.h
+++ b/AnKi/Math/Transform.h
@@ -36,13 +36,13 @@ public:
 
				 
			
 
				 	explicit TTransform(const TMat<T, 4, 4>& m4)
			
 
				 	{
			
 
				+		m_scale = m4.extractScale().xyz0();
			
 
				+
			
 
				 		const TVec<T, 3> s0 = m4.getColumn(0).xyz();
			
 
				 		const TVec<T, 3> s1 = m4.getColumn(1).xyz();
			
 
				 		const TVec<T, 3> s2 = m4.getColumn(2).xyz();
			
 
				+		m_rotation.setColumns(s0 / m_scale.x(), s1 / m_scale.y(), s2 / m_scale.z(), TVec<T, 3>(T(0)));
			
 
				 
			
 
				-		m_scale = TVec<T, 4>(s0.length(), s1.length(), s2.length(), T(0));
			
 
				-
			
 
				-		m_rotation.setColumns(s0 / m_scale.x(), s1 / m_scale.x(), s2 / m_scale.x(), TVec<T, 3>(T(0)));
			
 
				 		m_origin = m4.getTranslationPart().xyz0();
			
 
				 		check();
			
 
				 	}
			
--- a/AnKi/ShaderCompiler/ShaderCompiler.cpp
+++ b/AnKi/ShaderCompiler/ShaderCompiler.cpp
@@ -195,13 +195,13 @@ static void compileVariantAsync(const ShaderParser& parser, Bool spirv, Bool deb
 
				 				ShaderCompilerDynamicArray<U8> il;
			
 
				 				if(ctx.m_spirv)
			
 
				 				{
			
 
				-					err = compileHlslToSpirv(source, shaderType, ctx.m_parser->compileWith16bitTypes(), ctx.m_debugInfo, ctx.m_sm,
			
 
				-											 ctx.m_parser->getExtraCompilerArgs(), il, compilerErrorLog);
			
 
				+					err = compileHlslToSpirv(source, shaderType, true, ctx.m_debugInfo, ctx.m_sm, ctx.m_parser->getExtraCompilerArgs(), il,
			
 
				+											 compilerErrorLog);
			
 
				 				}
			
 
				 				else
			
 
				 				{
			
 
				-					err = compileHlslToDxil(source, shaderType, ctx.m_parser->compileWith16bitTypes(), ctx.m_debugInfo, ctx.m_sm,
			
 
				-											ctx.m_parser->getExtraCompilerArgs(), il, compilerErrorLog);
			
 
				+					err = compileHlslToDxil(source, shaderType, true, ctx.m_debugInfo, ctx.m_sm, ctx.m_parser->getExtraCompilerArgs(), il,
			
 
				+											compilerErrorLog);
			
 
				 				}
			
 
				 
			
 
				 				if(err)
			
--- a/AnKi/ShaderCompiler/ShaderParser.cpp
+++ b/AnKi/ShaderCompiler/ShaderParser.cpp
@@ -532,10 +532,6 @@ Error ShaderParser::parseLine(CString line, CString fname, Bool& foundPragmaOnce
 
				 				ANKI_CHECK(checkActiveStruct());
			
 
				 				ANKI_CHECK(parsePragmaMember(token + 1, end, line, fname));
			
 
				 			}
			
 
				-			else if(*token == "16bit")
			
 
				-			{
			
 
				-				ANKI_CHECK(parsePragma16bit(token + 1, end, line, fname));
			
 
				-			}
			
 
				 			else if(*token == "extra_compiler_args")
			
 
				 			{
			
 
				 				ANKI_CHECK(parseExtraCompilerArgs(token + 1, end, line, fname));
			
@@ -722,21 +718,6 @@ Error ShaderParser::parsePragmaStructEnd(const ShaderCompilerString* begin, cons
 
				 	return Error::kNone;
			
 
				 }
			
 
				 
			
 
				-Error ShaderParser::parsePragma16bit(const ShaderCompilerString* begin, const ShaderCompilerString* end, CString line, CString fname)
			
 
				-{
			
 
				-	ANKI_ASSERT(begin && end);
			
 
				-
			
 
				-	// Check tokens
			
 
				-	if(begin != end)
			
 
				-	{
			
 
				-		ANKI_PP_ERROR_MALFORMED();
			
 
				-	}
			
 
				-
			
 
				-	m_16bitTypes = true;
			
 
				-
			
 
				-	return Error::kNone;
			
 
				-}
			
 
				-
			
 
				 Error ShaderParser::parseExtraCompilerArgs(const ShaderCompilerString* begin, const ShaderCompilerString* end, CString line, CString fname)
			
 
				 {
			
 
				 	ANKI_ASSERT(begin && end);
			
@@ -904,16 +885,6 @@ void ShaderParser::generateVariant(ConstWeakArray<MutatorValue> mutation, const
 
				 	ShaderCompilerString header;
			
 
				 	generateAnkiShaderHeader(shaderType, header);
			
 
				 	source += header;
			
 
				-
			
 
				-	if(m_16bitTypes)
			
 
				-	{
			
 
				-		source += "#define ANKI_SUPPORTS_16BIT_TYPES 1\n";
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		source += "#define ANKI_SUPPORTS_16BIT_TYPES 0\n";
			
 
				-	}
			
 
				-
			
 
				 	source += m_source;
			
 
				 }
			
 
				 
			
--- a/AnKi/ShaderCompiler/ShaderParser.h
+++ b/AnKi/ShaderCompiler/ShaderParser.h
@@ -61,7 +61,6 @@ public:
 
				 /// #pragma once
			
 
				 /// #pragma anki mutator NAME VALUE0 [VALUE1 [VALUE2 ...]]
			
 
				 /// #pragma anki skip_mutation MUTATOR0 VALUE0 [MUTATOR1 VALUE1 [MUTATOR2 VALUE2 ...]]
			
 
				-/// #pragma anki 16bit // Works only in HLSL. Gain 16bit types but loose min16xxx types
			
 
				 /// #pragma anki technique [NAME] STAGE0 [STAGE1 ...] [mutators [MUTATOR0 [MUTATOR1 ...]]]
			
 
				 /// #pragma anki extra_compiler_args ARG0 [ARG1 [ARG2...]]
			
 
				 ///
			
@@ -113,11 +112,6 @@ public:
 
				 		return m_techniques;
			
 
				 	}
			
 
				 
			
 
				-	Bool compileWith16bitTypes() const
			
 
				-	{
			
 
				-		return m_16bitTypes;
			
 
				-	}
			
 
				-
			
 
				 	ConstWeakArray<CString> getExtraCompilerArgs() const
			
 
				 	{
			
 
				 		return m_extraCompilerArgsCString;
			
@@ -159,8 +153,6 @@ private:
 
				 	ShaderCompilerDynamicArray<GhostStruct> m_ghostStructs;
			
 
				 	Bool m_insideStruct = false;
			
 
				 
			
 
				-	Bool m_16bitTypes = false;
			
 
				-
			
 
				 	ShaderCompilerDynamicArray<ShaderCompilerString> m_extraCompilerArgs;
			
 
				 	ShaderCompilerDynamicArray<CString> m_extraCompilerArgsCString;
			
 
				 
			
@@ -173,7 +165,6 @@ private:
 
				 	Error parsePragmaStructBegin(const ShaderCompilerString* begin, const ShaderCompilerString* end, CString line, CString fname);
			
 
				 	Error parsePragmaStructEnd(const ShaderCompilerString* begin, const ShaderCompilerString* end, CString line, CString fname);
			
 
				 	Error parsePragmaMember(const ShaderCompilerString* begin, const ShaderCompilerString* end, CString line, CString fname);
			
 
				-	Error parsePragma16bit(const ShaderCompilerString* begin, const ShaderCompilerString* end, CString line, CString fname);
			
 
				 	Error parseExtraCompilerArgs(const ShaderCompilerString* begin, const ShaderCompilerString* end, CString line, CString fname);
			
 
				 
			
 
				 	void tokenizeLine(CString line, ShaderCompilerDynamicArray<ShaderCompilerString>& tokens) const;
			
--- a/AnKi/Shaders/Blit.ankiprog
+++ b/AnKi/Shaders/Blit.ankiprog
@@ -11,7 +11,7 @@
 
				 #	include <AnKi/Shaders/Functions.hlsl>
			
 
				 
			
 
				 SamplerState g_linearAnyClampSampler : register(s0);
			
 
				-Texture2D<RVec4> g_inputTex : register(t0);
			
 
				+Texture2D<Vec4> g_inputTex : register(t0);
			
 
				 
			
 
				 #	if ANKI_COMPUTE_SHADER
			
 
				 #		define USE_COMPUTE 1
			
@@ -20,7 +20,7 @@ Texture2D<RVec4> g_inputTex : register(t0);
 
				 #	endif
			
 
				 
			
 
				 #	if USE_COMPUTE
			
 
				-RWTexture2D<RVec4> g_storageTex : register(u0);
			
 
				+RWTexture2D<Vec4> g_storageTex : register(u0);
			
 
				 
			
 
				 struct Consts
			
 
				 {
			
@@ -33,7 +33,7 @@ ANKI_FAST_CONSTANTS(Consts, g_consts)
 
				 #	if USE_COMPUTE
			
 
				 [numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
			
 
				 #	else
			
 
				-RVec3 main(VertOut input) : SV_TARGET0
			
 
				+Vec3 main(VertOut input) : SV_TARGET0
			
 
				 #	endif
			
 
				 {
			
 
				 #	if USE_COMPUTE
			
@@ -47,10 +47,10 @@ RVec3 main(VertOut input) : SV_TARGET0
 
				 	const Vec2 uv = input.m_uv;
			
 
				 #	endif
			
 
				 
			
 
				-	const RVec3 color = g_inputTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).rgb;
			
 
				+	const Vec3 color = g_inputTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).rgb;
			
 
				 
			
 
				 #	if USE_COMPUTE
			
 
				-	g_storageTex[svDispatchThreadId.xy] = RVec4(color, 0.0);
			
 
				+	g_storageTex[svDispatchThreadId.xy] = Vec4(color, 0.0);
			
 
				 #	else
			
 
				 	return color;
			
 
				 #	endif
			
--- a/AnKi/Shaders/Bloom.ankiprog
+++ b/AnKi/Shaders/Bloom.ankiprog
@@ -16,7 +16,7 @@
 
				 #	include <AnKi/Shaders/Functions.hlsl>
			
 
				 
			
 
				 SamplerState g_linearAnyClampSampler : register(s0);
			
 
				-Texture2D<RVec3> g_tex : register(t0);
			
 
				+Texture2D<Vec3> g_tex : register(t0);
			
 
				 
			
 
				 struct Constants
			
 
				 {
			
@@ -26,7 +26,7 @@ struct Constants
 
				 ANKI_FAST_CONSTANTS(Constants, g_consts)
			
 
				 
			
 
				 #	if ANKI_COMPUTE_SHADER
			
 
				-RWTexture2D<RVec4> g_storageTex : register(u1);
			
 
				+RWTexture2D<Vec4> g_storageTex : register(u1);
			
 
				 #	endif
			
 
				 
			
 
				 #	if ANKI_COMPUTE_SHADER
			
@@ -34,13 +34,13 @@ RWTexture2D<RVec4> g_storageTex : register(u1);
 
				 {
			
 
				 	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5) / g_consts.m_fbSize;
			
 
				 #	else
			
 
				-RVec3 main(VertOut input) : SV_TARGET0
			
 
				+Vec3 main(VertOut input) : SV_TARGET0
			
 
				 {
			
 
				 	const Vec2 uv = input.m_uv;
			
 
				 #	endif
			
 
				 
			
 
				-	RVec3 output;
			
 
				-	const RF32 weight = 1.0 / 5.0;
			
 
				+	Vec3 output;
			
 
				+	const F32 weight = 1.0 / 5.0;
			
 
				 	output = g_tex.SampleLevel(g_linearAnyClampSampler, uv, 0.0) * weight;
			
 
				 	output += g_tex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, IVec2(+1, +1)) * weight;
			
 
				 	output += g_tex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, IVec2(-1, -1)) * weight;
			
@@ -48,7 +48,7 @@ RVec3 main(VertOut input) : SV_TARGET0
 
				 	output += g_tex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, IVec2(-1, +1)) * weight;
			
 
				 
			
 
				 #	if ANKI_COMPUTE_SHADER
			
 
				-	g_storageTex[svDispatchThreadId] = RVec4(output, 1.0);
			
 
				+	g_storageTex[svDispatchThreadId] = Vec4(output, 1.0);
			
 
				 #	else
			
 
				 	return output;
			
 
				 #	endif
			
@@ -63,7 +63,7 @@ RVec3 main(VertOut input) : SV_TARGET0
 
				 #	include <AnKi/Shaders/Functions.hlsl>
			
 
				 
			
 
				 SamplerState g_linearAnyClampSampler : register(s0);
			
 
				-Texture2D<RVec4> g_inTex : register(t0);
			
 
				+Texture2D<Vec4> g_inTex : register(t0);
			
 
				 
			
 
				 #	define TONEMAPPING_REGISTER u0
			
 
				 #	include <AnKi/Shaders/TonemappingResources.hlsl>
			
@@ -78,13 +78,13 @@ struct Consts
 
				 ANKI_FAST_CONSTANTS(Consts, g_consts)
			
 
				 
			
 
				 #	if ANKI_COMPUTE_SHADER
			
 
				-RWTexture2D<RVec4> g_storageTex : register(u1);
			
 
				+RWTexture2D<Vec4> g_storageTex : register(u1);
			
 
				 #	endif
			
 
				 
			
 
				 #	if ANKI_COMPUTE_SHADER
			
 
				 [numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
			
 
				 #	else
			
 
				-RVec3 main(VertOut input) : SV_TARGET0
			
 
				+Vec3 main(VertOut input) : SV_TARGET0
			
 
				 #	endif
			
 
				 {
			
 
				 #	if ANKI_COMPUTE_SHADER
			
@@ -96,8 +96,8 @@ RVec3 main(VertOut input) : SV_TARGET0
 
				 	const Vec2 uv = input.m_uv;
			
 
				 #	endif
			
 
				 
			
 
				-	const RF32 weight = 1.0 / 5.0;
			
 
				-	RVec3 color = g_inTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).rgb * weight;
			
 
				+	const F32 weight = 1.0 / 5.0;
			
 
				+	Vec3 color = g_inTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).rgb * weight;
			
 
				 	color += g_inTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, IVec2(+1, +1)).rgb * weight;
			
 
				 	color += g_inTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, IVec2(-1, -1)).rgb * weight;
			
 
				 	color += g_inTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, IVec2(-1, +1)).rgb * weight;
			
@@ -106,7 +106,7 @@ RVec3 main(VertOut input) : SV_TARGET0
 
				 	color = tonemap<F32>(color, readExposureAndAverageLuminance<F32>().y, g_consts.m_threshold) * g_consts.m_scale;
			
 
				 
			
 
				 #	if ANKI_COMPUTE_SHADER
			
 
				-	g_storageTex[svDispatchThreadId] = RVec4(color, 0.0);
			
 
				+	g_storageTex[svDispatchThreadId] = Vec4(color, 0.0);
			
 
				 #	else
			
 
				 	return color;
			
 
				 #	endif
			
@@ -129,26 +129,26 @@ constexpr F32 kChromaticDistortion = 3.0;
 
				 constexpr F32 kHaloOpacity = 0.5;
			
 
				 
			
 
				 SamplerState g_linearAnyClampSampler : register(s0);
			
 
				-Texture2D<RVec4> g_inputTex : register(t0);
			
 
				-Texture2D<RVec3> g_lensDirtTex : register(t1);
			
 
				+Texture2D<Vec4> g_inputTex : register(t0);
			
 
				+Texture2D<Vec3> g_lensDirtTex : register(t1);
			
 
				 
			
 
				 #	if ANKI_COMPUTE_SHADER
			
 
				-RWTexture2D<RVec4> g_storageTex : register(u0);
			
 
				+RWTexture2D<Vec4> g_storageTex : register(u0);
			
 
				 #	endif
			
 
				 
			
 
				-RVec3 textureDistorted(Texture2D<RVec4> tex, SamplerState sampl, Vec2 uv,
			
 
				-					   Vec2 direction, // direction of distortion
			
 
				-					   Vec3 distortion) // per-channel distortion factor
			
 
				+Vec3 textureDistorted(Texture2D<Vec4> tex, SamplerState sampl, Vec2 uv,
			
 
				+					  Vec2 direction, // direction of distortion
			
 
				+					  Vec3 distortion) // per-channel distortion factor
			
 
				 {
			
 
				 #	if ENABLE_CHROMATIC_DISTORTION
			
 
				-	return RVec3(tex.SampleLevel(sampl, uv + direction * distortion.r, 0.0).r, tex.SampleLevel(sampl, uv + direction * distortion.g, 0.0).g,
			
 
				-				 tex.SampleLevel(sampl, uv + direction * distortion.b, 0.0).b);
			
 
				+	return Vec3(tex.SampleLevel(sampl, uv + direction * distortion.r, 0.0).r, tex.SampleLevel(sampl, uv + direction * distortion.g, 0.0).g,
			
 
				+				tex.SampleLevel(sampl, uv + direction * distortion.b, 0.0).b);
			
 
				 #	else
			
 
				 	return tex.SampleLevel(uv, 0.0).rgb;
			
 
				 #	endif
			
 
				 }
			
 
				 
			
 
				-RVec3 ssLensFlare(Vec2 uv)
			
 
				+Vec3 ssLensFlare(Vec2 uv)
			
 
				 {
			
 
				 	Vec2 textureSize;
			
 
				 	g_inputTex.GetDimensions(textureSize.x, textureSize.y);
			
@@ -162,14 +162,14 @@ RVec3 ssLensFlare(Vec2 uv)
 
				 	const Vec2 ghostVec = (Vec2(0.5, 0.5) - flipUv) * kGhostDispersal;
			
 
				 
			
 
				 	const Vec2 direction = normalize(ghostVec);
			
 
				-	RVec3 result = Vec3(0.0, 0.0, 0.0);
			
 
				+	Vec3 result = Vec3(0.0, 0.0, 0.0);
			
 
				 
			
 
				 	// Sample ghosts
			
 
				 	[unroll] for(U32 i = 0u; i < kMaxGhosts; ++i)
			
 
				 	{
			
 
				 		const Vec2 offset = frac(flipUv + ghostVec * F32(i));
			
 
				 
			
 
				-		RF32 weight = length(Vec2(0.5, 0.5) - offset) / lensOfHalf;
			
 
				+		F32 weight = length(Vec2(0.5, 0.5) - offset) / lensOfHalf;
			
 
				 		weight = pow(1.0 - weight, 10.0);
			
 
				 
			
 
				 		result += textureDistorted(g_inputTex, g_linearAnyClampSampler, offset, direction, distortion) * weight;
			
@@ -178,7 +178,7 @@ RVec3 ssLensFlare(Vec2 uv)
 
				 	// Sample halo
			
 
				 #	if ENABLE_HALO
			
 
				 	const Vec2 haloVec = normalize(ghostVec) * kHaloWidth;
			
 
				-	RF32 weight = length(Vec2(0.5, 0.5) - frac(flipUv + haloVec)) / lensOfHalf;
			
 
				+	F32 weight = length(Vec2(0.5, 0.5) - frac(flipUv + haloVec)) / lensOfHalf;
			
 
				 	weight = pow(1.0 - weight, 20.0);
			
 
				 	result += textureDistorted(g_inputTex, g_linearAnyClampSampler, flipUv + haloVec, direction, distortion) * (weight * kHaloOpacity);
			
 
				 #	endif
			
@@ -189,10 +189,10 @@ RVec3 ssLensFlare(Vec2 uv)
 
				 	return result;
			
 
				 }
			
 
				 
			
 
				-RVec3 upscale(Vec2 uv)
			
 
				+Vec3 upscale(Vec2 uv)
			
 
				 {
			
 
				-	const RF32 weight = 1.0 / 5.0;
			
 
				-	RVec3 result = g_inputTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).rgb * weight;
			
 
				+	const F32 weight = 1.0 / 5.0;
			
 
				+	Vec3 result = g_inputTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).rgb * weight;
			
 
				 	result += g_inputTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, IVec2(+1, +1)).rgb * weight;
			
 
				 	result += g_inputTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, IVec2(+1, -1)).rgb * weight;
			
 
				 	result += g_inputTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, IVec2(-1, -1)).rgb * weight;
			
@@ -204,7 +204,7 @@ RVec3 upscale(Vec2 uv)
 
				 #	if ANKI_COMPUTE_SHADER
			
 
				 [numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
			
 
				 #	else
			
 
				-RVec3 main(VertOut input) : SV_TARGET0
			
 
				+Vec3 main(VertOut input) : SV_TARGET0
			
 
				 #	endif
			
 
				 {
			
 
				 #	if ANKI_COMPUTE_SHADER
			
@@ -216,10 +216,10 @@ RVec3 main(VertOut input) : SV_TARGET0
 
				 	const Vec2 uv = input.m_uv;
			
 
				 #	endif
			
 
				 
			
 
				-	const RVec3 outColor = ssLensFlare(uv) + upscale(uv);
			
 
				+	const Vec3 outColor = ssLensFlare(uv) + upscale(uv);
			
 
				 
			
 
				 #	if ANKI_COMPUTE_SHADER
			
 
				-	g_storageTex[svDispatchThreadId] = RVec4(outColor, 0.0);
			
 
				+	g_storageTex[svDispatchThreadId] = Vec4(outColor, 0.0);
			
 
				 #	else
			
 
				 	return outColor;
			
 
				 #	endif
			
--- a/AnKi/Shaders/Common.hlsl
+++ b/AnKi/Shaders/Common.hlsl
@@ -15,11 +15,7 @@
 
				 
			
 
				 // Common constants
			
 
				 constexpr F32 kEpsilonF32 = 0.000001f;
			
 
				-#if ANKI_SUPPORTS_16BIT_TYPES
			
 
				 constexpr F16 kEpsilonF16 = (F16)0.0001f; // Divisions by this should be OK according to http://weitz.de/ieee
			
 
				-#else
			
 
				-constexpr RF32 kEpsilonRF32 = 0.0001f;
			
 
				-#endif
			
 
				 
			
 
				 template<typename T>
			
 
				 T getEpsilon();
			
@@ -30,33 +26,18 @@ F32 getEpsilon()
 
				 	return kEpsilonF32;
			
 
				 }
			
 
				 
			
 
				-#if ANKI_SUPPORTS_16BIT_TYPES
			
 
				 template<>
			
 
				 F16 getEpsilon()
			
 
				 {
			
 
				 	return kEpsilonF16;
			
 
				 }
			
 
				-#endif
			
 
				-
			
 
				-#if !ANKI_FORCE_FULL_FP_PRECISION && !ANKI_SUPPORTS_16BIT_TYPES
			
 
				-template<>
			
 
				-RF32 getEpsilon()
			
 
				-{
			
 
				-	return kEpsilonRF32;
			
 
				-}
			
 
				-#endif
			
 
				 
			
 
				 constexpr U32 kMaxU32 = 0xFFFFFFFFu;
			
 
				 constexpr I32 kMinI32 = -2147483648;
			
 
				 constexpr I32 kMaxI32 = 2147483647;
			
 
				 constexpr F32 kMaxF32 = 3.402823e+38;
			
 
				 constexpr F32 kMinF32 = -3.402823e+38;
			
 
				-#if !ANKI_SUPPORTS_16BIT_TYPES
			
 
				-constexpr RF32 kMaxRF32 = 65504.0f; // Max half float value according to wikipedia
			
 
				-#endif
			
 
				-#if ANKI_SUPPORTS_16BIT_TYPES
			
 
				 constexpr F16 kMaxF16 = (F16)65504.0;
			
 
				-#endif
			
 
				 
			
 
				 template<typename T>
			
 
				 T getMaxNumericLimit();
			
@@ -67,21 +48,11 @@ F32 getMaxNumericLimit()
 
				 	return kMaxF32;
			
 
				 }
			
 
				 
			
 
				-#if !ANKI_FORCE_FULL_FP_PRECISION && !ANKI_SUPPORTS_16BIT_TYPES
			
 
				-template<>
			
 
				-RF32 getMaxNumericLimit()
			
 
				-{
			
 
				-	return kMaxRF32;
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				-#if ANKI_SUPPORTS_16BIT_TYPES
			
 
				 template<>
			
 
				 F16 getMaxNumericLimit()
			
 
				 {
			
 
				 	return kMaxF16;
			
 
				 }
			
 
				-#endif
			
 
				 
			
 
				 template<>
			
 
				 U32 getMaxNumericLimit()
			
--- a/AnKi/Shaders/FinalComposite.ankiprog
+++ b/AnKi/Shaders/FinalComposite.ankiprog
@@ -3,8 +3,6 @@
 
				 // Code licensed under the BSD License.
			
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki mutator FILM_GRAIN 0 1
			
 
				 #pragma anki mutator BLOOM 0 1
			
 
				 #pragma anki mutator DBG 0 1
			
--- a/AnKi/Shaders/ForwardShadingFog.ankiprog
+++ b/AnKi/Shaders/ForwardShadingFog.ankiprog
@@ -3,8 +3,6 @@
 
				 // Code licensed under the BSD License.
			
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki technique ForwardLegacy vert pixel
			
 
				 
			
 
				 #include <AnKi/Shaders/ForwardShadingCommon.hlsl>
			
--- a/AnKi/Shaders/ForwardShadingGenericTransparent.ankiprog
+++ b/AnKi/Shaders/ForwardShadingGenericTransparent.ankiprog
@@ -3,8 +3,6 @@
 
				 // Code licensed under the BSD License.
			
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki mutator TEXTURE 0 1
			
 
				 #pragma anki mutator LIGHT 0 1
			
 
				 
			
--- a/AnKi/Shaders/ForwardShadingParticles.ankiprog
+++ b/AnKi/Shaders/ForwardShadingParticles.ankiprog
@@ -3,8 +3,6 @@
 
				 // Code licensed under the BSD License.
			
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki mutator ANIMATED_TEXTURE 0 1
			
 
				 #pragma anki mutator LIGHT 0 1
			
 
				 
			
--- a/AnKi/Shaders/ForwardShadingParticlesGass.ankiprog
+++ b/AnKi/Shaders/ForwardShadingParticlesGass.ankiprog
@@ -3,8 +3,6 @@
 
				 // Code licensed under the BSD License.
			
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki mutator ANIMATED_TEXTURE 0 1
			
 
				 #pragma anki mutator LIGHT 0 1
			
 
				 
			
--- a/AnKi/Shaders/Fsr.ankiprog
+++ b/AnKi/Shaders/Fsr.ankiprog
@@ -3,7 +3,6 @@
 
				 // Code licensed under the BSD License.
			
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				 #pragma anki mutator SHARPEN 0 1
			
 
				 #pragma anki mutator FSR_QUALITY 0 1
			
 
				 
			
--- a/AnKi/Shaders/GBufferGeneric.ankiprog
+++ b/AnKi/Shaders/GBufferGeneric.ankiprog
@@ -3,8 +3,6 @@
 
				 // Code licensed under the BSD License.
			
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki mutator ANKI_VELOCITY 0 1
			
 
				 #pragma anki mutator ANKI_BONES 0 1
			
 
				 #pragma anki mutator DIFFUSE_TEX 0 1
			
--- a/AnKi/Shaders/GBufferGpuParticles.ankiprog
+++ b/AnKi/Shaders/GBufferGpuParticles.ankiprog
@@ -3,8 +3,6 @@
 
				 // Code licensed under the BSD License.
			
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki technique GBuffer vert pixel
			
 
				 
			
 
				 #include <AnKi/Shaders/MaterialShadersCommon.hlsl>
			
--- a/AnKi/Shaders/GBufferPost.ankiprog
+++ b/AnKi/Shaders/GBufferPost.ankiprog
@@ -3,8 +3,6 @@
 
				 // Code licensed under the BSD License.
			
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki technique comp
			
 
				 
			
 
				 #include <AnKi/Shaders/PackFunctions.hlsl>
			
--- a/AnKi/Shaders/GpuParticlesGass.ankiprog
+++ b/AnKi/Shaders/GpuParticlesGass.ankiprog
@@ -5,8 +5,6 @@
 
				 
			
 
				 // This shader does a particle simulation for gasses
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki mutator ANKI_WAVE_SIZE 16 32 64
			
 
				 
			
 
				 #pragma anki technique comp
			
--- a/AnKi/Shaders/GpuParticlesSimulation.ankiprog
+++ b/AnKi/Shaders/GpuParticlesSimulation.ankiprog
@@ -1,143 +0,0 @@
 
				-// Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
			
 
				-// All rights reserved.
			
 
				-// Code licensed under the BSD License.
			
 
				-// http://www.anki3d.org/LICENSE
			
 
				-
			
 
				-// This shader does a particle simulation
			
 
				-
			
 
				-#pragma anki technique comp
			
 
				-
			
 
				-#include <AnKi/Shaders/Include/ParticleTypes.h>
			
 
				-#include <AnKi/Shaders/Common.hlsl>
			
 
				-
			
 
				-#if 0
			
 
				-Texture2D g_depthTex : register(t0);
			
 
				-RWStructuredBuffer<GpuParticle> g_particles : register(u0);
			
 
				-ConstantBuffer<GpuParticleEmitterProperties> g_props : register(b0);
			
 
				-StructuredBuffer<F32> g_randomFactors : register(t1); // The 1st element is a uint with the count of the numbers
			
 
				-SamplerState g_nearestAnyClampSampler : register(s0);
			
 
				-ConstantBuffer<GpuParticleSimulationState> g_state : register(b1);
			
 
				-
			
 
				-F32 smallerDelta(F32 left, F32 mid, F32 right)
			
 
				-{
			
 
				-	const F32 a = mid - left;
			
 
				-	const F32 b = right - mid;
			
 
				-
			
 
				-	return (abs(a) < abs(b)) ? a : b;
			
 
				-}
			
 
				-
			
 
				-Vec3 unproject(Vec2 ndc, F32 depth)
			
 
				-{
			
 
				-	const F32 z = g_state.m_unprojectionParams.z / (g_state.m_unprojectionParams.w + depth);
			
 
				-	const Vec2 xy = ndc * g_state.m_unprojectionParams.xy * z;
			
 
				-	return Vec3(xy, z);
			
 
				-}
			
 
				-
			
 
				-// Compute the normal using the depth buffer
			
 
				-Vec3 computeNormal(const Vec2 uv, const F32 depth)
			
 
				-{
			
 
				-	const F32 depthLeft = g_depthTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0, IVec2(-2, 0)).r;
			
 
				-	const F32 depthRight = g_depthTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0, IVec2(2, 0)).r;
			
 
				-	const F32 depthTop = g_depthTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0, IVec2(0, 2)).r;
			
 
				-	const F32 depthBottom = g_depthTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0, IVec2(0, -2)).r;
			
 
				-
			
 
				-	const F32 ddx = smallerDelta(depthLeft, depth, depthRight);
			
 
				-	const F32 ddy = smallerDelta(depthBottom, depth, depthTop);
			
 
				-
			
 
				-	const Vec2 ndc = uvToNdc(uv);
			
 
				-	Vec3 textureSize;
			
 
				-	g_depthTex.GetDimensions(0, textureSize.x, textureSize.y, textureSize.z);
			
 
				-	const Vec2 texelSize = 1.0 / textureSize.xy;
			
 
				-	const Vec2 ndcTexelSize = 2.0 * texelSize;
			
 
				-	const Vec3 right = unproject(ndc + Vec2(ndcTexelSize.x, 0.0), depth + ddx);
			
 
				-	const Vec3 top = unproject(ndc + Vec2(0.0, ndcTexelSize.y), depth + ddy);
			
 
				-
			
 
				-	const Vec3 origin = unproject(ndc, depth);
			
 
				-	Vec3 normalVSpace = cross(origin - top, right - origin);
			
 
				-	normalVSpace = normalize(normalVSpace);
			
 
				-
			
 
				-	return mul(g_state.m_invViewRotation, Vec4(normalVSpace, 0.0));
			
 
				-}
			
 
				-
			
 
				-void initParticle(UVec3 svDispatchThreadId, out GpuParticle p)
			
 
				-{
			
 
				-	const U32 randomFactorCount = asuint(g_randomFactors[0]);
			
 
				-	const F32 randFactor = g_randomFactors[((svDispatchThreadId.x + g_state.m_randomIndex) % randomFactorCount) + 1];
			
 
				-
			
 
				-	p.m_newWorldPosition = lerp(g_props.m_minStartingPosition, g_props.m_maxStartingPosition, randFactor) + g_state.m_emitterPosition;
			
 
				-	p.m_oldWorldPosition = p.m_newWorldPosition;
			
 
				-
			
 
				-	p.m_mass = lerp(g_props.m_minMass, g_props.m_maxMass, randFactor);
			
 
				-	p.m_startingLife = lerp(g_props.m_minLife, g_props.m_maxLife, randFactor);
			
 
				-	p.m_life = p.m_startingLife;
			
 
				-	p.m_acceleration = lerp(g_props.m_minGravity, g_props.m_maxGravity, randFactor);
			
 
				-
			
 
				-	// Calculate the initial velocity
			
 
				-	const Vec3 initialForce = mul(g_state.m_emitterRotation, Vec4(lerp(g_props.m_minForce, g_props.m_maxForce, randFactor), 0.0));
			
 
				-	const Vec3 totalForce = (p.m_acceleration * p.m_mass) + initialForce;
			
 
				-	const Vec3 acceleration = totalForce / p.m_mass;
			
 
				-	p.m_velocity = acceleration * g_state.m_dt;
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				-[numthreads(64, 1, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID) {
			
 
				-#if 0
			
 
				-	const U32 particleIdx = svDispatchThreadId.x;
			
 
				-	if(particleIdx >= g_props.m_particleCount)
			
 
				-	{
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	GpuParticle particle = g_particles[particleIdx];
			
 
				-	const F32 dt = g_state.m_dt;
			
 
				-
			
 
				-	// Check if it's dead
			
 
				-	if(particle.m_life - dt <= 0.0)
			
 
				-	{
			
 
				-		// Dead, revive
			
 
				-		initParticle(svDispatchThreadId, particle);
			
 
				-	}
			
 
				-	else
			
 
				-	{
			
 
				-		// Simulate
			
 
				-
			
 
				-		particle.m_life -= dt;
			
 
				-
			
 
				-		const Vec3 xp = particle.m_oldWorldPosition;
			
 
				-		const Vec3 xc = particle.m_acceleration * (dt * dt) + g_particles[particleIdx].m_velocity * dt + xp;
			
 
				-
			
 
				-		// Project the point
			
 
				-		const Vec4 proj4 = mul(g_state.m_viewProjMat, Vec4(xc, 1.0));
			
 
				-		const Vec3 proj3 = proj4.xyz / proj4.w;
			
 
				-		if(all(proj3.xy >= Vec2(-1.0, -1.0)) && all(proj3.xy <= Vec2(1.0, 1.0)))
			
 
				-		{
			
 
				-			// It's visible, test against the depth buffer
			
 
				-
			
 
				-			const F32 refDepth = g_depthTex.SampleLevel(g_nearestAnyClampSampler, ndcToUv(proj3.xy), 0.0).r;
			
 
				-			const F32 testDepth = proj3.z;
			
 
				-
			
 
				-			if(testDepth >= refDepth)
			
 
				-			{
			
 
				-				// Collides, change its direction
			
 
				-				const Vec3 normal = computeNormal(ndcToUv(proj3.xy), refDepth);
			
 
				-				particle.m_velocity = reflect(particle.m_velocity, normal);
			
 
				-
			
 
				-				particle.m_oldWorldPosition = particle.m_newWorldPosition;
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				particle.m_oldWorldPosition = particle.m_newWorldPosition;
			
 
				-				particle.m_newWorldPosition = xc;
			
 
				-			}
			
 
				-		}
			
 
				-		else
			
 
				-		{
			
 
				-			particle.m_oldWorldPosition = particle.m_newWorldPosition;
			
 
				-			particle.m_newWorldPosition = xc;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	// Write back the particle
			
 
				-	g_particles[particleIdx] = particle;
			
 
				-#endif
			
 
				-}
			
--- a/AnKi/Shaders/GpuVisibilityStage2And3.ankiprog
+++ b/AnKi/Shaders/GpuVisibilityStage2And3.ankiprog
@@ -17,6 +17,7 @@
 
				 #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
			
 
				 #include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
			
 
				 #include <AnKi/Shaders/PackFunctions.hlsl>
			
 
				+#include <AnKi/Shaders/Functions.hlsl>
			
 
				 
			
 
				 #define NUMTHREADS 64u
			
 
				 
			
@@ -134,7 +135,7 @@ RWStructuredBuffer<U32> g_outOfMemoryBuffer : register(u3);
 
				 // ===========================================================================
			
 
				 #if ANKI_TECHNIQUE_Meshlets
			
 
				 
			
 
				-#	define MESHLET_BACKFACE_CULLING 0 // Doesn't work correctly for some reason
			
 
				+#	define MESHLET_BACKFACE_CULLING 1 // Doesn't cull much geometry
			
 
				 #	define MESHLET_OUTSIDE_OF_SCREEN_CULLING 1
			
 
				 #	define MESHLET_NO_SAMPLING_POINT_CULLING 1
			
 
				 #	define MESHLET_HZB_CULLING HZB_TEST
			
@@ -187,14 +188,6 @@ Bool cullMeshlet(GpuSceneRenderable renderable, const MeshletBoundingVolume mesh
 
				 
			
 
				 	const Mat3x4 worldTransform = SBUFF(g_transforms, renderable.m_worldTransformsIndex);
			
 
				 
			
 
				-#	if MESHLET_BACKFACE_CULLING
			
 
				-	const Vec4 coneDirAndAng = unpackSnorm4x8(meshletBoundingVol.m_coneDirection_R8G8B8_Snorm_cosHalfAngle_R8_Snorm);
			
 
				-	if(cullBackfaceMeshlet(coneDirAndAng.xyz, coneDirAndAng.w, meshletBoundingVol.m_coneApex, worldTransform, g_consts.m_cameraPos))
			
 
				-	{
			
 
				-		return true;
			
 
				-	}
			
 
				-#	endif
			
 
				-
			
 
				 	const Mat4 wordTransform4 = {worldTransform.m_row0, worldTransform.m_row1, worldTransform.m_row2, Vec4(0.0f, 0.0f, 0.0f, 1.0f)};
			
 
				 	const Mat4 mvp = mul(g_consts.m_viewProjectionMatrix, wordTransform4);
			
 
				 
			
@@ -225,6 +218,28 @@ Bool cullMeshlet(GpuSceneRenderable renderable, const MeshletBoundingVolume mesh
 
				 	return meshletCulledByHzb;
			
 
				 #	endif
			
 
				 
			
 
				+#	if MESHLET_BACKFACE_CULLING
			
 
				+	{
			
 
				+		Vec3 center = (meshletBoundingVol.m_aabbMin + meshletBoundingVol.m_aabbMax) * 0.5;
			
 
				+		center = mul(worldTransform, Vec4(center, 1.0));
			
 
				+
			
 
				+		const Vec4 coneDirAndAng = unpackSnorm4x8<F32>(meshletBoundingVol.m_coneDirection_R8G8B8_Snorm_cosHalfAngle_R8_Snorm);
			
 
				+		Vec3 coneDir = coneDirAndAng.xyz;
			
 
				+		coneDir = mul(worldTransform, Vec4(coneDir, 0.0));
			
 
				+
			
 
				+		const Vec3 scale = extractScale(worldTransform);
			
 
				+		const F32 radius = meshletBoundingVol.m_sphereRadius * max3(scale);
			
 
				+
			
 
				+		// Using from meshoptimizer: dot(center - camera_position, cone_axis) >= cone_cutoff * length(center - camera_position) + radius
			
 
				+		const Vec3 camPos = g_consts.m_cameraPos;
			
 
				+		const Vec3 cam2center = center - camPos;
			
 
				+		if(dot(cam2center, coneDir) >= coneDirAndAng.w * length(cam2center) + radius)
			
 
				+		{
			
 
				+			return true;
			
 
				+		}
			
 
				+	}
			
 
				+#	endif
			
 
				+
			
 
				 	return false;
			
 
				 }
			
 
				 
			
--- a/AnKi/Shaders/HistoryLength.ankiprog
+++ b/AnKi/Shaders/HistoryLength.ankiprog
@@ -5,8 +5,6 @@
 
				 
			
 
				 // Calculates the disocclusion length. The longer the length the better the pixel is for temporal accumulation
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki technique comp vert pixel
			
 
				 
			
 
				 #include <AnKi/Shaders/Functions.hlsl>
			
--- a/AnKi/Shaders/Include/Common.h
+++ b/AnKi/Shaders/Include/Common.h
@@ -83,135 +83,64 @@ void maybeUnused(T a)
 
				 
			
 
				 #	define static_assert(x)
			
 
				 
			
 
				-#	define _ANKI_SCONST_X(type, n, id) [[vk::constant_id(id)]] const type n = (type)1;
			
 
				-
			
 
				-#	define _ANKI_SCONST_X2(type, componentType, n, id) \
			
 
				-		[[vk::constant_id(id + 0u)]] const componentType ANKI_CONCATENATE(_anki_const_0_2_, n) = (componentType)1; \
			
 
				-		[[vk::constant_id(id + 1u)]] const componentType ANKI_CONCATENATE(_anki_const_1_2_, n) = (componentType)1; \
			
 
				-		static const type n = type(ANKI_CONCATENATE(_anki_const_0_2_, n), ANKI_CONCATENATE(_anki_const_1_2_, n))
			
 
				-
			
 
				-#	define _ANKI_SCONST_X3(type, componentType, n, id) \
			
 
				-		[[vk::constant_id(id + 0u)]] const componentType ANKI_CONCATENATE(_anki_const_0_3_, n) = (componentType)1; \
			
 
				-		[[vk::constant_id(id + 1u)]] const componentType ANKI_CONCATENATE(_anki_const_1_3_, n) = (componentType)1; \
			
 
				-		[[vk::constant_id(id + 2u)]] const componentType ANKI_CONCATENATE(_anki_const_2_3_, n) = (componentType)1; \
			
 
				-		static const type n = \
			
 
				-			type(ANKI_CONCATENATE(_anki_const_0_3_, n), ANKI_CONCATENATE(_anki_const_1_3_, n), ANKI_CONCATENATE(_anki_const_2_3_, n))
			
 
				-
			
 
				-#	define _ANKI_SCONST_X4(type, componentType, n, id) \
			
 
				-		[[vk::constant_id(id + 0u)]] const componentType ANKI_CONCATENATE(_anki_const_0_4_, n) = (componentType)1; \
			
 
				-		[[vk::constant_id(id + 1u)]] const componentType ANKI_CONCATENATE(_anki_const_1_4_, n) = (componentType)1; \
			
 
				-		[[vk::constant_id(id + 2u)]] const componentType ANKI_CONCATENATE(_anki_const_2_4_, n) = (componentType)1; \
			
 
				-		[[vk::constant_id(id + 3u)]] const componentType ANKI_CONCATENATE(_anki_const_3_4_, n) = (componentType)1; \
			
 
				-		static const type n = type(ANKI_CONCATENATE(_anki_const_0_4_, n), ANKI_CONCATENATE(_anki_const_1_4_, n), \
			
 
				-								   ANKI_CONCATENATE(_anki_const_2_4_, n), ANKI_CONCATENATE(_anki_const_2_4_, n))
			
 
				-
			
 
				-#	define ANKI_SPECIALIZATION_CONSTANT_I32(n, id) _ANKI_SCONST_X(I32, n, id)
			
 
				-#	define ANKI_SPECIALIZATION_CONSTANT_IVEC2(n, id) _ANKI_SCONST_X2(IVec2, I32, n, id)
			
 
				-#	define ANKI_SPECIALIZATION_CONSTANT_IVEC3(n, id) _ANKI_SCONST_X3(IVec3, I32, n, id)
			
 
				-#	define ANKI_SPECIALIZATION_CONSTANT_IVEC4(n, id) _ANKI_SCONST_X4(IVec4, I32, n, id)
			
 
				-
			
 
				-#	define ANKI_SPECIALIZATION_CONSTANT_U32(n, id) _ANKI_SCONST_X(U32, n, id)
			
 
				-#	define ANKI_SPECIALIZATION_CONSTANT_UVEC2(n, id) _ANKI_SCONST_X2(UVec2, U32, n, id)
			
 
				-#	define ANKI_SPECIALIZATION_CONSTANT_UVEC3(n, id) _ANKI_SCONST_X3(UVec3, U32, n, id)
			
 
				-#	define ANKI_SPECIALIZATION_CONSTANT_UVEC4(n, id) _ANKI_SCONST_X4(UVec4, U32, n, id)
			
 
				-
			
 
				-#	define ANKI_SPECIALIZATION_CONSTANT_F32(n, id) _ANKI_SCONST_X(F32, n, id)
			
 
				-#	define ANKI_SPECIALIZATION_CONSTANT_VEC2(n, id) _ANKI_SCONST_X2(Vec2, F32, n, id)
			
 
				-#	define ANKI_SPECIALIZATION_CONSTANT_VEC3(n, id) _ANKI_SCONST_X3(Vec3, F32, n, id)
			
 
				-#	define ANKI_SPECIALIZATION_CONSTANT_VEC4(n, id) _ANKI_SCONST_X4(Vec4, F32, n, id)
			
 
				-
			
 
				 #	pragma pack_matrix(row_major)
			
 
				 
			
 
				 typedef float F32;
			
 
				-constexpr uint kSizeof_F32 = 4u;
			
 
				 typedef float2 Vec2;
			
 
				-constexpr uint kSizeof_Vec2 = 8u;
			
 
				 typedef float3 Vec3;
			
 
				-constexpr uint kSizeof_Vec3 = 12u;
			
 
				 typedef float4 Vec4;
			
 
				-constexpr uint kSizeof_Vec4 = 16u;
			
 
				 
			
 
				-#	if ANKI_SUPPORTS_16BIT_TYPES
			
 
				 typedef float16_t F16;
			
 
				-constexpr uint kSizeof_F16 = 2u;
			
 
				 typedef float16_t2 HVec2;
			
 
				-constexpr uint kSizeof_HVec2 = 4u;
			
 
				 typedef float16_t3 HVec3;
			
 
				-constexpr uint kSizeof_HVec3 = 6u;
			
 
				 typedef float16_t4 HVec4;
			
 
				-constexpr uint kSizeof_HVec4 = 8u;
			
 
				 
			
 
				 typedef uint16_t U16;
			
 
				-constexpr uint kSizeof_U16 = 2u;
			
 
				 typedef uint16_t2 U16Vec2;
			
 
				-constexpr uint kSizeof_U16Vec2 = 4u;
			
 
				 typedef uint16_t3 U16Vec3;
			
 
				-constexpr uint kSizeof_U16Vec3 = 6u;
			
 
				 typedef uint16_t4 U16Vec4;
			
 
				-constexpr uint kSizeof_U16Vec4 = 8u;
			
 
				 
			
 
				 typedef int16_t I16;
			
 
				-constexpr uint kSizeof_I16 = 2u;
			
 
				 typedef int16_t2 I16Vec2;
			
 
				-constexpr uint kSizeof_I16Vec2 = 4u;
			
 
				 typedef int16_t3 I16Vec3;
			
 
				-constexpr uint kSizeof_I16Vec3 = 6u;
			
 
				 typedef int16_t4 I16Vec4;
			
 
				-constexpr uint kSizeof_I16Vec4 = 8u;
			
 
				-#	endif
			
 
				 
			
 
				 typedef uint U32;
			
 
				-constexpr uint kSizeof_U32 = 4u;
			
 
				 typedef uint32_t2 UVec2;
			
 
				-constexpr uint kSizeof_UVec2 = 8u;
			
 
				 typedef uint32_t3 UVec3;
			
 
				-constexpr uint kSizeof_UVec3 = 12u;
			
 
				 typedef uint32_t4 UVec4;
			
 
				-constexpr uint kSizeof_UVec4 = 16u;
			
 
				 
			
 
				 typedef int I32;
			
 
				-constexpr uint kSizeof_I32 = 4u;
			
 
				 typedef int32_t2 IVec2;
			
 
				-constexpr uint kSizeof_IVec2 = 8u;
			
 
				 typedef int32_t3 IVec3;
			
 
				-constexpr uint kSizeof_IVec3 = 12u;
			
 
				 typedef int32_t4 IVec4;
			
 
				-constexpr uint kSizeof_IVec4 = 16u;
			
 
				 
			
 
				 typedef uint64_t U64;
			
 
				-constexpr uint kSizeof_U64 = 8u;
			
 
				 typedef uint64_t2 U64Vec2;
			
 
				-constexpr uint kSizeof_U64Vec2 = 16u;
			
 
				 typedef uint64_t3 U64Vec3;
			
 
				-constexpr uint kSizeof_U64Vec3 = 24u;
			
 
				 typedef uint64_t4 U64Vec4;
			
 
				-constexpr uint kSizeof_U64Vec4 = 32u;
			
 
				 
			
 
				 typedef int64_t I64;
			
 
				-constexpr uint kSizeof_I64 = 8u;
			
 
				 typedef int64_t2 I64Vec2;
			
 
				-constexpr uint kSizeof_I64Vec2 = 16u;
			
 
				 typedef int64_t3 I64Vec3;
			
 
				-constexpr uint kSizeof_I64Vec3 = 24u;
			
 
				 typedef int64_t4 I64Vec4;
			
 
				-constexpr uint kSizeof_I64Vec4 = 32u;
			
 
				 
			
 
				 typedef bool Bool;
			
 
				 
			
 
				-#	define _ANKI_DEFINE_OPERATOR_SCALAR_ROWS3(mat, fl, op) \
			
 
				-		mat operator op(fl f) \
			
 
				+#	define _ANKI_DEFINE_OPERATOR_SCALAR_ROWS3(matType, scalarType, op) \
			
 
				+		matType operator op(scalarType f) \
			
 
				 		{ \
			
 
				-			mat o; \
			
 
				+			matType o; \
			
 
				 			o.m_row0 = m_row0 op f; \
			
 
				 			o.m_row1 = m_row1 op f; \
			
 
				 			o.m_row2 = m_row2 op f; \
			
 
				 			return o; \
			
 
				 		}
			
 
				 
			
 
				-#	define _ANKI_DEFINE_OPERATOR_SCALAR_ROWS4(mat, fl, op) \
			
 
				-		mat operator op(fl f) \
			
 
				+#	define _ANKI_DEFINE_OPERATOR_SCALAR_ROWS4(matType, scalarType, op) \
			
 
				+		matType operator op(scalarType f) \
			
 
				 		{ \
			
 
				-			mat o; \
			
 
				+			matType o; \
			
 
				 			o.m_row0 = m_row0 op f; \
			
 
				 			o.m_row1 = m_row1 op f; \
			
 
				 			o.m_row2 = m_row2 op f; \
			
@@ -219,20 +148,20 @@ typedef bool Bool;
 
				 			return o; \
			
 
				 		}
			
 
				 
			
 
				-#	define _ANKI_DEFINE_OPERATOR_SELF_ROWS3(mat, op) \
			
 
				-		mat operator op(mat b) \
			
 
				+#	define _ANKI_DEFINE_OPERATOR_SELF_ROWS3(matType, op) \
			
 
				+		matType operator op(matType b) \
			
 
				 		{ \
			
 
				-			mat o; \
			
 
				+			matType o; \
			
 
				 			o.m_row0 = m_row0 op b.m_row0; \
			
 
				 			o.m_row1 = m_row1 op b.m_row1; \
			
 
				 			o.m_row2 = m_row2 op b.m_row2; \
			
 
				 			return o; \
			
 
				 		}
			
 
				 
			
 
				-#	define _ANKI_DEFINE_OPERATOR_SELF_ROWS4(mat, op) \
			
 
				-		mat operator op(mat b) \
			
 
				+#	define _ANKI_DEFINE_OPERATOR_SELF_ROWS4(matType, op) \
			
 
				+		matType operator op(matType b) \
			
 
				 		{ \
			
 
				-			mat o; \
			
 
				+			matType o; \
			
 
				 			o.m_row0 = m_row0 op b.m_row0; \
			
 
				 			o.m_row1 = m_row1 op b.m_row1; \
			
 
				 			o.m_row2 = m_row2 op b.m_row2; \
			
@@ -240,181 +169,181 @@ typedef bool Bool;
 
				 			return o; \
			
 
				 		}
			
 
				 
			
 
				-#	define _ANKI_DEFINE_ALL_OPERATORS_ROWS3(mat, fl) \
			
 
				-		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS3(mat, fl, +) \
			
 
				-		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS3(mat, fl, -) \
			
 
				-		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS3(mat, fl, *) \
			
 
				-		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS3(mat, fl, /) \
			
 
				-		_ANKI_DEFINE_OPERATOR_SELF_ROWS3(mat, +) \
			
 
				-		_ANKI_DEFINE_OPERATOR_SELF_ROWS3(mat, -)
			
 
				-
			
 
				-#	define _ANKI_DEFINE_ALL_OPERATORS_ROWS4(mat, fl) \
			
 
				-		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS4(mat, fl, +) \
			
 
				-		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS4(mat, fl, -) \
			
 
				-		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS4(mat, fl, *) \
			
 
				-		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS4(mat, fl, /) \
			
 
				-		_ANKI_DEFINE_OPERATOR_SELF_ROWS4(mat, +) \
			
 
				-		_ANKI_DEFINE_OPERATOR_SELF_ROWS4(mat, -)
			
 
				-
			
 
				-// Mat3 "template". Not an actual template because of bugs
			
 
				-#	define _ANKI_MAT3(mat, vec, scalar) \
			
 
				-		struct mat \
			
 
				-		{ \
			
 
				-			vec m_row0; \
			
 
				-			vec m_row1; \
			
 
				-			vec m_row2; \
			
 
				-			_ANKI_DEFINE_ALL_OPERATORS_ROWS3(mat, scalar) \
			
 
				-			void setColumns(vec c0, vec c1, vec c2) \
			
 
				-			{ \
			
 
				-				m_row0 = vec(c0.x, c1.x, c2.x); \
			
 
				-				m_row1 = vec(c0.y, c1.y, c2.y); \
			
 
				-				m_row2 = vec(c0.z, c1.z, c2.z); \
			
 
				-			} \
			
 
				-		}; \
			
 
				-		vec mul(mat m, vec v) \
			
 
				-		{ \
			
 
				-			const scalar a = dot(m.m_row0, v); \
			
 
				-			const scalar b = dot(m.m_row1, v); \
			
 
				-			const scalar c = dot(m.m_row2, v); \
			
 
				-			return vec(a, b, c); \
			
 
				-		} \
			
 
				-		mat transpose(mat m) \
			
 
				-		{ \
			
 
				-			mat o; \
			
 
				-			o.setColumns(m.m_row0, m.m_row1, m.m_row2); \
			
 
				-			return o; \
			
 
				-		}
			
 
				+#	define _ANKI_DEFINE_ALL_OPERATORS_ROWS3(matType, scalarType) \
			
 
				+		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS3(matType, scalarType, +) \
			
 
				+		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS3(matType, scalarType, -) \
			
 
				+		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS3(matType, scalarType, *) \
			
 
				+		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS3(matType, scalarType, /) \
			
 
				+		_ANKI_DEFINE_OPERATOR_SELF_ROWS3(matType, +) \
			
 
				+		_ANKI_DEFINE_OPERATOR_SELF_ROWS3(matType, -)
			
 
				+
			
 
				+#	define _ANKI_DEFINE_ALL_OPERATORS_ROWS4(matType, scalarType) \
			
 
				+		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS4(matType, scalarType, +) \
			
 
				+		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS4(matType, scalarType, -) \
			
 
				+		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS4(matType, scalarType, *) \
			
 
				+		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS4(matType, scalarType, /) \
			
 
				+		_ANKI_DEFINE_OPERATOR_SELF_ROWS4(matType, +) \
			
 
				+		_ANKI_DEFINE_OPERATOR_SELF_ROWS4(matType, -)
			
 
				+
			
 
				+struct Mat3
			
 
				+{
			
 
				+	Vec3 m_row0;
			
 
				+	Vec3 m_row1;
			
 
				+	Vec3 m_row2;
			
 
				 
			
 
				-// Mat4 "template". Not an actual template because of bugs
			
 
				-#	define _ANKI_MAT4(mat, vec, scalar) \
			
 
				-		struct mat \
			
 
				-		{ \
			
 
				-			vec m_row0; \
			
 
				-			vec m_row1; \
			
 
				-			vec m_row2; \
			
 
				-			vec m_row3; \
			
 
				-			_ANKI_DEFINE_ALL_OPERATORS_ROWS4(mat, scalar) \
			
 
				-			vec getTranslationPart() \
			
 
				-			{ \
			
 
				-				return vec(m_row0.w, m_row1.w, m_row2.w, m_row3.w); \
			
 
				-			} \
			
 
				-			void setColumns(vec c0, vec c1, vec c2, vec c3) \
			
 
				-			{ \
			
 
				-				m_row0 = vec(c0.x, c1.x, c2.x, c3.x); \
			
 
				-				m_row1 = vec(c0.y, c1.y, c2.y, c3.y); \
			
 
				-				m_row2 = vec(c0.z, c1.z, c2.z, c3.z); \
			
 
				-				m_row3 = vec(c0.w, c1.w, c2.w, c3.w); \
			
 
				-			} \
			
 
				-		}; \
			
 
				-		vec mul(mat m, vec v) \
			
 
				-		{ \
			
 
				-			const scalar a = dot(m.m_row0, v); \
			
 
				-			const scalar b = dot(m.m_row1, v); \
			
 
				-			const scalar c = dot(m.m_row2, v); \
			
 
				-			const scalar d = dot(m.m_row3, v); \
			
 
				-			return vec(a, b, c, d); \
			
 
				-		} \
			
 
				-		mat mul(mat a_, mat b_) \
			
 
				-		{ \
			
 
				-			const vec a[4] = {a_.m_row0, a_.m_row1, a_.m_row2, a_.m_row3}; \
			
 
				-			const vec b[4] = {b_.m_row0, b_.m_row1, b_.m_row2, b_.m_row3}; \
			
 
				-			vec c[4]; \
			
 
				-			[unroll] for(U32 i = 0; i < 4; i++) \
			
 
				-			{ \
			
 
				-				vec t1, t2; \
			
 
				-				t1 = a[i][0]; \
			
 
				-				t2 = b[0] * t1; \
			
 
				-				t1 = a[i][1]; \
			
 
				-				t2 += b[1] * t1; \
			
 
				-				t1 = a[i][2]; \
			
 
				-				t2 += b[2] * t1; \
			
 
				-				t1 = a[i][3]; \
			
 
				-				t2 += b[3] * t1; \
			
 
				-				c[i] = t2; \
			
 
				-			} \
			
 
				-			mat o; \
			
 
				-			o.m_row0 = c[0]; \
			
 
				-			o.m_row1 = c[1]; \
			
 
				-			o.m_row2 = c[2]; \
			
 
				-			o.m_row3 = c[3]; \
			
 
				-			return o; \
			
 
				-		}
			
 
				+	_ANKI_DEFINE_ALL_OPERATORS_ROWS3(Mat3, F32)
			
 
				 
			
 
				-// Mat3x4 "template". Not an actual template because of bugs
			
 
				-#	define _ANKI_MAT3x4(mat, row, column, scalar) \
			
 
				-		struct mat \
			
 
				-		{ \
			
 
				-			row m_row0; \
			
 
				-			row m_row1; \
			
 
				-			row m_row2; \
			
 
				-			_ANKI_DEFINE_ALL_OPERATORS_ROWS3(mat, scalar) \
			
 
				-			column getTranslationPart() \
			
 
				-			{ \
			
 
				-				return column(m_row0.w, m_row1.w, m_row2.w); \
			
 
				-			} \
			
 
				-			void setColumns(column c0, column c1, column c2, column c3) \
			
 
				-			{ \
			
 
				-				m_row0 = row(c0.x, c1.x, c2.x, c3.x); \
			
 
				-				m_row1 = row(c0.y, c1.y, c2.y, c3.y); \
			
 
				-				m_row2 = row(c0.z, c1.z, c2.z, c3.z); \
			
 
				-			} \
			
 
				-			void setColumn(U32 i, column c) \
			
 
				-			{ \
			
 
				-				m_row0[i] = c.x; \
			
 
				-				m_row1[i] = c.y; \
			
 
				-				m_row2[i] = c.z; \
			
 
				-			} \
			
 
				-		}; \
			
 
				-		column mul(mat m, row v) \
			
 
				-		{ \
			
 
				-			const scalar a = dot(m.m_row0, v); \
			
 
				-			const scalar b = dot(m.m_row1, v); \
			
 
				-			const scalar c = dot(m.m_row2, v); \
			
 
				-			return column(a, b, c); \
			
 
				-		} \
			
 
				-		mat combineTransformations(mat a_, mat b_) \
			
 
				-		{ \
			
 
				-			const row a[3] = {a_.m_row0, a_.m_row1, a_.m_row2}; \
			
 
				-			const row b[3] = {b_.m_row0, b_.m_row1, b_.m_row2}; \
			
 
				-			row c[3]; \
			
 
				-			[unroll] for(U32 i = 0; i < 3; i++) \
			
 
				-			{ \
			
 
				-				row t2; \
			
 
				-				t2 = b[0] * a[i][0]; \
			
 
				-				t2 += b[1] * a[i][1]; \
			
 
				-				t2 += b[2] * a[i][2]; \
			
 
				-				const row v4 = row(0.0f, 0.0f, 0.0f, a[i][3]); \
			
 
				-				t2 += v4; \
			
 
				-				c[i] = t2; \
			
 
				-			} \
			
 
				-			mat o; \
			
 
				-			o.m_row0 = c[0]; \
			
 
				-			o.m_row1 = c[1]; \
			
 
				-			o.m_row2 = c[2]; \
			
 
				-			return o; \
			
 
				-		}
			
 
				+	void setColumns(Vec3 c0, Vec3 c1, Vec3 c2)
			
 
				+	{
			
 
				+		m_row0 = Vec3(c0.x, c1.x, c2.x);
			
 
				+		m_row1 = Vec3(c0.y, c1.y, c2.y);
			
 
				+		m_row2 = Vec3(c0.z, c1.z, c2.z);
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+Vec3 mul(Mat3 m, Vec3 v)
			
 
				+{
			
 
				+	const F32 a = dot(m.m_row0, v);
			
 
				+	const F32 b = dot(m.m_row1, v);
			
 
				+	const F32 c = dot(m.m_row2, v);
			
 
				+	return Vec3(a, b, c);
			
 
				+}
			
 
				+
			
 
				+Mat3 transpose(Mat3 m)
			
 
				+{
			
 
				+	Mat3 o;
			
 
				+	o.setColumns(m.m_row0, m.m_row1, m.m_row2);
			
 
				+	return o;
			
 
				+}
			
 
				 
			
 
				-_ANKI_MAT3(Mat3, Vec3, F32)
			
 
				-_ANKI_MAT4(Mat4, Vec4, F32)
			
 
				-_ANKI_MAT3x4(Mat3x4, Vec4, Vec3, F32)
			
 
				-
			
 
				-#	if ANKI_SUPPORTS_16BIT_TYPES == 0
			
 
				-#		if ANKI_FORCE_FULL_FP_PRECISION
			
 
				-	typedef float RF32;
			
 
				-typedef float2 RVec2;
			
 
				-typedef float3 RVec3;
			
 
				-typedef float4 RVec4;
			
 
				-_ANKI_MAT3(RMat3, Vec3, F32)
			
 
				-#		else
			
 
				-	typedef min16float RF32;
			
 
				-typedef min16float2 RVec2;
			
 
				-typedef min16float3 RVec3;
			
 
				-typedef min16float4 RVec4;
			
 
				-_ANKI_MAT3(RMat3, RVec3, RF32)
			
 
				-#		endif
			
 
				-#	else // ANKI_SUPPORTS_16BIT_TYPES == 0
			
 
				-	_ANKI_MAT3(HMat3, HVec3, F16)
			
 
				-#	endif // ANKI_SUPPORTS_16BIT_TYPES == 0
			
 
				+struct Mat4
			
 
				+{
			
 
				+	Vec4 m_row0;
			
 
				+	Vec4 m_row1;
			
 
				+	Vec4 m_row2;
			
 
				+	Vec4 m_row3;
			
 
				+
			
 
				+	_ANKI_DEFINE_ALL_OPERATORS_ROWS4(Mat4, F32)
			
 
				+
			
 
				+	void setColumns(Vec4 c0, Vec4 c1, Vec4 c2, Vec4 c3)
			
 
				+	{
			
 
				+		m_row0 = Vec4(c0.x, c1.x, c2.x, c3.x);
			
 
				+		m_row1 = Vec4(c0.y, c1.y, c2.y, c3.y);
			
 
				+		m_row2 = Vec4(c0.z, c1.z, c2.z, c3.z);
			
 
				+		m_row3 = Vec4(c0.w, c1.w, c2.w, c3.w);
			
 
				+	}
			
 
				+
			
 
				+	Vec4 getTranslationPart()
			
 
				+	{
			
 
				+		return Vec4(m_row0.w, m_row1.w, m_row2.w, m_row3.w);
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+Vec4 mul(Mat4 m, Vec4 v)
			
 
				+{
			
 
				+	const F32 a = dot(m.m_row0, v);
			
 
				+	const F32 b = dot(m.m_row1, v);
			
 
				+	const F32 c = dot(m.m_row2, v);
			
 
				+	const F32 d = dot(m.m_row3, v);
			
 
				+	return Vec4(a, b, c, d);
			
 
				+}
			
 
				+
			
 
				+Mat4 mul(Mat4 a_, Mat4 b_)
			
 
				+{
			
 
				+	const Vec4 a[4] = {a_.m_row0, a_.m_row1, a_.m_row2, a_.m_row3};
			
 
				+	const Vec4 b[4] = {b_.m_row0, b_.m_row1, b_.m_row2, b_.m_row3};
			
 
				+	Vec4 c[4];
			
 
				+	[unroll] for(U32 i = 0; i < 4; i++)
			
 
				+	{
			
 
				+		Vec4 t1, t2;
			
 
				+		t1 = a[i][0];
			
 
				+		t2 = b[0] * t1;
			
 
				+		t1 = a[i][1];
			
 
				+		t2 += b[1] * t1;
			
 
				+		t1 = a[i][2];
			
 
				+		t2 += b[2] * t1;
			
 
				+		t1 = a[i][3];
			
 
				+		t2 += b[3] * t1;
			
 
				+		c[i] = t2;
			
 
				+	}
			
 
				+	Mat4 o;
			
 
				+	o.m_row0 = c[0];
			
 
				+	o.m_row1 = c[1];
			
 
				+	o.m_row2 = c[2];
			
 
				+	o.m_row3 = c[3];
			
 
				+	return o;
			
 
				+}
			
 
				+
			
 
				+struct Mat3x4
			
 
				+{
			
 
				+	Vec4 m_row0;
			
 
				+	Vec4 m_row1;
			
 
				+	Vec4 m_row2;
			
 
				+
			
 
				+	_ANKI_DEFINE_ALL_OPERATORS_ROWS3(Mat3x4, F32)
			
 
				+
			
 
				+	Vec3 getTranslationPart()
			
 
				+	{
			
 
				+		return Vec3(m_row0.w, m_row1.w, m_row2.w);
			
 
				+	}
			
 
				+
			
 
				+	void setColumns(Vec3 c0, Vec3 c1, Vec3 c2, Vec3 c3)
			
 
				+	{
			
 
				+		m_row0 = Vec4(c0.x, c1.x, c2.x, c3.x);
			
 
				+		m_row1 = Vec4(c0.y, c1.y, c2.y, c3.y);
			
 
				+		m_row2 = Vec4(c0.z, c1.z, c2.z, c3.z);
			
 
				+	}
			
 
				+
			
 
				+	void setColumn(U32 i, Vec3 c)
			
 
				+	{
			
 
				+		m_row0[i] = c.x;
			
 
				+		m_row1[i] = c.y;
			
 
				+		m_row2[i] = c.z;
			
 
				+	}
			
 
				+};
			
 
				+
			
 
				+Vec3 mul(Mat3x4 m, Vec4 v)
			
 
				+{
			
 
				+	const F32 a = dot(m.m_row0, v);
			
 
				+	const F32 b = dot(m.m_row1, v);
			
 
				+	const F32 c = dot(m.m_row2, v);
			
 
				+	return Vec3(a, b, c);
			
 
				+}
			
 
				+
			
 
				+Mat3x4 combineTransformations(Mat3x4 a_, Mat3x4 b_)
			
 
				+{
			
 
				+	const Vec4 a[3] = {a_.m_row0, a_.m_row1, a_.m_row2};
			
 
				+	const Vec4 b[3] = {b_.m_row0, b_.m_row1, b_.m_row2};
			
 
				+	Vec4 c[3];
			
 
				+	[unroll] for(U32 i = 0; i < 3; i++)
			
 
				+	{
			
 
				+		Vec4 t2;
			
 
				+		t2 = b[0] * a[i][0];
			
 
				+		t2 += b[1] * a[i][1];
			
 
				+		t2 += b[2] * a[i][2];
			
 
				+		const Vec4 v4 = Vec4(0.0f, 0.0f, 0.0f, a[i][3]);
			
 
				+		t2 += v4;
			
 
				+		c[i] = t2;
			
 
				+	}
			
 
				+	Mat3x4 o;
			
 
				+	o.m_row0 = c[0];
			
 
				+	o.m_row1 = c[1];
			
 
				+	o.m_row2 = c[2];
			
 
				+	return o;
			
 
				+}
			
 
				+
			
 
				+template<typename TMat>
			
 
				+Vec3 extractScale(TMat trf)
			
 
				+{
			
 
				+	Vec3 scale;
			
 
				+	[unroll] for(U32 i = 0; i < 3; ++i)
			
 
				+	{
			
 
				+		const Vec3 axis = Vec3(trf.m_row0[i], trf.m_row1[i], trf.m_row2[i]);
			
 
				+		scale[i] = length(axis);
			
 
				+	}
			
 
				+
			
 
				+	return scale;
			
 
				+}
			
 
				 
			
 
				 #endif // defined(__HLSL_VERSION)
			
 
				 
			
--- a/AnKi/Shaders/IndirectDiffuse.ankiprog
+++ b/AnKi/Shaders/IndirectDiffuse.ankiprog
@@ -3,8 +3,6 @@
 
				 // Code licensed under the BSD License.
			
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki technique RtMaterialFetch rgen
			
 
				 
			
 
				 #include <AnKi/Shaders/RtMaterialFetch.hlsl>
			
--- a/AnKi/Shaders/IndirectDiffuseClipmaps.ankiprog
+++ b/AnKi/Shaders/IndirectDiffuseClipmaps.ankiprog
@@ -3,8 +3,6 @@
 
				 // Code licensed under the BSD License.
			
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki mutator GPU_WAVE_SIZE 16 32 64
			
 
				 #pragma anki mutator RADIANCE_OCTAHEDRON_MAP_SIZE 8 10 12 14 16 18 20
			
 
				 #pragma anki mutator IRRADIANCE_OCTAHEDRON_MAP_SIZE 4 5 6
			
--- a/AnKi/Shaders/IrradianceDice.ankiprog
+++ b/AnKi/Shaders/IrradianceDice.ankiprog
@@ -20,7 +20,7 @@
 
				 constexpr U32 kThreadgroupSize = U32(THREDGROUP_SIZE_SQRT) * U32(THREDGROUP_SIZE_SQRT);
			
 
				 
			
 
				 SamplerState g_nearestAnyClampSampler : register(s0);
			
 
				-TextureCube<RVec4> g_lightShadingTexCube : register(t0);
			
 
				+TextureCube<Vec4> g_lightShadingTexCube : register(t0);
			
 
				 
			
 
				 #if SECOND_BOUNCE == 1
			
 
				 TextureCube<Vec4> g_gbufferTex[3u] : register(t1);
			
@@ -39,7 +39,7 @@ ANKI_FAST_CONSTANTS(Constants, g_consts)
 
				 #else
			
 
				 struct BufferOut
			
 
				 {
			
 
				-	RVec4 m_val[6u];
			
 
				+	Vec4 m_val[6u];
			
 
				 };
			
 
				 
			
 
				 RWStructuredBuffer<BufferOut> g_irradianceDisceResults : register(u0);
			
@@ -49,7 +49,7 @@ constexpr U32 kMinWaveSize = 8u;
 
				 groupshared Vec3 s_integrationResults[6u][kThreadgroupSize / kMinWaveSize]; // In cube coords
			
 
				 groupshared U32 s_waveIndexInsideThreadGroup;
			
 
				 
			
 
				-RVec3 sampleLightShadingTexture(const U32 face, UVec3 svGroupThreadId)
			
 
				+Vec3 sampleLightShadingTexture(const U32 face, UVec3 svGroupThreadId)
			
 
				 {
			
 
				 	const Vec2 uv = (Vec2(svGroupThreadId.x, svGroupThreadId.y) + 0.5) / F32(THREDGROUP_SIZE_SQRT);
			
 
				 	const Vec3 cubeUvw = getCubemapDirection(uv, face);
			
@@ -77,9 +77,9 @@ RVec3 sampleLightShadingTexture(const U32 face, UVec3 svGroupThreadId)
 
				 		const Vec3 r = getCubemapDirection(faceUv, f) * Vec3(1.0, 1.0, -1.0);
			
 
				 
			
 
				 		// Compute integral part
			
 
				-		const RF32 lambert = max(0.0, dot(r, diceDir));
			
 
				-		const RVec3 lightShading = sampleLightShadingTexture(f, svGroupThreadId);
			
 
				-		const RVec3 irradiance = lightShading * lambert * cubeCoordSolidAngle(uvToNdc(faceUv), threadgroupSizeSqrtf);
			
 
				+		const F32 lambert = max(0.0, dot(r, diceDir));
			
 
				+		const Vec3 lightShading = sampleLightShadingTexture(f, svGroupThreadId);
			
 
				+		const Vec3 irradiance = lightShading * lambert * cubeCoordSolidAngle(uvToNdc(faceUv), threadgroupSizeSqrtf);
			
 
				 
			
 
				 		// Store
			
 
				 		resultFaces[f] = irradiance;
			
@@ -125,7 +125,7 @@ RVec3 sampleLightShadingTexture(const U32 face, UVec3 svGroupThreadId)
 
				 		const Vec3 r = getCubemapDirection(faceUv, f) * Vec3(1.0, 1.0, -1.0);
			
 
				 
			
 
				 		// Compute integral part
			
 
				-		const RF32 lambert = max(0.0, dot(r, diceDir));
			
 
				+		const F32 lambert = max(0.0, dot(r, diceDir));
			
 
				 
			
 
				 		// Read the gbuffer
			
 
				 		const Vec3 gbufferUv = getCubemapDirection(faceUv, f);
			
@@ -135,14 +135,14 @@ RVec3 sampleLightShadingTexture(const U32 face, UVec3 svGroupThreadId)
 
				 								g_gbufferTex[2u].SampleLevel(g_nearestAnyClampSampler, gbufferUv, 0.0), gbuffer);
			
 
				 
			
 
				 		// Sample irradiance
			
 
				-		RVec3 firstBounceIrradiance =
			
 
				-			sampleAmbientDice<RF32>(s_integrationResults[0][0], s_integrationResults[1][0], s_integrationResults[2][0], s_integrationResults[3][0],
			
 
				-									s_integrationResults[4][0], s_integrationResults[5][0], gbuffer.m_normal * Vec3(1.0, 1.0, -1.0));
			
 
				+		Vec3 firstBounceIrradiance =
			
 
				+			sampleAmbientDice<F32>(s_integrationResults[0][0], s_integrationResults[1][0], s_integrationResults[2][0], s_integrationResults[3][0],
			
 
				+								   s_integrationResults[4][0], s_integrationResults[5][0], gbuffer.m_normal * Vec3(1.0, 1.0, -1.0));
			
 
				 		firstBounceIrradiance = gbuffer.m_diffuse * firstBounceIrradiance;
			
 
				 
			
 
				 		// Compute 2nd bounce
			
 
				-		const RVec3 lightShading = sampleLightShadingTexture(f, svGroupThreadId);
			
 
				-		const RVec3 irradiance = (firstBounceIrradiance + lightShading * lambert) * cubeCoordSolidAngle(uvToNdc(faceUv), threadgroupSizeSqrtf);
			
 
				+		const Vec3 lightShading = sampleLightShadingTexture(f, svGroupThreadId);
			
 
				+		const Vec3 irradiance = (firstBounceIrradiance + lightShading * lambert) * cubeCoordSolidAngle(uvToNdc(faceUv), threadgroupSizeSqrtf);
			
 
				 
			
 
				 		// Store
			
 
				 		resultFaces[f] = irradiance;
			
@@ -185,10 +185,10 @@ RVec3 sampleLightShadingTexture(const U32 face, UVec3 svGroupThreadId)
 
				 		const U32 f = svGroupIndex;
			
 
				 
			
 
				 #if DEBUG_MODE == 0
			
 
				-		RVec3 irradiance = s_integrationResults[f][0];
			
 
				-		const RVec3 toStoreValue = irradiance;
			
 
				+		Vec3 irradiance = s_integrationResults[f][0];
			
 
				+		const Vec3 toStoreValue = irradiance;
			
 
				 #elif DEBUG_MODE == 1
			
 
				-		const RVec3 toStoreValue = colorPerCubeFace(f);
			
 
				+		const Vec3 toStoreValue = colorPerCubeFace(f);
			
 
				 #else
			
 
				 		UVec3 volumeSize;
			
 
				 		g_irradianceVolume.GetDimensions(volumeSize.x, volumeSize.y, volumeSize.z);
			
@@ -196,8 +196,8 @@ RVec3 sampleLightShadingTexture(const U32 face, UVec3 svGroupThreadId)
 
				 		const UVec3 subvolumeSize = UVec3(volumeSize.x / 6u, volumeSize.y, volumeSize.z);
			
 
				 		const U32 cellIdx =
			
 
				 			g_consts.m_volumeTexel.z * subvolumeSize.x * subvolumeSize.y + g_consts.m_volumeTexel.y * subvolumeSize.x + g_consts.m_volumeTexel.x;
			
 
				-		const RF32 headmapFactor = F32(cellIdx) / F32(subvolumeSize.x * subvolumeSize.y * subvolumeSize.z);
			
 
				-		const RVec3 toStoreValue = heatmap(headmapFactor);
			
 
				+		const F32 headmapFactor = F32(cellIdx) / F32(subvolumeSize.x * subvolumeSize.y * subvolumeSize.z);
			
 
				+		const Vec3 toStoreValue = heatmap(headmapFactor);
			
 
				 #endif
			
 
				 
			
 
				 #if STORE_LOCATION == 0
			
--- a/AnKi/Shaders/LensFlareSprite.ankiprog
+++ b/AnKi/Shaders/LensFlareSprite.ankiprog
@@ -11,7 +11,7 @@
 
				 struct VertOut
			
 
				 {
			
 
				 	Vec3 m_uv : TEXCOORD;
			
 
				-	RVec4 m_color : COLOR;
			
 
				+	Vec4 m_color : COLOR;
			
 
				 	Vec4 m_svPosition : SV_POSITION;
			
 
				 };
			
 
				 
			
@@ -41,11 +41,11 @@ VertOut main(U32 svVertexId : SV_VERTEXID, U32 svInstanceId : SV_INSTANCEID)
 
				 
			
 
				 #if ANKI_PIXEL_SHADER
			
 
				 SamplerState g_trilinearRepeatSampler : register(s0);
			
 
				-Texture2DArray<RVec4> g_tex : register(t1);
			
 
				+Texture2DArray<Vec4> g_tex : register(t1);
			
 
				 
			
 
				-RVec4 main(VertOut input) : SV_TARGET0
			
 
				+Vec4 main(VertOut input) : SV_TARGET0
			
 
				 {
			
 
				-	const RVec4 col = g_tex.Sample(g_trilinearRepeatSampler, input.m_uv);
			
 
				+	const Vec4 col = g_tex.Sample(g_trilinearRepeatSampler, input.m_uv);
			
 
				 	return col * input.m_color;
			
 
				 }
			
 
				 #endif
			
--- a/AnKi/Shaders/LightShading.ankiprog
+++ b/AnKi/Shaders/LightShading.ankiprog
@@ -3,8 +3,6 @@
 
				 // Code licensed under the BSD License.
			
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki mutator INDIRECT_DIFFUSE_TEX 0 1
			
 
				 
			
 
				 #pragma anki technique vert pixel
			
--- a/AnKi/Shaders/LightShadingApplyFog.ankiprog
+++ b/AnKi/Shaders/LightShadingApplyFog.ankiprog
@@ -13,7 +13,7 @@
 
				 SamplerState g_nearestAnyClampSampler : register(s0);
			
 
				 SamplerState g_linearAnyClampSampler : register(s1);
			
 
				 Texture2D g_depthRt : register(t0);
			
 
				-Texture3D<RVec4> g_fogVolume : register(t1);
			
 
				+Texture3D<Vec4> g_fogVolume : register(t1);
			
 
				 
			
 
				 struct Constants
			
 
				 {
			
@@ -24,14 +24,7 @@ struct Constants
 
				 };
			
 
				 ANKI_FAST_CONSTANTS(Constants, g_consts)
			
 
				 
			
 
				-// Workaround validation errors
			
 
				-#	if ANKI_GR_BACKEND_DIRECT3D
			
 
				-#		define OUT_TYPE Vec4
			
 
				-#	else
			
 
				-#		define OUT_TYPE RVec4
			
 
				-#	endif
			
 
				-
			
 
				-OUT_TYPE main(VertOut input) : SV_TARGET0
			
 
				+Vec4 main(VertOut input) : SV_TARGET0
			
 
				 {
			
 
				 	const Vec2 uv = input.m_uv;
			
 
				 	Vec3 uvw;
			
@@ -45,11 +38,11 @@ OUT_TYPE main(VertOut input) : SV_TARGET0
 
				 	uvw.xy = uv;
			
 
				 
			
 
				 	// Read the volume
			
 
				-	const RVec4 fogVals = g_fogVolume.SampleLevel(g_linearAnyClampSampler, uvw, 0.0);
			
 
				-	const RVec3 inScattering = fogVals.rgb;
			
 
				-	const RF32 transmittance = fogVals.a;
			
 
				+	const Vec4 fogVals = g_fogVolume.SampleLevel(g_linearAnyClampSampler, uvw, 0.0);
			
 
				+	const Vec3 inScattering = fogVals.rgb;
			
 
				+	const F32 transmittance = fogVals.a;
			
 
				 
			
 
				 	// Apply the fog
			
 
				-	return OUT_TYPE(inScattering, transmittance);
			
 
				+	return Vec4(inScattering, transmittance);
			
 
				 }
			
 
				 #endif // ANKI_PIXEL_SHADER
			
--- a/AnKi/Shaders/LightShadingSkybox.ankiprog
+++ b/AnKi/Shaders/LightShadingSkybox.ankiprog
@@ -19,14 +19,14 @@
 
				 #	if METHOD == 0
			
 
				 struct Constants
			
 
				 {
			
 
				-	RVec3 m_solidColor;
			
 
				+	Vec3 m_solidColor;
			
 
				 	F32 m_padding;
			
 
				 };
			
 
				 
			
 
				 ANKI_FAST_CONSTANTS(Constants, g_consts)
			
 
				 #	elif METHOD == 1
			
 
				 SamplerState g_trilinearAnySampler : register(s0);
			
 
				-Texture2D<RVec4> g_envMapTex : register(t0);
			
 
				+Texture2D<Vec4> g_envMapTex : register(t0);
			
 
				 
			
 
				 struct Constants
			
 
				 {
			
@@ -49,12 +49,12 @@ Texture2D<Vec4> g_skyLut : register(t0);
 
				 ConstantBuffer<GlobalRendererConstants> g_consts : register(b0);
			
 
				 #	endif
			
 
				 
			
 
				-RVec4 main(VertOut input) : SV_TARGET0
			
 
				+Vec4 main(VertOut input) : SV_TARGET0
			
 
				 {
			
 
				 	const Vec2 uv = input.m_uv;
			
 
				 #	if METHOD == 0
			
 
				 	ANKI_MAYBE_UNUSED(uv);
			
 
				-	const RVec3 output = g_consts.m_solidColor;
			
 
				+	const Vec3 output = g_consts.m_solidColor;
			
 
				 #	elif METHOD == 1
			
 
				 	const F32 depth = 1.0;
			
 
				 	const Vec2 ndc = uvToNdc(uv);
			
@@ -73,7 +73,7 @@ RVec4 main(VertOut input) : SV_TARGET0
 
				 
			
 
				 	const F32 bias = (maxD > 0.9) ? -100.0f : 0.0f;
			
 
				 
			
 
				-	const RVec3 output = g_envMapTex.SampleBias(g_trilinearAnySampler, uv3, bias).rgb * g_consts.m_scale + g_consts.m_bias;
			
 
				+	const Vec3 output = g_envMapTex.SampleBias(g_trilinearAnySampler, uv3, bias).rgb * g_consts.m_scale + g_consts.m_bias;
			
 
				 #	else
			
 
				 	const F32 depth = 1.0;
			
 
				 	const Vec2 ndc = uvToNdc(uv);
			
@@ -82,10 +82,10 @@ RVec4 main(VertOut input) : SV_TARGET0
 
				 
			
 
				 	const Vec3 eyeToFrag = normalize(worldPos - g_consts.m_cameraPosition);
			
 
				 
			
 
				-	const RVec3 output = computeSkyColor(g_skyLut, g_linearAnyClampSampler, eyeToFrag, -g_consts.m_directionalLight.m_direction,
			
 
				-										 g_consts.m_directionalLight.m_power, true);
			
 
				+	const Vec3 output = computeSkyColor(g_skyLut, g_linearAnyClampSampler, eyeToFrag, -g_consts.m_directionalLight.m_direction,
			
 
				+										g_consts.m_directionalLight.m_power, true);
			
 
				 #	endif
			
 
				 
			
 
				-	return RVec4(output, 0.0);
			
 
				+	return Vec4(output, 0.0);
			
 
				 }
			
 
				 #endif // ANKI_PIXEL_SHADER
			
--- a/AnKi/Shaders/Reflections.ankiprog
+++ b/AnKi/Shaders/Reflections.ankiprog
@@ -3,8 +3,6 @@
 
				 // Code licensed under the BSD License.
			
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki mutator SSR_SAMPLE_GBUFFER 0 1
			
 
				 #pragma anki mutator INDIRECT_DIFFUSE_CLIPMAPS 0 1
			
 
				 
			
--- a/AnKi/Shaders/RtMaterialFetchDbg.ankiprog
+++ b/AnKi/Shaders/RtMaterialFetchDbg.ankiprog
@@ -3,8 +3,6 @@
 
				 // Code licensed under the BSD License.
			
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki technique RtMaterialFetch rgen
			
 
				 
			
 
				 #include <AnKi/Shaders/RtMaterialFetch.hlsl>
			
--- a/AnKi/Shaders/RtMaterialFetchMiss.ankiprog
+++ b/AnKi/Shaders/RtMaterialFetchMiss.ankiprog
@@ -3,8 +3,6 @@
 
				 // Code licensed under the BSD License.
			
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki technique RtMaterialFetch miss
			
 
				 
			
 
				 #include <AnKi/Shaders/RtMaterialFetch.hlsl>
			
--- a/AnKi/Shaders/RtShadows.ankiprog
+++ b/AnKi/Shaders/RtShadows.ankiprog
@@ -3,8 +3,6 @@
 
				 // Code licensed under the BSD License.
			
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki mutator RAYS_PER_PIXEL 1 2 4 8
			
 
				 
			
 
				 #pragma anki technique RtShadows rgen miss
			
--- a/AnKi/Shaders/ShadowmapsResolve.ankiprog
+++ b/AnKi/Shaders/ShadowmapsResolve.ankiprog
@@ -32,7 +32,7 @@ Texture2D<Vec4> g_dirLightResolvedShadowsTex : register(t6);
 
				 #	endif
			
 
				 
			
 
				 #	if ANKI_COMPUTE_SHADER
			
 
				-RWTexture2D<RVec4> g_storageTex : register(u0);
			
 
				+RWTexture2D<Vec4> g_storageTex : register(u0);
			
 
				 #	endif
			
 
				 
			
 
				 struct Constants
			
@@ -67,7 +67,7 @@ Vec3 computeDebugShadowCascadeColor(U32 cascade)
 
				 #	if ANKI_COMPUTE_SHADER
			
 
				 [numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
			
 
				 #	else
			
 
				-RVec4 main(VertOut input) : SV_TARGET0
			
 
				+Vec4 main(VertOut input) : SV_TARGET0
			
 
				 #	endif
			
 
				 {
			
 
				 #	if ANKI_COMPUTE_SHADER
			
@@ -86,12 +86,12 @@ RVec4 main(VertOut input) : SV_TARGET0
 
				 	g_noiseTex.GetDimensions(noiseTexSize.x, noiseTexSize.y);
			
 
				 
			
 
				 	const Vec2 noiseUv = g_consts.m_framebufferSize / noiseTexSize * uv;
			
 
				-	RVec3 noise = g_noiseTex.SampleLevel(g_trilinearRepeatSampler, noiseUv, 0.0).rgb;
			
 
				+	Vec3 noise = g_noiseTex.SampleLevel(g_trilinearRepeatSampler, noiseUv, 0.0).rgb;
			
 
				 	noise = animateBlueNoise(noise, g_globalConstants.m_frame % 16u);
			
 
				-	const RF32 randFactor = noise.x;
			
 
				+	const F32 randFactor = noise.x;
			
 
				 #		else
			
 
				-	const RVec2 noise2 = spatioTemporalNoise(svDispatchThreadId, g_globalConstants.m_frame);
			
 
				-	const RF32 randFactor = noise2.x;
			
 
				+	const Vec2 noise2 = spatioTemporalNoise(svDispatchThreadId, g_globalConstants.m_frame);
			
 
				+	const F32 randFactor = noise2.x;
			
 
				 #		endif
			
 
				 #	endif
			
 
				 
			
@@ -108,7 +108,7 @@ RVec4 main(VertOut input) : SV_TARGET0
 
				 	// Layers
			
 
				 	U32 shadowCasterCountPerFragment = 0u;
			
 
				 	const U32 kMaxShadowCastersPerFragment = 4u;
			
 
				-	RVec4 shadowFactors = 0.0f;
			
 
				+	Vec4 shadowFactors = 0.0f;
			
 
				 
			
 
				 	// Dir light
			
 
				 #	if DIRECTIONAL_LIGHT_SHADOW_RESOLVED
			
@@ -120,14 +120,14 @@ RVec4 main(VertOut input) : SV_TARGET0
 
				 	{
			
 
				 		const U32 shadowCascadeCount = dirLight.m_shadowCascadeCount;
			
 
				 
			
 
				-		const RF32 positiveZViewSpace = testPlanePoint(g_globalConstants.m_nearPlaneWSpace.xyz, g_globalConstants.m_nearPlaneWSpace.w, worldPos)
			
 
				-										+ g_globalConstants.m_matrices.m_near;
			
 
				+		const F32 positiveZViewSpace = testPlanePoint(g_globalConstants.m_nearPlaneWSpace.xyz, g_globalConstants.m_nearPlaneWSpace.w, worldPos)
			
 
				+									   + g_globalConstants.m_matrices.m_near;
			
 
				 
			
 
				 		const F32 lastCascadeDistance = dirLight.m_shadowCascadeDistances[shadowCascadeCount - 1u];
			
 
				-		RF32 shadowFactor;
			
 
				+		F32 shadowFactor;
			
 
				 		if(positiveZViewSpace < lastCascadeDistance)
			
 
				 		{
			
 
				-			RF32 cascadeBlendFactor;
			
 
				+			F32 cascadeBlendFactor;
			
 
				 			const UVec2 cascadeIndices =
			
 
				 				computeShadowCascadeIndex2(positiveZViewSpace, dirLight.m_shadowCascadeDistances, shadowCascadeCount, cascadeBlendFactor);
			
 
				 
			
@@ -144,14 +144,14 @@ RVec4 main(VertOut input) : SV_TARGET0
 
				 #		endif
			
 
				 
			
 
				 #		if QUALITY == 2
			
 
				-			const RF32 shadowFactorCascadeA = computeShadowFactorDirLightPcss<RF32>(
			
 
				-				dirLight, cascadeIndices.x, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor, g_linearAnyClampSampler);
			
 
				+			const F32 shadowFactorCascadeA = computeShadowFactorDirLightPcss<F32>(dirLight, cascadeIndices.x, worldPos, g_shadowAtlasTex,
			
 
				+																				  g_linearAnyClampShadowSampler, randFactor, g_linearAnyClampSampler);
			
 
				 #		elif QUALITY == 1
			
 
				-			const RF32 shadowFactorCascadeA = computeShadowFactorDirLightPcf<RF32>(dirLight, cascadeIndices.x, worldPos, g_shadowAtlasTex,
			
 
				-																				   g_linearAnyClampShadowSampler, randFactor);
			
 
				+			const F32 shadowFactorCascadeA = computeShadowFactorDirLightPcf<F32>(dirLight, cascadeIndices.x, worldPos, g_shadowAtlasTex,
			
 
				+																				 g_linearAnyClampShadowSampler, randFactor);
			
 
				 #		else
			
 
				-			const RF32 shadowFactorCascadeA =
			
 
				-				computeShadowFactorDirLight<RF32>(dirLight, cascadeIndices.x, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
			
 
				+			const F32 shadowFactorCascadeA =
			
 
				+				computeShadowFactorDirLight<F32>(dirLight, cascadeIndices.x, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
			
 
				 #		endif
			
 
				 
			
 
				 			if(cascadeBlendFactor < 0.01 || cascadeIndices.x == cascadeIndices.y)
			
@@ -163,21 +163,21 @@ RVec4 main(VertOut input) : SV_TARGET0
 
				 			{
			
 
				 #		if QUALITY == 2
			
 
				 				// Blend cascades
			
 
				-				const RF32 shadowFactorCascadeB = computeShadowFactorDirLightPcss<RF32>(
			
 
				+				const F32 shadowFactorCascadeB = computeShadowFactorDirLightPcss<F32>(
			
 
				 					dirLight, cascadeIndices.y, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor, g_linearAnyClampSampler);
			
 
				 #		elif QUALITY == 1
			
 
				 				// Blend cascades
			
 
				-				const RF32 shadowFactorCascadeB = computeShadowFactorDirLightPcf<RF32>(dirLight, cascadeIndices.y, worldPos, g_shadowAtlasTex,
			
 
				-																					   g_linearAnyClampShadowSampler, randFactor);
			
 
				+				const F32 shadowFactorCascadeB = computeShadowFactorDirLightPcf<F32>(dirLight, cascadeIndices.y, worldPos, g_shadowAtlasTex,
			
 
				+																					 g_linearAnyClampShadowSampler, randFactor);
			
 
				 #		else
			
 
				 				// Blend cascades
			
 
				-				const RF32 shadowFactorCascadeB =
			
 
				-					computeShadowFactorDirLight<RF32>(dirLight, cascadeIndices.y, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
			
 
				+				const F32 shadowFactorCascadeB =
			
 
				+					computeShadowFactorDirLight<F32>(dirLight, cascadeIndices.y, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
			
 
				 #		endif
			
 
				 				shadowFactor = lerp(shadowFactorCascadeA, shadowFactorCascadeB, cascadeBlendFactor);
			
 
				 			}
			
 
				 
			
 
				-			RF32 distanceFadeFactor = saturate(positiveZViewSpace / lastCascadeDistance);
			
 
				+			F32 distanceFadeFactor = saturate(positiveZViewSpace / lastCascadeDistance);
			
 
				 			distanceFadeFactor = pow(distanceFadeFactor, 8.0);
			
 
				 			shadowFactor += distanceFadeFactor;
			
 
				 		}
			
@@ -202,10 +202,9 @@ RVec4 main(VertOut input) : SV_TARGET0
 
				 			const Vec3 frag2Light = light.m_position - worldPos;
			
 
				 
			
 
				 #	if QUALITY > 0
			
 
				-			const RF32 shadowFactor =
			
 
				-				computeShadowFactorPointLightPcf(light, frag2Light, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor);
			
 
				+			const F32 shadowFactor = computeShadowFactorPointLightPcf(light, frag2Light, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor);
			
 
				 #	else
			
 
				-			const RF32 shadowFactor = computeShadowFactorPointLight<F32>(light, frag2Light, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
			
 
				+			const F32 shadowFactor = computeShadowFactorPointLight<F32>(light, frag2Light, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
			
 
				 #	endif
			
 
				 			shadowFactors[min(kMaxShadowCastersPerFragment - 1u, shadowCasterCountPerFragment++)] = shadowFactor;
			
 
				 		}
			
@@ -219,13 +218,13 @@ RVec4 main(VertOut input) : SV_TARGET0
 
				 		[branch] if(light.m_shadow)
			
 
				 		{
			
 
				 #	if QUALITY == 2
			
 
				-			const RF32 shadowFactor = computeShadowFactorSpotLightPcss<RF32>(light, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler,
			
 
				-																			 randFactor, g_linearAnyClampSampler);
			
 
				+			const F32 shadowFactor = computeShadowFactorSpotLightPcss<F32>(light, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler,
			
 
				+																		   randFactor, g_linearAnyClampSampler);
			
 
				 #	elif QUALITY == 1
			
 
				-			const RF32 shadowFactor =
			
 
				-				computeShadowFactorSpotLightPcf<RF32>(light, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor);
			
 
				+			const F32 shadowFactor =
			
 
				+				computeShadowFactorSpotLightPcf<F32>(light, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor);
			
 
				 #	else
			
 
				-			const RF32 shadowFactor = computeShadowFactorSpotLight<RF32>(light, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
			
 
				+			const F32 shadowFactor = computeShadowFactorSpotLight<F32>(light, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
			
 
				 #	endif
			
 
				 			shadowFactors[min(kMaxShadowCastersPerFragment - 1u, shadowCasterCountPerFragment++)] = shadowFactor;
			
 
				 		}
			
--- a/AnKi/Shaders/Ssao.ankiprog
+++ b/AnKi/Shaders/Ssao.ankiprog
@@ -5,8 +5,6 @@
 
				 
			
 
				 // Ground truth ambiend occlusion
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki mutator SPATIAL_DENOISE_SAMPLE_COUNT 3 5 7 9
			
 
				 #pragma anki mutator DENOISING_QUARTER_RESOLUTION 0 1
			
 
				 
			
--- a/AnKi/Shaders/TemporalAA.ankiprog
+++ b/AnKi/Shaders/TemporalAA.ankiprog
@@ -3,8 +3,6 @@
 
				 // Code licensed under the BSD License.
			
 
				 // http://www.anki3d.org/LICENSE
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki mutator VARIANCE_CLIPPING 0 1
			
 
				 #pragma anki mutator YCBCR 0 1
			
 
				 
			
--- a/AnKi/Shaders/Tonemap.ankiprog
+++ b/AnKi/Shaders/Tonemap.ankiprog
@@ -5,8 +5,6 @@
 
				 
			
 
				 // Does tonemapping
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki technique vert pixel comp
			
 
				 
			
 
				 #include <AnKi/Shaders/QuadVert.hlsl>
			
--- a/AnKi/Shaders/TonemappingAverageLuminance.ankiprog
+++ b/AnKi/Shaders/TonemappingAverageLuminance.ankiprog
@@ -12,7 +12,7 @@
 
				 #define THREAD_COUNT_Y 16u
			
 
				 #define THREAD_COUNT UVec2(THREAD_COUNT_X, THREAD_COUNT_Y)
			
 
				 
			
 
				-Texture2D<RVec4> g_tex : register(t0);
			
 
				+Texture2D<Vec4> g_tex : register(t0);
			
 
				 
			
 
				 #define TONEMAPPING_REGISTER u0
			
 
				 #include <AnKi/Shaders/TonemappingResources.hlsl>
			
--- a/AnKi/Shaders/TraditionalDeferredShading.ankiprog
+++ b/AnKi/Shaders/TraditionalDeferredShading.ankiprog
@@ -5,8 +5,6 @@
 
				 
			
 
				 // Classic deferred lighting shader
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki mutator SPECULAR 0 1
			
 
				 #pragma anki mutator INDIRECT_DIFFUSE 0 1 2
			
 
				 
			
--- a/AnKi/Shaders/TraditionalDeferredShadingSkybox.ankiprog
+++ b/AnKi/Shaders/TraditionalDeferredShadingSkybox.ankiprog
@@ -23,7 +23,7 @@ Texture2D g_depthTex : register(t0);
 
				 
			
 
				 #	if METHOD == 1
			
 
				 SamplerState g_trilinearAnySampler : register(s1);
			
 
				-Texture2D<RVec4> g_envMapTex : register(t1);
			
 
				+Texture2D<Vec4> g_envMapTex : register(t1);
			
 
				 #	elif METHOD == 2
			
 
				 SamplerState g_linearAnyClampSampler : register(s1);
			
 
				 Texture2D<Vec4> g_skyLut : register(t1);
			
--- a/AnKi/Shaders/Ui.ankiprog
+++ b/AnKi/Shaders/Ui.ankiprog
@@ -9,17 +9,10 @@
 
				 
			
 
				 #include <AnKi/Shaders/Common.hlsl>
			
 
				 
			
 
				-// Workaround validation errors
			
 
				-#if ANKI_GR_BACKEND_DIRECT3D
			
 
				-#	define OUT_TYPE Vec4
			
 
				-#else
			
 
				-#	define OUT_TYPE RVec4
			
 
				-#endif
			
 
				-
			
 
				 struct VertIn
			
 
				 {
			
 
				 	Vec2 m_position : POSITION;
			
 
				-	OUT_TYPE m_color : COLOR;
			
 
				+	ANKI_RELAXED_PRECISION Vec4 m_color : COLOR;
			
 
				 #if TEXTURE_TYPE > 0
			
 
				 	Vec2 m_uv : TEXCOORD;
			
 
				 #endif
			
@@ -30,7 +23,7 @@ struct VertOut
 
				 #if TEXTURE_TYPE > 0
			
 
				 	Vec2 m_uv : TEXCOORD;
			
 
				 #endif
			
 
				-	RVec4 m_color : COLOR;
			
 
				+	ANKI_RELAXED_PRECISION Vec4 m_color : COLOR;
			
 
				 	Vec4 m_svPosition : SV_POSITION;
			
 
				 };
			
 
				 
			
@@ -64,7 +57,7 @@ SamplerState g_trilinearRepeatSampler : register(s0);
 
				 Texture2D g_tex : register(t0);
			
 
				 #	endif
			
 
				 
			
 
				-OUT_TYPE main(VertOut input) : SV_TARGET0
			
 
				+Vec4 main(VertOut input) : SV_TARGET0
			
 
				 {
			
 
				 #	if TEXTURE_TYPE == 0
			
 
				 	return input.m_color;
			
--- a/AnKi/Shaders/UiVisualizeImage.ankiprog
+++ b/AnKi/Shaders/UiVisualizeImage.ankiprog
@@ -21,14 +21,14 @@ ANKI_FAST_CONSTANTS(Constants, g_consts)
 
				 struct VertIn
			
 
				 {
			
 
				 	Vec2 m_position : POSITION;
			
 
				-	RVec4 m_color : COLOR;
			
 
				+	Vec4 m_color : COLOR;
			
 
				 	Vec2 m_uv : TEXCOORD;
			
 
				 };
			
 
				 
			
 
				 struct VertOut
			
 
				 {
			
 
				 	Vec2 m_uv : TEXCOORD;
			
 
				-	RVec4 m_color : COLOR;
			
 
				+	Vec4 m_color : COLOR;
			
 
				 	Vec4 m_svPosition : SV_POSITION;
			
 
				 };
			
 
				 
			
@@ -51,19 +51,19 @@ VertOut main(VertIn input)
 
				 #if ANKI_PIXEL_SHADER
			
 
				 SamplerState g_trilinearRepeatSampler : register(s0);
			
 
				 #	if TEXTURE_TYPE == 0
			
 
				-Texture2D<RVec4> g_tex2d : register(t0);
			
 
				+Texture2D<Vec4> g_tex2d : register(t0);
			
 
				 #	else
			
 
				-Texture3D<RVec4> g_tex3d : register(t0);
			
 
				+Texture3D<Vec4> g_tex3d : register(t0);
			
 
				 #	endif
			
 
				 
			
 
				-RVec4 main(VertOut input) : SV_TARGET0
			
 
				+Vec4 main(VertOut input) : SV_TARGET0
			
 
				 {
			
 
				 #	if TEXTURE_TYPE == 0
			
 
				-	const RVec4 rgba = g_tex2d.Sample(g_trilinearRepeatSampler, input.m_uv);
			
 
				+	const Vec4 rgba = g_tex2d.Sample(g_trilinearRepeatSampler, input.m_uv);
			
 
				 #	else
			
 
				-	const RVec4 rgba = g_tex3d.Sample(g_trilinearRepeatSampler, Vec3(input.m_uv, g_consts.m_depth.x));
			
 
				+	const Vec4 rgba = g_tex3d.Sample(g_trilinearRepeatSampler, Vec3(input.m_uv, g_consts.m_depth.x));
			
 
				 #	endif
			
 
				-	RVec3 outColor = input.m_color.rgb * rgba.rgb * g_consts.m_colorScale.rgb;
			
 
				+	Vec3 outColor = input.m_color.rgb * rgba.rgb * g_consts.m_colorScale.rgb;
			
 
				 
			
 
				 	if(g_consts.m_colorScale.a == 1.0)
			
 
				 	{
			
@@ -74,6 +74,6 @@ RVec4 main(VertOut input) : SV_TARGET0
 
				 		outColor = lerp(Vec3(alphaPattern, alphaPattern, alphaPattern), outColor, rgba.a);
			
 
				 	}
			
 
				 
			
 
				-	return RVec4(outColor, 1.0);
			
 
				+	return Vec4(outColor, 1.0);
			
 
				 }
			
 
				 #endif // ANKI_PIXEL_SHADER
			
--- a/AnKi/Shaders/VolumetricFogAccumulation.ankiprog
+++ b/AnKi/Shaders/VolumetricFogAccumulation.ankiprog
@@ -9,8 +9,8 @@
 
				 #include <AnKi/Shaders/Common.hlsl>
			
 
				 
			
 
				 SamplerState g_linearAnyClampSampler : register(s0);
			
 
				-Texture3D<RVec4> g_lightVolume : register(t0);
			
 
				-RWTexture3D<RVec4> g_fogVolume : register(u0);
			
 
				+Texture3D<Vec4> g_lightVolume : register(t0);
			
 
				+RWTexture3D<Vec4> g_fogVolume : register(u0);
			
 
				 
			
 
				 ANKI_FAST_CONSTANTS(VolumetricFogConstants, g_consts)
			
 
				 
			
@@ -23,41 +23,41 @@ ANKI_FAST_CONSTANTS(VolumetricFogConstants, g_consts)
 
				 
			
 
				 	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / Vec2(g_consts.m_volumeSize.xy);
			
 
				 
			
 
				-	RVec4 colorAndDensityFront = 0.0;
			
 
				+	Vec4 colorAndDensityFront = 0.0;
			
 
				 	[loop] for(U32 i = 0u; i < g_consts.m_volumeSize.z; ++i)
			
 
				 	{
			
 
				-		const RF32 fi = F32(i);
			
 
				+		const F32 fi = F32(i);
			
 
				 
			
 
				 		// Compute the linear depth
			
 
				-		const RF32 maxLinearDepth = g_consts.m_maxZSplitsToProcessf / g_consts.m_zSplitCountf;
			
 
				-		const RF32 linearDepthFraction = maxLinearDepth / F32(g_consts.m_volumeSize.z);
			
 
				-		const RF32 linearDepthNear = fi * linearDepthFraction;
			
 
				-		const RF32 linearDepthFar = (fi + 1.0) * linearDepthFraction;
			
 
				+		const F32 maxLinearDepth = g_consts.m_maxZSplitsToProcessf / g_consts.m_zSplitCountf;
			
 
				+		const F32 linearDepthFraction = maxLinearDepth / F32(g_consts.m_volumeSize.z);
			
 
				+		const F32 linearDepthNear = fi * linearDepthFraction;
			
 
				+		const F32 linearDepthFar = (fi + 1.0) * linearDepthFraction;
			
 
				 
			
 
				 		// Compute the min and max Z in view space if this cluster fragment
			
 
				-		const RF32 zVSpaceNear = -linearDepthNear * (g_consts.m_far - g_consts.m_near) + g_consts.m_near;
			
 
				-		const RF32 zVSpaceFar = -linearDepthFar * (g_consts.m_far - g_consts.m_near) + g_consts.m_near;
			
 
				+		const F32 zVSpaceNear = -linearDepthNear * (g_consts.m_far - g_consts.m_near) + g_consts.m_near;
			
 
				+		const F32 zVSpaceFar = -linearDepthFar * (g_consts.m_far - g_consts.m_near) + g_consts.m_near;
			
 
				 
			
 
				 		// Compute the thikness of this fragment
			
 
				-		const RF32 layerThinkness = abs(zVSpaceNear - zVSpaceFar);
			
 
				+		const F32 layerThinkness = abs(zVSpaceNear - zVSpaceFar);
			
 
				 
			
 
				 		// Read the light value and the fog density from the fog volumes
			
 
				-		const RF32 w = (fi + 0.5) / F32(g_consts.m_volumeSize.z);
			
 
				-		RVec4 lightAndFogDensity = g_lightVolume.SampleLevel(g_linearAnyClampSampler, Vec3(uv, w), 0.0);
			
 
				+		const F32 w = (fi + 0.5) / F32(g_consts.m_volumeSize.z);
			
 
				+		Vec4 lightAndFogDensity = g_lightVolume.SampleLevel(g_linearAnyClampSampler, Vec3(uv, w), 0.0);
			
 
				 		lightAndFogDensity.xyz *= g_consts.m_fogDiffuse / kPi;
			
 
				 
			
 
				 		// Scattering & absorption
			
 
				-		const RF32 scattering = lightAndFogDensity.w * g_consts.m_fogScatteringCoeff * layerThinkness;
			
 
				-		const RF32 absorption = lightAndFogDensity.w * g_consts.m_fogAbsorptionCoeff * layerThinkness;
			
 
				+		const F32 scattering = lightAndFogDensity.w * g_consts.m_fogScatteringCoeff * layerThinkness;
			
 
				+		const F32 absorption = lightAndFogDensity.w * g_consts.m_fogAbsorptionCoeff * layerThinkness;
			
 
				 
			
 
				 		// Integrate
			
 
				-		const RVec4 colorAndDensityBack = Vec4(lightAndFogDensity.xyz * scattering, scattering + absorption);
			
 
				+		const Vec4 colorAndDensityBack = Vec4(lightAndFogDensity.xyz * scattering, scattering + absorption);
			
 
				 
			
 
				-		const RVec3 l = colorAndDensityFront.rgb + saturate(exp(-colorAndDensityFront.a)) * colorAndDensityBack.rgb;
			
 
				+		const Vec3 l = colorAndDensityFront.rgb + saturate(exp(-colorAndDensityFront.a)) * colorAndDensityBack.rgb;
			
 
				 		colorAndDensityFront = Vec4(l.rgb, colorAndDensityFront.a + colorAndDensityBack.a);
			
 
				 
			
 
				 		// Write the value
			
 
				-		const RVec4 valToWrite = Vec4(colorAndDensityFront.rgb, saturate(exp(-colorAndDensityFront.a)));
			
 
				+		const Vec4 valToWrite = Vec4(colorAndDensityFront.rgb, saturate(exp(-colorAndDensityFront.a)));
			
 
				 		g_fogVolume[UVec3(svDispatchThreadId.xy, i)] = valToWrite;
			
 
				 	}
			
 
				 }
			
--- a/AnKi/Shaders/VolumetricLightingAccumulation.ankiprog
+++ b/AnKi/Shaders/VolumetricLightingAccumulation.ankiprog
@@ -5,8 +5,6 @@
 
				 
			
 
				 // This shader accumulates the lighting for every cluster fraction
			
 
				 
			
 
				-#pragma anki 16bit
			
 
				-
			
 
				 #pragma anki mutator ENABLE_SHADOWS 0 1
			
 
				 #pragma anki mutator CLIPMAP_DIFFUSE_INDIRECT 0 1
			
 
				 
			
--- a/AnKi/Shaders/VrsSriGenerationCompute.ankiprog
+++ b/AnKi/Shaders/VrsSriGenerationCompute.ankiprog
@@ -15,7 +15,7 @@
 
				 // Find the maximum luma derivative in x and y, relative to the average luma of the block.
			
 
				 // Each thread handles a 2x2 region when using 8x8 VRS tiles and a 2x4 region when using 16x16 VRS tiles.
			
 
				 
			
 
				-Texture2D<RVec4> g_inputTex : register(t0);
			
 
				+Texture2D<Vec4> g_inputTex : register(t0);
			
 
				 SamplerState g_nearestClampSampler : register(s0);
			
 
				 
			
 
				 #if SRI_TEXEL_DIMENSION == 8
			
@@ -45,14 +45,14 @@ ANKI_FAST_CONSTANTS(Constants, g_consts)
 
				 // subgroupSize is large enough so we need shared memory as a fallback. We need gl_NumSubgroups entries, but it is not a
			
 
				 // constant, so estimate it assuming a subgroupSize of at least 8.
			
 
				 constexpr U32 kSharedMemoryEntries = THREADGROUP_SIZE_X * THREADGROUP_SIZE_Y / 8u;
			
 
				-groupshared RF32 s_averageLuma[kSharedMemoryEntries];
			
 
				-groupshared RVec2 s_maxDerivative[kSharedMemoryEntries];
			
 
				+groupshared F32 s_averageLuma[kSharedMemoryEntries];
			
 
				+groupshared Vec2 s_maxDerivative[kSharedMemoryEntries];
			
 
				 groupshared U32 s_waveIndexInsideThreadGroup;
			
 
				 #endif
			
 
				 
			
 
				-RF32 computeLuma(RVec3 color)
			
 
				+F32 computeLuma(Vec3 color)
			
 
				 {
			
 
				-	const RF32 l = computeLuminance(color);
			
 
				+	const F32 l = computeLuminance(color);
			
 
				 	return l / (1.0f + l);
			
 
				 }
			
 
				 
			
@@ -74,30 +74,30 @@ RF32 computeLuma(RVec3 color)
 
				 	//       l1.y
			
 
				 	// l0.z  l0.w  l1.x
			
 
				 	// l0.x  l0.y
			
 
				-	RVec4 l0;
			
 
				+	Vec4 l0;
			
 
				 	l0.x = sampleLuma(0, 0);
			
 
				 	l0.y = sampleLuma(1, 0);
			
 
				 	l0.z = sampleLuma(0, 1);
			
 
				 	l0.w = sampleLuma(1, 1);
			
 
				 
			
 
				-	RVec2 l1;
			
 
				+	Vec2 l1;
			
 
				 	l1.x = sampleLuma(2, 1);
			
 
				 	l1.y = sampleLuma(1, 2);
			
 
				 
			
 
				 	// Calculate derivatives.
			
 
				-	RVec2 a = RVec2(l0.y, l1.x);
			
 
				-	RVec2 b = RVec2(l0.x, l0.w);
			
 
				-	const RVec2 dx = abs(a - b);
			
 
				+	Vec2 a = Vec2(l0.y, l1.x);
			
 
				+	Vec2 b = Vec2(l0.x, l0.w);
			
 
				+	const Vec2 dx = abs(a - b);
			
 
				 
			
 
				-	a = RVec2(l0.z, l1.y);
			
 
				-	b = RVec2(l0.x, l0.w);
			
 
				-	const RVec2 dy = abs(a - b);
			
 
				+	a = Vec2(l0.z, l1.y);
			
 
				+	b = Vec2(l0.x, l0.w);
			
 
				+	const Vec2 dy = abs(a - b);
			
 
				 
			
 
				-	RF32 maxDerivativeX = max(dx.x, dx.y);
			
 
				-	RF32 maxDerivativeY = max(dy.x, dy.y);
			
 
				+	F32 maxDerivativeX = max(dx.x, dx.y);
			
 
				+	F32 maxDerivativeY = max(dy.x, dy.y);
			
 
				 
			
 
				 	// Calculate average luma.
			
 
				-	RF32 averageLuma = (l0.x + l0.y + l0.z + l0.w) / 4.0;
			
 
				+	F32 averageLuma = (l0.x + l0.y + l0.z + l0.w) / 4.0;
			
 
				 #else
			
 
				 	// Get luminance.
			
 
				 	//             l2.z
			
@@ -105,38 +105,38 @@ RF32 computeLuma(RVec3 color)
 
				 	//       l1.x  l1.y
			
 
				 	//       l0.z  l0.w  l2.x
			
 
				 	//       l0.x  l0.y
			
 
				-	RVec4 l0;
			
 
				+	Vec4 l0;
			
 
				 	l0.x = sampleLuma(0, 0);
			
 
				 	l0.y = sampleLuma(1, 0);
			
 
				 	l0.z = sampleLuma(0, 1);
			
 
				 	l0.w = sampleLuma(1, 1);
			
 
				 
			
 
				-	RVec4 l1;
			
 
				+	Vec4 l1;
			
 
				 	l1.x = sampleLuma(0, 2);
			
 
				 	l1.y = sampleLuma(1, 2);
			
 
				 	l1.z = sampleLuma(0, 3);
			
 
				 	l1.w = sampleLuma(1, 3);
			
 
				 
			
 
				-	RVec3 l2;
			
 
				+	Vec3 l2;
			
 
				 	l2.x = sampleLuma(2, 1);
			
 
				 	l2.y = sampleLuma(-1, 3);
			
 
				 	l2.z = sampleLuma(1, 4);
			
 
				 
			
 
				 	// Calculate derivatives.
			
 
				-	RVec4 a = RVec4(l0.y, l2.x, l1.y, l2.y);
			
 
				-	RVec4 b = RVec4(l0.x, l0.w, l1.x, l1.z);
			
 
				-	const RVec4 dx = abs(a - b);
			
 
				+	Vec4 a = Vec4(l0.y, l2.x, l1.y, l2.y);
			
 
				+	Vec4 b = Vec4(l0.x, l0.w, l1.x, l1.z);
			
 
				+	const Vec4 dx = abs(a - b);
			
 
				 
			
 
				-	a = RVec4(l0.z, l0.w, l1.z, l2.z);
			
 
				-	b = RVec4(l0.x, l0.y, l1.x, l1.w);
			
 
				-	const RVec4 dy = abs(a - b);
			
 
				+	a = Vec4(l0.z, l0.w, l1.z, l2.z);
			
 
				+	b = Vec4(l0.x, l0.y, l1.x, l1.w);
			
 
				+	const Vec4 dy = abs(a - b);
			
 
				 
			
 
				-	RF32 maxDerivativeX = max(max(dx.x, dx.y), max(dx.z, dx.w));
			
 
				-	RF32 maxDerivativeY = max(max(dy.x, dy.y), max(dy.z, dy.w));
			
 
				+	F32 maxDerivativeX = max(max(dx.x, dx.y), max(dx.z, dx.w));
			
 
				+	F32 maxDerivativeY = max(max(dy.x, dy.y), max(dy.z, dy.w));
			
 
				 
			
 
				 	// Calculate average luma.
			
 
				-	const RVec4 sumL0L1 = l0 + l1;
			
 
				-	RF32 averageLuma = (sumL0L1.x + sumL0L1.y + sumL0L1.z + sumL0L1.w) / 8.0;
			
 
				+	const Vec4 sumL0L1 = l0 + l1;
			
 
				+	F32 averageLuma = (sumL0L1.x + sumL0L1.y + sumL0L1.z + sumL0L1.w) / 8.0;
			
 
				 #endif
			
 
				 
			
 
				 	// Share values in subgroup.
			
@@ -149,7 +149,7 @@ RF32 computeLuma(RVec3 color)
 
				 	[branch] if(WaveIsFirstLane())
			
 
				 	{
			
 
				 		s_averageLuma[waveIndexInsideThreadGroup] = averageLuma;
			
 
				-		s_maxDerivative[waveIndexInsideThreadGroup] = RVec2(maxDerivativeX, maxDerivativeY);
			
 
				+		s_maxDerivative[waveIndexInsideThreadGroup] = Vec2(maxDerivativeX, maxDerivativeY);
			
 
				 	}
			
 
				 
			
 
				 	GroupMemoryBarrierWithGroupSync();
			
@@ -161,7 +161,7 @@ RF32 computeLuma(RVec3 color)
 
				 		// Get max across all subgroups.
			
 
				 #if SHARED_MEMORY
			
 
				 		averageLuma = s_averageLuma[0];
			
 
				-		RVec2 maxDerivative = s_maxDerivative[0];
			
 
				+		Vec2 maxDerivative = s_maxDerivative[0];
			
 
				 
			
 
				 		for(U32 i = 1u; i < wavesPerThreadGroup; ++i)
			
 
				 		{
			
@@ -169,14 +169,14 @@ RF32 computeLuma(RVec3 color)
 
				 			maxDerivative = max(maxDerivative, s_maxDerivative[i]);
			
 
				 		}
			
 
				 #else
			
 
				-		const RVec2 maxDerivative = RVec2(maxDerivativeX, maxDerivativeY);
			
 
				+		const Vec2 maxDerivative = Vec2(maxDerivativeX, maxDerivativeY);
			
 
				 #endif
			
 
				 
			
 
				 		// Determine shading rate.
			
 
				-		const RF32 avgLuma = averageLuma / RF32(THREADGROUP_SIZE_X * THREADGROUP_SIZE_Y);
			
 
				-		const RVec2 lumaDiff = maxDerivative / avgLuma;
			
 
				-		const RF32 threshold1 = g_consts.m_threshold;
			
 
				-		const RF32 threshold2 = threshold1 * 0.4;
			
 
				+		const F32 avgLuma = averageLuma / F32(THREADGROUP_SIZE_X * THREADGROUP_SIZE_Y);
			
 
				+		const Vec2 lumaDiff = maxDerivative / avgLuma;
			
 
				+		const F32 threshold1 = g_consts.m_threshold;
			
 
				+		const F32 threshold2 = threshold1 * 0.4;
			
 
				 
			
 
				 		UVec2 rate;
			
 
				 		rate.x = (lumaDiff.x > threshold1) ? 1u : ((lumaDiff.x > threshold2) ? 2u : 4u);