Browse Source

GPU visibility: Enable meshlet backfacing culling

Panagiotis Christopoulos Charitos 1 month ago
parent
commit
dce6e8c3c1
47 changed files with 446 additions and 740 deletions
  1. 24 18
      AnKi/Editor/EditorUi.cpp
  2. 1 0
      AnKi/Editor/EditorUi.h
  3. 38 13
      AnKi/Math/Mat.h
  4. 3 3
      AnKi/Math/Transform.h
  5. 4 4
      AnKi/ShaderCompiler/ShaderCompiler.cpp
  6. 0 29
      AnKi/ShaderCompiler/ShaderParser.cpp
  7. 0 9
      AnKi/ShaderCompiler/ShaderParser.h
  8. 5 5
      AnKi/Shaders/Blit.ankiprog
  9. 30 30
      AnKi/Shaders/Bloom.ankiprog
  10. 0 29
      AnKi/Shaders/Common.hlsl
  11. 0 2
      AnKi/Shaders/FinalComposite.ankiprog
  12. 0 2
      AnKi/Shaders/ForwardShadingFog.ankiprog
  13. 0 2
      AnKi/Shaders/ForwardShadingGenericTransparent.ankiprog
  14. 0 2
      AnKi/Shaders/ForwardShadingParticles.ankiprog
  15. 0 2
      AnKi/Shaders/ForwardShadingParticlesGass.ankiprog
  16. 0 1
      AnKi/Shaders/Fsr.ankiprog
  17. 0 2
      AnKi/Shaders/GBufferGeneric.ankiprog
  18. 0 2
      AnKi/Shaders/GBufferGpuParticles.ankiprog
  19. 0 2
      AnKi/Shaders/GBufferPost.ankiprog
  20. 0 2
      AnKi/Shaders/GpuParticlesGass.ankiprog
  21. 0 143
      AnKi/Shaders/GpuParticlesSimulation.ankiprog
  22. 24 9
      AnKi/Shaders/GpuVisibilityStage2And3.ankiprog
  23. 0 2
      AnKi/Shaders/HistoryLength.ankiprog
  24. 184 255
      AnKi/Shaders/Include/Common.h
  25. 0 2
      AnKi/Shaders/IndirectDiffuse.ankiprog
  26. 0 2
      AnKi/Shaders/IndirectDiffuseClipmaps.ankiprog
  27. 17 17
      AnKi/Shaders/IrradianceDice.ankiprog
  28. 4 4
      AnKi/Shaders/LensFlareSprite.ankiprog
  29. 0 2
      AnKi/Shaders/LightShading.ankiprog
  30. 6 13
      AnKi/Shaders/LightShadingApplyFog.ankiprog
  31. 8 8
      AnKi/Shaders/LightShadingSkybox.ankiprog
  32. 0 2
      AnKi/Shaders/Reflections.ankiprog
  33. 0 2
      AnKi/Shaders/RtMaterialFetchDbg.ankiprog
  34. 0 2
      AnKi/Shaders/RtMaterialFetchMiss.ankiprog
  35. 0 2
      AnKi/Shaders/RtShadows.ankiprog
  36. 30 31
      AnKi/Shaders/ShadowmapsResolve.ankiprog
  37. 0 2
      AnKi/Shaders/Ssao.ankiprog
  38. 0 2
      AnKi/Shaders/TemporalAA.ankiprog
  39. 0 2
      AnKi/Shaders/Tonemap.ankiprog
  40. 1 1
      AnKi/Shaders/TonemappingAverageLuminance.ankiprog
  41. 0 2
      AnKi/Shaders/TraditionalDeferredShading.ankiprog
  42. 1 1
      AnKi/Shaders/TraditionalDeferredShadingSkybox.ankiprog
  43. 3 10
      AnKi/Shaders/Ui.ankiprog
  44. 9 9
      AnKi/Shaders/UiVisualizeImage.ankiprog
  45. 18 18
      AnKi/Shaders/VolumetricFogAccumulation.ankiprog
  46. 0 2
      AnKi/Shaders/VolumetricLightingAccumulation.ankiprog
  47. 36 36
      AnKi/Shaders/VrsSriGenerationCompute.ankiprog

+ 24 - 18
AnKi/Editor/EditorUi.cpp

@@ -1084,16 +1084,6 @@ void EditorUi::consoleWindow()
 
 	if(ImGui::Begin("Console", &m_showConsoleWindow, ImGuiWindowFlags_NoCollapse))
 	{
-		// Log controls
-		{
-			if(ImGui::Button(ICON_MDI_DELETE))
-			{
-				state.m_log.destroy();
-			}
-			ImGui::SetItemTooltip("Clear log");
-			ImGui::SameLine();
-		}
-
 		// Lua input
 		{
 			Char consoleTxt[kMaxTextInputLen] = "";
@@ -1106,6 +1096,19 @@ void EditorUi::consoleWindow()
 			}
 		}
 
+		// Clear Log
+		{
+			if(ImGui::Button(ICON_MDI_DELETE))
+			{
+				state.m_log.destroy();
+			}
+			ImGui::SetItemTooltip("Clear log");
+			ImGui::SameLine();
+		}
+
+		// Search log
+		filter(state.m_logFilter);
+
 		// Log
 		{
 			if(ImGui::BeginChild("Log", Vec2(0.0f), ImGuiChildFlags_Borders | ImGuiChildFlags_NavFlattened))
@@ -1118,14 +1121,17 @@ void EditorUi::consoleWindow()
 
 					for(const auto& logEntry : state.m_log)
 					{
-						ImGui::TableNextRow();
-						ImGui::TableNextColumn();
-						constexpr Array<Vec3, U(LoggerMessageType::kCount)> colors = {Vec3(0.074f, 0.631f, 0.054f), Vec3(0.074f, 0.354f, 0.631f),
-																					  Vec3(1.0f, 0.0f, 0.0f), Vec3(0.756f, 0.611f, 0.0f),
-																					  Vec3(1.0f, 0.0f, 0.0f)};
-						ImGui::PushStyleColor(ImGuiCol_Text, colors[logEntry.first].xyz1());
-						ImGui::TextUnformatted(logEntry.second.cstr());
-						ImGui::PopStyleColor();
+						if(state.m_logFilter.PassFilter(logEntry.second.cstr()))
+						{
+							ImGui::TableNextRow();
+							ImGui::TableNextColumn();
+							constexpr Array<Vec3, U(LoggerMessageType::kCount)> colors = {Vec3(0.074f, 0.631f, 0.054f), Vec3(0.074f, 0.354f, 0.631f),
+																						  Vec3(1.0f, 0.0f, 0.0f), Vec3(0.756f, 0.611f, 0.0f),
+																						  Vec3(1.0f, 0.0f, 0.0f)};
+							ImGui::PushStyleColor(ImGuiCol_Text, colors[logEntry.first].xyz1());
+							ImGui::TextUnformatted(logEntry.second.cstr());
+							ImGui::PopStyleColor();
+						}
 					}
 
 					if(state.m_forceLogScrollDown)

+ 1 - 0
AnKi/Editor/EditorUi.h

@@ -113,6 +113,7 @@ private:
 		List<std::pair<LoggerMessageType, String>> m_log;
 		Bool m_forceLogScrollDown = true;
 		SpinLock m_logMtx;
+		ImGuiTextFilter m_logFilter;
 	} m_consoleWindow;
 
 	class

+ 38 - 13
AnKi/Math/Mat.h

@@ -894,6 +894,27 @@ public:
 		return getColumn(3);
 	}
 
+	TMat& setDiagonal(ColumnVec diag)
+	{
+		TMat& m = *this;
+		for(U32 r = 0; r < kRowCount; ++r)
+		{
+			m(r, r) = diag[r];
+		}
+		return *this;
+	}
+
+	ColumnVec getDiagonal() const
+	{
+		TMat& m = *this;
+		ColumnVec diag;
+		for(U32 r = 0; r < kRowCount; ++r)
+		{
+			diag[r] = m(r, r);
+		}
+		return diag;
+	}
+
 	[[nodiscard]] TMat reorthogonalize() const requires(kTRowCount == 3)
 	{
 		// There are 2 methods, the standard and the Gram-Schmidt method with a twist for zAxis. This uses the 2nd. For the first see < r664
@@ -1337,21 +1358,19 @@ public:
 	}
 
 	/// If we suppose this matrix represents a transformation, return the inverted transformation
-	[[nodiscard]] TMat invertTransformation() const requires(kSize == 16)
+	[[nodiscard]] TMat invertTransformation() const requires(kSize == 16 || kSize == 12)
 	{
-		const TMat<T, 3, 3> invertedRot = getRotationPart().transpose();
-		TVec<T, 3> invertedTsl = getTranslationPart().xyz();
-		invertedTsl = -(invertedRot * invertedTsl);
-		return TMat(invertedTsl.xyz0(), invertedRot);
-	}
+		const TVec<T, 3> scale = extractScale();
+		const TVec<T, 3> invScale = T(1) / scale;
 
-	/// If we suppose this matrix represents a transformation, return the inverted transformation
-	[[nodiscard]] TMat invertTransformation() const requires(kSize == 12)
-	{
-		const TMat<T, 3, 3> invertedRot = getRotationPart().transpose();
-		TVec<T, 3> invertedTsl = getTranslationPart().xyz();
-		invertedTsl = -(invertedRot * invertedTsl);
-		return TMat(invertedTsl.xyz(), invertedRot);
+		TMat<T, 3, 3> rot;
+		rot.setRows(getRow(0).xyz() * invScale, getRow(1).xyz() * invScale, getRow(2).xyz() * invScale);
+
+		const TMat<T, 3, 3> invRot = rot.transpose();
+
+		const TVec<T, 3> invTsl = -(invRot * (getTranslationPart().xyz() * invScale));
+
+		return TMat(invTsl, invRot, invScale);
 	}
 
 	/// @note 9 muls, 9 adds
@@ -1410,6 +1429,12 @@ public:
 		return ((*this) * (T(1) - t)) + (b * t);
 	}
 
+	// If we assume this is a transformation matrix then extract the scale
+	[[nodiscard]] TVec<T, 3> extractScale() const
+	{
+		return TVec<T, 3>(getColumn(0).xyz().length(), getColumn(1).xyz().length(), getColumn(2).xyz().length());
+	}
+
 	static TMat getZero()
 	{
 		return TMat(T(0));

+ 3 - 3
AnKi/Math/Transform.h

@@ -36,13 +36,13 @@ public:
 
 	explicit TTransform(const TMat<T, 4, 4>& m4)
 	{
+		m_scale = m4.extractScale().xyz0();
+
 		const TVec<T, 3> s0 = m4.getColumn(0).xyz();
 		const TVec<T, 3> s1 = m4.getColumn(1).xyz();
 		const TVec<T, 3> s2 = m4.getColumn(2).xyz();
+		m_rotation.setColumns(s0 / m_scale.x(), s1 / m_scale.y(), s2 / m_scale.z(), TVec<T, 3>(T(0)));
 
-		m_scale = TVec<T, 4>(s0.length(), s1.length(), s2.length(), T(0));
-
-		m_rotation.setColumns(s0 / m_scale.x(), s1 / m_scale.x(), s2 / m_scale.x(), TVec<T, 3>(T(0)));
 		m_origin = m4.getTranslationPart().xyz0();
 		check();
 	}

+ 4 - 4
AnKi/ShaderCompiler/ShaderCompiler.cpp

@@ -195,13 +195,13 @@ static void compileVariantAsync(const ShaderParser& parser, Bool spirv, Bool deb
 				ShaderCompilerDynamicArray<U8> il;
 				if(ctx.m_spirv)
 				{
-					err = compileHlslToSpirv(source, shaderType, ctx.m_parser->compileWith16bitTypes(), ctx.m_debugInfo, ctx.m_sm,
-											 ctx.m_parser->getExtraCompilerArgs(), il, compilerErrorLog);
+					err = compileHlslToSpirv(source, shaderType, true, ctx.m_debugInfo, ctx.m_sm, ctx.m_parser->getExtraCompilerArgs(), il,
+											 compilerErrorLog);
 				}
 				else
 				{
-					err = compileHlslToDxil(source, shaderType, ctx.m_parser->compileWith16bitTypes(), ctx.m_debugInfo, ctx.m_sm,
-											ctx.m_parser->getExtraCompilerArgs(), il, compilerErrorLog);
+					err = compileHlslToDxil(source, shaderType, true, ctx.m_debugInfo, ctx.m_sm, ctx.m_parser->getExtraCompilerArgs(), il,
+											compilerErrorLog);
 				}
 
 				if(err)

+ 0 - 29
AnKi/ShaderCompiler/ShaderParser.cpp

@@ -532,10 +532,6 @@ Error ShaderParser::parseLine(CString line, CString fname, Bool& foundPragmaOnce
 				ANKI_CHECK(checkActiveStruct());
 				ANKI_CHECK(parsePragmaMember(token + 1, end, line, fname));
 			}
-			else if(*token == "16bit")
-			{
-				ANKI_CHECK(parsePragma16bit(token + 1, end, line, fname));
-			}
 			else if(*token == "extra_compiler_args")
 			{
 				ANKI_CHECK(parseExtraCompilerArgs(token + 1, end, line, fname));
@@ -722,21 +718,6 @@ Error ShaderParser::parsePragmaStructEnd(const ShaderCompilerString* begin, cons
 	return Error::kNone;
 }
 
-Error ShaderParser::parsePragma16bit(const ShaderCompilerString* begin, const ShaderCompilerString* end, CString line, CString fname)
-{
-	ANKI_ASSERT(begin && end);
-
-	// Check tokens
-	if(begin != end)
-	{
-		ANKI_PP_ERROR_MALFORMED();
-	}
-
-	m_16bitTypes = true;
-
-	return Error::kNone;
-}
-
 Error ShaderParser::parseExtraCompilerArgs(const ShaderCompilerString* begin, const ShaderCompilerString* end, CString line, CString fname)
 {
 	ANKI_ASSERT(begin && end);
@@ -904,16 +885,6 @@ void ShaderParser::generateVariant(ConstWeakArray<MutatorValue> mutation, const
 	ShaderCompilerString header;
 	generateAnkiShaderHeader(shaderType, header);
 	source += header;
-
-	if(m_16bitTypes)
-	{
-		source += "#define ANKI_SUPPORTS_16BIT_TYPES 1\n";
-	}
-	else
-	{
-		source += "#define ANKI_SUPPORTS_16BIT_TYPES 0\n";
-	}
-
 	source += m_source;
 }
 

+ 0 - 9
AnKi/ShaderCompiler/ShaderParser.h

@@ -61,7 +61,6 @@ public:
 /// #pragma once
 /// #pragma anki mutator NAME VALUE0 [VALUE1 [VALUE2 ...]]
 /// #pragma anki skip_mutation MUTATOR0 VALUE0 [MUTATOR1 VALUE1 [MUTATOR2 VALUE2 ...]]
-/// #pragma anki 16bit // Works only in HLSL. Gain 16bit types but loose min16xxx types
 /// #pragma anki technique [NAME] STAGE0 [STAGE1 ...] [mutators [MUTATOR0 [MUTATOR1 ...]]]
 /// #pragma anki extra_compiler_args ARG0 [ARG1 [ARG2...]]
 ///
@@ -113,11 +112,6 @@ public:
 		return m_techniques;
 	}
 
-	Bool compileWith16bitTypes() const
-	{
-		return m_16bitTypes;
-	}
-
 	ConstWeakArray<CString> getExtraCompilerArgs() const
 	{
 		return m_extraCompilerArgsCString;
@@ -159,8 +153,6 @@ private:
 	ShaderCompilerDynamicArray<GhostStruct> m_ghostStructs;
 	Bool m_insideStruct = false;
 
-	Bool m_16bitTypes = false;
-
 	ShaderCompilerDynamicArray<ShaderCompilerString> m_extraCompilerArgs;
 	ShaderCompilerDynamicArray<CString> m_extraCompilerArgsCString;
 
@@ -173,7 +165,6 @@ private:
 	Error parsePragmaStructBegin(const ShaderCompilerString* begin, const ShaderCompilerString* end, CString line, CString fname);
 	Error parsePragmaStructEnd(const ShaderCompilerString* begin, const ShaderCompilerString* end, CString line, CString fname);
 	Error parsePragmaMember(const ShaderCompilerString* begin, const ShaderCompilerString* end, CString line, CString fname);
-	Error parsePragma16bit(const ShaderCompilerString* begin, const ShaderCompilerString* end, CString line, CString fname);
 	Error parseExtraCompilerArgs(const ShaderCompilerString* begin, const ShaderCompilerString* end, CString line, CString fname);
 
 	void tokenizeLine(CString line, ShaderCompilerDynamicArray<ShaderCompilerString>& tokens) const;

+ 5 - 5
AnKi/Shaders/Blit.ankiprog

@@ -11,7 +11,7 @@
 #	include <AnKi/Shaders/Functions.hlsl>
 
 SamplerState g_linearAnyClampSampler : register(s0);
-Texture2D<RVec4> g_inputTex : register(t0);
+Texture2D<Vec4> g_inputTex : register(t0);
 
 #	if ANKI_COMPUTE_SHADER
 #		define USE_COMPUTE 1
@@ -20,7 +20,7 @@ Texture2D<RVec4> g_inputTex : register(t0);
 #	endif
 
 #	if USE_COMPUTE
-RWTexture2D<RVec4> g_storageTex : register(u0);
+RWTexture2D<Vec4> g_storageTex : register(u0);
 
 struct Consts
 {
@@ -33,7 +33,7 @@ ANKI_FAST_CONSTANTS(Consts, g_consts)
 #	if USE_COMPUTE
 [numthreads(8, 8, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-RVec3 main(VertOut input) : SV_TARGET0
+Vec3 main(VertOut input) : SV_TARGET0
 #	endif
 {
 #	if USE_COMPUTE
@@ -47,10 +47,10 @@ RVec3 main(VertOut input) : SV_TARGET0
 	const Vec2 uv = input.m_uv;
 #	endif
 
-	const RVec3 color = g_inputTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).rgb;
+	const Vec3 color = g_inputTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).rgb;
 
 #	if USE_COMPUTE
-	g_storageTex[svDispatchThreadId.xy] = RVec4(color, 0.0);
+	g_storageTex[svDispatchThreadId.xy] = Vec4(color, 0.0);
 #	else
 	return color;
 #	endif

+ 30 - 30
AnKi/Shaders/Bloom.ankiprog

@@ -16,7 +16,7 @@
 #	include <AnKi/Shaders/Functions.hlsl>
 
 SamplerState g_linearAnyClampSampler : register(s0);
-Texture2D<RVec3> g_tex : register(t0);
+Texture2D<Vec3> g_tex : register(t0);
 
 struct Constants
 {
@@ -26,7 +26,7 @@ struct Constants
 ANKI_FAST_CONSTANTS(Constants, g_consts)
 
 #	if ANKI_COMPUTE_SHADER
-RWTexture2D<RVec4> g_storageTex : register(u1);
+RWTexture2D<Vec4> g_storageTex : register(u1);
 #	endif
 
 #	if ANKI_COMPUTE_SHADER
@@ -34,13 +34,13 @@ RWTexture2D<RVec4> g_storageTex : register(u1);
 {
 	const Vec2 uv = (Vec2(svDispatchThreadId) + 0.5) / g_consts.m_fbSize;
 #	else
-RVec3 main(VertOut input) : SV_TARGET0
+Vec3 main(VertOut input) : SV_TARGET0
 {
 	const Vec2 uv = input.m_uv;
 #	endif
 
-	RVec3 output;
-	const RF32 weight = 1.0 / 5.0;
+	Vec3 output;
+	const F32 weight = 1.0 / 5.0;
 	output = g_tex.SampleLevel(g_linearAnyClampSampler, uv, 0.0) * weight;
 	output += g_tex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, IVec2(+1, +1)) * weight;
 	output += g_tex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, IVec2(-1, -1)) * weight;
@@ -48,7 +48,7 @@ RVec3 main(VertOut input) : SV_TARGET0
 	output += g_tex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, IVec2(-1, +1)) * weight;
 
 #	if ANKI_COMPUTE_SHADER
-	g_storageTex[svDispatchThreadId] = RVec4(output, 1.0);
+	g_storageTex[svDispatchThreadId] = Vec4(output, 1.0);
 #	else
 	return output;
 #	endif
@@ -63,7 +63,7 @@ RVec3 main(VertOut input) : SV_TARGET0
 #	include <AnKi/Shaders/Functions.hlsl>
 
 SamplerState g_linearAnyClampSampler : register(s0);
-Texture2D<RVec4> g_inTex : register(t0);
+Texture2D<Vec4> g_inTex : register(t0);
 
 #	define TONEMAPPING_REGISTER u0
 #	include <AnKi/Shaders/TonemappingResources.hlsl>
@@ -78,13 +78,13 @@ struct Consts
 ANKI_FAST_CONSTANTS(Consts, g_consts)
 
 #	if ANKI_COMPUTE_SHADER
-RWTexture2D<RVec4> g_storageTex : register(u1);
+RWTexture2D<Vec4> g_storageTex : register(u1);
 #	endif
 
 #	if ANKI_COMPUTE_SHADER
 [numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-RVec3 main(VertOut input) : SV_TARGET0
+Vec3 main(VertOut input) : SV_TARGET0
 #	endif
 {
 #	if ANKI_COMPUTE_SHADER
@@ -96,8 +96,8 @@ RVec3 main(VertOut input) : SV_TARGET0
 	const Vec2 uv = input.m_uv;
 #	endif
 
-	const RF32 weight = 1.0 / 5.0;
-	RVec3 color = g_inTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).rgb * weight;
+	const F32 weight = 1.0 / 5.0;
+	Vec3 color = g_inTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).rgb * weight;
 	color += g_inTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, IVec2(+1, +1)).rgb * weight;
 	color += g_inTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, IVec2(-1, -1)).rgb * weight;
 	color += g_inTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, IVec2(-1, +1)).rgb * weight;
@@ -106,7 +106,7 @@ RVec3 main(VertOut input) : SV_TARGET0
 	color = tonemap<F32>(color, readExposureAndAverageLuminance<F32>().y, g_consts.m_threshold) * g_consts.m_scale;
 
 #	if ANKI_COMPUTE_SHADER
-	g_storageTex[svDispatchThreadId] = RVec4(color, 0.0);
+	g_storageTex[svDispatchThreadId] = Vec4(color, 0.0);
 #	else
 	return color;
 #	endif
@@ -129,26 +129,26 @@ constexpr F32 kChromaticDistortion = 3.0;
 constexpr F32 kHaloOpacity = 0.5;
 
 SamplerState g_linearAnyClampSampler : register(s0);
-Texture2D<RVec4> g_inputTex : register(t0);
-Texture2D<RVec3> g_lensDirtTex : register(t1);
+Texture2D<Vec4> g_inputTex : register(t0);
+Texture2D<Vec3> g_lensDirtTex : register(t1);
 
 #	if ANKI_COMPUTE_SHADER
-RWTexture2D<RVec4> g_storageTex : register(u0);
+RWTexture2D<Vec4> g_storageTex : register(u0);
 #	endif
 
-RVec3 textureDistorted(Texture2D<RVec4> tex, SamplerState sampl, Vec2 uv,
-					   Vec2 direction, // direction of distortion
-					   Vec3 distortion) // per-channel distortion factor
+Vec3 textureDistorted(Texture2D<Vec4> tex, SamplerState sampl, Vec2 uv,
+					  Vec2 direction, // direction of distortion
+					  Vec3 distortion) // per-channel distortion factor
 {
 #	if ENABLE_CHROMATIC_DISTORTION
-	return RVec3(tex.SampleLevel(sampl, uv + direction * distortion.r, 0.0).r, tex.SampleLevel(sampl, uv + direction * distortion.g, 0.0).g,
-				 tex.SampleLevel(sampl, uv + direction * distortion.b, 0.0).b);
+	return Vec3(tex.SampleLevel(sampl, uv + direction * distortion.r, 0.0).r, tex.SampleLevel(sampl, uv + direction * distortion.g, 0.0).g,
+				tex.SampleLevel(sampl, uv + direction * distortion.b, 0.0).b);
 #	else
 	return tex.SampleLevel(uv, 0.0).rgb;
 #	endif
 }
 
-RVec3 ssLensFlare(Vec2 uv)
+Vec3 ssLensFlare(Vec2 uv)
 {
 	Vec2 textureSize;
 	g_inputTex.GetDimensions(textureSize.x, textureSize.y);
@@ -162,14 +162,14 @@ RVec3 ssLensFlare(Vec2 uv)
 	const Vec2 ghostVec = (Vec2(0.5, 0.5) - flipUv) * kGhostDispersal;
 
 	const Vec2 direction = normalize(ghostVec);
-	RVec3 result = Vec3(0.0, 0.0, 0.0);
+	Vec3 result = Vec3(0.0, 0.0, 0.0);
 
 	// Sample ghosts
 	[unroll] for(U32 i = 0u; i < kMaxGhosts; ++i)
 	{
 		const Vec2 offset = frac(flipUv + ghostVec * F32(i));
 
-		RF32 weight = length(Vec2(0.5, 0.5) - offset) / lensOfHalf;
+		F32 weight = length(Vec2(0.5, 0.5) - offset) / lensOfHalf;
 		weight = pow(1.0 - weight, 10.0);
 
 		result += textureDistorted(g_inputTex, g_linearAnyClampSampler, offset, direction, distortion) * weight;
@@ -178,7 +178,7 @@ RVec3 ssLensFlare(Vec2 uv)
 	// Sample halo
 #	if ENABLE_HALO
 	const Vec2 haloVec = normalize(ghostVec) * kHaloWidth;
-	RF32 weight = length(Vec2(0.5, 0.5) - frac(flipUv + haloVec)) / lensOfHalf;
+	F32 weight = length(Vec2(0.5, 0.5) - frac(flipUv + haloVec)) / lensOfHalf;
 	weight = pow(1.0 - weight, 20.0);
 	result += textureDistorted(g_inputTex, g_linearAnyClampSampler, flipUv + haloVec, direction, distortion) * (weight * kHaloOpacity);
 #	endif
@@ -189,10 +189,10 @@ RVec3 ssLensFlare(Vec2 uv)
 	return result;
 }
 
-RVec3 upscale(Vec2 uv)
+Vec3 upscale(Vec2 uv)
 {
-	const RF32 weight = 1.0 / 5.0;
-	RVec3 result = g_inputTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).rgb * weight;
+	const F32 weight = 1.0 / 5.0;
+	Vec3 result = g_inputTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0).rgb * weight;
 	result += g_inputTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, IVec2(+1, +1)).rgb * weight;
 	result += g_inputTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, IVec2(+1, -1)).rgb * weight;
 	result += g_inputTex.SampleLevel(g_linearAnyClampSampler, uv, 0.0, IVec2(-1, -1)).rgb * weight;
@@ -204,7 +204,7 @@ RVec3 upscale(Vec2 uv)
 #	if ANKI_COMPUTE_SHADER
 [numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-RVec3 main(VertOut input) : SV_TARGET0
+Vec3 main(VertOut input) : SV_TARGET0
 #	endif
 {
 #	if ANKI_COMPUTE_SHADER
@@ -216,10 +216,10 @@ RVec3 main(VertOut input) : SV_TARGET0
 	const Vec2 uv = input.m_uv;
 #	endif
 
-	const RVec3 outColor = ssLensFlare(uv) + upscale(uv);
+	const Vec3 outColor = ssLensFlare(uv) + upscale(uv);
 
 #	if ANKI_COMPUTE_SHADER
-	g_storageTex[svDispatchThreadId] = RVec4(outColor, 0.0);
+	g_storageTex[svDispatchThreadId] = Vec4(outColor, 0.0);
 #	else
 	return outColor;
 #	endif

+ 0 - 29
AnKi/Shaders/Common.hlsl

@@ -15,11 +15,7 @@
 
 // Common constants
 constexpr F32 kEpsilonF32 = 0.000001f;
-#if ANKI_SUPPORTS_16BIT_TYPES
 constexpr F16 kEpsilonF16 = (F16)0.0001f; // Divisions by this should be OK according to http://weitz.de/ieee
-#else
-constexpr RF32 kEpsilonRF32 = 0.0001f;
-#endif
 
 template<typename T>
 T getEpsilon();
@@ -30,33 +26,18 @@ F32 getEpsilon()
 	return kEpsilonF32;
 }
 
-#if ANKI_SUPPORTS_16BIT_TYPES
 template<>
 F16 getEpsilon()
 {
 	return kEpsilonF16;
 }
-#endif
-
-#if !ANKI_FORCE_FULL_FP_PRECISION && !ANKI_SUPPORTS_16BIT_TYPES
-template<>
-RF32 getEpsilon()
-{
-	return kEpsilonRF32;
-}
-#endif
 
 constexpr U32 kMaxU32 = 0xFFFFFFFFu;
 constexpr I32 kMinI32 = -2147483648;
 constexpr I32 kMaxI32 = 2147483647;
 constexpr F32 kMaxF32 = 3.402823e+38;
 constexpr F32 kMinF32 = -3.402823e+38;
-#if !ANKI_SUPPORTS_16BIT_TYPES
-constexpr RF32 kMaxRF32 = 65504.0f; // Max half float value according to wikipedia
-#endif
-#if ANKI_SUPPORTS_16BIT_TYPES
 constexpr F16 kMaxF16 = (F16)65504.0;
-#endif
 
 template<typename T>
 T getMaxNumericLimit();
@@ -67,21 +48,11 @@ F32 getMaxNumericLimit()
 	return kMaxF32;
 }
 
-#if !ANKI_FORCE_FULL_FP_PRECISION && !ANKI_SUPPORTS_16BIT_TYPES
-template<>
-RF32 getMaxNumericLimit()
-{
-	return kMaxRF32;
-}
-#endif
-
-#if ANKI_SUPPORTS_16BIT_TYPES
 template<>
 F16 getMaxNumericLimit()
 {
 	return kMaxF16;
 }
-#endif
 
 template<>
 U32 getMaxNumericLimit()

+ 0 - 2
AnKi/Shaders/FinalComposite.ankiprog

@@ -3,8 +3,6 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki 16bit
-
 #pragma anki mutator FILM_GRAIN 0 1
 #pragma anki mutator BLOOM 0 1
 #pragma anki mutator DBG 0 1

+ 0 - 2
AnKi/Shaders/ForwardShadingFog.ankiprog

@@ -3,8 +3,6 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki 16bit
-
 #pragma anki technique ForwardLegacy vert pixel
 
 #include <AnKi/Shaders/ForwardShadingCommon.hlsl>

+ 0 - 2
AnKi/Shaders/ForwardShadingGenericTransparent.ankiprog

@@ -3,8 +3,6 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki 16bit
-
 #pragma anki mutator TEXTURE 0 1
 #pragma anki mutator LIGHT 0 1
 

+ 0 - 2
AnKi/Shaders/ForwardShadingParticles.ankiprog

@@ -3,8 +3,6 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki 16bit
-
 #pragma anki mutator ANIMATED_TEXTURE 0 1
 #pragma anki mutator LIGHT 0 1
 

+ 0 - 2
AnKi/Shaders/ForwardShadingParticlesGass.ankiprog

@@ -3,8 +3,6 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki 16bit
-
 #pragma anki mutator ANIMATED_TEXTURE 0 1
 #pragma anki mutator LIGHT 0 1
 

+ 0 - 1
AnKi/Shaders/Fsr.ankiprog

@@ -3,7 +3,6 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki 16bit
 #pragma anki mutator SHARPEN 0 1
 #pragma anki mutator FSR_QUALITY 0 1
 

+ 0 - 2
AnKi/Shaders/GBufferGeneric.ankiprog

@@ -3,8 +3,6 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki 16bit
-
 #pragma anki mutator ANKI_VELOCITY 0 1
 #pragma anki mutator ANKI_BONES 0 1
 #pragma anki mutator DIFFUSE_TEX 0 1

+ 0 - 2
AnKi/Shaders/GBufferGpuParticles.ankiprog

@@ -3,8 +3,6 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki 16bit
-
 #pragma anki technique GBuffer vert pixel
 
 #include <AnKi/Shaders/MaterialShadersCommon.hlsl>

+ 0 - 2
AnKi/Shaders/GBufferPost.ankiprog

@@ -3,8 +3,6 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki 16bit
-
 #pragma anki technique comp
 
 #include <AnKi/Shaders/PackFunctions.hlsl>

+ 0 - 2
AnKi/Shaders/GpuParticlesGass.ankiprog

@@ -5,8 +5,6 @@
 
 // This shader does a particle simulation for gasses
 
-#pragma anki 16bit
-
 #pragma anki mutator ANKI_WAVE_SIZE 16 32 64
 
 #pragma anki technique comp

+ 0 - 143
AnKi/Shaders/GpuParticlesSimulation.ankiprog

@@ -1,143 +0,0 @@
-// Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
-// All rights reserved.
-// Code licensed under the BSD License.
-// http://www.anki3d.org/LICENSE
-
-// This shader does a particle simulation
-
-#pragma anki technique comp
-
-#include <AnKi/Shaders/Include/ParticleTypes.h>
-#include <AnKi/Shaders/Common.hlsl>
-
-#if 0
-Texture2D g_depthTex : register(t0);
-RWStructuredBuffer<GpuParticle> g_particles : register(u0);
-ConstantBuffer<GpuParticleEmitterProperties> g_props : register(b0);
-StructuredBuffer<F32> g_randomFactors : register(t1); // The 1st element is a uint with the count of the numbers
-SamplerState g_nearestAnyClampSampler : register(s0);
-ConstantBuffer<GpuParticleSimulationState> g_state : register(b1);
-
-F32 smallerDelta(F32 left, F32 mid, F32 right)
-{
-	const F32 a = mid - left;
-	const F32 b = right - mid;
-
-	return (abs(a) < abs(b)) ? a : b;
-}
-
-Vec3 unproject(Vec2 ndc, F32 depth)
-{
-	const F32 z = g_state.m_unprojectionParams.z / (g_state.m_unprojectionParams.w + depth);
-	const Vec2 xy = ndc * g_state.m_unprojectionParams.xy * z;
-	return Vec3(xy, z);
-}
-
-// Compute the normal using the depth buffer
-Vec3 computeNormal(const Vec2 uv, const F32 depth)
-{
-	const F32 depthLeft = g_depthTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0, IVec2(-2, 0)).r;
-	const F32 depthRight = g_depthTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0, IVec2(2, 0)).r;
-	const F32 depthTop = g_depthTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0, IVec2(0, 2)).r;
-	const F32 depthBottom = g_depthTex.SampleLevel(g_nearestAnyClampSampler, uv, 0.0, IVec2(0, -2)).r;
-
-	const F32 ddx = smallerDelta(depthLeft, depth, depthRight);
-	const F32 ddy = smallerDelta(depthBottom, depth, depthTop);
-
-	const Vec2 ndc = uvToNdc(uv);
-	Vec3 textureSize;
-	g_depthTex.GetDimensions(0, textureSize.x, textureSize.y, textureSize.z);
-	const Vec2 texelSize = 1.0 / textureSize.xy;
-	const Vec2 ndcTexelSize = 2.0 * texelSize;
-	const Vec3 right = unproject(ndc + Vec2(ndcTexelSize.x, 0.0), depth + ddx);
-	const Vec3 top = unproject(ndc + Vec2(0.0, ndcTexelSize.y), depth + ddy);
-
-	const Vec3 origin = unproject(ndc, depth);
-	Vec3 normalVSpace = cross(origin - top, right - origin);
-	normalVSpace = normalize(normalVSpace);
-
-	return mul(g_state.m_invViewRotation, Vec4(normalVSpace, 0.0));
-}
-
-void initParticle(UVec3 svDispatchThreadId, out GpuParticle p)
-{
-	const U32 randomFactorCount = asuint(g_randomFactors[0]);
-	const F32 randFactor = g_randomFactors[((svDispatchThreadId.x + g_state.m_randomIndex) % randomFactorCount) + 1];
-
-	p.m_newWorldPosition = lerp(g_props.m_minStartingPosition, g_props.m_maxStartingPosition, randFactor) + g_state.m_emitterPosition;
-	p.m_oldWorldPosition = p.m_newWorldPosition;
-
-	p.m_mass = lerp(g_props.m_minMass, g_props.m_maxMass, randFactor);
-	p.m_startingLife = lerp(g_props.m_minLife, g_props.m_maxLife, randFactor);
-	p.m_life = p.m_startingLife;
-	p.m_acceleration = lerp(g_props.m_minGravity, g_props.m_maxGravity, randFactor);
-
-	// Calculate the initial velocity
-	const Vec3 initialForce = mul(g_state.m_emitterRotation, Vec4(lerp(g_props.m_minForce, g_props.m_maxForce, randFactor), 0.0));
-	const Vec3 totalForce = (p.m_acceleration * p.m_mass) + initialForce;
-	const Vec3 acceleration = totalForce / p.m_mass;
-	p.m_velocity = acceleration * g_state.m_dt;
-}
-#endif
-
-[numthreads(64, 1, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID) {
-#if 0
-	const U32 particleIdx = svDispatchThreadId.x;
-	if(particleIdx >= g_props.m_particleCount)
-	{
-		return;
-	}
-
-	GpuParticle particle = g_particles[particleIdx];
-	const F32 dt = g_state.m_dt;
-
-	// Check if it's dead
-	if(particle.m_life - dt <= 0.0)
-	{
-		// Dead, revive
-		initParticle(svDispatchThreadId, particle);
-	}
-	else
-	{
-		// Simulate
-
-		particle.m_life -= dt;
-
-		const Vec3 xp = particle.m_oldWorldPosition;
-		const Vec3 xc = particle.m_acceleration * (dt * dt) + g_particles[particleIdx].m_velocity * dt + xp;
-
-		// Project the point
-		const Vec4 proj4 = mul(g_state.m_viewProjMat, Vec4(xc, 1.0));
-		const Vec3 proj3 = proj4.xyz / proj4.w;
-		if(all(proj3.xy >= Vec2(-1.0, -1.0)) && all(proj3.xy <= Vec2(1.0, 1.0)))
-		{
-			// It's visible, test against the depth buffer
-
-			const F32 refDepth = g_depthTex.SampleLevel(g_nearestAnyClampSampler, ndcToUv(proj3.xy), 0.0).r;
-			const F32 testDepth = proj3.z;
-
-			if(testDepth >= refDepth)
-			{
-				// Collides, change its direction
-				const Vec3 normal = computeNormal(ndcToUv(proj3.xy), refDepth);
-				particle.m_velocity = reflect(particle.m_velocity, normal);
-
-				particle.m_oldWorldPosition = particle.m_newWorldPosition;
-			}
-			else
-			{
-				particle.m_oldWorldPosition = particle.m_newWorldPosition;
-				particle.m_newWorldPosition = xc;
-			}
-		}
-		else
-		{
-			particle.m_oldWorldPosition = particle.m_newWorldPosition;
-			particle.m_newWorldPosition = xc;
-		}
-	}
-
-	// Write back the particle
-	g_particles[particleIdx] = particle;
-#endif
-}

+ 24 - 9
AnKi/Shaders/GpuVisibilityStage2And3.ankiprog

@@ -17,6 +17,7 @@
 #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
 #include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
 #include <AnKi/Shaders/PackFunctions.hlsl>
+#include <AnKi/Shaders/Functions.hlsl>
 
 #define NUMTHREADS 64u
 
@@ -134,7 +135,7 @@ RWStructuredBuffer<U32> g_outOfMemoryBuffer : register(u3);
 // ===========================================================================
 #if ANKI_TECHNIQUE_Meshlets
 
-#	define MESHLET_BACKFACE_CULLING 0 // Doesn't work correctly for some reason
+#	define MESHLET_BACKFACE_CULLING 1 // Doesn't cull much geometry
 #	define MESHLET_OUTSIDE_OF_SCREEN_CULLING 1
 #	define MESHLET_NO_SAMPLING_POINT_CULLING 1
 #	define MESHLET_HZB_CULLING HZB_TEST
@@ -187,14 +188,6 @@ Bool cullMeshlet(GpuSceneRenderable renderable, const MeshletBoundingVolume mesh
 
 	const Mat3x4 worldTransform = SBUFF(g_transforms, renderable.m_worldTransformsIndex);
 
-#	if MESHLET_BACKFACE_CULLING
-	const Vec4 coneDirAndAng = unpackSnorm4x8(meshletBoundingVol.m_coneDirection_R8G8B8_Snorm_cosHalfAngle_R8_Snorm);
-	if(cullBackfaceMeshlet(coneDirAndAng.xyz, coneDirAndAng.w, meshletBoundingVol.m_coneApex, worldTransform, g_consts.m_cameraPos))
-	{
-		return true;
-	}
-#	endif
-
 	const Mat4 wordTransform4 = {worldTransform.m_row0, worldTransform.m_row1, worldTransform.m_row2, Vec4(0.0f, 0.0f, 0.0f, 1.0f)};
 	const Mat4 mvp = mul(g_consts.m_viewProjectionMatrix, wordTransform4);
 
@@ -225,6 +218,28 @@ Bool cullMeshlet(GpuSceneRenderable renderable, const MeshletBoundingVolume mesh
 	return meshletCulledByHzb;
 #	endif
 
+#	if MESHLET_BACKFACE_CULLING
+	{
+		Vec3 center = (meshletBoundingVol.m_aabbMin + meshletBoundingVol.m_aabbMax) * 0.5;
+		center = mul(worldTransform, Vec4(center, 1.0));
+
+		const Vec4 coneDirAndAng = unpackSnorm4x8<F32>(meshletBoundingVol.m_coneDirection_R8G8B8_Snorm_cosHalfAngle_R8_Snorm);
+		Vec3 coneDir = coneDirAndAng.xyz;
+		coneDir = mul(worldTransform, Vec4(coneDir, 0.0));
+
+		const Vec3 scale = extractScale(worldTransform);
+		const F32 radius = meshletBoundingVol.m_sphereRadius * max3(scale);
+
+		// Using from meshoptimizer: dot(center - camera_position, cone_axis) >= cone_cutoff * length(center - camera_position) + radius
+		const Vec3 camPos = g_consts.m_cameraPos;
+		const Vec3 cam2center = center - camPos;
+		if(dot(cam2center, coneDir) >= coneDirAndAng.w * length(cam2center) + radius)
+		{
+			return true;
+		}
+	}
+#	endif
+
 	return false;
 }
 

+ 0 - 2
AnKi/Shaders/HistoryLength.ankiprog

@@ -5,8 +5,6 @@
 
 // Calculates the disocclusion length. The longer the length the better the pixel is for temporal accumulation
 
-#pragma anki 16bit
-
 #pragma anki technique comp vert pixel
 
 #include <AnKi/Shaders/Functions.hlsl>

+ 184 - 255
AnKi/Shaders/Include/Common.h

@@ -83,135 +83,64 @@ void maybeUnused(T a)
 
 #	define static_assert(x)
 
-#	define _ANKI_SCONST_X(type, n, id) [[vk::constant_id(id)]] const type n = (type)1;
-
-#	define _ANKI_SCONST_X2(type, componentType, n, id) \
-		[[vk::constant_id(id + 0u)]] const componentType ANKI_CONCATENATE(_anki_const_0_2_, n) = (componentType)1; \
-		[[vk::constant_id(id + 1u)]] const componentType ANKI_CONCATENATE(_anki_const_1_2_, n) = (componentType)1; \
-		static const type n = type(ANKI_CONCATENATE(_anki_const_0_2_, n), ANKI_CONCATENATE(_anki_const_1_2_, n))
-
-#	define _ANKI_SCONST_X3(type, componentType, n, id) \
-		[[vk::constant_id(id + 0u)]] const componentType ANKI_CONCATENATE(_anki_const_0_3_, n) = (componentType)1; \
-		[[vk::constant_id(id + 1u)]] const componentType ANKI_CONCATENATE(_anki_const_1_3_, n) = (componentType)1; \
-		[[vk::constant_id(id + 2u)]] const componentType ANKI_CONCATENATE(_anki_const_2_3_, n) = (componentType)1; \
-		static const type n = \
-			type(ANKI_CONCATENATE(_anki_const_0_3_, n), ANKI_CONCATENATE(_anki_const_1_3_, n), ANKI_CONCATENATE(_anki_const_2_3_, n))
-
-#	define _ANKI_SCONST_X4(type, componentType, n, id) \
-		[[vk::constant_id(id + 0u)]] const componentType ANKI_CONCATENATE(_anki_const_0_4_, n) = (componentType)1; \
-		[[vk::constant_id(id + 1u)]] const componentType ANKI_CONCATENATE(_anki_const_1_4_, n) = (componentType)1; \
-		[[vk::constant_id(id + 2u)]] const componentType ANKI_CONCATENATE(_anki_const_2_4_, n) = (componentType)1; \
-		[[vk::constant_id(id + 3u)]] const componentType ANKI_CONCATENATE(_anki_const_3_4_, n) = (componentType)1; \
-		static const type n = type(ANKI_CONCATENATE(_anki_const_0_4_, n), ANKI_CONCATENATE(_anki_const_1_4_, n), \
-								   ANKI_CONCATENATE(_anki_const_2_4_, n), ANKI_CONCATENATE(_anki_const_2_4_, n))
-
-#	define ANKI_SPECIALIZATION_CONSTANT_I32(n, id) _ANKI_SCONST_X(I32, n, id)
-#	define ANKI_SPECIALIZATION_CONSTANT_IVEC2(n, id) _ANKI_SCONST_X2(IVec2, I32, n, id)
-#	define ANKI_SPECIALIZATION_CONSTANT_IVEC3(n, id) _ANKI_SCONST_X3(IVec3, I32, n, id)
-#	define ANKI_SPECIALIZATION_CONSTANT_IVEC4(n, id) _ANKI_SCONST_X4(IVec4, I32, n, id)
-
-#	define ANKI_SPECIALIZATION_CONSTANT_U32(n, id) _ANKI_SCONST_X(U32, n, id)
-#	define ANKI_SPECIALIZATION_CONSTANT_UVEC2(n, id) _ANKI_SCONST_X2(UVec2, U32, n, id)
-#	define ANKI_SPECIALIZATION_CONSTANT_UVEC3(n, id) _ANKI_SCONST_X3(UVec3, U32, n, id)
-#	define ANKI_SPECIALIZATION_CONSTANT_UVEC4(n, id) _ANKI_SCONST_X4(UVec4, U32, n, id)
-
-#	define ANKI_SPECIALIZATION_CONSTANT_F32(n, id) _ANKI_SCONST_X(F32, n, id)
-#	define ANKI_SPECIALIZATION_CONSTANT_VEC2(n, id) _ANKI_SCONST_X2(Vec2, F32, n, id)
-#	define ANKI_SPECIALIZATION_CONSTANT_VEC3(n, id) _ANKI_SCONST_X3(Vec3, F32, n, id)
-#	define ANKI_SPECIALIZATION_CONSTANT_VEC4(n, id) _ANKI_SCONST_X4(Vec4, F32, n, id)
-
 #	pragma pack_matrix(row_major)
 
 typedef float F32;
-constexpr uint kSizeof_F32 = 4u;
 typedef float2 Vec2;
-constexpr uint kSizeof_Vec2 = 8u;
 typedef float3 Vec3;
-constexpr uint kSizeof_Vec3 = 12u;
 typedef float4 Vec4;
-constexpr uint kSizeof_Vec4 = 16u;
 
-#	if ANKI_SUPPORTS_16BIT_TYPES
 typedef float16_t F16;
-constexpr uint kSizeof_F16 = 2u;
 typedef float16_t2 HVec2;
-constexpr uint kSizeof_HVec2 = 4u;
 typedef float16_t3 HVec3;
-constexpr uint kSizeof_HVec3 = 6u;
 typedef float16_t4 HVec4;
-constexpr uint kSizeof_HVec4 = 8u;
 
 typedef uint16_t U16;
-constexpr uint kSizeof_U16 = 2u;
 typedef uint16_t2 U16Vec2;
-constexpr uint kSizeof_U16Vec2 = 4u;
 typedef uint16_t3 U16Vec3;
-constexpr uint kSizeof_U16Vec3 = 6u;
 typedef uint16_t4 U16Vec4;
-constexpr uint kSizeof_U16Vec4 = 8u;
 
 typedef int16_t I16;
-constexpr uint kSizeof_I16 = 2u;
 typedef int16_t2 I16Vec2;
-constexpr uint kSizeof_I16Vec2 = 4u;
 typedef int16_t3 I16Vec3;
-constexpr uint kSizeof_I16Vec3 = 6u;
 typedef int16_t4 I16Vec4;
-constexpr uint kSizeof_I16Vec4 = 8u;
-#	endif
 
 typedef uint U32;
-constexpr uint kSizeof_U32 = 4u;
 typedef uint32_t2 UVec2;
-constexpr uint kSizeof_UVec2 = 8u;
 typedef uint32_t3 UVec3;
-constexpr uint kSizeof_UVec3 = 12u;
 typedef uint32_t4 UVec4;
-constexpr uint kSizeof_UVec4 = 16u;
 
 typedef int I32;
-constexpr uint kSizeof_I32 = 4u;
 typedef int32_t2 IVec2;
-constexpr uint kSizeof_IVec2 = 8u;
 typedef int32_t3 IVec3;
-constexpr uint kSizeof_IVec3 = 12u;
 typedef int32_t4 IVec4;
-constexpr uint kSizeof_IVec4 = 16u;
 
 typedef uint64_t U64;
-constexpr uint kSizeof_U64 = 8u;
 typedef uint64_t2 U64Vec2;
-constexpr uint kSizeof_U64Vec2 = 16u;
 typedef uint64_t3 U64Vec3;
-constexpr uint kSizeof_U64Vec3 = 24u;
 typedef uint64_t4 U64Vec4;
-constexpr uint kSizeof_U64Vec4 = 32u;
 
 typedef int64_t I64;
-constexpr uint kSizeof_I64 = 8u;
 typedef int64_t2 I64Vec2;
-constexpr uint kSizeof_I64Vec2 = 16u;
 typedef int64_t3 I64Vec3;
-constexpr uint kSizeof_I64Vec3 = 24u;
 typedef int64_t4 I64Vec4;
-constexpr uint kSizeof_I64Vec4 = 32u;
 
 typedef bool Bool;
 
-#	define _ANKI_DEFINE_OPERATOR_SCALAR_ROWS3(mat, fl, op) \
-		mat operator op(fl f) \
+#	define _ANKI_DEFINE_OPERATOR_SCALAR_ROWS3(matType, scalarType, op) \
+		matType operator op(scalarType f) \
 		{ \
-			mat o; \
+			matType o; \
 			o.m_row0 = m_row0 op f; \
 			o.m_row1 = m_row1 op f; \
 			o.m_row2 = m_row2 op f; \
 			return o; \
 		}
 
-#	define _ANKI_DEFINE_OPERATOR_SCALAR_ROWS4(mat, fl, op) \
-		mat operator op(fl f) \
+#	define _ANKI_DEFINE_OPERATOR_SCALAR_ROWS4(matType, scalarType, op) \
+		matType operator op(scalarType f) \
 		{ \
-			mat o; \
+			matType o; \
 			o.m_row0 = m_row0 op f; \
 			o.m_row1 = m_row1 op f; \
 			o.m_row2 = m_row2 op f; \
@@ -219,20 +148,20 @@ typedef bool Bool;
 			return o; \
 		}
 
-#	define _ANKI_DEFINE_OPERATOR_SELF_ROWS3(mat, op) \
-		mat operator op(mat b) \
+#	define _ANKI_DEFINE_OPERATOR_SELF_ROWS3(matType, op) \
+		matType operator op(matType b) \
 		{ \
-			mat o; \
+			matType o; \
 			o.m_row0 = m_row0 op b.m_row0; \
 			o.m_row1 = m_row1 op b.m_row1; \
 			o.m_row2 = m_row2 op b.m_row2; \
 			return o; \
 		}
 
-#	define _ANKI_DEFINE_OPERATOR_SELF_ROWS4(mat, op) \
-		mat operator op(mat b) \
+#	define _ANKI_DEFINE_OPERATOR_SELF_ROWS4(matType, op) \
+		matType operator op(matType b) \
 		{ \
-			mat o; \
+			matType o; \
 			o.m_row0 = m_row0 op b.m_row0; \
 			o.m_row1 = m_row1 op b.m_row1; \
 			o.m_row2 = m_row2 op b.m_row2; \
@@ -240,181 +169,181 @@ typedef bool Bool;
 			return o; \
 		}
 
-#	define _ANKI_DEFINE_ALL_OPERATORS_ROWS3(mat, fl) \
-		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS3(mat, fl, +) \
-		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS3(mat, fl, -) \
-		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS3(mat, fl, *) \
-		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS3(mat, fl, /) \
-		_ANKI_DEFINE_OPERATOR_SELF_ROWS3(mat, +) \
-		_ANKI_DEFINE_OPERATOR_SELF_ROWS3(mat, -)
-
-#	define _ANKI_DEFINE_ALL_OPERATORS_ROWS4(mat, fl) \
-		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS4(mat, fl, +) \
-		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS4(mat, fl, -) \
-		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS4(mat, fl, *) \
-		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS4(mat, fl, /) \
-		_ANKI_DEFINE_OPERATOR_SELF_ROWS4(mat, +) \
-		_ANKI_DEFINE_OPERATOR_SELF_ROWS4(mat, -)
-
-// Mat3 "template". Not an actual template because of bugs
-#	define _ANKI_MAT3(mat, vec, scalar) \
-		struct mat \
-		{ \
-			vec m_row0; \
-			vec m_row1; \
-			vec m_row2; \
-			_ANKI_DEFINE_ALL_OPERATORS_ROWS3(mat, scalar) \
-			void setColumns(vec c0, vec c1, vec c2) \
-			{ \
-				m_row0 = vec(c0.x, c1.x, c2.x); \
-				m_row1 = vec(c0.y, c1.y, c2.y); \
-				m_row2 = vec(c0.z, c1.z, c2.z); \
-			} \
-		}; \
-		vec mul(mat m, vec v) \
-		{ \
-			const scalar a = dot(m.m_row0, v); \
-			const scalar b = dot(m.m_row1, v); \
-			const scalar c = dot(m.m_row2, v); \
-			return vec(a, b, c); \
-		} \
-		mat transpose(mat m) \
-		{ \
-			mat o; \
-			o.setColumns(m.m_row0, m.m_row1, m.m_row2); \
-			return o; \
-		}
+#	define _ANKI_DEFINE_ALL_OPERATORS_ROWS3(matType, scalarType) \
+		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS3(matType, scalarType, +) \
+		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS3(matType, scalarType, -) \
+		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS3(matType, scalarType, *) \
+		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS3(matType, scalarType, /) \
+		_ANKI_DEFINE_OPERATOR_SELF_ROWS3(matType, +) \
+		_ANKI_DEFINE_OPERATOR_SELF_ROWS3(matType, -)
+
+#	define _ANKI_DEFINE_ALL_OPERATORS_ROWS4(matType, scalarType) \
+		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS4(matType, scalarType, +) \
+		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS4(matType, scalarType, -) \
+		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS4(matType, scalarType, *) \
+		_ANKI_DEFINE_OPERATOR_SCALAR_ROWS4(matType, scalarType, /) \
+		_ANKI_DEFINE_OPERATOR_SELF_ROWS4(matType, +) \
+		_ANKI_DEFINE_OPERATOR_SELF_ROWS4(matType, -)
+
+struct Mat3
+{
+	Vec3 m_row0;
+	Vec3 m_row1;
+	Vec3 m_row2;
 
-// Mat4 "template". Not an actual template because of bugs
-#	define _ANKI_MAT4(mat, vec, scalar) \
-		struct mat \
-		{ \
-			vec m_row0; \
-			vec m_row1; \
-			vec m_row2; \
-			vec m_row3; \
-			_ANKI_DEFINE_ALL_OPERATORS_ROWS4(mat, scalar) \
-			vec getTranslationPart() \
-			{ \
-				return vec(m_row0.w, m_row1.w, m_row2.w, m_row3.w); \
-			} \
-			void setColumns(vec c0, vec c1, vec c2, vec c3) \
-			{ \
-				m_row0 = vec(c0.x, c1.x, c2.x, c3.x); \
-				m_row1 = vec(c0.y, c1.y, c2.y, c3.y); \
-				m_row2 = vec(c0.z, c1.z, c2.z, c3.z); \
-				m_row3 = vec(c0.w, c1.w, c2.w, c3.w); \
-			} \
-		}; \
-		vec mul(mat m, vec v) \
-		{ \
-			const scalar a = dot(m.m_row0, v); \
-			const scalar b = dot(m.m_row1, v); \
-			const scalar c = dot(m.m_row2, v); \
-			const scalar d = dot(m.m_row3, v); \
-			return vec(a, b, c, d); \
-		} \
-		mat mul(mat a_, mat b_) \
-		{ \
-			const vec a[4] = {a_.m_row0, a_.m_row1, a_.m_row2, a_.m_row3}; \
-			const vec b[4] = {b_.m_row0, b_.m_row1, b_.m_row2, b_.m_row3}; \
-			vec c[4]; \
-			[unroll] for(U32 i = 0; i < 4; i++) \
-			{ \
-				vec t1, t2; \
-				t1 = a[i][0]; \
-				t2 = b[0] * t1; \
-				t1 = a[i][1]; \
-				t2 += b[1] * t1; \
-				t1 = a[i][2]; \
-				t2 += b[2] * t1; \
-				t1 = a[i][3]; \
-				t2 += b[3] * t1; \
-				c[i] = t2; \
-			} \
-			mat o; \
-			o.m_row0 = c[0]; \
-			o.m_row1 = c[1]; \
-			o.m_row2 = c[2]; \
-			o.m_row3 = c[3]; \
-			return o; \
-		}
+	_ANKI_DEFINE_ALL_OPERATORS_ROWS3(Mat3, F32)
 
-// Mat3x4 "template". Not an actual template because of bugs
-#	define _ANKI_MAT3x4(mat, row, column, scalar) \
-		struct mat \
-		{ \
-			row m_row0; \
-			row m_row1; \
-			row m_row2; \
-			_ANKI_DEFINE_ALL_OPERATORS_ROWS3(mat, scalar) \
-			column getTranslationPart() \
-			{ \
-				return column(m_row0.w, m_row1.w, m_row2.w); \
-			} \
-			void setColumns(column c0, column c1, column c2, column c3) \
-			{ \
-				m_row0 = row(c0.x, c1.x, c2.x, c3.x); \
-				m_row1 = row(c0.y, c1.y, c2.y, c3.y); \
-				m_row2 = row(c0.z, c1.z, c2.z, c3.z); \
-			} \
-			void setColumn(U32 i, column c) \
-			{ \
-				m_row0[i] = c.x; \
-				m_row1[i] = c.y; \
-				m_row2[i] = c.z; \
-			} \
-		}; \
-		column mul(mat m, row v) \
-		{ \
-			const scalar a = dot(m.m_row0, v); \
-			const scalar b = dot(m.m_row1, v); \
-			const scalar c = dot(m.m_row2, v); \
-			return column(a, b, c); \
-		} \
-		mat combineTransformations(mat a_, mat b_) \
-		{ \
-			const row a[3] = {a_.m_row0, a_.m_row1, a_.m_row2}; \
-			const row b[3] = {b_.m_row0, b_.m_row1, b_.m_row2}; \
-			row c[3]; \
-			[unroll] for(U32 i = 0; i < 3; i++) \
-			{ \
-				row t2; \
-				t2 = b[0] * a[i][0]; \
-				t2 += b[1] * a[i][1]; \
-				t2 += b[2] * a[i][2]; \
-				const row v4 = row(0.0f, 0.0f, 0.0f, a[i][3]); \
-				t2 += v4; \
-				c[i] = t2; \
-			} \
-			mat o; \
-			o.m_row0 = c[0]; \
-			o.m_row1 = c[1]; \
-			o.m_row2 = c[2]; \
-			return o; \
-		}
+	void setColumns(Vec3 c0, Vec3 c1, Vec3 c2)
+	{
+		m_row0 = Vec3(c0.x, c1.x, c2.x);
+		m_row1 = Vec3(c0.y, c1.y, c2.y);
+		m_row2 = Vec3(c0.z, c1.z, c2.z);
+	}
+};
+
+Vec3 mul(Mat3 m, Vec3 v)
+{
+	const F32 a = dot(m.m_row0, v);
+	const F32 b = dot(m.m_row1, v);
+	const F32 c = dot(m.m_row2, v);
+	return Vec3(a, b, c);
+}
+
+Mat3 transpose(Mat3 m)
+{
+	Mat3 o;
+	o.setColumns(m.m_row0, m.m_row1, m.m_row2);
+	return o;
+}
 
-_ANKI_MAT3(Mat3, Vec3, F32)
-_ANKI_MAT4(Mat4, Vec4, F32)
-_ANKI_MAT3x4(Mat3x4, Vec4, Vec3, F32)
-
-#	if ANKI_SUPPORTS_16BIT_TYPES == 0
-#		if ANKI_FORCE_FULL_FP_PRECISION
-	typedef float RF32;
-typedef float2 RVec2;
-typedef float3 RVec3;
-typedef float4 RVec4;
-_ANKI_MAT3(RMat3, Vec3, F32)
-#		else
-	typedef min16float RF32;
-typedef min16float2 RVec2;
-typedef min16float3 RVec3;
-typedef min16float4 RVec4;
-_ANKI_MAT3(RMat3, RVec3, RF32)
-#		endif
-#	else // ANKI_SUPPORTS_16BIT_TYPES == 0
-	_ANKI_MAT3(HMat3, HVec3, F16)
-#	endif // ANKI_SUPPORTS_16BIT_TYPES == 0
+struct Mat4
+{
+	Vec4 m_row0;
+	Vec4 m_row1;
+	Vec4 m_row2;
+	Vec4 m_row3;
+
+	_ANKI_DEFINE_ALL_OPERATORS_ROWS4(Mat4, F32)
+
+	void setColumns(Vec4 c0, Vec4 c1, Vec4 c2, Vec4 c3)
+	{
+		m_row0 = Vec4(c0.x, c1.x, c2.x, c3.x);
+		m_row1 = Vec4(c0.y, c1.y, c2.y, c3.y);
+		m_row2 = Vec4(c0.z, c1.z, c2.z, c3.z);
+		m_row3 = Vec4(c0.w, c1.w, c2.w, c3.w);
+	}
+
+	Vec4 getTranslationPart()
+	{
+		return Vec4(m_row0.w, m_row1.w, m_row2.w, m_row3.w);
+	}
+};
+
+Vec4 mul(Mat4 m, Vec4 v)
+{
+	const F32 a = dot(m.m_row0, v);
+	const F32 b = dot(m.m_row1, v);
+	const F32 c = dot(m.m_row2, v);
+	const F32 d = dot(m.m_row3, v);
+	return Vec4(a, b, c, d);
+}
+
+Mat4 mul(Mat4 a_, Mat4 b_)
+{
+	const Vec4 a[4] = {a_.m_row0, a_.m_row1, a_.m_row2, a_.m_row3};
+	const Vec4 b[4] = {b_.m_row0, b_.m_row1, b_.m_row2, b_.m_row3};
+	Vec4 c[4];
+	[unroll] for(U32 i = 0; i < 4; i++)
+	{
+		Vec4 t1, t2;
+		t1 = a[i][0];
+		t2 = b[0] * t1;
+		t1 = a[i][1];
+		t2 += b[1] * t1;
+		t1 = a[i][2];
+		t2 += b[2] * t1;
+		t1 = a[i][3];
+		t2 += b[3] * t1;
+		c[i] = t2;
+	}
+	Mat4 o;
+	o.m_row0 = c[0];
+	o.m_row1 = c[1];
+	o.m_row2 = c[2];
+	o.m_row3 = c[3];
+	return o;
+}
+
+struct Mat3x4
+{
+	Vec4 m_row0;
+	Vec4 m_row1;
+	Vec4 m_row2;
+
+	_ANKI_DEFINE_ALL_OPERATORS_ROWS3(Mat3x4, F32)
+
+	Vec3 getTranslationPart()
+	{
+		return Vec3(m_row0.w, m_row1.w, m_row2.w);
+	}
+
+	void setColumns(Vec3 c0, Vec3 c1, Vec3 c2, Vec3 c3)
+	{
+		m_row0 = Vec4(c0.x, c1.x, c2.x, c3.x);
+		m_row1 = Vec4(c0.y, c1.y, c2.y, c3.y);
+		m_row2 = Vec4(c0.z, c1.z, c2.z, c3.z);
+	}
+
+	void setColumn(U32 i, Vec3 c)
+	{
+		m_row0[i] = c.x;
+		m_row1[i] = c.y;
+		m_row2[i] = c.z;
+	}
+};
+
+Vec3 mul(Mat3x4 m, Vec4 v)
+{
+	const F32 a = dot(m.m_row0, v);
+	const F32 b = dot(m.m_row1, v);
+	const F32 c = dot(m.m_row2, v);
+	return Vec3(a, b, c);
+}
+
+Mat3x4 combineTransformations(Mat3x4 a_, Mat3x4 b_)
+{
+	const Vec4 a[3] = {a_.m_row0, a_.m_row1, a_.m_row2};
+	const Vec4 b[3] = {b_.m_row0, b_.m_row1, b_.m_row2};
+	Vec4 c[3];
+	[unroll] for(U32 i = 0; i < 3; i++)
+	{
+		Vec4 t2;
+		t2 = b[0] * a[i][0];
+		t2 += b[1] * a[i][1];
+		t2 += b[2] * a[i][2];
+		const Vec4 v4 = Vec4(0.0f, 0.0f, 0.0f, a[i][3]);
+		t2 += v4;
+		c[i] = t2;
+	}
+	Mat3x4 o;
+	o.m_row0 = c[0];
+	o.m_row1 = c[1];
+	o.m_row2 = c[2];
+	return o;
+}
+
+template<typename TMat>
+Vec3 extractScale(TMat trf)
+{
+	Vec3 scale;
+	[unroll] for(U32 i = 0; i < 3; ++i)
+	{
+		const Vec3 axis = Vec3(trf.m_row0[i], trf.m_row1[i], trf.m_row2[i]);
+		scale[i] = length(axis);
+	}
+
+	return scale;
+}
 
 #endif // defined(__HLSL_VERSION)
 

+ 0 - 2
AnKi/Shaders/IndirectDiffuse.ankiprog

@@ -3,8 +3,6 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki 16bit
-
 #pragma anki technique RtMaterialFetch rgen
 
 #include <AnKi/Shaders/RtMaterialFetch.hlsl>

+ 0 - 2
AnKi/Shaders/IndirectDiffuseClipmaps.ankiprog

@@ -3,8 +3,6 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki 16bit
-
 #pragma anki mutator GPU_WAVE_SIZE 16 32 64
 #pragma anki mutator RADIANCE_OCTAHEDRON_MAP_SIZE 8 10 12 14 16 18 20
 #pragma anki mutator IRRADIANCE_OCTAHEDRON_MAP_SIZE 4 5 6

+ 17 - 17
AnKi/Shaders/IrradianceDice.ankiprog

@@ -20,7 +20,7 @@
 constexpr U32 kThreadgroupSize = U32(THREDGROUP_SIZE_SQRT) * U32(THREDGROUP_SIZE_SQRT);
 
 SamplerState g_nearestAnyClampSampler : register(s0);
-TextureCube<RVec4> g_lightShadingTexCube : register(t0);
+TextureCube<Vec4> g_lightShadingTexCube : register(t0);
 
 #if SECOND_BOUNCE == 1
 TextureCube<Vec4> g_gbufferTex[3u] : register(t1);
@@ -39,7 +39,7 @@ ANKI_FAST_CONSTANTS(Constants, g_consts)
 #else
 struct BufferOut
 {
-	RVec4 m_val[6u];
+	Vec4 m_val[6u];
 };
 
 RWStructuredBuffer<BufferOut> g_irradianceDisceResults : register(u0);
@@ -49,7 +49,7 @@ constexpr U32 kMinWaveSize = 8u;
 groupshared Vec3 s_integrationResults[6u][kThreadgroupSize / kMinWaveSize]; // In cube coords
 groupshared U32 s_waveIndexInsideThreadGroup;
 
-RVec3 sampleLightShadingTexture(const U32 face, UVec3 svGroupThreadId)
+Vec3 sampleLightShadingTexture(const U32 face, UVec3 svGroupThreadId)
 {
 	const Vec2 uv = (Vec2(svGroupThreadId.x, svGroupThreadId.y) + 0.5) / F32(THREDGROUP_SIZE_SQRT);
 	const Vec3 cubeUvw = getCubemapDirection(uv, face);
@@ -77,9 +77,9 @@ RVec3 sampleLightShadingTexture(const U32 face, UVec3 svGroupThreadId)
 		const Vec3 r = getCubemapDirection(faceUv, f) * Vec3(1.0, 1.0, -1.0);
 
 		// Compute integral part
-		const RF32 lambert = max(0.0, dot(r, diceDir));
-		const RVec3 lightShading = sampleLightShadingTexture(f, svGroupThreadId);
-		const RVec3 irradiance = lightShading * lambert * cubeCoordSolidAngle(uvToNdc(faceUv), threadgroupSizeSqrtf);
+		const F32 lambert = max(0.0, dot(r, diceDir));
+		const Vec3 lightShading = sampleLightShadingTexture(f, svGroupThreadId);
+		const Vec3 irradiance = lightShading * lambert * cubeCoordSolidAngle(uvToNdc(faceUv), threadgroupSizeSqrtf);
 
 		// Store
 		resultFaces[f] = irradiance;
@@ -125,7 +125,7 @@ RVec3 sampleLightShadingTexture(const U32 face, UVec3 svGroupThreadId)
 		const Vec3 r = getCubemapDirection(faceUv, f) * Vec3(1.0, 1.0, -1.0);
 
 		// Compute integral part
-		const RF32 lambert = max(0.0, dot(r, diceDir));
+		const F32 lambert = max(0.0, dot(r, diceDir));
 
 		// Read the gbuffer
 		const Vec3 gbufferUv = getCubemapDirection(faceUv, f);
@@ -135,14 +135,14 @@ RVec3 sampleLightShadingTexture(const U32 face, UVec3 svGroupThreadId)
 								g_gbufferTex[2u].SampleLevel(g_nearestAnyClampSampler, gbufferUv, 0.0), gbuffer);
 
 		// Sample irradiance
-		RVec3 firstBounceIrradiance =
-			sampleAmbientDice<RF32>(s_integrationResults[0][0], s_integrationResults[1][0], s_integrationResults[2][0], s_integrationResults[3][0],
-									s_integrationResults[4][0], s_integrationResults[5][0], gbuffer.m_normal * Vec3(1.0, 1.0, -1.0));
+		Vec3 firstBounceIrradiance =
+			sampleAmbientDice<F32>(s_integrationResults[0][0], s_integrationResults[1][0], s_integrationResults[2][0], s_integrationResults[3][0],
+								   s_integrationResults[4][0], s_integrationResults[5][0], gbuffer.m_normal * Vec3(1.0, 1.0, -1.0));
 		firstBounceIrradiance = gbuffer.m_diffuse * firstBounceIrradiance;
 
 		// Compute 2nd bounce
-		const RVec3 lightShading = sampleLightShadingTexture(f, svGroupThreadId);
-		const RVec3 irradiance = (firstBounceIrradiance + lightShading * lambert) * cubeCoordSolidAngle(uvToNdc(faceUv), threadgroupSizeSqrtf);
+		const Vec3 lightShading = sampleLightShadingTexture(f, svGroupThreadId);
+		const Vec3 irradiance = (firstBounceIrradiance + lightShading * lambert) * cubeCoordSolidAngle(uvToNdc(faceUv), threadgroupSizeSqrtf);
 
 		// Store
 		resultFaces[f] = irradiance;
@@ -185,10 +185,10 @@ RVec3 sampleLightShadingTexture(const U32 face, UVec3 svGroupThreadId)
 		const U32 f = svGroupIndex;
 
 #if DEBUG_MODE == 0
-		RVec3 irradiance = s_integrationResults[f][0];
-		const RVec3 toStoreValue = irradiance;
+		Vec3 irradiance = s_integrationResults[f][0];
+		const Vec3 toStoreValue = irradiance;
 #elif DEBUG_MODE == 1
-		const RVec3 toStoreValue = colorPerCubeFace(f);
+		const Vec3 toStoreValue = colorPerCubeFace(f);
 #else
 		UVec3 volumeSize;
 		g_irradianceVolume.GetDimensions(volumeSize.x, volumeSize.y, volumeSize.z);
@@ -196,8 +196,8 @@ RVec3 sampleLightShadingTexture(const U32 face, UVec3 svGroupThreadId)
 		const UVec3 subvolumeSize = UVec3(volumeSize.x / 6u, volumeSize.y, volumeSize.z);
 		const U32 cellIdx =
 			g_consts.m_volumeTexel.z * subvolumeSize.x * subvolumeSize.y + g_consts.m_volumeTexel.y * subvolumeSize.x + g_consts.m_volumeTexel.x;
-		const RF32 headmapFactor = F32(cellIdx) / F32(subvolumeSize.x * subvolumeSize.y * subvolumeSize.z);
-		const RVec3 toStoreValue = heatmap(headmapFactor);
+		const F32 headmapFactor = F32(cellIdx) / F32(subvolumeSize.x * subvolumeSize.y * subvolumeSize.z);
+		const Vec3 toStoreValue = heatmap(headmapFactor);
 #endif
 
 #if STORE_LOCATION == 0

+ 4 - 4
AnKi/Shaders/LensFlareSprite.ankiprog

@@ -11,7 +11,7 @@
 struct VertOut
 {
 	Vec3 m_uv : TEXCOORD;
-	RVec4 m_color : COLOR;
+	Vec4 m_color : COLOR;
 	Vec4 m_svPosition : SV_POSITION;
 };
 
@@ -41,11 +41,11 @@ VertOut main(U32 svVertexId : SV_VERTEXID, U32 svInstanceId : SV_INSTANCEID)
 
 #if ANKI_PIXEL_SHADER
 SamplerState g_trilinearRepeatSampler : register(s0);
-Texture2DArray<RVec4> g_tex : register(t1);
+Texture2DArray<Vec4> g_tex : register(t1);
 
-RVec4 main(VertOut input) : SV_TARGET0
+Vec4 main(VertOut input) : SV_TARGET0
 {
-	const RVec4 col = g_tex.Sample(g_trilinearRepeatSampler, input.m_uv);
+	const Vec4 col = g_tex.Sample(g_trilinearRepeatSampler, input.m_uv);
 	return col * input.m_color;
 }
 #endif

+ 0 - 2
AnKi/Shaders/LightShading.ankiprog

@@ -3,8 +3,6 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki 16bit
-
 #pragma anki mutator INDIRECT_DIFFUSE_TEX 0 1
 
 #pragma anki technique vert pixel

+ 6 - 13
AnKi/Shaders/LightShadingApplyFog.ankiprog

@@ -13,7 +13,7 @@
 SamplerState g_nearestAnyClampSampler : register(s0);
 SamplerState g_linearAnyClampSampler : register(s1);
 Texture2D g_depthRt : register(t0);
-Texture3D<RVec4> g_fogVolume : register(t1);
+Texture3D<Vec4> g_fogVolume : register(t1);
 
 struct Constants
 {
@@ -24,14 +24,7 @@ struct Constants
 };
 ANKI_FAST_CONSTANTS(Constants, g_consts)
 
-// Workaround validation errors
-#	if ANKI_GR_BACKEND_DIRECT3D
-#		define OUT_TYPE Vec4
-#	else
-#		define OUT_TYPE RVec4
-#	endif
-
-OUT_TYPE main(VertOut input) : SV_TARGET0
+Vec4 main(VertOut input) : SV_TARGET0
 {
 	const Vec2 uv = input.m_uv;
 	Vec3 uvw;
@@ -45,11 +38,11 @@ OUT_TYPE main(VertOut input) : SV_TARGET0
 	uvw.xy = uv;
 
 	// Read the volume
-	const RVec4 fogVals = g_fogVolume.SampleLevel(g_linearAnyClampSampler, uvw, 0.0);
-	const RVec3 inScattering = fogVals.rgb;
-	const RF32 transmittance = fogVals.a;
+	const Vec4 fogVals = g_fogVolume.SampleLevel(g_linearAnyClampSampler, uvw, 0.0);
+	const Vec3 inScattering = fogVals.rgb;
+	const F32 transmittance = fogVals.a;
 
 	// Apply the fog
-	return OUT_TYPE(inScattering, transmittance);
+	return Vec4(inScattering, transmittance);
 }
 #endif // ANKI_PIXEL_SHADER

+ 8 - 8
AnKi/Shaders/LightShadingSkybox.ankiprog

@@ -19,14 +19,14 @@
 #	if METHOD == 0
 struct Constants
 {
-	RVec3 m_solidColor;
+	Vec3 m_solidColor;
 	F32 m_padding;
 };
 
 ANKI_FAST_CONSTANTS(Constants, g_consts)
 #	elif METHOD == 1
 SamplerState g_trilinearAnySampler : register(s0);
-Texture2D<RVec4> g_envMapTex : register(t0);
+Texture2D<Vec4> g_envMapTex : register(t0);
 
 struct Constants
 {
@@ -49,12 +49,12 @@ Texture2D<Vec4> g_skyLut : register(t0);
 ConstantBuffer<GlobalRendererConstants> g_consts : register(b0);
 #	endif
 
-RVec4 main(VertOut input) : SV_TARGET0
+Vec4 main(VertOut input) : SV_TARGET0
 {
 	const Vec2 uv = input.m_uv;
 #	if METHOD == 0
 	ANKI_MAYBE_UNUSED(uv);
-	const RVec3 output = g_consts.m_solidColor;
+	const Vec3 output = g_consts.m_solidColor;
 #	elif METHOD == 1
 	const F32 depth = 1.0;
 	const Vec2 ndc = uvToNdc(uv);
@@ -73,7 +73,7 @@ RVec4 main(VertOut input) : SV_TARGET0
 
 	const F32 bias = (maxD > 0.9) ? -100.0f : 0.0f;
 
-	const RVec3 output = g_envMapTex.SampleBias(g_trilinearAnySampler, uv3, bias).rgb * g_consts.m_scale + g_consts.m_bias;
+	const Vec3 output = g_envMapTex.SampleBias(g_trilinearAnySampler, uv3, bias).rgb * g_consts.m_scale + g_consts.m_bias;
 #	else
 	const F32 depth = 1.0;
 	const Vec2 ndc = uvToNdc(uv);
@@ -82,10 +82,10 @@ RVec4 main(VertOut input) : SV_TARGET0
 
 	const Vec3 eyeToFrag = normalize(worldPos - g_consts.m_cameraPosition);
 
-	const RVec3 output = computeSkyColor(g_skyLut, g_linearAnyClampSampler, eyeToFrag, -g_consts.m_directionalLight.m_direction,
-										 g_consts.m_directionalLight.m_power, true);
+	const Vec3 output = computeSkyColor(g_skyLut, g_linearAnyClampSampler, eyeToFrag, -g_consts.m_directionalLight.m_direction,
+										g_consts.m_directionalLight.m_power, true);
 #	endif
 
-	return RVec4(output, 0.0);
+	return Vec4(output, 0.0);
 }
 #endif // ANKI_PIXEL_SHADER

+ 0 - 2
AnKi/Shaders/Reflections.ankiprog

@@ -3,8 +3,6 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki 16bit
-
 #pragma anki mutator SSR_SAMPLE_GBUFFER 0 1
 #pragma anki mutator INDIRECT_DIFFUSE_CLIPMAPS 0 1
 

+ 0 - 2
AnKi/Shaders/RtMaterialFetchDbg.ankiprog

@@ -3,8 +3,6 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki 16bit
-
 #pragma anki technique RtMaterialFetch rgen
 
 #include <AnKi/Shaders/RtMaterialFetch.hlsl>

+ 0 - 2
AnKi/Shaders/RtMaterialFetchMiss.ankiprog

@@ -3,8 +3,6 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki 16bit
-
 #pragma anki technique RtMaterialFetch miss
 
 #include <AnKi/Shaders/RtMaterialFetch.hlsl>

+ 0 - 2
AnKi/Shaders/RtShadows.ankiprog

@@ -3,8 +3,6 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki 16bit
-
 #pragma anki mutator RAYS_PER_PIXEL 1 2 4 8
 
 #pragma anki technique RtShadows rgen miss

+ 30 - 31
AnKi/Shaders/ShadowmapsResolve.ankiprog

@@ -32,7 +32,7 @@ Texture2D<Vec4> g_dirLightResolvedShadowsTex : register(t6);
 #	endif
 
 #	if ANKI_COMPUTE_SHADER
-RWTexture2D<RVec4> g_storageTex : register(u0);
+RWTexture2D<Vec4> g_storageTex : register(u0);
 #	endif
 
 struct Constants
@@ -67,7 +67,7 @@ Vec3 computeDebugShadowCascadeColor(U32 cascade)
 #	if ANKI_COMPUTE_SHADER
 [numthreads(8, 8, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
 #	else
-RVec4 main(VertOut input) : SV_TARGET0
+Vec4 main(VertOut input) : SV_TARGET0
 #	endif
 {
 #	if ANKI_COMPUTE_SHADER
@@ -86,12 +86,12 @@ RVec4 main(VertOut input) : SV_TARGET0
 	g_noiseTex.GetDimensions(noiseTexSize.x, noiseTexSize.y);
 
 	const Vec2 noiseUv = g_consts.m_framebufferSize / noiseTexSize * uv;
-	RVec3 noise = g_noiseTex.SampleLevel(g_trilinearRepeatSampler, noiseUv, 0.0).rgb;
+	Vec3 noise = g_noiseTex.SampleLevel(g_trilinearRepeatSampler, noiseUv, 0.0).rgb;
 	noise = animateBlueNoise(noise, g_globalConstants.m_frame % 16u);
-	const RF32 randFactor = noise.x;
+	const F32 randFactor = noise.x;
 #		else
-	const RVec2 noise2 = spatioTemporalNoise(svDispatchThreadId, g_globalConstants.m_frame);
-	const RF32 randFactor = noise2.x;
+	const Vec2 noise2 = spatioTemporalNoise(svDispatchThreadId, g_globalConstants.m_frame);
+	const F32 randFactor = noise2.x;
 #		endif
 #	endif
 
@@ -108,7 +108,7 @@ RVec4 main(VertOut input) : SV_TARGET0
 	// Layers
 	U32 shadowCasterCountPerFragment = 0u;
 	const U32 kMaxShadowCastersPerFragment = 4u;
-	RVec4 shadowFactors = 0.0f;
+	Vec4 shadowFactors = 0.0f;
 
 	// Dir light
 #	if DIRECTIONAL_LIGHT_SHADOW_RESOLVED
@@ -120,14 +120,14 @@ RVec4 main(VertOut input) : SV_TARGET0
 	{
 		const U32 shadowCascadeCount = dirLight.m_shadowCascadeCount;
 
-		const RF32 positiveZViewSpace = testPlanePoint(g_globalConstants.m_nearPlaneWSpace.xyz, g_globalConstants.m_nearPlaneWSpace.w, worldPos)
-										+ g_globalConstants.m_matrices.m_near;
+		const F32 positiveZViewSpace = testPlanePoint(g_globalConstants.m_nearPlaneWSpace.xyz, g_globalConstants.m_nearPlaneWSpace.w, worldPos)
+									   + g_globalConstants.m_matrices.m_near;
 
 		const F32 lastCascadeDistance = dirLight.m_shadowCascadeDistances[shadowCascadeCount - 1u];
-		RF32 shadowFactor;
+		F32 shadowFactor;
 		if(positiveZViewSpace < lastCascadeDistance)
 		{
-			RF32 cascadeBlendFactor;
+			F32 cascadeBlendFactor;
 			const UVec2 cascadeIndices =
 				computeShadowCascadeIndex2(positiveZViewSpace, dirLight.m_shadowCascadeDistances, shadowCascadeCount, cascadeBlendFactor);
 
@@ -144,14 +144,14 @@ RVec4 main(VertOut input) : SV_TARGET0
 #		endif
 
 #		if QUALITY == 2
-			const RF32 shadowFactorCascadeA = computeShadowFactorDirLightPcss<RF32>(
-				dirLight, cascadeIndices.x, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor, g_linearAnyClampSampler);
+			const F32 shadowFactorCascadeA = computeShadowFactorDirLightPcss<F32>(dirLight, cascadeIndices.x, worldPos, g_shadowAtlasTex,
+																				  g_linearAnyClampShadowSampler, randFactor, g_linearAnyClampSampler);
 #		elif QUALITY == 1
-			const RF32 shadowFactorCascadeA = computeShadowFactorDirLightPcf<RF32>(dirLight, cascadeIndices.x, worldPos, g_shadowAtlasTex,
-																				   g_linearAnyClampShadowSampler, randFactor);
+			const F32 shadowFactorCascadeA = computeShadowFactorDirLightPcf<F32>(dirLight, cascadeIndices.x, worldPos, g_shadowAtlasTex,
+																				 g_linearAnyClampShadowSampler, randFactor);
 #		else
-			const RF32 shadowFactorCascadeA =
-				computeShadowFactorDirLight<RF32>(dirLight, cascadeIndices.x, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
+			const F32 shadowFactorCascadeA =
+				computeShadowFactorDirLight<F32>(dirLight, cascadeIndices.x, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
 #		endif
 
 			if(cascadeBlendFactor < 0.01 || cascadeIndices.x == cascadeIndices.y)
@@ -163,21 +163,21 @@ RVec4 main(VertOut input) : SV_TARGET0
 			{
 #		if QUALITY == 2
 				// Blend cascades
-				const RF32 shadowFactorCascadeB = computeShadowFactorDirLightPcss<RF32>(
+				const F32 shadowFactorCascadeB = computeShadowFactorDirLightPcss<F32>(
 					dirLight, cascadeIndices.y, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor, g_linearAnyClampSampler);
 #		elif QUALITY == 1
 				// Blend cascades
-				const RF32 shadowFactorCascadeB = computeShadowFactorDirLightPcf<RF32>(dirLight, cascadeIndices.y, worldPos, g_shadowAtlasTex,
-																					   g_linearAnyClampShadowSampler, randFactor);
+				const F32 shadowFactorCascadeB = computeShadowFactorDirLightPcf<F32>(dirLight, cascadeIndices.y, worldPos, g_shadowAtlasTex,
+																					 g_linearAnyClampShadowSampler, randFactor);
 #		else
 				// Blend cascades
-				const RF32 shadowFactorCascadeB =
-					computeShadowFactorDirLight<RF32>(dirLight, cascadeIndices.y, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
+				const F32 shadowFactorCascadeB =
+					computeShadowFactorDirLight<F32>(dirLight, cascadeIndices.y, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
 #		endif
 				shadowFactor = lerp(shadowFactorCascadeA, shadowFactorCascadeB, cascadeBlendFactor);
 			}
 
-			RF32 distanceFadeFactor = saturate(positiveZViewSpace / lastCascadeDistance);
+			F32 distanceFadeFactor = saturate(positiveZViewSpace / lastCascadeDistance);
 			distanceFadeFactor = pow(distanceFadeFactor, 8.0);
 			shadowFactor += distanceFadeFactor;
 		}
@@ -202,10 +202,9 @@ RVec4 main(VertOut input) : SV_TARGET0
 			const Vec3 frag2Light = light.m_position - worldPos;
 
 #	if QUALITY > 0
-			const RF32 shadowFactor =
-				computeShadowFactorPointLightPcf(light, frag2Light, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor);
+			const F32 shadowFactor = computeShadowFactorPointLightPcf(light, frag2Light, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor);
 #	else
-			const RF32 shadowFactor = computeShadowFactorPointLight<F32>(light, frag2Light, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
+			const F32 shadowFactor = computeShadowFactorPointLight<F32>(light, frag2Light, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
 #	endif
 			shadowFactors[min(kMaxShadowCastersPerFragment - 1u, shadowCasterCountPerFragment++)] = shadowFactor;
 		}
@@ -219,13 +218,13 @@ RVec4 main(VertOut input) : SV_TARGET0
 		[branch] if(light.m_shadow)
 		{
 #	if QUALITY == 2
-			const RF32 shadowFactor = computeShadowFactorSpotLightPcss<RF32>(light, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler,
-																			 randFactor, g_linearAnyClampSampler);
+			const F32 shadowFactor = computeShadowFactorSpotLightPcss<F32>(light, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler,
+																		   randFactor, g_linearAnyClampSampler);
 #	elif QUALITY == 1
-			const RF32 shadowFactor =
-				computeShadowFactorSpotLightPcf<RF32>(light, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor);
+			const F32 shadowFactor =
+				computeShadowFactorSpotLightPcf<F32>(light, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler, randFactor);
 #	else
-			const RF32 shadowFactor = computeShadowFactorSpotLight<RF32>(light, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
+			const F32 shadowFactor = computeShadowFactorSpotLight<F32>(light, worldPos, g_shadowAtlasTex, g_linearAnyClampShadowSampler);
 #	endif
 			shadowFactors[min(kMaxShadowCastersPerFragment - 1u, shadowCasterCountPerFragment++)] = shadowFactor;
 		}

+ 0 - 2
AnKi/Shaders/Ssao.ankiprog

@@ -5,8 +5,6 @@
 
 // Ground truth ambiend occlusion
 
-#pragma anki 16bit
-
 #pragma anki mutator SPATIAL_DENOISE_SAMPLE_COUNT 3 5 7 9
 #pragma anki mutator DENOISING_QUARTER_RESOLUTION 0 1
 

+ 0 - 2
AnKi/Shaders/TemporalAA.ankiprog

@@ -3,8 +3,6 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki 16bit
-
 #pragma anki mutator VARIANCE_CLIPPING 0 1
 #pragma anki mutator YCBCR 0 1
 

+ 0 - 2
AnKi/Shaders/Tonemap.ankiprog

@@ -5,8 +5,6 @@
 
 // Does tonemapping
 
-#pragma anki 16bit
-
 #pragma anki technique vert pixel comp
 
 #include <AnKi/Shaders/QuadVert.hlsl>

+ 1 - 1
AnKi/Shaders/TonemappingAverageLuminance.ankiprog

@@ -12,7 +12,7 @@
 #define THREAD_COUNT_Y 16u
 #define THREAD_COUNT UVec2(THREAD_COUNT_X, THREAD_COUNT_Y)
 
-Texture2D<RVec4> g_tex : register(t0);
+Texture2D<Vec4> g_tex : register(t0);
 
 #define TONEMAPPING_REGISTER u0
 #include <AnKi/Shaders/TonemappingResources.hlsl>

+ 0 - 2
AnKi/Shaders/TraditionalDeferredShading.ankiprog

@@ -5,8 +5,6 @@
 
 // Classic deferred lighting shader
 
-#pragma anki 16bit
-
 #pragma anki mutator SPECULAR 0 1
 #pragma anki mutator INDIRECT_DIFFUSE 0 1 2
 

+ 1 - 1
AnKi/Shaders/TraditionalDeferredShadingSkybox.ankiprog

@@ -23,7 +23,7 @@ Texture2D g_depthTex : register(t0);
 
 #	if METHOD == 1
 SamplerState g_trilinearAnySampler : register(s1);
-Texture2D<RVec4> g_envMapTex : register(t1);
+Texture2D<Vec4> g_envMapTex : register(t1);
 #	elif METHOD == 2
 SamplerState g_linearAnyClampSampler : register(s1);
 Texture2D<Vec4> g_skyLut : register(t1);

+ 3 - 10
AnKi/Shaders/Ui.ankiprog

@@ -9,17 +9,10 @@
 
 #include <AnKi/Shaders/Common.hlsl>
 
-// Workaround validation errors
-#if ANKI_GR_BACKEND_DIRECT3D
-#	define OUT_TYPE Vec4
-#else
-#	define OUT_TYPE RVec4
-#endif
-
 struct VertIn
 {
 	Vec2 m_position : POSITION;
-	OUT_TYPE m_color : COLOR;
+	ANKI_RELAXED_PRECISION Vec4 m_color : COLOR;
 #if TEXTURE_TYPE > 0
 	Vec2 m_uv : TEXCOORD;
 #endif
@@ -30,7 +23,7 @@ struct VertOut
 #if TEXTURE_TYPE > 0
 	Vec2 m_uv : TEXCOORD;
 #endif
-	RVec4 m_color : COLOR;
+	ANKI_RELAXED_PRECISION Vec4 m_color : COLOR;
 	Vec4 m_svPosition : SV_POSITION;
 };
 
@@ -64,7 +57,7 @@ SamplerState g_trilinearRepeatSampler : register(s0);
 Texture2D g_tex : register(t0);
 #	endif
 
-OUT_TYPE main(VertOut input) : SV_TARGET0
+Vec4 main(VertOut input) : SV_TARGET0
 {
 #	if TEXTURE_TYPE == 0
 	return input.m_color;

+ 9 - 9
AnKi/Shaders/UiVisualizeImage.ankiprog

@@ -21,14 +21,14 @@ ANKI_FAST_CONSTANTS(Constants, g_consts)
 struct VertIn
 {
 	Vec2 m_position : POSITION;
-	RVec4 m_color : COLOR;
+	Vec4 m_color : COLOR;
 	Vec2 m_uv : TEXCOORD;
 };
 
 struct VertOut
 {
 	Vec2 m_uv : TEXCOORD;
-	RVec4 m_color : COLOR;
+	Vec4 m_color : COLOR;
 	Vec4 m_svPosition : SV_POSITION;
 };
 
@@ -51,19 +51,19 @@ VertOut main(VertIn input)
 #if ANKI_PIXEL_SHADER
 SamplerState g_trilinearRepeatSampler : register(s0);
 #	if TEXTURE_TYPE == 0
-Texture2D<RVec4> g_tex2d : register(t0);
+Texture2D<Vec4> g_tex2d : register(t0);
 #	else
-Texture3D<RVec4> g_tex3d : register(t0);
+Texture3D<Vec4> g_tex3d : register(t0);
 #	endif
 
-RVec4 main(VertOut input) : SV_TARGET0
+Vec4 main(VertOut input) : SV_TARGET0
 {
 #	if TEXTURE_TYPE == 0
-	const RVec4 rgba = g_tex2d.Sample(g_trilinearRepeatSampler, input.m_uv);
+	const Vec4 rgba = g_tex2d.Sample(g_trilinearRepeatSampler, input.m_uv);
 #	else
-	const RVec4 rgba = g_tex3d.Sample(g_trilinearRepeatSampler, Vec3(input.m_uv, g_consts.m_depth.x));
+	const Vec4 rgba = g_tex3d.Sample(g_trilinearRepeatSampler, Vec3(input.m_uv, g_consts.m_depth.x));
 #	endif
-	RVec3 outColor = input.m_color.rgb * rgba.rgb * g_consts.m_colorScale.rgb;
+	Vec3 outColor = input.m_color.rgb * rgba.rgb * g_consts.m_colorScale.rgb;
 
 	if(g_consts.m_colorScale.a == 1.0)
 	{
@@ -74,6 +74,6 @@ RVec4 main(VertOut input) : SV_TARGET0
 		outColor = lerp(Vec3(alphaPattern, alphaPattern, alphaPattern), outColor, rgba.a);
 	}
 
-	return RVec4(outColor, 1.0);
+	return Vec4(outColor, 1.0);
 }
 #endif // ANKI_PIXEL_SHADER

+ 18 - 18
AnKi/Shaders/VolumetricFogAccumulation.ankiprog

@@ -9,8 +9,8 @@
 #include <AnKi/Shaders/Common.hlsl>
 
 SamplerState g_linearAnyClampSampler : register(s0);
-Texture3D<RVec4> g_lightVolume : register(t0);
-RWTexture3D<RVec4> g_fogVolume : register(u0);
+Texture3D<Vec4> g_lightVolume : register(t0);
+RWTexture3D<Vec4> g_fogVolume : register(u0);
 
 ANKI_FAST_CONSTANTS(VolumetricFogConstants, g_consts)
 
@@ -23,41 +23,41 @@ ANKI_FAST_CONSTANTS(VolumetricFogConstants, g_consts)
 
 	const Vec2 uv = (Vec2(svDispatchThreadId.xy) + 0.5) / Vec2(g_consts.m_volumeSize.xy);
 
-	RVec4 colorAndDensityFront = 0.0;
+	Vec4 colorAndDensityFront = 0.0;
 	[loop] for(U32 i = 0u; i < g_consts.m_volumeSize.z; ++i)
 	{
-		const RF32 fi = F32(i);
+		const F32 fi = F32(i);
 
 		// Compute the linear depth
-		const RF32 maxLinearDepth = g_consts.m_maxZSplitsToProcessf / g_consts.m_zSplitCountf;
-		const RF32 linearDepthFraction = maxLinearDepth / F32(g_consts.m_volumeSize.z);
-		const RF32 linearDepthNear = fi * linearDepthFraction;
-		const RF32 linearDepthFar = (fi + 1.0) * linearDepthFraction;
+		const F32 maxLinearDepth = g_consts.m_maxZSplitsToProcessf / g_consts.m_zSplitCountf;
+		const F32 linearDepthFraction = maxLinearDepth / F32(g_consts.m_volumeSize.z);
+		const F32 linearDepthNear = fi * linearDepthFraction;
+		const F32 linearDepthFar = (fi + 1.0) * linearDepthFraction;
 
 		// Compute the min and max Z in view space if this cluster fragment
-		const RF32 zVSpaceNear = -linearDepthNear * (g_consts.m_far - g_consts.m_near) + g_consts.m_near;
-		const RF32 zVSpaceFar = -linearDepthFar * (g_consts.m_far - g_consts.m_near) + g_consts.m_near;
+		const F32 zVSpaceNear = -linearDepthNear * (g_consts.m_far - g_consts.m_near) + g_consts.m_near;
+		const F32 zVSpaceFar = -linearDepthFar * (g_consts.m_far - g_consts.m_near) + g_consts.m_near;
 
 		// Compute the thikness of this fragment
-		const RF32 layerThinkness = abs(zVSpaceNear - zVSpaceFar);
+		const F32 layerThinkness = abs(zVSpaceNear - zVSpaceFar);
 
 		// Read the light value and the fog density from the fog volumes
-		const RF32 w = (fi + 0.5) / F32(g_consts.m_volumeSize.z);
-		RVec4 lightAndFogDensity = g_lightVolume.SampleLevel(g_linearAnyClampSampler, Vec3(uv, w), 0.0);
+		const F32 w = (fi + 0.5) / F32(g_consts.m_volumeSize.z);
+		Vec4 lightAndFogDensity = g_lightVolume.SampleLevel(g_linearAnyClampSampler, Vec3(uv, w), 0.0);
 		lightAndFogDensity.xyz *= g_consts.m_fogDiffuse / kPi;
 
 		// Scattering & absorption
-		const RF32 scattering = lightAndFogDensity.w * g_consts.m_fogScatteringCoeff * layerThinkness;
-		const RF32 absorption = lightAndFogDensity.w * g_consts.m_fogAbsorptionCoeff * layerThinkness;
+		const F32 scattering = lightAndFogDensity.w * g_consts.m_fogScatteringCoeff * layerThinkness;
+		const F32 absorption = lightAndFogDensity.w * g_consts.m_fogAbsorptionCoeff * layerThinkness;
 
 		// Integrate
-		const RVec4 colorAndDensityBack = Vec4(lightAndFogDensity.xyz * scattering, scattering + absorption);
+		const Vec4 colorAndDensityBack = Vec4(lightAndFogDensity.xyz * scattering, scattering + absorption);
 
-		const RVec3 l = colorAndDensityFront.rgb + saturate(exp(-colorAndDensityFront.a)) * colorAndDensityBack.rgb;
+		const Vec3 l = colorAndDensityFront.rgb + saturate(exp(-colorAndDensityFront.a)) * colorAndDensityBack.rgb;
 		colorAndDensityFront = Vec4(l.rgb, colorAndDensityFront.a + colorAndDensityBack.a);
 
 		// Write the value
-		const RVec4 valToWrite = Vec4(colorAndDensityFront.rgb, saturate(exp(-colorAndDensityFront.a)));
+		const Vec4 valToWrite = Vec4(colorAndDensityFront.rgb, saturate(exp(-colorAndDensityFront.a)));
 		g_fogVolume[UVec3(svDispatchThreadId.xy, i)] = valToWrite;
 	}
 }

+ 0 - 2
AnKi/Shaders/VolumetricLightingAccumulation.ankiprog

@@ -5,8 +5,6 @@
 
 // This shader accumulates the lighting for every cluster fraction
 
-#pragma anki 16bit
-
 #pragma anki mutator ENABLE_SHADOWS 0 1
 #pragma anki mutator CLIPMAP_DIFFUSE_INDIRECT 0 1
 

+ 36 - 36
AnKi/Shaders/VrsSriGenerationCompute.ankiprog

@@ -15,7 +15,7 @@
 // Find the maximum luma derivative in x and y, relative to the average luma of the block.
 // Each thread handles a 2x2 region when using 8x8 VRS tiles and a 2x4 region when using 16x16 VRS tiles.
 
-Texture2D<RVec4> g_inputTex : register(t0);
+Texture2D<Vec4> g_inputTex : register(t0);
 SamplerState g_nearestClampSampler : register(s0);
 
 #if SRI_TEXEL_DIMENSION == 8
@@ -45,14 +45,14 @@ ANKI_FAST_CONSTANTS(Constants, g_consts)
 // subgroupSize is large enough so we need shared memory as a fallback. We need gl_NumSubgroups entries, but it is not a
 // constant, so estimate it assuming a subgroupSize of at least 8.
 constexpr U32 kSharedMemoryEntries = THREADGROUP_SIZE_X * THREADGROUP_SIZE_Y / 8u;
-groupshared RF32 s_averageLuma[kSharedMemoryEntries];
-groupshared RVec2 s_maxDerivative[kSharedMemoryEntries];
+groupshared F32 s_averageLuma[kSharedMemoryEntries];
+groupshared Vec2 s_maxDerivative[kSharedMemoryEntries];
 groupshared U32 s_waveIndexInsideThreadGroup;
 #endif
 
-RF32 computeLuma(RVec3 color)
+F32 computeLuma(Vec3 color)
 {
-	const RF32 l = computeLuminance(color);
+	const F32 l = computeLuminance(color);
 	return l / (1.0f + l);
 }
 
@@ -74,30 +74,30 @@ RF32 computeLuma(RVec3 color)
 	//       l1.y
 	// l0.z  l0.w  l1.x
 	// l0.x  l0.y
-	RVec4 l0;
+	Vec4 l0;
 	l0.x = sampleLuma(0, 0);
 	l0.y = sampleLuma(1, 0);
 	l0.z = sampleLuma(0, 1);
 	l0.w = sampleLuma(1, 1);
 
-	RVec2 l1;
+	Vec2 l1;
 	l1.x = sampleLuma(2, 1);
 	l1.y = sampleLuma(1, 2);
 
 	// Calculate derivatives.
-	RVec2 a = RVec2(l0.y, l1.x);
-	RVec2 b = RVec2(l0.x, l0.w);
-	const RVec2 dx = abs(a - b);
+	Vec2 a = Vec2(l0.y, l1.x);
+	Vec2 b = Vec2(l0.x, l0.w);
+	const Vec2 dx = abs(a - b);
 
-	a = RVec2(l0.z, l1.y);
-	b = RVec2(l0.x, l0.w);
-	const RVec2 dy = abs(a - b);
+	a = Vec2(l0.z, l1.y);
+	b = Vec2(l0.x, l0.w);
+	const Vec2 dy = abs(a - b);
 
-	RF32 maxDerivativeX = max(dx.x, dx.y);
-	RF32 maxDerivativeY = max(dy.x, dy.y);
+	F32 maxDerivativeX = max(dx.x, dx.y);
+	F32 maxDerivativeY = max(dy.x, dy.y);
 
 	// Calculate average luma.
-	RF32 averageLuma = (l0.x + l0.y + l0.z + l0.w) / 4.0;
+	F32 averageLuma = (l0.x + l0.y + l0.z + l0.w) / 4.0;
 #else
 	// Get luminance.
 	//             l2.z
@@ -105,38 +105,38 @@ RF32 computeLuma(RVec3 color)
 	//       l1.x  l1.y
 	//       l0.z  l0.w  l2.x
 	//       l0.x  l0.y
-	RVec4 l0;
+	Vec4 l0;
 	l0.x = sampleLuma(0, 0);
 	l0.y = sampleLuma(1, 0);
 	l0.z = sampleLuma(0, 1);
 	l0.w = sampleLuma(1, 1);
 
-	RVec4 l1;
+	Vec4 l1;
 	l1.x = sampleLuma(0, 2);
 	l1.y = sampleLuma(1, 2);
 	l1.z = sampleLuma(0, 3);
 	l1.w = sampleLuma(1, 3);
 
-	RVec3 l2;
+	Vec3 l2;
 	l2.x = sampleLuma(2, 1);
 	l2.y = sampleLuma(-1, 3);
 	l2.z = sampleLuma(1, 4);
 
 	// Calculate derivatives.
-	RVec4 a = RVec4(l0.y, l2.x, l1.y, l2.y);
-	RVec4 b = RVec4(l0.x, l0.w, l1.x, l1.z);
-	const RVec4 dx = abs(a - b);
+	Vec4 a = Vec4(l0.y, l2.x, l1.y, l2.y);
+	Vec4 b = Vec4(l0.x, l0.w, l1.x, l1.z);
+	const Vec4 dx = abs(a - b);
 
-	a = RVec4(l0.z, l0.w, l1.z, l2.z);
-	b = RVec4(l0.x, l0.y, l1.x, l1.w);
-	const RVec4 dy = abs(a - b);
+	a = Vec4(l0.z, l0.w, l1.z, l2.z);
+	b = Vec4(l0.x, l0.y, l1.x, l1.w);
+	const Vec4 dy = abs(a - b);
 
-	RF32 maxDerivativeX = max(max(dx.x, dx.y), max(dx.z, dx.w));
-	RF32 maxDerivativeY = max(max(dy.x, dy.y), max(dy.z, dy.w));
+	F32 maxDerivativeX = max(max(dx.x, dx.y), max(dx.z, dx.w));
+	F32 maxDerivativeY = max(max(dy.x, dy.y), max(dy.z, dy.w));
 
 	// Calculate average luma.
-	const RVec4 sumL0L1 = l0 + l1;
-	RF32 averageLuma = (sumL0L1.x + sumL0L1.y + sumL0L1.z + sumL0L1.w) / 8.0;
+	const Vec4 sumL0L1 = l0 + l1;
+	F32 averageLuma = (sumL0L1.x + sumL0L1.y + sumL0L1.z + sumL0L1.w) / 8.0;
 #endif
 
 	// Share values in subgroup.
@@ -149,7 +149,7 @@ RF32 computeLuma(RVec3 color)
 	[branch] if(WaveIsFirstLane())
 	{
 		s_averageLuma[waveIndexInsideThreadGroup] = averageLuma;
-		s_maxDerivative[waveIndexInsideThreadGroup] = RVec2(maxDerivativeX, maxDerivativeY);
+		s_maxDerivative[waveIndexInsideThreadGroup] = Vec2(maxDerivativeX, maxDerivativeY);
 	}
 
 	GroupMemoryBarrierWithGroupSync();
@@ -161,7 +161,7 @@ RF32 computeLuma(RVec3 color)
 		// Get max across all subgroups.
 #if SHARED_MEMORY
 		averageLuma = s_averageLuma[0];
-		RVec2 maxDerivative = s_maxDerivative[0];
+		Vec2 maxDerivative = s_maxDerivative[0];
 
 		for(U32 i = 1u; i < wavesPerThreadGroup; ++i)
 		{
@@ -169,14 +169,14 @@ RF32 computeLuma(RVec3 color)
 			maxDerivative = max(maxDerivative, s_maxDerivative[i]);
 		}
 #else
-		const RVec2 maxDerivative = RVec2(maxDerivativeX, maxDerivativeY);
+		const Vec2 maxDerivative = Vec2(maxDerivativeX, maxDerivativeY);
 #endif
 
 		// Determine shading rate.
-		const RF32 avgLuma = averageLuma / RF32(THREADGROUP_SIZE_X * THREADGROUP_SIZE_Y);
-		const RVec2 lumaDiff = maxDerivative / avgLuma;
-		const RF32 threshold1 = g_consts.m_threshold;
-		const RF32 threshold2 = threshold1 * 0.4;
+		const F32 avgLuma = averageLuma / F32(THREADGROUP_SIZE_X * THREADGROUP_SIZE_Y);
+		const Vec2 lumaDiff = maxDerivative / avgLuma;
+		const F32 threshold1 = g_consts.m_threshold;
+		const F32 threshold2 = threshold1 * 0.4;
 
 		UVec2 rate;
 		rate.x = (lumaDiff.x > threshold1) ? 1u : ((lumaDiff.x > threshold2) ? 2u : 4u);