3 년 전 · 402e45f2d7
--- a/AnKi/ShaderCompiler/Dxc.cpp
+++ b/AnKi/ShaderCompiler/Dxc.cpp
@@ -102,11 +102,12 @@ Error compileHlslToSpirv(CString src, ShaderType shaderType, BaseMemoryPool& tmp
 
				 	dxcArgs.emplaceBack(&tmpPool, spvFilename);
			
 
				 	dxcArgs.emplaceBack(&tmpPool, "-Wall");
			
 
				 	dxcArgs.emplaceBack(&tmpPool, "-Wextra");
			
 
				-	dxcArgs.emplaceBack(&tmpPool, "-Wconversion");
			
 
				+	dxcArgs.emplaceBack(&tmpPool, "-Wno-conversion");
			
 
				 	dxcArgs.emplaceBack(&tmpPool, "-Werror");
			
 
				 	dxcArgs.emplaceBack(&tmpPool, "-Wfatal-errors");
			
 
				+	dxcArgs.emplaceBack(&tmpPool, "-Wundef");
			
 
				 	dxcArgs.emplaceBack(&tmpPool, "-Wno-unused-const-variable");
			
 
				-	dxcArgs.emplaceBack(&tmpPool, "-enable-16bit-types");
			
 
				+	// dxcArgs.emplaceBack(&tmpPool, "-enable-16bit-types");
			
 
				 	dxcArgs.emplaceBack(&tmpPool, "-HV");
			
 
				 	dxcArgs.emplaceBack(&tmpPool, "2021");
			
 
				 	dxcArgs.emplaceBack(&tmpPool, "-E");
			
@@ -152,6 +153,7 @@ Error compileHlslToSpirv(CString src, ShaderType shaderType, BaseMemoryPool& tmp
 
				 			}
			
 
				 			else
			
 
				 			{
			
 
				+				// printf("%s\n", src.cstr());
			
 
				 				return Error::kFunctionFailed;
			
 
				 			}
			
 
				 		}
			
--- a/AnKi/ShaderCompiler/ShaderProgramParser.cpp
+++ b/AnKi/ShaderCompiler/ShaderProgramParser.cpp
@@ -515,6 +515,8 @@ Error ShaderProgramParser::parseLine(CString line, CString fname, Bool& foundPra
 
				 				{
			
 
				 					ANKI_CHECK(checkActiveStruct());
			
 
				 					ANKI_CHECK(parsePragmaStructEnd(token + 1, end, line, fname));
			
 
				+
			
 
				+					m_codeLines.pushBackSprintf("#line %u \"%s\"", lineNumber, fname.cstr());
			
 
				 				}
			
 
				 				else
			
 
				 				{
			
@@ -789,8 +791,8 @@ Error ShaderProgramParser::parsePragmaStructEnd(const StringRaii* begin, const S
 
				 		for(U32 j = 0; j < componentCount; ++j)
			
 
				 		{
			
 
				 			StringRaii tmp(m_pool);
			
 
				-			tmp.sprintf("%s(ssbo[%s_%s_OFFSETOF + offset + %uu])%s", (isIntegral) ? "" : "uintBitsToFloat",
			
 
				-						structName.cstr(), m.m_name.cstr(), j, (j != componentCount - 1) ? "," : "");
			
 
				+			tmp.sprintf("%s(ssbo[%s_%s_OFFSETOF + offset + %uu])%s", (isIntegral) ? "" : "asfloat", structName.cstr(),
			
 
				+						m.m_name.cstr(), j, (j != componentCount - 1) ? "," : "");
			
 
				 
			
 
				 			values.append(tmp);
			
 
				 		}
			
@@ -812,7 +814,8 @@ Error ShaderProgramParser::parsePragmaStructEnd(const StringRaii* begin, const S
 
				 		m_codeLines.pushBack("#endif");
			
 
				 	}
			
 
				 
			
 
				-	// Now define the structure LOAD
			
 
				+	// Now define the structure LOAD in GLSL
			
 
				+	m_codeLines.pushBack("#if ANKI_GLSL");
			
 
				 	m_codeLines.pushBackSprintf("#define load%s(ssbo, offset) %s( \\", structName.cstr(), structName.cstr());
			
 
				 	for(U32 i = 0; i < gstruct.m_members.getSize(); ++i)
			
 
				 	{
			
@@ -821,6 +824,17 @@ Error ShaderProgramParser::parsePragmaStructEnd(const StringRaii* begin, const S
 
				 	}
			
 
				 	m_codeLines.pushBack(")");
			
 
				 
			
 
				+	// Now define the structure LOAD in HLSL
			
 
				+	m_codeLines.pushBack("#else");
			
 
				+	m_codeLines.pushBackSprintf("#define load%s(ssbo, offset) { \\", structName.cstr());
			
 
				+	for(U32 i = 0; i < gstruct.m_members.getSize(); ++i)
			
 
				+	{
			
 
				+		const Member& m = gstruct.m_members[i];
			
 
				+		m_codeLines.pushBackSprintf("\t%s_%s_LOAD(ssbo, offset) \\", structName.cstr(), m.m_name.cstr());
			
 
				+	}
			
 
				+	m_codeLines.pushBack("}");
			
 
				+	m_codeLines.pushBack("#endif");
			
 
				+
			
 
				 	// Define the actual struct
			
 
				 	m_codeLines.pushBackSprintf("#define %s %s_", structName.cstr(), structName.cstr());
			
 
				 
			
--- a/AnKi/Shaders/Common.hlsl
+++ b/AnKi/Shaders/Common.hlsl
@@ -0,0 +1,22 @@
 
				+// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
			
 
				+// All rights reserved.
			
 
				+// Code licensed under the BSD License.
			
 
				+// http://www.anki3d.org/LICENSE
			
 
				+
			
 
				+// This file contains common code for all shaders. It's optional but it's recomended to include it
			
 
				+
			
 
				+#pragma once
			
 
				+
			
 
				+#include <AnKi/Shaders/Include/Common.h>
			
 
				+
			
 
				+template<typename T>
			
 
				+T uvToNdc(T x)
			
 
				+{
			
 
				+	return x * 2.0f - 1.0f;
			
 
				+}
			
 
				+
			
 
				+template<typename T>
			
 
				+T ndcToUv(T x)
			
 
				+{
			
 
				+	return x * 0.5f + 0.5f;
			
 
				+}
			
--- a/AnKi/Shaders/Functions.glsl
+++ b/AnKi/Shaders/Functions.glsl
@@ -613,7 +613,7 @@ F32 computeMipLevel(Vec2 normalizedUvs)
 
				 #endif
			
 
				 
			
 
				 #if ANKI_GLSL
			
 
				-#	if ANKI_SUPPORTS_64BIT
			
 
				+#	if ANKI_SUPPORTS_64BIT_TYPES
			
 
				 /// The regular findLSB in glslang has some issues since it invokes a builtin that is only supposed to be used with
			
 
				 /// 32bit input. This is an alternative implementation but it expects that the input is not zero.
			
 
				 I32 findLSB2(U64 v)
			
--- a/AnKi/Shaders/Functions.hlsl
+++ b/AnKi/Shaders/Functions.hlsl
@@ -0,0 +1,713 @@
 
				+// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
			
 
				+// All rights reserved.
			
 
				+// Code licensed under the BSD License.
			
 
				+// http://www.anki3d.org/LICENSE
			
 
				+
			
 
				+#pragma once
			
 
				+
			
 
				+#include <AnKi/Shaders/Common.hlsl>
			
 
				+
			
 
				+// Convert to linear depth
			
 
				+F32 linearizeDepth(F32 depth, F32 zNear, F32 zFar)
			
 
				+{
			
 
				+	return zNear / ((zNear - zFar) + zFar / depth);
			
 
				+}
			
 
				+
			
 
				+// Convert to linear depth
			
 
				+Vec4 linearizeDepth(Vec4 depth, F32 zNear, F32 zFar)
			
 
				+{
			
 
				+	return zNear / ((zNear - zFar) + zFar / depth);
			
 
				+}
			
 
				+
			
 
				+// This is the optimal linearizeDepth where a=(n-f)/n and b=f/n
			
 
				+F32 linearizeDepthOptimal(F32 depth, F32 a, F32 b)
			
 
				+{
			
 
				+	return 1.0 / (a + b / depth);
			
 
				+}
			
 
				+
			
 
				+// This is the optimal linearizeDepth where a=(n-f)/n and b=f/n
			
 
				+Vec4 linearizeDepthOptimal(Vec4 depths, F32 a, F32 b)
			
 
				+{
			
 
				+	return 1.0 / (a + b / depths);
			
 
				+}
			
 
				+
			
 
				+// Project a vector by knowing only the non zero values of a perspective matrix
			
 
				+Vec4 projectPerspective(Vec4 vec, F32 m00, F32 m11, F32 m22, F32 m23)
			
 
				+{
			
 
				+	Vec4 o;
			
 
				+	o.x = vec.x * m00;
			
 
				+	o.y = vec.y * m11;
			
 
				+	o.z = vec.z * m22 + vec.w * m23;
			
 
				+	o.w = -vec.z;
			
 
				+	return o;
			
 
				+}
			
 
				+
			
 
				+#if defined(ANKI_FRAGMENT_SHADER)
			
 
				+// Stolen from shadertoy.com/view/4tyGDD
			
 
				+Vec4 textureCatmullRom4Samples(Texture2D tex, SamplerState sampl, Vec2 uv, Vec2 texSize)
			
 
				+{
			
 
				+	const Vec2 halff = 2.0 * frac(0.5 * uv * texSize - 0.25) - 1.0;
			
 
				+	const Vec2 f = frac(halff);
			
 
				+	const Vec2 sum0 = (2.0 * f - 3.5) * f + 0.5;
			
 
				+	const Vec2 sum1 = (2.0 * f - 2.5) * f - 0.5;
			
 
				+	Vec4 w = Vec4(f * sum0 + 1.0, f * sum1);
			
 
				+	const Vec4 pos = Vec4((((-2.0 * f + 3.0) * f + 0.5) * f - 1.5) * f / (w.xy * texSize) + uv,
			
 
				+						  (((-2.0 * f + 5.0) * f - 2.5) * f - 0.5) / (sum1 * texSize) + uv);
			
 
				+	w.xz *= halff.x * halff.y > 0.0 ? 1.0 : -1.0;
			
 
				+
			
 
				+	return (tex.Sample(sampl, pos.xy) * w.x + tex.Sample(sampl, pos.zy) * w.z) * w.y
			
 
				+		   + (tex.Sample(sampl, pos.xw) * w.x + tex.Sample(sampl, pos.zw) * w.z) * w.w;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+// Stolen from shadertoy.com/view/4df3Dn
			
 
				+Vec4 textureBicubic(Texture2D tex, SamplerState sampl, Vec2 uv, F32 lod, Vec2 texSize)
			
 
				+{
			
 
				+#define w0(a) ((1.0 / 6.0) * ((a) * ((a) * (-(a) + 3.0) - 3.0) + 1.0))
			
 
				+#define w1(a) ((1.0 / 6.0) * ((a) * (a) * (3.0 * (a)-6.0) + 4.0))
			
 
				+#define w2(a) ((1.0 / 6.0) * ((a) * ((a) * (-3.0 * (a) + 3.0) + 3.0) + 1.0))
			
 
				+#define w3(a) ((1.0 / 6.0) * ((a) * (a) * (a)))
			
 
				+#define g0(a) (w0(a) + w1(a))
			
 
				+#define g1(a) (w2(a) + w3(a))
			
 
				+#define h0(a) (-1.0 + w1(a) / (w0(a) + w1(a)))
			
 
				+#define h1(a) (1.0 + w3(a) / (w2(a) + w3(a)))
			
 
				+#define texSample(uv) tex.SampleLevel(sampl, uv, lod)
			
 
				+
			
 
				+	uv = uv * texSize + 0.5;
			
 
				+	const Vec2 iuv = floor(uv);
			
 
				+	const Vec2 fuv = frac(uv);
			
 
				+
			
 
				+	const F32 g0x = g0(fuv.x);
			
 
				+	const F32 g1x = g1(fuv.x);
			
 
				+	const F32 h0x = h0(fuv.x);
			
 
				+	const F32 h1x = h1(fuv.x);
			
 
				+	const F32 h0y = h0(fuv.y);
			
 
				+	const F32 h1y = h1(fuv.y);
			
 
				+
			
 
				+	const Vec2 p0 = (Vec2(iuv.x + h0x, iuv.y + h0y) - 0.5) / texSize;
			
 
				+	const Vec2 p1 = (Vec2(iuv.x + h1x, iuv.y + h0y) - 0.5) / texSize;
			
 
				+	const Vec2 p2 = (Vec2(iuv.x + h0x, iuv.y + h1y) - 0.5) / texSize;
			
 
				+	const Vec2 p3 = (Vec2(iuv.x + h1x, iuv.y + h1y) - 0.5) / texSize;
			
 
				+
			
 
				+	return g0(fuv.y) * (g0x * texSample(p0) + g1x * texSample(p1))
			
 
				+		   + g1(fuv.y) * (g0x * texSample(p2) + g1x * texSample(p3));
			
 
				+
			
 
				+#undef w0
			
 
				+#undef w1
			
 
				+#undef w2
			
 
				+#undef g0
			
 
				+#undef g1
			
 
				+#undef h0
			
 
				+#undef h1
			
 
				+#undef texSample
			
 
				+}
			
 
				+
			
 
				+F32 rand(Vec2 n)
			
 
				+{
			
 
				+	return 0.5 + 0.5 * frac(sin(dot(n, Vec2(12.9898, 78.233))) * 43758.5453);
			
 
				+}
			
 
				+
			
 
				+Vec4 nearestDepthUpscale(Vec2 uv, Texture2D depthFull, Texture2D depthHalf, Texture2D colorTex,
			
 
				+						 SamplerState linearAnyClampSampler, Vec2 linearDepthCf, F32 depthThreshold)
			
 
				+{
			
 
				+	F32 fullDepth = depthFull.SampleLevel(linearAnyClampSampler, uv, 0.0).r; // Sampler not important.
			
 
				+	fullDepth = linearizeDepthOptimal(fullDepth, linearDepthCf.x, linearDepthCf.y);
			
 
				+
			
 
				+	Vec4 halfDepths = depthHalf.GatherRed(linearAnyClampSampler, uv); // Sampler not important.
			
 
				+	halfDepths = linearizeDepthOptimal(halfDepths, linearDepthCf.x, linearDepthCf.y);
			
 
				+
			
 
				+	const Vec4 diffs = abs(Vec4(fullDepth, fullDepth, fullDepth, fullDepth) - halfDepths);
			
 
				+	Vec4 color;
			
 
				+
			
 
				+	if(all(diffs < Vec4(depthThreshold, depthThreshold, depthThreshold, depthThreshold)))
			
 
				+	{
			
 
				+		// No major discontinuites, sample with bilinear
			
 
				+		color = colorTex.SampleLevel(linearAnyClampSampler, uv, 0.0);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		// Some discontinuites, need to use the newUv
			
 
				+		const Vec4 r = colorTex.GatherRed(linearAnyClampSampler, uv);
			
 
				+		const Vec4 g = colorTex.GatherGreen(linearAnyClampSampler, uv);
			
 
				+		const Vec4 b = colorTex.GatherBlue(linearAnyClampSampler, uv);
			
 
				+		const Vec4 a = colorTex.GatherAlpha(linearAnyClampSampler, uv);
			
 
				+
			
 
				+		F32 minDiff = diffs.x;
			
 
				+		U32 comp = 0u;
			
 
				+
			
 
				+		if(diffs.y < minDiff)
			
 
				+		{
			
 
				+			comp = 1u;
			
 
				+			minDiff = diffs.y;
			
 
				+		}
			
 
				+
			
 
				+		if(diffs.z < minDiff)
			
 
				+		{
			
 
				+			comp = 2u;
			
 
				+			minDiff = diffs.z;
			
 
				+		}
			
 
				+
			
 
				+		if(diffs.w < minDiff)
			
 
				+		{
			
 
				+			comp = 3u;
			
 
				+		}
			
 
				+
			
 
				+		color = Vec4(r[comp], g[comp], b[comp], a[comp]);
			
 
				+	}
			
 
				+
			
 
				+	return color;
			
 
				+}
			
 
				+
			
 
				+F32 _calcDepthWeight(Texture2D depthLow, SamplerState nearestAnyClamp, Vec2 uv, F32 ref, Vec2 linearDepthCf)
			
 
				+{
			
 
				+	const F32 d = depthLow.SampleLevel(nearestAnyClamp, uv, 0.0).r;
			
 
				+	const F32 linearD = linearizeDepthOptimal(d, linearDepthCf.x, linearDepthCf.y);
			
 
				+	return 1.0 / (kEpsilonf + abs(ref - linearD));
			
 
				+}
			
 
				+
			
 
				+Vec4 _sampleAndWeight(Texture2D depthLow, Texture2D colorLow, SamplerState linearAnyClamp, SamplerState nearestAnyClamp,
			
 
				+					  const Vec2 lowInvSize, Vec2 uv, const Vec2 offset, const F32 ref, const F32 weight,
			
 
				+					  const Vec2 linearDepthCf, inout F32 normalize)
			
 
				+{
			
 
				+	uv += offset * lowInvSize;
			
 
				+	const F32 dw = _calcDepthWeight(depthLow, nearestAnyClamp, uv, ref, linearDepthCf);
			
 
				+	const Vec4 v = colorLow.SampleLevel(linearAnyClamp, uv, 0.0);
			
 
				+	normalize += weight * dw;
			
 
				+	return v * dw * weight;
			
 
				+}
			
 
				+
			
 
				+Vec4 bilateralUpsample(Texture2D depthHigh, Texture2D depthLow, Texture2D colorLow, SamplerState linearAnyClamp,
			
 
				+					   SamplerState nearestAnyClamp, const Vec2 lowInvSize, const Vec2 uv, const Vec2 linearDepthCf)
			
 
				+{
			
 
				+	const Vec3 kWeights = Vec3(0.25, 0.125, 0.0625);
			
 
				+	F32 depthRef = depthHigh.SampleLevel(nearestAnyClamp, uv, 0.0).r;
			
 
				+	depthRef = linearizeDepthOptimal(depthRef, linearDepthCf.x, linearDepthCf.y);
			
 
				+	F32 normalize = 0.0;
			
 
				+
			
 
				+	Vec4 sum = _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(0.0, 0.0),
			
 
				+								depthRef, kWeights.x, linearDepthCf, normalize);
			
 
				+	sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(-1.0, 0.0),
			
 
				+							depthRef, kWeights.y, linearDepthCf, normalize);
			
 
				+	sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(0.0, -1.0),
			
 
				+							depthRef, kWeights.y, linearDepthCf, normalize);
			
 
				+	sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(1.0, 0.0),
			
 
				+							depthRef, kWeights.y, linearDepthCf, normalize);
			
 
				+	sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(0.0, 1.0),
			
 
				+							depthRef, kWeights.y, linearDepthCf, normalize);
			
 
				+	sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(1.0, 1.0),
			
 
				+							depthRef, kWeights.z, linearDepthCf, normalize);
			
 
				+	sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(1.0, -1.0),
			
 
				+							depthRef, kWeights.z, linearDepthCf, normalize);
			
 
				+	sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(-1.0, 1.0),
			
 
				+							depthRef, kWeights.z, linearDepthCf, normalize);
			
 
				+	sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(-1.0, -1.0),
			
 
				+							depthRef, kWeights.z, linearDepthCf, normalize);
			
 
				+
			
 
				+	return sum / normalize;
			
 
				+}
			
 
				+
			
 
				+Vec3 getCubemapDirection(const Vec2 norm, const U32 faceIdx)
			
 
				+{
			
 
				+	Vec3 zDir = Vec3((faceIdx <= 1u) ? 1 : 0, (faceIdx & 2u) >> 1u, (faceIdx & 4u) >> 2u);
			
 
				+	zDir *= (((faceIdx & 1u) == 1u) ? -1.0 : 1.0);
			
 
				+	const Vec3 yDir = (faceIdx == 2u)   ? Vec3(0.0, 0.0, 1.0)
			
 
				+					  : (faceIdx == 3u) ? Vec3(0.0, 0.0, -1.0)
			
 
				+										: Vec3(0.0, -1.0, 0.0);
			
 
				+	const Vec3 xDir = cross(zDir, yDir);
			
 
				+	return normalize(norm.x * xDir + norm.y * yDir + zDir);
			
 
				+}
			
 
				+
			
 
				+// Convert 3D cubemap coordinates to 2D plus face index. v doesn't need to be normalized.
			
 
				+Vec2 convertCubeUvs(const Vec3 v, out F32 faceIndex)
			
 
				+{
			
 
				+	const Vec3 absV = abs(v);
			
 
				+	F32 mag;
			
 
				+	Vec2 uv;
			
 
				+
			
 
				+	if(absV.z >= absV.x && absV.z >= absV.y)
			
 
				+	{
			
 
				+		faceIndex = (v.z < 0.0) ? 5.0 : 4.0;
			
 
				+		uv = Vec2((v.z < 0.0) ? -v.x : v.x, -v.y);
			
 
				+		mag = absV.z;
			
 
				+	}
			
 
				+	else if(absV.y >= absV.x)
			
 
				+	{
			
 
				+		faceIndex = (v.y < 0.0) ? 3.0 : 2.0;
			
 
				+		uv = Vec2(v.x, (v.y < 0.0) ? -v.z : v.z);
			
 
				+		mag = absV.y;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		faceIndex = (v.x < 0.0) ? 1.0 : 0.0;
			
 
				+		uv = Vec2((v.x < 0.0) ? v.z : -v.z, -v.y);
			
 
				+		mag = absV.x;
			
 
				+	}
			
 
				+
			
 
				+	return 0.5 / mag * uv + 0.5;
			
 
				+}
			
 
				+
			
 
				+// Same as convertCubeUvs but it returns the faceIndex as unsigned I32.
			
 
				+Vec2 convertCubeUvsu(const Vec3 v, out U32 faceIndex)
			
 
				+{
			
 
				+	const Vec3 absV = abs(v);
			
 
				+	F32 mag;
			
 
				+	Vec2 uv;
			
 
				+
			
 
				+	if(absV.z >= absV.x && absV.z >= absV.y)
			
 
				+	{
			
 
				+		faceIndex = (v.z < 0.0) ? 5u : 4u;
			
 
				+		uv = Vec2((v.z < 0.0) ? -v.x : v.x, -v.y);
			
 
				+		mag = absV.z;
			
 
				+	}
			
 
				+	else if(absV.y >= absV.x)
			
 
				+	{
			
 
				+		faceIndex = (v.y < 0.0) ? 3u : 2u;
			
 
				+		uv = Vec2(v.x, (v.y < 0.0) ? -v.z : v.z);
			
 
				+		mag = absV.y;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		faceIndex = (v.x < 0.0) ? 1u : 0u;
			
 
				+		uv = Vec2((v.x < 0.0) ? v.z : -v.z, -v.y);
			
 
				+		mag = absV.x;
			
 
				+	}
			
 
				+
			
 
				+	return 0.5 / mag * uv + 0.5;
			
 
				+}
			
 
				+
			
 
				+RVec3 grayScale(const RVec3 col)
			
 
				+{
			
 
				+	const F32 grey = (col.r + col.g + col.b) * (1.0 / 3.0);
			
 
				+	return RVec3(grey, grey, grey);
			
 
				+}
			
 
				+
			
 
				+Vec3 saturateColor(const Vec3 col, const F32 factor)
			
 
				+{
			
 
				+	const Vec3 lumCoeff = Vec3(0.2125, 0.7154, 0.0721);
			
 
				+	const F32 d = dot(col, lumCoeff);
			
 
				+	const Vec3 intensity = Vec3(d, d, d);
			
 
				+	return lerp(intensity, col, factor);
			
 
				+}
			
 
				+
			
 
				+Vec3 gammaCorrection(Vec3 gamma, Vec3 col)
			
 
				+{
			
 
				+	return pow(col, 1.0 / gamma);
			
 
				+}
			
 
				+
			
 
				+// Can use 0.15 for sharpenFactor
			
 
				+Vec3 readSharpen(Texture2D tex, SamplerState sampl, Vec2 uv, F32 sharpenFactor, Bool detailed)
			
 
				+{
			
 
				+	Vec3 col = tex.SampleLevel(sampl, uv, 0.0).rgb;
			
 
				+
			
 
				+	Vec3 col2 = tex.SampleLevel(sampl, uv, 0.0, IVec2(1, 1)).rgb;
			
 
				+	col2 += tex.SampleLevel(sampl, uv, 0.0, IVec2(-1, -1)).rgb;
			
 
				+	col2 += tex.SampleLevel(sampl, uv, 0.0, IVec2(1, -1)).rgb;
			
 
				+	col2 += tex.SampleLevel(sampl, uv, 0.0, IVec2(-1, 1)).rgb;
			
 
				+
			
 
				+	F32 f = 4.0;
			
 
				+	if(detailed)
			
 
				+	{
			
 
				+		col2 += tex.SampleLevel(sampl, uv, 0.0, IVec2(0, 1)).rgb;
			
 
				+		col2 += tex.SampleLevel(sampl, uv, 0.0, IVec2(1, 0)).rgb;
			
 
				+		col2 += tex.SampleLevel(sampl, uv, 0.0, IVec2(-1, 0)).rgb;
			
 
				+		col2 += tex.SampleLevel(sampl, uv, 0.0, IVec2(0, -1)).rgb;
			
 
				+
			
 
				+		f = 8.0;
			
 
				+	}
			
 
				+
			
 
				+	col = col * (f * sharpenFactor + 1.0) - sharpenFactor * col2;
			
 
				+	return max(Vec3(0.0, 0.0, 0.0), col);
			
 
				+}
			
 
				+
			
 
				+Vec3 readErosion(Texture2D tex, SamplerState sampl, const Vec2 uv)
			
 
				+{
			
 
				+	Vec3 minValue = tex.SampleLevel(sampl, uv, 0.0).rgb;
			
 
				+
			
 
				+#define ANKI_EROSION(x, y) \
			
 
				+	col2 = tex.SampleLevel(sampl, uv, 0.0, IVec2(x, y)).rgb; \
			
 
				+	minValue = min(col2, minValue);
			
 
				+
			
 
				+	Vec3 col2;
			
 
				+	ANKI_EROSION(1, 1);
			
 
				+	ANKI_EROSION(-1, -1);
			
 
				+	ANKI_EROSION(1, -1);
			
 
				+	ANKI_EROSION(-1, 1);
			
 
				+	ANKI_EROSION(0, 1);
			
 
				+	ANKI_EROSION(1, 0);
			
 
				+	ANKI_EROSION(-1, 0);
			
 
				+	ANKI_EROSION(0, -1);
			
 
				+
			
 
				+#undef ANKI_EROSION
			
 
				+
			
 
				+	return minValue;
			
 
				+}
			
 
				+
			
 
				+// 5 color heatmap from a factor.
			
 
				+Vec3 heatmap(const F32 factor)
			
 
				+{
			
 
				+	F32 intPart;
			
 
				+	const F32 fractional = modf(factor * 4.0, intPart);
			
 
				+
			
 
				+	if(intPart < 1.0)
			
 
				+	{
			
 
				+		return lerp(Vec3(0.0, 0.0, 0.0), Vec3(0.0, 0.0, 1.0), fractional);
			
 
				+	}
			
 
				+	else if(intPart < 2.0)
			
 
				+	{
			
 
				+		return lerp(Vec3(0.0, 0.0, 1.0), Vec3(0.0, 1.0, 0.0), fractional);
			
 
				+	}
			
 
				+	else if(intPart < 3.0)
			
 
				+	{
			
 
				+		return lerp(Vec3(0.0, 1.0, 0.0), Vec3(1.0, 1.0, 0.0), fractional);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		return lerp(Vec3(1.0, 1.0, 0.0), Vec3(1.0, 0.0, 0.0), fractional);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// Return a color per cubemap face. The +X is red, -X dark red, +Y green, -Y dark green, +Z blue, -Z dark blue
			
 
				+Vec3 colorPerCubeFace(const U32 dir)
			
 
				+{
			
 
				+	Vec3 color;
			
 
				+	switch(dir)
			
 
				+	{
			
 
				+	case 0:
			
 
				+		color = Vec3(1.0, 0.0, 0.0);
			
 
				+		break;
			
 
				+	case 1:
			
 
				+		color = Vec3(0.25, 0.0, 0.0);
			
 
				+		break;
			
 
				+	case 2:
			
 
				+		color = Vec3(0.0, 1.0, 0.0);
			
 
				+		break;
			
 
				+	case 3:
			
 
				+		color = Vec3(0.0, 0.25, 0.0);
			
 
				+		break;
			
 
				+	case 4:
			
 
				+		color = Vec3(0.0, 0.0, 1.0);
			
 
				+		break;
			
 
				+	default:
			
 
				+		color = Vec3(0.0, 0.0, 0.25);
			
 
				+	}
			
 
				+	return color;
			
 
				+}
			
 
				+
			
 
				+Bool incorrectColor(const Vec3 c)
			
 
				+{
			
 
				+	return isnan(c.x) || isnan(c.y) || isnan(c.z) || isinf(c.x) || isinf(c.y) || isinf(c.z);
			
 
				+}
			
 
				+
			
 
				+F32 areaElement(const F32 x, const F32 y)
			
 
				+{
			
 
				+	return atan2(x * y, sqrt(x * x + y * y + 1.0));
			
 
				+}
			
 
				+
			
 
				+// Compute the solid angle of a cube. Solid angle is the area of a sphere when projected into a cubemap. It's also the
			
 
				+// delta omega (dω) in the irradiance integral and other integrals that operate in a sphere.
			
 
				+// http://www.rorydriscoll.com/2012/01/15/cubemap-texel-solid-angle/
			
 
				+F32 cubeCoordSolidAngle(Vec2 norm, F32 cubeFaceSize)
			
 
				+{
			
 
				+	const F32 s = 1.0f / cubeFaceSize;
			
 
				+	const Vec2 invSize = Vec2(s, s);
			
 
				+	const Vec2 v0 = norm - invSize;
			
 
				+	const Vec2 v1 = norm + invSize;
			
 
				+	return areaElement(v0.x, v0.y) - areaElement(v0.x, v1.y) - areaElement(v1.x, v0.y) + areaElement(v1.x, v1.y);
			
 
				+}
			
 
				+
			
 
				+// A convenience function to skip out of bounds invocations on post-process compute shaders. Both the arguments should
			
 
				+// be constexpr.
			
 
				+#if defined(ANKI_COMPUTE_SHADER) && ANKI_GLSL
			
 
				+Bool skipOutOfBoundsInvocations(UVec2 workgroupSize, UVec2 globalInvocationCount)
			
 
				+{
			
 
				+	if((globalInvocationCount.x % workgroupSize.x) != 0u || (globalInvocationCount.y % workgroupSize.y) != 0u)
			
 
				+	{
			
 
				+		if(gl_GlobalInvocationID.x >= globalInvocationCount.x || gl_GlobalInvocationID.y >= globalInvocationCount.y)
			
 
				+		{
			
 
				+			return true;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return false;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+// Create a matrix from some direction.
			
 
				+Mat3 rotationFromDirection(Vec3 zAxis)
			
 
				+{
			
 
				+#if 0
			
 
				+	const Vec3 z = zAxis;
			
 
				+	const Bool alignsWithXBasis = abs(z.x - 1.0) <= kEpsilonf; // aka z == Vec3(1.0, 0.0, 0.0)
			
 
				+	Vec3 x = (alignsWithXBasis) ? Vec3(0.0, 0.0, 1.0) : Vec3(1.0, 0.0, 0.0);
			
 
				+	const Vec3 y = normalize(cross(x, z));
			
 
				+	x = normalize(cross(z, y));
			
 
				+	return Mat3(x, y, z);
			
 
				+#else
			
 
				+	// http://jcgt.org/published/0006/01/01/
			
 
				+	const Vec3 z = zAxis;
			
 
				+	const F32 sign = (z.z >= 0.0) ? 1.0 : -1.0;
			
 
				+	const F32 a = -1.0 / (sign + z.z);
			
 
				+	const F32 b = z.x * z.y * a;
			
 
				+
			
 
				+	const Vec3 x = Vec3(1.0 + sign * a * pow(z.x, 2.0), sign * b, -sign * z.x);
			
 
				+	const Vec3 y = Vec3(b, sign + a * pow(z.y, 2.0), -z.y);
			
 
				+
			
 
				+	return Mat3(x, y, z);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+#if defined(ANKI_COMPUTE_SHADER) && ANKI_GLSL
			
 
				+// See getOptimalGlobalInvocationId8x8Amd
			
 
				+U32 _ABfiM(U32 src, U32 ins, U32 bits)
			
 
				+{
			
 
				+	const U32 mask = (1u << bits) - 1u;
			
 
				+	return (ins & mask) | (src & (~mask));
			
 
				+}
			
 
				+
			
 
				+// See getOptimalGlobalInvocationId8x8Amd
			
 
				+U32 _ABfe(U32 src, U32 off, U32 bits)
			
 
				+{
			
 
				+	const U32 mask = (1u << bits) - 1u;
			
 
				+	return (src >> off) & mask;
			
 
				+}
			
 
				+
			
 
				+// See getOptimalGlobalInvocationId8x8Amd
			
 
				+UVec2 _ARmpRed8x8(U32 a)
			
 
				+{
			
 
				+	return UVec2(_ABfiM(_ABfe(a, 2u, 3u), a, 1u), _ABfiM(_ABfe(a, 3u, 3u), _ABfe(a, 1u, 2u), 2u));
			
 
				+}
			
 
				+
			
 
				+// https://github.com/GPUOpen-Effects/FidelityFX-CAS/blob/master/ffx-cas/ffx_a.h
			
 
				+UVec2 getOptimalGlobalInvocationId8x8Amd()
			
 
				+{
			
 
				+	const UVec2 localInvocationId = _ARmpRed8x8(gl_LocalInvocationIndex);
			
 
				+	return gl_WorkGroupID.xy * UVec2(8u) + localInvocationId;
			
 
				+}
			
 
				+
			
 
				+// https://github.com/LouisBavoil/ThreadGroupIDSwizzling/blob/master/ThreadGroupTilingX.hlsl
			
 
				+UVec2 getOptimalGlobalInvocationId8x8Nvidia()
			
 
				+{
			
 
				+	const U32 maxTileWidth = 8u;
			
 
				+	const UVec2 workgroupSize = UVec2(8u);
			
 
				+
			
 
				+	const U32 workgroupsInAPerfectTile = maxTileWidth * gl_NumWorkGroups.y;
			
 
				+
			
 
				+	const U32 perfectTileCount = gl_NumWorkGroups.x / maxTileWidth;
			
 
				+
			
 
				+	const U32 totalWorkgroupsInAllPerfectTiles = perfectTileCount * maxTileWidth * gl_NumWorkGroups.y;
			
 
				+	const U32 vThreadGroupIDFlattened = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;
			
 
				+
			
 
				+	const U32 tileIdOfCurrentWorkgroup = vThreadGroupIDFlattened / workgroupsInAPerfectTile;
			
 
				+	const U32 localWorkgroupIdWithinCurrentTile = vThreadGroupIDFlattened % workgroupsInAPerfectTile;
			
 
				+	U32 localWorkgroupIdYWithinCurrentTile;
			
 
				+	U32 localWorgroupIdXWithinCurrentTile;
			
 
				+
			
 
				+	if(totalWorkgroupsInAllPerfectTiles <= vThreadGroupIDFlattened)
			
 
				+	{
			
 
				+		U32 xDimensionOfLastTile = gl_NumWorkGroups.x % maxTileWidth;
			
 
				+		localWorkgroupIdYWithinCurrentTile = localWorkgroupIdWithinCurrentTile / xDimensionOfLastTile;
			
 
				+		localWorgroupIdXWithinCurrentTile = localWorkgroupIdWithinCurrentTile % xDimensionOfLastTile;
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		localWorkgroupIdYWithinCurrentTile = localWorkgroupIdWithinCurrentTile / maxTileWidth;
			
 
				+		localWorgroupIdXWithinCurrentTile = localWorkgroupIdWithinCurrentTile % maxTileWidth;
			
 
				+	}
			
 
				+
			
 
				+	const U32 swizzledvThreadGroupIdFlattened = tileIdOfCurrentWorkgroup * maxTileWidth
			
 
				+												+ localWorkgroupIdYWithinCurrentTile * gl_NumWorkGroups.x
			
 
				+												+ localWorgroupIdXWithinCurrentTile;
			
 
				+
			
 
				+	UVec2 swizzledvThreadGroupId;
			
 
				+	swizzledvThreadGroupId.y = swizzledvThreadGroupIdFlattened / gl_NumWorkGroups.x;
			
 
				+	swizzledvThreadGroupId.x = swizzledvThreadGroupIdFlattened % gl_NumWorkGroups.x;
			
 
				+
			
 
				+	UVec2 swizzledGlobalId;
			
 
				+	swizzledGlobalId.x = workgroupSize.x * swizzledvThreadGroupId.x + gl_LocalInvocationID.x;
			
 
				+	swizzledGlobalId.y = workgroupSize.y * swizzledvThreadGroupId.y + gl_LocalInvocationID.y;
			
 
				+
			
 
				+	return swizzledGlobalId.xy;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+// Gaussian distrubution function
			
 
				+F32 gaussianWeight(F32 s, F32 x)
			
 
				+{
			
 
				+	F32 p = 1.0 / (s * sqrt(2.0 * kPi));
			
 
				+	p *= exp((x * x) / (-2.0 * s * s));
			
 
				+	return p;
			
 
				+}
			
 
				+
			
 
				+#if ANKI_GLSL
			
 
				+Vec4 bilinearFiltering(Texture2D tex, SamplerState nearestSampler, Vec2 uv, F32 lod, Vec2 textureSize)
			
 
				+{
			
 
				+	const Vec2 texelSize = 1.0 / textureSize;
			
 
				+	const Vec2 unnormTexCoord = (uv * textureSize) - 0.5;
			
 
				+	const Vec2 f = frac(unnormTexCoord);
			
 
				+	const Vec2 snapTexCoord = (floor(unnormTexCoord) + 0.5) / textureSize;
			
 
				+	const Vec4 s1 = textureLod(tex, nearestSampler, uv, lod);
			
 
				+	const Vec4 s2 = textureLod(tex, nearestSampler, uv + Vec2(texelSize.x, 0.0), lod);
			
 
				+	const Vec4 s3 = textureLod(tex, nearestSampler, uv + Vec2(0.0, texelSize.y), lod);
			
 
				+	const Vec4 s4 = textureLod(tex, nearestSampler, uv + texelSize, lod);
			
 
				+	return mix(mix(s1, s2, f.x), mix(s3, s4, f.x), f.y);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+// https://www.shadertoy.com/view/WsfBDf
			
 
				+Vec3 animateBlueNoise(Vec3 inputBlueNoise, U32 frameIdx)
			
 
				+{
			
 
				+	const F32 goldenRatioConjugate = 0.61803398875;
			
 
				+	return frac(inputBlueNoise + F32(frameIdx % 64u) * goldenRatioConjugate);
			
 
				+}
			
 
				+
			
 
				+#if defined(ANKI_FRAGMENT_SHADER)
			
 
				+/// https://bgolus.medium.com/distinctive-derivative-differences-cce38d36797b
			
 
				+/// normalizedUvs is uv*textureResolution
			
 
				+F32 computeMipLevel(Vec2 normalizedUvs)
			
 
				+{
			
 
				+	const Vec2 dx = ddx_coarse(normalizedUvs);
			
 
				+	const Vec2 dy = ddy_coarse(normalizedUvs);
			
 
				+	const F32 deltaMax2 = max(dot(dx, dx), dot(dy, dy));
			
 
				+	return max(0.0, 0.5 * log2(deltaMax2));
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if ANKI_GLSL
			
 
				+#	if ANKI_SUPPORTS_64BIT_TYPES
			
 
				+/// The regular findLSB in glslang has some issues since it invokes a builtin that is only supposed to be used with
			
 
				+/// 32bit input. This is an alternative implementation but it expects that the input is not zero.
			
 
				+I32 findLSB2(U64 v)
			
 
				+{
			
 
				+	const I32 lsb1 = findLSB(U32(v));
			
 
				+	const I32 lsb2 = findLSB(U32(v >> 32ul));
			
 
				+	return (lsb1 >= 0) ? lsb1 : lsb2 + 32;
			
 
				+}
			
 
				+#	endif
			
 
				+
			
 
				+/// Define an alternative findLSB to go in pair with the 64bit version.
			
 
				+I32 findLSB2(U32 v)
			
 
				+{
			
 
				+	return findLSB(v);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/// Encode the shading rate to be stored in an SRI. The rates should be power of two, can't be zero and can't exceed 4.
			
 
				+/// So the possible values are 1,2,4
			
 
				+U32 encodeVrsRate(UVec2 rateXY)
			
 
				+{
			
 
				+	return (rateXY.y >> 1u) | ((rateXY.x << 1u) & 12u);
			
 
				+}
			
 
				+
			
 
				+Vec3 visualizeVrsRate(UVec2 rate)
			
 
				+{
			
 
				+	if(all(rate == UVec2(1u, 1u)))
			
 
				+	{
			
 
				+		return Vec3(1.0, 0.0, 0.0);
			
 
				+	}
			
 
				+	else if(all(rate == UVec2(2u, 1u)) || all(rate == UVec2(1u, 2u)))
			
 
				+	{
			
 
				+		return Vec3(1.0, 0.5, 0.0);
			
 
				+	}
			
 
				+	else if(all(rate == UVec2(2u, 2u)) || all(rate == UVec2(4u, 1u)) || all(rate == UVec2(1u, 4u)))
			
 
				+	{
			
 
				+		return Vec3(1.0, 1.0, 0.0);
			
 
				+	}
			
 
				+	else if(all(rate == UVec2(4u, 2u)) || all(rate == UVec2(2u, 4u)))
			
 
				+	{
			
 
				+		return Vec3(0.65, 1.0, 0.0);
			
 
				+	}
			
 
				+	else if(all(rate == UVec2(4u, 4u)))
			
 
				+	{
			
 
				+		return Vec3(0.0, 1.0, 0.0);
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		return Vec3(0.0, 0.0, 0.0);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/// Decodes a number produced by encodeVrsRate(). Returns the shading rates.
			
 
				+UVec2 decodeVrsRate(U32 texel)
			
 
				+{
			
 
				+	UVec2 rateXY;
			
 
				+	rateXY.x = 1u << ((texel >> 2u) & 3u);
			
 
				+	rateXY.y = 1u << (texel & 3u);
			
 
				+	return rateXY;
			
 
				+}
			
 
				+
			
 
				+/// 3D coordinates to equirectangular 2D coordinates.
			
 
				+Vec2 equirectangularMapping(Vec3 v)
			
 
				+{
			
 
				+	Vec2 uv = Vec2(atan2(v.z, v.x), asin(v.y));
			
 
				+	uv *= Vec2(0.1591, 0.3183);
			
 
				+	uv += 0.5;
			
 
				+	return uv;
			
 
				+}
			
 
				+
			
 
				+Vec3 linearToSRgb(Vec3 linearRgb)
			
 
				+{
			
 
				+	const F32 a = 6.10352e-5;
			
 
				+	const F32 b = 1.0 / 2.4;
			
 
				+	linearRgb = max(Vec3(a, a, a), linearRgb);
			
 
				+	return min(linearRgb * 12.92, pow(max(linearRgb, 0.00313067), Vec3(b, b, b)) * 1.055 - 0.055);
			
 
				+}
			
 
				+
			
 
				+Vec3 sRgbToLinear(Vec3 sRgb)
			
 
				+{
			
 
				+#if ANKI_GLSL
			
 
				+	const bvec3 cutoff = lessThan(sRgb, Vec3(0.04045));
			
 
				+	const Vec3 higher = pow((sRgb + 0.055) / 1.055, Vec3(2.4));
			
 
				+	const Vec3 lower = sRgb / 12.92;
			
 
				+	return mix(higher, lower, cutoff);
			
 
				+#else
			
 
				+	const bool3 cutoff = sRgb < Vec3(0.04045, 0.04045, 0.04045);
			
 
				+	const Vec3 higher = pow((sRgb + 0.055) / 1.055, Vec3(2.4, 2.4, 2.4));
			
 
				+	const Vec3 lower = sRgb / 12.92;
			
 
				+	return lerp(higher, lower, cutoff);
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+#if ANKI_GLSL
			
 
				+ANKI_RP Vec3 filmGrain(ANKI_RP Vec3 color, Vec2 uv, ANKI_RP F32 strength, ANKI_RP F32 time)
			
 
				+{
			
 
				+	const F32 x = (uv.x + 4.0) * (uv.y + 4.0) * time;
			
 
				+	const F32 grain = 1.0 - (mod((mod(x, 13.0) + 1.0) * (mod(x, 123.0) + 1.0), 0.01) - 0.005) * strength;
			
 
				+	return color * grain;
			
 
				+}
			
 
				+#else
			
 
				+template<typename TVec3, typename TFloat>
			
 
				+TVec3 filmGrain(TVec3 color, Vec2 uv, TFloat strength, TFloat time)
			
 
				+{
			
 
				+	const TFloat x = (uv.x + 4.0) * (uv.y + 4.0) * time;
			
 
				+	const TFloat grain = 1.0 - (mod((mod(x, 13.0) + 1.0) * (mod(x, 123.0) + 1.0), 0.01) - 0.005) * strength;
			
 
				+	return color * grain;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/// Sin approximation: https://www.desmos.com/calculator/svgcjfskne
			
 
				+F32 fastSin(F32 x)
			
 
				+{
			
 
				+	const F32 k2Pi = 2.0 * kPi;
			
 
				+	const F32 kPiOver2 = kPi / 2.0;
			
 
				+
			
 
				+	x = (x + kPiOver2) / (k2Pi) + 0.75;
			
 
				+	x = frac(x);
			
 
				+	x = x * 2.0 - 1.0;
			
 
				+	x = x * abs(x) - x;
			
 
				+	x *= 4.0;
			
 
				+	return x;
			
 
				+}
			
 
				+
			
 
				+/// Cos approximation
			
 
				+F32 fastCos(F32 x)
			
 
				+{
			
 
				+	return fastSin(x + kPi / 2.0);
			
 
				+}
			
 
				+
			
 
				+Vec3 transform(Vec4 mat[3u], Vec4 v)
			
 
				+{
			
 
				+	const F32 a = dot(mat[0], v);
			
 
				+	const F32 b = dot(mat[1], v);
			
 
				+	const F32 c = dot(mat[2], v);
			
 
				+	return Vec3(a, b, c);
			
 
				+}
			
--- a/AnKi/Shaders/GBufferGeneric2.ankiprog
+++ b/AnKi/Shaders/GBufferGeneric2.ankiprog
@@ -0,0 +1,357 @@
 
				+// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
			
 
				+// All rights reserved.
			
 
				+// Code licensed under the BSD License.
			
 
				+// http://www.anki3d.org/LICENSE
			
 
				+
			
 
				+#pragma anki hlsl
			
 
				+
			
 
				+#pragma anki mutator ANKI_LOD 0 1 2
			
 
				+#pragma anki mutator ANKI_VELOCITY 0 1
			
 
				+#pragma anki mutator ANKI_TECHNIQUE 0 1 2
			
 
				+#pragma anki mutator ANKI_BONES 0 1
			
 
				+#pragma anki mutator DIFFUSE_TEX 0 1
			
 
				+#pragma anki mutator SPECULAR_TEX 0 1
			
 
				+#pragma anki mutator ROUGHNESS_TEX 0 1
			
 
				+#pragma anki mutator METAL_TEX 0 1
			
 
				+#pragma anki mutator NORMAL_TEX 0 1
			
 
				+#pragma anki mutator PARALLAX 0 1
			
 
				+#pragma anki mutator EMISSIVE_TEX 0 1
			
 
				+#pragma anki mutator ALPHA_TEST 0 1
			
 
				+
			
 
				+#pragma anki skip_mutation ALPHA_TEST 1 DIFFUSE_TEX 0
			
 
				+#pragma anki skip_mutation ANKI_VELOCITY 1 ANKI_TECHNIQUE 1
			
 
				+#pragma anki skip_mutation ANKI_VELOCITY 1 ANKI_TECHNIQUE 2
			
 
				+#pragma anki skip_mutation ANKI_LOD 1 ANKI_TECHNIQUE 1
			
 
				+#pragma anki skip_mutation ANKI_LOD 2 ANKI_TECHNIQUE 1
			
 
				+#pragma anki skip_mutation ANKI_LOD 1 ANKI_TECHNIQUE 2
			
 
				+#pragma anki skip_mutation ANKI_LOD 2 ANKI_TECHNIQUE 2
			
 
				+
			
 
				+// Some defines the clear up things
			
 
				+#define REALLY_ALPHA_TEST (ALPHA_TEST && DIFFUSE_TEX)
			
 
				+#define UVS (ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER || REALLY_ALPHA_TEST)
			
 
				+#define REALLY_VELOCITY ((ANKI_VELOCITY || ANKI_BONES) && ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER)
			
 
				+#define REALLY_USING_PARALLAX \
			
 
				+	(PARALLAX == 1 && ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER && ANKI_LOD == 0 && ALPHA_TEST == 0)
			
 
				+
			
 
				+#include <AnKi/Shaders/Include/MaterialTypes.h>
			
 
				+#include <AnKi/Shaders/Include/GpuSceneTypes.h>
			
 
				+#include <AnKi/Shaders/PackFunctions.hlsl>
			
 
				+#include <AnKi/Shaders/Functions.hlsl>
			
 
				+
			
 
				+ANKI_BINDLESS_SET(kMaterialSetBindless)
			
 
				+
			
 
				+[[vk::binding(kMaterialBindingTrilinearRepeatSampler, kMaterialSetGlobal)]] SamplerState u_globalSampler;
			
 
				+[[vk::binding(kMaterialBindingLocalUniforms, kMaterialSetLocal)]] StructuredBuffer<U32> u_localUniforms;
			
 
				+[[vk::binding(kMaterialBindingRenderableGpuView, kMaterialSetLocal)]] StructuredBuffer<RenderableGpuView>
			
 
				+	u_renderableGpuViews;
			
 
				+[[vk::binding(kMaterialBindingGlobalUniforms, kMaterialSetGlobal)]] ConstantBuffer<MaterialGlobalUniforms>
			
 
				+	u_globalUniforms;
			
 
				+
			
 
				+#if ANKI_BONES
			
 
				+[[vk::binding(kMaterialBindingBoneTransforms, kMaterialSetLocal)]] StructuredBuffer<Mat4> u_boneTransforms;
			
 
				+[[vk::binding(kMaterialBindingPreviousBoneTransforms, kMaterialSetLocal)]] StructuredBuffer<Mat4>
			
 
				+	u_prevFrameBoneTransforms;
			
 
				+#endif
			
 
				+
			
 
				+#pragma anki reflect AnKiLocalUniforms
			
 
				+#pragma anki struct AnKiLocalUniforms
			
 
				+#pragma anki member U32 m_normalTex if NORMAL_TEX is 1
			
 
				+
			
 
				+#pragma anki member Vec3 m_diffColor if DIFFUSE_TEX is 0
			
 
				+#pragma anki member U32 m_diffTex if DIFFUSE_TEX is 1
			
 
				+
			
 
				+#pragma anki member F32 m_roughness if ROUGHNESS_TEX is 0
			
 
				+#pragma anki member U32 m_roughnessTex if ROUGHNESS_TEX is 1
			
 
				+
			
 
				+#pragma anki member Vec3 m_specColor if SPECULAR_TEX is 0
			
 
				+#pragma anki member U32 m_specTex if SPECULAR_TEX is 1
			
 
				+
			
 
				+#pragma anki member F32 m_metallic if METAL_TEX is 0
			
 
				+#pragma anki member U32 m_metallicTex if METAL_TEX is 1
			
 
				+
			
 
				+#pragma anki member Vec3 m_emission if EMISSIVE_TEX is 0
			
 
				+#pragma anki member U32 m_emissiveTex if EMISSIVE_TEX is 1
			
 
				+
			
 
				+#pragma anki member F32 m_heightmapScale if PARALLAX is 1
			
 
				+#pragma anki member U32 m_heightTex if PARALLAX is 1
			
 
				+
			
 
				+#pragma anki member F32 m_subsurface
			
 
				+#pragma anki struct end
			
 
				+
			
 
				+struct VertIn
			
 
				+{
			
 
				+	[[vk::location(VertexStreamId::kPosition)]] Vec3 m_position : POSITION;
			
 
				+	U32 m_instanceId : SV_INSTANCEID;
			
 
				+
			
 
				+#if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
			
 
				+	[[vk::location(VertexStreamId::kNormal)]] RVec3 m_normal : NORMAL;
			
 
				+	[[vk::location(VertexStreamId::kTangent)]] RVec4 m_tangent : TANGENT;
			
 
				+#endif
			
 
				+
			
 
				+#if UVS
			
 
				+	[[vk::location(VertexStreamId::kUv)]] Vec2 m_uv : TEXCOORD;
			
 
				+#endif
			
 
				+
			
 
				+#if ANKI_BONES
			
 
				+	[[vk::location(VertexStreamId::kBoneIds)]] UVec4 m_boneIndices : COLOR0;
			
 
				+	[[vk::location(VertexStreamId::kBoneWeights)]] RVec4 m_boneWeights : COLOR1;
			
 
				+#endif
			
 
				+};
			
 
				+
			
 
				+struct VertOut
			
 
				+{
			
 
				+	Vec4 m_position : SV_POSITION;
			
 
				+
			
 
				+#if UVS
			
 
				+	Vec2 m_uv : TEXCOORD;
			
 
				+#endif
			
 
				+
			
 
				+#if REALLY_VELOCITY
			
 
				+	Vec3 m_prevClipXyw : PREV_CLIP;
			
 
				+	Vec3 m_crntClipXyw : CRNT_CLIP;
			
 
				+#endif
			
 
				+
			
 
				+#if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
			
 
				+	RVec3 m_normal : NORMAL;
			
 
				+	RVec3 m_tangent : TANGENT;
			
 
				+	RVec3 m_bitangent : BINTANGENT;
			
 
				+#endif
			
 
				+};
			
 
				+
			
 
				+#if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER || ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER_EZ
			
 
				+struct FragOut
			
 
				+{
			
 
				+	Vec4 m_color0 : SV_TARGET0;
			
 
				+	Vec4 m_color1 : SV_TARGET1;
			
 
				+	Vec4 m_color2 : SV_TARGET2;
			
 
				+	Vec2 m_color3 : SV_TARGET3;
			
 
				+};
			
 
				+#endif
			
 
				+
			
 
				+#pragma anki start vert
			
 
				+
			
 
				+#if ANKI_BONES
			
 
				+void skinning(VertIn input, inout Vec3 pos, inout Vec3 prevPos, inout RVec3 normal, inout RVec4 tangent)
			
 
				+{
			
 
				+	Mat4 skinMat = u_boneTransforms[input.m_boneIndices[0]] * input.m_boneWeights[0];
			
 
				+	Mat4 prevSkinMat = u_prevFrameBoneTransforms[input.m_boneIndices[0]] * input.m_boneWeights[0];
			
 
				+	[[unroll]] for(U32 i = 1u; i < 4u; ++i)
			
 
				+	{
			
 
				+		skinMat += u_boneTransforms[input.m_boneIndices[i]] * input.m_boneWeights[i];
			
 
				+		prevSkinMat += u_prevFrameBoneTransforms[input.m_boneIndices[i]] * input.m_boneWeights[i];
			
 
				+	}
			
 
				+
			
 
				+#	if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
			
 
				+	prevPos = mul(prevSkinMat, Vec4(pos, 1.0)).xyz;
			
 
				+	tangent.xyz = mul(skinMat, Vec4(tangent.xyz, 0.0)).xyz;
			
 
				+	normal = mul(skinMat, Vec4(normal, 0.0)).xyz;
			
 
				+#	endif
			
 
				+	ANKI_MAYBE_UNUSED(prevPos);
			
 
				+	ANKI_MAYBE_UNUSED(tangent);
			
 
				+	ANKI_MAYBE_UNUSED(normal);
			
 
				+
			
 
				+	pos = mul(skinMat, Vec4(pos, 1.0)).xyz;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#if(ANKI_VELOCITY || ANKI_BONES) && ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
			
 
				+void velocity(RenderableGpuView view, Vec3 prevLocalPos, inout VertOut output)
			
 
				+{
			
 
				+#	if ANKI_VELOCITY
			
 
				+	// Object is also moving
			
 
				+	const Vec4 trf[3] = view.m_previousWorldTransform;
			
 
				+#	else
			
 
				+	// Object is a skin that is not moving
			
 
				+	const Vec4 trf[3] = view.m_worldTransform;
			
 
				+#	endif
			
 
				+
			
 
				+	Vec4 v4 = Vec4(transform(trf, Vec4(prevLocalPos, 1.0)), 1.0);
			
 
				+	v4 = mul(u_globalUniforms.m_previousViewProjectionMatrix, v4);
			
 
				+
			
 
				+	output.m_prevClipXyw = v4.xyw;
			
 
				+	output.m_crntClipXyw = output.m_position.xyw;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+VertOut main(VertIn input)
			
 
				+{
			
 
				+	const RenderableGpuView view = u_renderableGpuViews[input.m_instanceId];
			
 
				+	VertOut output;
			
 
				+
			
 
				+	// All values in local space
			
 
				+#if ANKI_BONES
			
 
				+	Vec3 pos =
			
 
				+		input.m_position * view.m_positionScaleF32AndTranslationVec3.x + view.m_positionScaleF32AndTranslationVec3.yzw;
			
 
				+#else
			
 
				+	Vec3 pos = input.m_position;
			
 
				+#endif
			
 
				+
			
 
				+	Vec3 prevPos = pos;
			
 
				+	ANKI_MAYBE_UNUSED(prevPos);
			
 
				+
			
 
				+#if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
			
 
				+	RVec3 normal = input.m_normal;
			
 
				+	RVec4 tangent = input.m_tangent;
			
 
				+#else
			
 
				+	RVec3 normal = RVec3(0.0, 0.0, 0.0);
			
 
				+	RVec4 tangent = RVec4(0.0, 0.0, 0.0, 0.0);
			
 
				+	ANKI_MAYBE_UNUSED(normal);
			
 
				+	ANKI_MAYBE_UNUSED(tangent);
			
 
				+#endif
			
 
				+
			
 
				+#if UVS
			
 
				+	output.m_uv = input.m_uv;
			
 
				+#endif
			
 
				+
			
 
				+	// Do stuff
			
 
				+#if ANKI_BONES
			
 
				+	skinning(input, pos, prevPos, normal, tangent);
			
 
				+#endif
			
 
				+
			
 
				+	output.m_position = Vec4(transform(view.m_worldTransform, Vec4(pos, 1.0)), 1.0);
			
 
				+	output.m_position = mul(u_globalUniforms.m_viewProjectionMatrix, output.m_position);
			
 
				+
			
 
				+#if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
			
 
				+	output.m_normal = transform(view.m_worldTransform, Vec4(normal, 0.0));
			
 
				+	output.m_tangent = transform(view.m_worldTransform, Vec4(tangent.xyz, 0.0));
			
 
				+	output.m_bitangent = cross(output.m_normal, output.m_tangent) * tangent.w;
			
 
				+#endif
			
 
				+
			
 
				+#if REALLY_VELOCITY
			
 
				+	velocity(view, prevPos, output);
			
 
				+#endif
			
 
				+
			
 
				+	return output;
			
 
				+};
			
 
				+
			
 
				+#pragma anki end
			
 
				+
			
 
				+#pragma anki start frag
			
 
				+
			
 
				+void doAlphaTest(RF32 alpha)
			
 
				+{
			
 
				+	if(alpha == 0.0)
			
 
				+	{
			
 
				+		discard;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_SHADOWS
			
 
				+void main(VertOut input)
			
 
				+{
			
 
				+	ANKI_MAYBE_UNUSED(input);
			
 
				+#	if REALLY_ALPHA_TEST
			
 
				+	const AnKiLocalUniforms localUniforms = loadAnKiLocalUniforms(u_localUniforms, 0u);
			
 
				+	const RVec4 diffColorA = u_bindlessTextures2dF32[localUniforms.m_diffTex].Sample(u_globalSampler, input.m_uv);
			
 
				+	doAlphaTest(diffColorA.a);
			
 
				+#	endif
			
 
				+}
			
 
				+#elif ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER_EZ
			
 
				+FragOut main(VertOut input)
			
 
				+{
			
 
				+	ANKI_MAYBE_UNUSED(input);
			
 
				+#	if REALLY_ALPHA_TEST
			
 
				+	const AnKiLocalUniforms localUniforms = loadAnKiLocalUniforms(u_localUniforms, 0u);
			
 
				+	const RVec4 diffColorA = u_bindlessTextures2dF32[localUniforms.m_diffTex].Sample(u_globalSampler, input.m_uv);
			
 
				+	doAlphaTest(diffColorA.a);
			
 
				+#	endif
			
 
				+	return (FragOut)0;
			
 
				+}
			
 
				+#elif ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
			
 
				+// Do normal mapping
			
 
				+RVec3 readNormalFromTexture(VertOut input, Texture2D<RVec4> map, SamplerState sampl, Vec2 texCoords)
			
 
				+{
			
 
				+	// First read the texture
			
 
				+	const RVec3 nAtTangentspace = normalize((map.Sample(sampl, texCoords).rgb - 0.5) * 2.0);
			
 
				+
			
 
				+	const RVec3 n = normalize(input.m_normal);
			
 
				+	const RVec3 t = normalize(input.m_tangent);
			
 
				+	const RVec3 b = normalize(input.m_bitangent);
			
 
				+
			
 
				+	const RMat3 tbnMat = Mat3(t, b, n);
			
 
				+
			
 
				+	return mul(tbnMat, nAtTangentspace);
			
 
				+}
			
 
				+
			
 
				+FragOut main(VertOut input)
			
 
				+{
			
 
				+	const AnKiLocalUniforms localUniforms = loadAnKiLocalUniforms(u_localUniforms, 0u);
			
 
				+
			
 
				+#	if REALLY_USING_PARALLAX
			
 
				+	// TODO
			
 
				+	const Vec2 uv = input.m_uv;
			
 
				+#	else
			
 
				+	const Vec2 uv = input.m_uv;
			
 
				+#	endif
			
 
				+	ANKI_MAYBE_UNUSED(uv);
			
 
				+
			
 
				+#	if DIFFUSE_TEX
			
 
				+#		if REALLY_ALPHA_TEST
			
 
				+	const RVec4 diffColorA = u_bindlessTextures2dF32[localUniforms.m_diffTex].Sample(u_globalSampler, uv);
			
 
				+	doAlphaTest(diffColorA.a);
			
 
				+	const RVec3 diffColor = diffColorA.rgb;
			
 
				+#		else
			
 
				+	const RVec3 diffColor = u_bindlessTextures2dF32[localUniforms.m_diffTex].Sample(u_globalSampler, uv).rgb;
			
 
				+#		endif
			
 
				+#	else
			
 
				+	const RVec3 diffColor = localUniforms.m_diffColor;
			
 
				+#	endif
			
 
				+
			
 
				+#	if SPECULAR_TEX
			
 
				+	const RVec3 specColor = u_bindlessTextures2dF32[localUniforms.m_specTex].Sample(u_globalSampler, uv).rgb;
			
 
				+#	else
			
 
				+	const RVec3 specColor = localUniforms.m_specColor;
			
 
				+#	endif
			
 
				+
			
 
				+#	if ROUGHNESS_TEX
			
 
				+	const RF32 roughness = u_bindlessTextures2dF32[localUniforms.m_roughnessTex].Sample(u_globalSampler, uv).g;
			
 
				+#	else
			
 
				+	const RF32 roughness = localUniforms.m_roughness;
			
 
				+#	endif
			
 
				+
			
 
				+#	if METAL_TEX
			
 
				+	const RF32 metallic = u_bindlessTextures2dF32[localUniforms.m_metallicTex].Sample(u_globalSampler, uv).b;
			
 
				+#	else
			
 
				+	const RF32 metallic = localUniforms.m_metallic;
			
 
				+#	endif
			
 
				+
			
 
				+#	if NORMAL_TEX
			
 
				+	const RVec3 normal =
			
 
				+		readNormalFromTexture(input, u_bindlessTextures2dF32[localUniforms.m_normalTex], u_globalSampler, uv);
			
 
				+#	else
			
 
				+	const RVec3 normal = normalize(input.m_normal);
			
 
				+#	endif
			
 
				+
			
 
				+#	if EMISSIVE_TEX
			
 
				+	const RVec3 emission = u_bindlessTextures2dF32[localUniforms.m_emissiveTex].Sample(u_globalSampler, uv).rgb;
			
 
				+#	else
			
 
				+	const RVec3 emission = localUniforms.m_emission;
			
 
				+#	endif
			
 
				+
			
 
				+#	if ANKI_VELOCITY || ANKI_BONES
			
 
				+	const Vec2 prevNdc = input.m_prevClipXyw.xy / input.m_prevClipXyw.z;
			
 
				+	const Vec2 crntNdc = input.m_crntClipXyw.xy / input.m_crntClipXyw.z;
			
 
				+
			
 
				+	// It's NDC_TO_UV(prevNdc) - NDC_TO_UV(crntNdc) or:
			
 
				+	const Vec2 velocity = (prevNdc - crntNdc) * 0.5;
			
 
				+#	else
			
 
				+	const Vec2 velocity = Vec2(1.0, 1.0);
			
 
				+#	endif
			
 
				+
			
 
				+	GbufferInfo g;
			
 
				+	g.m_diffuse = diffColor;
			
 
				+	g.m_normal = normal;
			
 
				+	g.m_f0 = specColor;
			
 
				+	g.m_roughness = roughness;
			
 
				+	g.m_subsurface = localUniforms.m_subsurface;
			
 
				+	g.m_emission = emission;
			
 
				+	g.m_metallic = metallic;
			
 
				+	g.m_velocity = velocity;
			
 
				+
			
 
				+	FragOut output;
			
 
				+	packGBuffer(g, output.m_color0, output.m_color1, output.m_color2, output.m_color3);
			
 
				+	return output;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#pragma anki end
			
--- a/AnKi/Shaders/Include/Common.h
+++ b/AnKi/Shaders/Include/Common.h
@@ -40,9 +40,27 @@ ANKI_END_NAMESPACE
 
				 #	define ANKI_SHADER_FUNC_INLINE
			
 
				 
			
 
				 #	define ANKI_SHADER_STATIC_ASSERT(cond_)
			
 
				+#	define ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(enum_)
			
 
				 
			
 
				 #	define constexpr static const
			
 
				 
			
 
				+#	define ANKI_SUPPORTS_16BIT_TYPES 0
			
 
				+#	define ANKI_SUPPORTS_64BIT_TYPES !ANKI_PLATFORM_MOBILE
			
 
				+
			
 
				+template<typename T>
			
 
				+void maybeUnused(T a)
			
 
				+{
			
 
				+	a = a;
			
 
				+}
			
 
				+#	define ANKI_MAYBE_UNUSED(x) maybeUnused(x)
			
 
				+
			
 
				+#	define ANKI_BINDLESS_SET(s) \
			
 
				+		[[vk::binding(0, s)]] Texture2D<uint4> u_bindlessTextures2dU32[kMaxBindlessTextures]; \
			
 
				+		[[vk::binding(0, s)]] Texture2D<int4> u_bindlessTextures2dI32[kMaxBindlessTextures]; \
			
 
				+		[[vk::binding(0, s)]] Texture2D<RVec4> u_bindlessTextures2dF32[kMaxBindlessTextures]; \
			
 
				+		[[vk::binding(0, s)]] Texture2DArray<RVec4> u_bindlessTextures2dArrayF32[kMaxBindlessTextures]; \
			
 
				+		[[vk::binding(1, s)]] Buffer<float4> u_bindlessTextureBuffersF32[kMaxBindlessReadonlyTextureBuffers];
			
 
				+
			
 
				 typedef float F32;
			
 
				 constexpr uint kSizeof_F32 = 4u;
			
 
				 typedef float2 Vec2;
			
@@ -52,6 +70,7 @@ constexpr uint kSizeof_Vec3 = 12u;
 
				 typedef float4 Vec4;
			
 
				 constexpr uint kSizeof_Vec4 = 16u;
			
 
				 
			
 
				+#	if ANKI_SUPPORTS_16BIT_TYPES
			
 
				 typedef float16_t F16;
			
 
				 constexpr uint kSizeof_F16 = 2u;
			
 
				 typedef float16_t2 HVec2;
			
@@ -78,6 +97,7 @@ typedef int16_t3 I16Vec3;
 
				 constexpr uint kSizeof_I16Vec3 = 6u;
			
 
				 typedef int16_t4 I16Vec4;
			
 
				 constexpr uint kSizeof_I16Vec4 = 8u;
			
 
				+#	endif
			
 
				 
			
 
				 typedef uint U32;
			
 
				 constexpr uint kSizeof_U32 = 4u;
			
@@ -97,7 +117,7 @@ constexpr uint kSizeof_IVec3 = 12u;
 
				 typedef int32_t4 IVec4;
			
 
				 constexpr uint kSizeof_IVec4 = 16u;
			
 
				 
			
 
				-#	if ANKI_SUPPORTS_64BIT
			
 
				+#	if ANKI_SUPPORTS_64BIT_TYPES
			
 
				 typedef uint64_t U64;
			
 
				 constexpr uint kSizeof_U64 = 8u;
			
 
				 typedef uint64_t2 U64Vec2;
			
@@ -118,23 +138,37 @@ constexpr uint kSizeof_I64Vec4 = 32u;
 
				 #	endif
			
 
				 
			
 
				 typedef float3x3 Mat3;
			
 
				+typedef float4x4 Mat4;
			
 
				+typedef float3x4 Mat3x4;
			
 
				 
			
 
				 typedef bool Bool;
			
 
				 
			
 
				-#	if 0
			
 
				+#	if ANKI_FORCE_FULL_FP_PRECISION
			
 
				+typedef float RF32;
			
 
				+typedef float2 RVec2;
			
 
				+typedef float3 RVec3;
			
 
				+typedef float4 RVec4;
			
 
				+typedef float3x3 RMat3;
			
 
				+#	else
			
 
				 typedef min16float RF32;
			
 
				 typedef min16float2 RVec2;
			
 
				 typedef min16float3 RVec3;
			
 
				 typedef min16float4 RVec4;
			
 
				+typedef min16float3x3 RMat3;
			
 
				 #	endif
			
 
				 
			
 
				 constexpr F32 kEpsilonf = 0.000001f;
			
 
				-constexpr F16 kEpsilonhf = (F16)0.0001f; // Divisions by this should be OK according to http://weitz.de/ieee/
			
 
				+#	if ANKI_SUPPORTS_16BIT_TYPES
			
 
				+constexpr F16 kEpsilonhf = (F16)0.0001f; // Divisions by this should be OK according to http://weitz.de/ieee
			
 
				+#	endif
			
 
				+constexpr RF32 kEpsilonRf = 0.0001f;
			
 
				 
			
 
				 constexpr U32 kMaxU32 = 0xFFFFFFFFu;
			
 
				 constexpr F32 kMaxF32 = 3.402823e+38;
			
 
				+#	if ANKI_SUPPORTS_16BIT_TYPES
			
 
				 constexpr F16 kMaxF16 = (F16)65504.0;
			
 
				 constexpr F16 kMinF16 = (F16)0.00006104;
			
 
				+#	endif
			
 
				 
			
 
				 constexpr F32 kPi = 3.14159265358979323846f;
			
 
				 
			
@@ -156,7 +190,7 @@ constexpr F32 kPi = 3.14159265358979323846f;
 
				 
			
 
				 #	define constexpr const
			
 
				 
			
 
				-#	define ANKI_SUPPORTS_64BIT !ANKI_PLATFORM_MOBILE
			
 
				+#	define ANKI_SUPPORTS_64BIT_TYPES !ANKI_PLATFORM_MOBILE
			
 
				 
			
 
				 #	extension GL_EXT_control_flow_attributes : require
			
 
				 #	extension GL_KHR_shader_subgroup_vote : require
			
@@ -178,7 +212,7 @@ constexpr F32 kPi = 3.14159265358979323846f;
 
				 #	extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
			
 
				 #	extension GL_EXT_shader_explicit_arithmetic_types_float32 : enable
			
 
				 
			
 
				-#	if ANKI_SUPPORTS_64BIT
			
 
				+#	if ANKI_SUPPORTS_64BIT_TYPES
			
 
				 #		extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable
			
 
				 #		extension GL_EXT_shader_explicit_arithmetic_types_float64 : enable
			
 
				 #		extension GL_EXT_shader_atomic_int64 : enable
			
@@ -265,7 +299,7 @@ const uint kSizeof_ivec3 = 12u;
 
				 #	define IVec4 ivec4
			
 
				 const uint kSizeof_ivec4 = 16u;
			
 
				 
			
 
				-#	if ANKI_SUPPORTS_64BIT
			
 
				+#	if ANKI_SUPPORTS_64BIT_TYPES
			
 
				 #		define U64 uint64_t
			
 
				 const uint kSizeof_uint64_t = 8u;
			
 
				 #		define U64Vec2 u64vec2
			
@@ -296,7 +330,7 @@ const uint kSizeof_mat4x3 = 48u;
 
				 
			
 
				 #	define Bool bool
			
 
				 
			
 
				-#	if ANKI_SUPPORTS_64BIT
			
 
				+#	if ANKI_SUPPORTS_64BIT_TYPES
			
 
				 #		define Address U64
			
 
				 #	else
			
 
				 #		define Address UVec2
			
@@ -409,6 +443,11 @@ Bool all(Bool b)
 
				 #	define lerp(a, b, t) mix(a, b, t)
			
 
				 #	define atan2(x, y) atan(x, y)
			
 
				 
			
 
				+float asfloat(uint u)
			
 
				+{
			
 
				+	return uintBitsToFloat(u);
			
 
				+}
			
 
				+
			
 
				 constexpr F32 kEpsilonf = 0.000001f;
			
 
				 constexpr F16 kEpsilonhf = 0.0001hf; // Divisions by this should be OK according to http://weitz.de/ieee/
			
 
				 constexpr ANKI_RP F32 kEpsilonRp = F32(kEpsilonhf);
			
--- a/AnKi/Shaders/Include/GpuSceneTypes.h
+++ b/AnKi/Shaders/Include/GpuSceneTypes.h
@@ -32,7 +32,11 @@ struct MeshGpuView
 
				 	Vec3 m_positionTranslation;
			
 
				 	F32 m_positionScale;
			
 
				 
			
 
				+#if ANKI_GLSL
			
 
				 	U32 m_vertexOffsets[kMaxLodCount][kMaxVertexStreamIds];
			
 
				+#else
			
 
				+	U32 m_vertexOffsets[kMaxLodCount][(U32)VertexStreamId::kCount];
			
 
				+#endif
			
 
				 	U32 m_indexCounts[kMaxLodCount];
			
 
				 	U32 m_indexOffsets[kMaxLodCount];
			
 
				 };
			
--- a/AnKi/Shaders/Include/MaterialTypes.h
+++ b/AnKi/Shaders/Include/MaterialTypes.h
@@ -19,30 +19,36 @@ struct MaterialGlobalUniforms
 
				 };
			
 
				 ANKI_SHADER_STATIC_ASSERT(sizeof(MaterialGlobalUniforms) == 14 * sizeof(Vec4));
			
 
				 
			
 
				-const U32 kMaterialSetBindless = 0u;
			
 
				-const U32 kMaterialSetGlobal = 1u;
			
 
				-const U32 kMaterialSetLocal = 2u;
			
 
				+constexpr U32 kMaterialSetBindless = 0u;
			
 
				+constexpr U32 kMaterialSetGlobal = 1u;
			
 
				+constexpr U32 kMaterialSetLocal = 2u;
			
 
				 
			
 
				 // Begin global bindings
			
 
				-const U32 kMaterialBindingTrilinearRepeatSampler = 0u;
			
 
				-const U32 kMaterialBindingGlobalUniforms = 1u;
			
 
				+constexpr U32 kMaterialBindingTrilinearRepeatSampler = 0u;
			
 
				+constexpr U32 kMaterialBindingGlobalUniforms = 1u;
			
 
				 
			
 
				 // For forward shading:
			
 
				-const U32 kMaterialBindingLinearClampSampler = 2u;
			
 
				-const U32 kMaterialBindingDepthRt = 3u;
			
 
				-const U32 kMaterialBindingLightVolume = 4u;
			
 
				-const U32 kMaterialBindingClusterShadingUniforms = 5u;
			
 
				-const U32 kMaterialBindingClusterShadingLights = 6u;
			
 
				-const U32 kMaterialBindingClusters = 9u;
			
 
				-const U32 kMaterialBindingShadowSampler = 10u;
			
 
				+constexpr U32 kMaterialBindingLinearClampSampler = 2u;
			
 
				+constexpr U32 kMaterialBindingDepthRt = 3u;
			
 
				+constexpr U32 kMaterialBindingLightVolume = 4u;
			
 
				+constexpr U32 kMaterialBindingClusterShadingUniforms = 5u;
			
 
				+constexpr U32 kMaterialBindingClusterShadingLights = 6u;
			
 
				+constexpr U32 kMaterialBindingClusters = 9u;
			
 
				+constexpr U32 kMaterialBindingShadowSampler = 10u;
			
 
				 // End global bindings
			
 
				 
			
 
				 // Begin local bindings
			
 
				-const U32 kMaterialBindingLocalUniforms = 0u;
			
 
				-const U32 kMaterialBindingRenderableGpuView = 1u;
			
 
				-const U32 kMaterialBindingBoneTransforms = 2u;
			
 
				-const U32 kMaterialBindingPreviousBoneTransforms = 3u;
			
 
				-const U32 kMaterialBindingFirstNonStandardLocal = 4u;
			
 
				+constexpr U32 kMaterialBindingLocalUniforms = 0u;
			
 
				+constexpr U32 kMaterialBindingRenderableGpuView = 1u;
			
 
				+constexpr U32 kMaterialBindingBoneTransforms = 2u;
			
 
				+constexpr U32 kMaterialBindingPreviousBoneTransforms = 3u;
			
 
				+constexpr U32 kMaterialBindingFirstNonStandardLocal = 4u;
			
 
				 // End local bindings
			
 
				 
			
 
				+// Techniques
			
 
				+#define ANKI_RENDERING_TECHNIQUE_GBUFFER 0
			
 
				+#define ANKI_RENDERING_TECHNIQUE_GBUFFER_EZ 1
			
 
				+#define ANKI_RENDERING_TECHNIQUE_SHADOWS 2
			
 
				+#define ANKI_RENDERING_TECHNIQUE_FORWARD 3
			
 
				+
			
 
				 ANKI_END_NAMESPACE
			
--- a/AnKi/Shaders/Include/MeshTypes.h
+++ b/AnKi/Shaders/Include/MeshTypes.h
@@ -9,8 +9,13 @@
 
				 
			
 
				 ANKI_BEGIN_NAMESPACE
			
 
				 
			
 
				-#if __cplusplus
			
 
				+#if defined(__cplusplus) || ANKI_HLSL
			
 
				+
			
 
				+#	if ANKI_HLSL
			
 
				+enum class VertexStreamId : U32
			
 
				+#	else
			
 
				 enum class VertexStreamId : U8
			
 
				+#	endif
			
 
				 {
			
 
				 	// For regular geometry
			
 
				 	kPosition,
			
@@ -30,10 +35,16 @@ enum class VertexStreamId : U8
 
				 	kParticleLife,
			
 
				 	kParticleStartingLife,
			
 
				 	kParticlePreviousPosition,
			
 
				+
			
 
				+	kCount = 6
			
 
				 };
			
 
				 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(VertexStreamId)
			
 
				 
			
 
				+#	if ANKI_HLSL
			
 
				+enum class VertexStreamMask : U32
			
 
				+#	else
			
 
				 enum class VertexStreamMask : U8
			
 
				+#	endif
			
 
				 {
			
 
				 	kNone,
			
 
				 
			
@@ -53,31 +64,33 @@ enum class VertexStreamMask : U8
 
				 };
			
 
				 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(VertexStreamMask)
			
 
				 
			
 
				+#	if defined(__cplusplus)
			
 
				 inline constexpr Array<Format, U32(VertexStreamId::kMeshRelatedCount)> kMeshRelatedVertexStreamFormats = {
			
 
				 	Format::kR16G16B16_Unorm, Format::kR8G8B8A8_Snorm, Format::kR8G8B8A8_Snorm,
			
 
				 	Format::kR32G32_Sfloat,   Format::kR8G8B8A8_Uint,  Format::kR8G8B8A8_Snorm};
			
 
				 
			
 
				 constexpr U32 kMaxVertexStreamIds = 6u;
			
 
				+#	endif
			
 
				 
			
 
				 #else
			
 
				 
			
 
				 // For regular geometry
			
 
				-const U32 kVertexStreamIdPosition = 0u;
			
 
				-const U32 kVertexStreamIdNormal = 1u;
			
 
				-const U32 kVertexStreamIdTangent = 2u;
			
 
				-const U32 kVertexStreamIdUv = 3u;
			
 
				-const U32 kVertexStreamIdBoneIds = 4u;
			
 
				-const U32 kVertexStreamIdBoneWeights = 5u;
			
 
				+constexpr U32 kVertexStreamIdPosition = 0u;
			
 
				+constexpr U32 kVertexStreamIdNormal = 1u;
			
 
				+constexpr U32 kVertexStreamIdTangent = 2u;
			
 
				+constexpr U32 kVertexStreamIdUv = 3u;
			
 
				+constexpr U32 kVertexStreamIdBoneIds = 4u;
			
 
				+constexpr U32 kVertexStreamIdBoneWeights = 5u;
			
 
				 
			
 
				 // For particles
			
 
				-const U32 kVertexStreamIdParticlePosition = 0u;
			
 
				-const U32 kVertexStreamIdParticleScale = 1u;
			
 
				-const U32 kVertexStreamIdParticleAlpha = 2u;
			
 
				-const U32 kVertexStreamIdParticleLife = 3u;
			
 
				-const U32 kVertexStreamIdParticleStartingLife = 4u;
			
 
				-const U32 kVertexStreamIdParticlePreviousPosition = 5u;
			
 
				-
			
 
				-const U32 kMaxVertexStreamIds = 6u;
			
 
				+constexpr U32 kVertexStreamIdParticlePosition = 0u;
			
 
				+constexpr U32 kVertexStreamIdParticleScale = 1u;
			
 
				+constexpr U32 kVertexStreamIdParticleAlpha = 2u;
			
 
				+constexpr U32 kVertexStreamIdParticleLife = 3u;
			
 
				+constexpr U32 kVertexStreamIdParticleStartingLife = 4u;
			
 
				+constexpr U32 kVertexStreamIdParticlePreviousPosition = 5u;
			
 
				+
			
 
				+constexpr U32 kMaxVertexStreamIds = 6u;
			
 
				 #endif
			
 
				 
			
 
				 ANKI_END_NAMESPACE
			
--- a/AnKi/Shaders/Include/MiscRendererTypes.h
+++ b/AnKi/Shaders/Include/MiscRendererTypes.h
@@ -10,7 +10,7 @@
 
				 ANKI_BEGIN_NAMESPACE
			
 
				 
			
 
				 // RT shadows
			
 
				-const U32 kMaxRtShadowLayers = 8u;
			
 
				+constexpr U32 kMaxRtShadowLayers = 8u;
			
 
				 
			
 
				 struct RtShadowsUniforms
			
 
				 {
			
--- a/AnKi/Shaders/Include/ModelTypes.h
+++ b/AnKi/Shaders/Include/ModelTypes.h
@@ -30,18 +30,18 @@ enum class VertexAttributeId : U8
 
				 };
			
 
				 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(VertexAttributeId)
			
 
				 #else
			
 
				-const U32 kVertexAttributeIdPosition = 0u;
			
 
				-const U32 kVertexAttributeIdUv0 = 1u;
			
 
				-const U32 kVertexAttributeIdUv1 = 2u;
			
 
				-const U32 kVertexAttributeIdNormal = 3u;
			
 
				-const U32 kVertexAttributeIdTangent = 4u;
			
 
				-const U32 kVertexAttributeIdColor = 5u;
			
 
				-const U32 kVertexAttributeIdBoneWeights = 6u;
			
 
				-const U32 kVertexAttributeIdBoneIndices = 7u;
			
 
				-const U32 kVertexAttributeIdCount = 8u;
			
 
				-
			
 
				-const U32 kVertexAttributeIdScale = kVertexAttributeIdUv0; ///< Only for particles.
			
 
				-const U32 kVertexAttributeIdAlpha = kVertexAttributeIdUv1; ///< Only for particles.
			
 
				+constexpr U32 kVertexAttributeIdPosition = 0u;
			
 
				+constexpr U32 kVertexAttributeIdUv0 = 1u;
			
 
				+constexpr U32 kVertexAttributeIdUv1 = 2u;
			
 
				+constexpr U32 kVertexAttributeIdNormal = 3u;
			
 
				+constexpr U32 kVertexAttributeIdTangent = 4u;
			
 
				+constexpr U32 kVertexAttributeIdColor = 5u;
			
 
				+constexpr U32 kVertexAttributeIdBoneWeights = 6u;
			
 
				+constexpr U32 kVertexAttributeIdBoneIndices = 7u;
			
 
				+constexpr U32 kVertexAttributeIdCount = 8u;
			
 
				+
			
 
				+constexpr U32 kVertexAttributeIdScale = kVertexAttributeIdUv0; ///< Only for particles.
			
 
				+constexpr U32 kVertexAttributeIdAlpha = kVertexAttributeIdUv1; ///< Only for particles.
			
 
				 #endif
			
 
				 
			
 
				 // Vertex buffers
			
@@ -56,10 +56,10 @@ enum class VertexAttributeBufferId : U8
 
				 };
			
 
				 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(VertexAttributeBufferId)
			
 
				 #else
			
 
				-const U32 kVertexAttributeBufferIdPosition = 0u;
			
 
				-const U32 kVertexAttributeBufferIdNormalTangentUv0 = 1u;
			
 
				-const U32 kVertexAttributeBufferIdBone = 2u;
			
 
				-const U32 kVertexAttributeBufferIdCount = 3u;
			
 
				+constexpr U32 kVertexAttributeBufferIdPosition = 0u;
			
 
				+constexpr U32 kVertexAttributeBufferIdNormalTangentUv0 = 1u;
			
 
				+constexpr U32 kVertexAttributeBufferIdBone = 2u;
			
 
				+constexpr U32 kVertexAttributeBufferIdCount = 3u;
			
 
				 #endif
			
 
				 
			
 
				 /// The main vertex that contains normals, tangents and UVs.
			
@@ -70,8 +70,8 @@ struct MainVertex
 
				 	Vec2 m_uv0;
			
 
				 };
			
 
				 
			
 
				-const U32 kSizeof_MainVertex = 4u * 4u;
			
 
				-const U32 kAlignof_MainVertex = 4u;
			
 
				+constexpr U32 kSizeof_MainVertex = 4u * 4u;
			
 
				+constexpr U32 kAlignof_MainVertex = 4u;
			
 
				 ANKI_SHADER_STATIC_ASSERT(kSizeof_MainVertex == sizeof(MainVertex));
			
 
				 
			
 
				 /// The vertex that contains the bone influences.
			
@@ -81,8 +81,8 @@ struct BoneInfoVertex
 
				 	U8Vec4 m_boneWeights;
			
 
				 };
			
 
				 
			
 
				-const U32 kSizeof_BoneInfoVertex = 8u;
			
 
				-const U32 kAlignof_BoneInfoVertex = 1u;
			
 
				+constexpr U32 kSizeof_BoneInfoVertex = 8u;
			
 
				+constexpr U32 kAlignof_BoneInfoVertex = 1u;
			
 
				 ANKI_SHADER_STATIC_ASSERT(kSizeof_BoneInfoVertex == sizeof(BoneInfoVertex));
			
 
				 
			
 
				 /// A structure that contains all the info of a geometry.
			
@@ -100,8 +100,8 @@ struct MeshGpuDescriptor
 
				 	Vec3 m_aabbMax;
			
 
				 };
			
 
				 
			
 
				-const U32 kSizeof_MeshGpuDescriptor = 4u * sizeof(UVec2) + 8u * sizeof(F32);
			
 
				-const U32 kAlignof_MeshGpuDescriptor = 8u;
			
 
				+constexpr U32 kSizeof_MeshGpuDescriptor = 4u * sizeof(UVec2) + 8u * sizeof(F32);
			
 
				+constexpr U32 kAlignof_MeshGpuDescriptor = 8u;
			
 
				 ANKI_SHADER_STATIC_ASSERT(kSizeof_MeshGpuDescriptor == sizeof(MeshGpuDescriptor));
			
 
				 
			
 
				 #if defined(__cplusplus)
			
@@ -120,15 +120,15 @@ enum class TextureChannelId : U8
 
				 };
			
 
				 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(TextureChannelId)
			
 
				 #else
			
 
				-const U32 TEXTURE_CHANNEL_ID_DIFFUSE = 0u;
			
 
				-const U32 TEXTURE_CHANNEL_ID_NORMAL = 1u;
			
 
				-const U32 TEXTURE_CHANNEL_ID_ROUGHNESS_METALNESS = 2u;
			
 
				-const U32 TEXTURE_CHANNEL_ID_EMISSION = 3u;
			
 
				-const U32 TEXTURE_CHANNEL_ID_HEIGHT = 4u;
			
 
				-const U32 TEXTURE_CHANNEL_ID_AUX_0 = 5u;
			
 
				-const U32 TEXTURE_CHANNEL_ID_AUX_1 = 6u;
			
 
				-const U32 TEXTURE_CHANNEL_ID_AUX_2 = 7u;
			
 
				-const U32 TEXTURE_CHANNEL_ID_COUNT = 8u;
			
 
				+constexpr U32 TEXTURE_CHANNEL_ID_DIFFUSE = 0u;
			
 
				+constexpr U32 TEXTURE_CHANNEL_ID_NORMAL = 1u;
			
 
				+constexpr U32 TEXTURE_CHANNEL_ID_ROUGHNESS_METALNESS = 2u;
			
 
				+constexpr U32 TEXTURE_CHANNEL_ID_EMISSION = 3u;
			
 
				+constexpr U32 TEXTURE_CHANNEL_ID_HEIGHT = 4u;
			
 
				+constexpr U32 TEXTURE_CHANNEL_ID_AUX_0 = 5u;
			
 
				+constexpr U32 TEXTURE_CHANNEL_ID_AUX_1 = 6u;
			
 
				+constexpr U32 TEXTURE_CHANNEL_ID_AUX_2 = 7u;
			
 
				+constexpr U32 TEXTURE_CHANNEL_ID_COUNT = 8u;
			
 
				 #endif
			
 
				 
			
 
				 struct MaterialGpuDescriptor
			
@@ -145,8 +145,8 @@ struct MaterialGpuDescriptor
 
				 	F32 m_metalness;
			
 
				 };
			
 
				 
			
 
				-const U32 kSizeof_MaterialGpuDescriptor = 8u * sizeof(U16) + 3u * sizeof(Vec3) + 2u * sizeof(F32);
			
 
				-const U32 kAlignof_MaterialGpuDescriptor = 4u;
			
 
				+constexpr U32 kSizeof_MaterialGpuDescriptor = 8u * sizeof(U16) + 3u * sizeof(Vec3) + 2u * sizeof(F32);
			
 
				+constexpr U32 kAlignof_MaterialGpuDescriptor = 4u;
			
 
				 ANKI_SHADER_STATIC_ASSERT(kSizeof_MaterialGpuDescriptor == sizeof(MaterialGpuDescriptor));
			
 
				 
			
 
				 struct ModelGpuDescriptor
			
--- a/AnKi/Shaders/PackFunctions.hlsl
+++ b/AnKi/Shaders/PackFunctions.hlsl
@@ -0,0 +1,195 @@
 
				+// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
			
 
				+// All rights reserved.
			
 
				+// Code licensed under the BSD License.
			
 
				+// http://www.anki3d.org/LICENSE
			
 
				+
			
 
				+#pragma once
			
 
				+
			
 
				+#include <AnKi/Shaders/Common.hlsl>
			
 
				+#include <AnKi/Shaders/TonemappingFunctions.hlsl>
			
 
				+
			
 
				+constexpr RF32 kMinRoughness = 0.05;
			
 
				+
			
 
				+/// Pack 3D normal to 2D vector
			
 
				+/// See the clean code in comments in revision < r467
			
 
				+Vec2 packNormal(const Vec3 normal)
			
 
				+{
			
 
				+	const F32 scale = 1.7777;
			
 
				+	const F32 scalar1 = (normal.z + 1.0) * (scale * 2.0);
			
 
				+	return normal.xy / scalar1 + 0.5;
			
 
				+}
			
 
				+
			
 
				+/// Reverse the packNormal
			
 
				+Vec3 unpackNormal(const Vec2 enc)
			
 
				+{
			
 
				+	const F32 scale = 1.7777;
			
 
				+	const Vec2 nn = enc * (2.0 * scale) - scale;
			
 
				+	const F32 g = 2.0 / (dot(nn.xy, nn.xy) + 1.0);
			
 
				+	Vec3 normal;
			
 
				+	normal.xy = g * nn.xy;
			
 
				+	normal.z = g - 1.0;
			
 
				+	return normalize(normal);
			
 
				+}
			
 
				+
			
 
				+// See http://johnwhite3d.blogspot.no/2017/10/signed-octahedron-normal-encoding.html
			
 
				+// Result in [0.0, 1.0]
			
 
				+Vec3 signedOctEncode(Vec3 n)
			
 
				+{
			
 
				+	Vec3 outn;
			
 
				+
			
 
				+	const Vec3 nabs = abs(n);
			
 
				+	n /= nabs.x + nabs.y + nabs.z;
			
 
				+
			
 
				+	outn.y = n.y * 0.5 + 0.5;
			
 
				+	outn.x = n.x * 0.5 + outn.y;
			
 
				+	outn.y = n.x * -0.5 + outn.y;
			
 
				+
			
 
				+	outn.z = saturate(n.z * kMaxF32);
			
 
				+	return outn;
			
 
				+}
			
 
				+
			
 
				+// See http://johnwhite3d.blogspot.no/2017/10/signed-octahedron-normal-encoding.html
			
 
				+Vec3 signedOctDecode(const Vec3 n)
			
 
				+{
			
 
				+	Vec3 outn;
			
 
				+
			
 
				+	outn.x = n.x - n.y;
			
 
				+	outn.y = n.x + n.y - 1.0;
			
 
				+	outn.z = n.z * 2.0 - 1.0;
			
 
				+	outn.z = outn.z * (1.0 - abs(outn.x) - abs(outn.y));
			
 
				+
			
 
				+	outn = normalize(outn);
			
 
				+	return outn;
			
 
				+}
			
 
				+
			
 
				+// Vectorized version. Assumes that v is in [0.0, 1.0]
			
 
				+U32 newPackUnorm4x8(const Vec4 v)
			
 
				+{
			
 
				+	Vec4 a = v * 255.0;
			
 
				+	UVec4 b = UVec4(a) << UVec4(0u, 8u, 16u, 24u);
			
 
				+	UVec2 c = b.xy | b.zw;
			
 
				+	return c.x | c.y;
			
 
				+}
			
 
				+
			
 
				+// Vectorized version
			
 
				+Vec4 newUnpackUnorm4x8(const U32 u)
			
 
				+{
			
 
				+	const UVec4 a = ((UVec4)u) >> UVec4(0u, 8u, 16u, 24u);
			
 
				+	const UVec4 b = a & ((UVec4)0xFFu);
			
 
				+	const Vec4 c = Vec4(b);
			
 
				+	return c * (1.0 / 255.0);
			
 
				+}
			
 
				+
			
 
				+// Convert from RGB to YCbCr.
			
 
				+// The RGB should be in [0, 1] and the output YCbCr will be in [0, 1] as well.
			
 
				+Vec3 rgbToYCbCr(const Vec3 rgb)
			
 
				+{
			
 
				+	const F32 y = dot(rgb, Vec3(0.299, 0.587, 0.114));
			
 
				+	const F32 cb = 0.5 + dot(rgb, Vec3(-0.168736, -0.331264, 0.5));
			
 
				+	const F32 cr = 0.5 + dot(rgb, Vec3(0.5, -0.418688, -0.081312));
			
 
				+	return Vec3(y, cb, cr);
			
 
				+}
			
 
				+
			
 
				+// Convert the output of rgbToYCbCr back to RGB.
			
 
				+Vec3 yCbCrToRgb(const Vec3 ycbcr)
			
 
				+{
			
 
				+	const F32 cb = ycbcr.y - 0.5;
			
 
				+	const F32 cr = ycbcr.z - 0.5;
			
 
				+	const F32 y = ycbcr.x;
			
 
				+	const F32 r = 1.402 * cr;
			
 
				+	const F32 g = -0.344 * cb - 0.714 * cr;
			
 
				+	const F32 b = 1.772 * cb;
			
 
				+	return Vec3(r, g, b) + y;
			
 
				+}
			
 
				+
			
 
				+// Pack a Vec2 to a single F32.
			
 
				+// comp should be in [0, 1] and the output will be in [0, 1].
			
 
				+F32 packUnorm2ToUnorm1(const Vec2 comp)
			
 
				+{
			
 
				+	return dot(round(comp * 15.0), Vec2(1.0 / (255.0 / 16.0), 1.0 / 255.0));
			
 
				+}
			
 
				+
			
 
				+// Unpack a single F32 to Vec2. Does the oposite of packUnorm2ToUnorm1.
			
 
				+Vec2 unpackUnorm1ToUnorm2(F32 c)
			
 
				+{
			
 
				+#if 1
			
 
				+	const F32 temp = c * (255.0 / 16.0);
			
 
				+	const F32 a = floor(temp);
			
 
				+	const F32 b = temp - a; // b = fract(temp)
			
 
				+	return Vec2(a, b) * Vec2(1.0 / 15.0, 16.0 / 15.0);
			
 
				+#else
			
 
				+	const U32 temp = U32(c * 255.0);
			
 
				+	const U32 a = temp >> 4;
			
 
				+	const U32 b = temp & 0xF;
			
 
				+	return Vec2(a, b) / 15.0;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				+// G-Buffer structure
			
 
				+struct GbufferInfo
			
 
				+{
			
 
				+	RVec3 m_diffuse;
			
 
				+	RVec3 m_f0; ///< Freshnel at zero angles.
			
 
				+	RVec3 m_normal;
			
 
				+	RF32 m_roughness;
			
 
				+	RF32 m_metallic;
			
 
				+	RF32 m_subsurface;
			
 
				+	RVec3 m_emission;
			
 
				+	Vec2 m_velocity;
			
 
				+};
			
 
				+
			
 
				+// Populate the G buffer
			
 
				+void packGBuffer(GbufferInfo g, out Vec4 rt0, out Vec4 rt1, out Vec4 rt2, out Vec2 rt3)
			
 
				+{
			
 
				+	const F32 packedSubsurfaceMetallic = packUnorm2ToUnorm1(Vec2(g.m_subsurface, g.m_metallic));
			
 
				+
			
 
				+	const Vec3 tonemappedEmission = reinhardTonemap(g.m_emission);
			
 
				+
			
 
				+	rt0 = Vec4(g.m_diffuse, packedSubsurfaceMetallic);
			
 
				+	rt1 = Vec4(g.m_roughness, g.m_f0.x, tonemappedEmission.rb);
			
 
				+
			
 
				+	const Vec3 encNorm = signedOctEncode(g.m_normal);
			
 
				+	rt2 = Vec4(tonemappedEmission.g, encNorm);
			
 
				+
			
 
				+	rt3 = g.m_velocity;
			
 
				+}
			
 
				+
			
 
				+RVec3 unpackDiffuseFromGBuffer(RVec4 rt0, RF32 metallic)
			
 
				+{
			
 
				+	return rt0.xyz *= 1.0 - metallic;
			
 
				+}
			
 
				+
			
 
				+Vec3 unpackNormalFromGBuffer(Vec4 rt2)
			
 
				+{
			
 
				+	return signedOctDecode(rt2.yzw);
			
 
				+}
			
 
				+
			
 
				+RF32 unpackRoughnessFromGBuffer(RVec4 rt1)
			
 
				+{
			
 
				+	RF32 r = rt1.x;
			
 
				+	r = r * (1.0 - kMinRoughness) + kMinRoughness;
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+// Read part of the G-buffer
			
 
				+void unpackGBufferNoVelocity(Vec4 rt0, Vec4 rt1, Vec4 rt2, out GbufferInfo g)
			
 
				+{
			
 
				+	g.m_diffuse = rt0.xyz;
			
 
				+	const Vec2 unpackedSubsurfaceMetallic = unpackUnorm1ToUnorm2(rt0.w);
			
 
				+	g.m_subsurface = unpackedSubsurfaceMetallic.x;
			
 
				+	g.m_metallic = unpackedSubsurfaceMetallic.y;
			
 
				+
			
 
				+	g.m_roughness = unpackRoughnessFromGBuffer(rt1);
			
 
				+	g.m_f0 = Vec3(rt1.y, rt1.y, rt1.y);
			
 
				+	g.m_emission = invertReinhardTonemap(Vec3(rt1.z, rt2.x, rt1.w));
			
 
				+
			
 
				+	g.m_normal = signedOctDecode(rt2.yzw);
			
 
				+
			
 
				+	g.m_velocity = Vec2(kMaxF32, kMaxF32); // Put something random
			
 
				+
			
 
				+	// Compute reflectance
			
 
				+	g.m_f0 = lerp(g.m_f0, g.m_diffuse, g.m_metallic);
			
 
				+
			
 
				+	// Compute diffuse
			
 
				+	g.m_diffuse *= 1.0 - g.m_metallic;
			
 
				+}
			
--- a/AnKi/Shaders/RtShadowsHit.ankiprog
+++ b/AnKi/Shaders/RtShadowsHit.ankiprog
@@ -27,14 +27,14 @@ layout(location = 0) rayPayloadInEXT F32 g_payload;
 
				 
			
 
				 hitAttributeEXT vec2 g_attribs;
			
 
				 
			
 
				-#if ALPHA_TEXTURE == 1 && ANKI_SUPPORTS_64BIT
			
 
				+#if ALPHA_TEXTURE == 1 && ANKI_SUPPORTS_64BIT_TYPES
			
 
				 ANKI_DEFINE_LOAD_STORE(U16Vec3, 2)
			
 
				 ANKI_DEFINE_LOAD_STORE(MainVertex, alignof(MainVertex))
			
 
				 #endif
			
 
				 
			
 
				 void main()
			
 
				 {
			
 
				-#if ALPHA_TEXTURE == 1 && ANKI_SUPPORTS_64BIT
			
 
				+#if ALPHA_TEXTURE == 1 && ANKI_SUPPORTS_64BIT_TYPES
			
 
				 	const ModelGpuDescriptor model = u_modelDescriptor;
			
 
				 	const MeshGpuDescriptor mesh = model.m_mesh;
			
 
				 
			
--- a/AnKi/Shaders/TonemappingFunctions.glsl
+++ b/AnKi/Shaders/TonemappingFunctions.glsl
@@ -38,7 +38,7 @@ ANKI_RP Vec3 tonemapReinhard(ANKI_RP Vec3 color, ANKI_RP F32 saturation)
 
				 {
			
 
				 	const ANKI_RP F32 lum = computeLuminance(color);
			
 
				 	const ANKI_RP F32 toneMappedLuminance = lum / (lum + 1.0);
			
 
				-	return toneMappedLuminance * pow(color / lum, Vec3(saturation));
			
 
				+	return toneMappedLuminance * pow(color / lum, Vec3(saturation, saturation, saturation));
			
 
				 }
			
 
				 
			
 
				 // Uncharted 2 operator
			
--- a/AnKi/Shaders/TonemappingFunctions.hlsl
+++ b/AnKi/Shaders/TonemappingFunctions.hlsl
@@ -0,0 +1,103 @@
 
				+// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
			
 
				+// All rights reserved.
			
 
				+// Code licensed under the BSD License.
			
 
				+// http://www.anki3d.org/LICENSE
			
 
				+
			
 
				+#pragma once
			
 
				+
			
 
				+#include <AnKi/Shaders/Common.hlsl>
			
 
				+
			
 
				+// A tick to compute log of base 10
			
 
				+template<typename T>
			
 
				+T log10(T x)
			
 
				+{
			
 
				+	return log(x) / log((T)10.0);
			
 
				+}
			
 
				+
			
 
				+RVec3 computeLuminance(RVec3 color)
			
 
				+{
			
 
				+	return max(dot(RVec3(0.30, 0.59, 0.11), color), kEpsilonRf);
			
 
				+}
			
 
				+
			
 
				+RF32 computeExposure(RF32 avgLum, RF32 threshold)
			
 
				+{
			
 
				+	const RF32 keyValue = 1.03 - (2.0 / (2.0 + log10(avgLum + 1.0)));
			
 
				+	const RF32 linearExposure = (keyValue / avgLum);
			
 
				+	RF32 exposure = log2(linearExposure);
			
 
				+
			
 
				+	exposure -= threshold;
			
 
				+	return exp2(exposure);
			
 
				+}
			
 
				+
			
 
				+RVec3 computeExposedColor(RVec3 color, RF32 avgLum, RF32 threshold)
			
 
				+{
			
 
				+	return computeExposure(avgLum, threshold) * color;
			
 
				+}
			
 
				+
			
 
				+// Uncharted 2 operator
			
 
				+RF32 tonemapUncharted2(RF32 color)
			
 
				+{
			
 
				+	const RF32 A = 0.15;
			
 
				+	const RF32 B = 0.50;
			
 
				+	const RF32 C = 0.10;
			
 
				+	const RF32 D = 0.20;
			
 
				+	const RF32 E = 0.02;
			
 
				+	const RF32 F = 0.30;
			
 
				+
			
 
				+	return ((color * (A * color + C * B) + D * E) / (color * (A * color + B) + D * F)) - E / F;
			
 
				+}
			
 
				+
			
 
				+constexpr RF32 kAcesA = 2.51;
			
 
				+constexpr RF32 kAcesB = 0.03;
			
 
				+constexpr RF32 kAcesC = 2.43;
			
 
				+constexpr RF32 kAcesD = 0.59;
			
 
				+constexpr RF32 kAcesE = 0.14;
			
 
				+
			
 
				+// See ACES in action and its inverse at https://www.desmos.com/calculator/n1lkpc6hwq
			
 
				+RVec3 tonemapACESFilm(RVec3 x)
			
 
				+{
			
 
				+	return saturate((x * (kAcesA * x + kAcesB)) / (x * (kAcesC * x + kAcesD) + kAcesE));
			
 
				+}
			
 
				+
			
 
				+// https://www.desmos.com/calculator/n1lkpc6hwq
			
 
				+RVec3 invertTonemapACESFilm(RVec3 x)
			
 
				+{
			
 
				+	RVec3 res = kAcesD * x - kAcesB;
			
 
				+	res += sqrt(x * x * (kAcesD * kAcesD - 4.0 * kAcesE * kAcesC) + x * (4.0 * kAcesE * kAcesA - 2.0 * kAcesB * kAcesD)
			
 
				+				+ kAcesB * kAcesB);
			
 
				+	res /= 2.0 * kAcesA - 2.0 * kAcesC * x;
			
 
				+
			
 
				+	return res;
			
 
				+}
			
 
				+
			
 
				+RVec3 tonemap(RVec3 color, RF32 exposure)
			
 
				+{
			
 
				+	color *= exposure;
			
 
				+	return tonemapACESFilm(color);
			
 
				+}
			
 
				+
			
 
				+RVec3 invertTonemap(RVec3 color, RF32 exposure)
			
 
				+{
			
 
				+	color = invertTonemapACESFilm(color);
			
 
				+	color /= max(kEpsilonRf, exposure);
			
 
				+	return color;
			
 
				+}
			
 
				+
			
 
				+RVec3 tonemap(RVec3 color, RF32 avgLum, RF32 threshold)
			
 
				+{
			
 
				+	const RF32 exposure = computeExposure(avgLum, threshold);
			
 
				+	return tonemap(color, exposure);
			
 
				+}
			
 
				+
			
 
				+// https://graphicrants.blogspot.com/2013/12/tone-mapping.html
			
 
				+RVec3 reinhardTonemap(RVec3 colour)
			
 
				+{
			
 
				+	// rgb / (1 + max(rgb))
			
 
				+	return colour / (1.0 + max(max(colour.r, colour.g), colour.b));
			
 
				+}
			
 
				+
			
 
				+RVec3 invertReinhardTonemap(RVec3 colour)
			
 
				+{
			
 
				+	// rgb / (1 - max(rgb))
			
 
				+	return colour / max(1.0 / 32768.0, 1.0 - max(max(colour.r, colour.g), colour.b));
			
 
				+}
			
--- a/ThirdParty/SpirvCross/spirv_glsl.cpp
+++ b/ThirdParty/SpirvCross/spirv_glsl.cpp
@@ -7353,12 +7353,22 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 
				 		emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset");
			
 
				 		break;
			
 
				 
			
 
				+	// Panos: Treat NMin as FMin because I hate these isnan
			
 
				+#if 0
			
 
				 	case GLSLstd450NMin:
			
 
				 	case GLSLstd450NMax:
			
 
				 	{
			
 
				 		emit_nminmax_op(result_type, id, args[0], args[1], op);
			
 
				 		break;
			
 
				 	}
			
 
				+#else
			
 
				+	case GLSLstd450NMin:
			
 
				+		emit_binary_func_op(result_type, id, args[0], args[1], "min");
			
 
				+		break;
			
 
				+	case GLSLstd450NMax:
			
 
				+		emit_binary_func_op(result_type, id, args[0], args[1], "max");
			
 
				+		break;
			
 
				+#endif
			
 
				 
			
 
				 	case GLSLstd450NClamp:
			
 
				 	{
			
--- a/Tools/FormatSource.py
+++ b/Tools/FormatSource.py
@@ -10,7 +10,7 @@ import subprocess
 
				 import threading
			
 
				 import multiprocessing
			
 
				 
			
 
				-file_extensions = ["h", "hpp", "c", "cpp", "glsl", "ankiprog"]
			
 
				+file_extensions = ["h", "hpp", "c", "cpp", "glsl", "hlsl", "ankiprog"]
			
 
				 directories = ["AnKi", "Tests", "Sandbox", "Tools", "Samples"]