소스 검색

Convert GBufferGeneric to HLSL

Panagiotis Christopoulos Charitos 3 년 전
부모
커밋
402e45f2d7

+ 4 - 2
AnKi/ShaderCompiler/Dxc.cpp

@@ -102,11 +102,12 @@ Error compileHlslToSpirv(CString src, ShaderType shaderType, BaseMemoryPool& tmp
 	dxcArgs.emplaceBack(&tmpPool, spvFilename);
 	dxcArgs.emplaceBack(&tmpPool, "-Wall");
 	dxcArgs.emplaceBack(&tmpPool, "-Wextra");
-	dxcArgs.emplaceBack(&tmpPool, "-Wconversion");
+	dxcArgs.emplaceBack(&tmpPool, "-Wno-conversion");
 	dxcArgs.emplaceBack(&tmpPool, "-Werror");
 	dxcArgs.emplaceBack(&tmpPool, "-Wfatal-errors");
+	dxcArgs.emplaceBack(&tmpPool, "-Wundef");
 	dxcArgs.emplaceBack(&tmpPool, "-Wno-unused-const-variable");
-	dxcArgs.emplaceBack(&tmpPool, "-enable-16bit-types");
+	// dxcArgs.emplaceBack(&tmpPool, "-enable-16bit-types");
 	dxcArgs.emplaceBack(&tmpPool, "-HV");
 	dxcArgs.emplaceBack(&tmpPool, "2021");
 	dxcArgs.emplaceBack(&tmpPool, "-E");
@@ -152,6 +153,7 @@ Error compileHlslToSpirv(CString src, ShaderType shaderType, BaseMemoryPool& tmp
 			}
 			else
 			{
+				// printf("%s\n", src.cstr());
 				return Error::kFunctionFailed;
 			}
 		}

+ 17 - 3
AnKi/ShaderCompiler/ShaderProgramParser.cpp

@@ -515,6 +515,8 @@ Error ShaderProgramParser::parseLine(CString line, CString fname, Bool& foundPra
 				{
 					ANKI_CHECK(checkActiveStruct());
 					ANKI_CHECK(parsePragmaStructEnd(token + 1, end, line, fname));
+
+					m_codeLines.pushBackSprintf("#line %u \"%s\"", lineNumber, fname.cstr());
 				}
 				else
 				{
@@ -789,8 +791,8 @@ Error ShaderProgramParser::parsePragmaStructEnd(const StringRaii* begin, const S
 		for(U32 j = 0; j < componentCount; ++j)
 		{
 			StringRaii tmp(m_pool);
-			tmp.sprintf("%s(ssbo[%s_%s_OFFSETOF + offset + %uu])%s", (isIntegral) ? "" : "uintBitsToFloat",
-						structName.cstr(), m.m_name.cstr(), j, (j != componentCount - 1) ? "," : "");
+			tmp.sprintf("%s(ssbo[%s_%s_OFFSETOF + offset + %uu])%s", (isIntegral) ? "" : "asfloat", structName.cstr(),
+						m.m_name.cstr(), j, (j != componentCount - 1) ? "," : "");
 
 			values.append(tmp);
 		}
@@ -812,7 +814,8 @@ Error ShaderProgramParser::parsePragmaStructEnd(const StringRaii* begin, const S
 		m_codeLines.pushBack("#endif");
 	}
 
-	// Now define the structure LOAD
+	// Now define the structure LOAD in GLSL
+	m_codeLines.pushBack("#if ANKI_GLSL");
 	m_codeLines.pushBackSprintf("#define load%s(ssbo, offset) %s( \\", structName.cstr(), structName.cstr());
 	for(U32 i = 0; i < gstruct.m_members.getSize(); ++i)
 	{
@@ -821,6 +824,17 @@ Error ShaderProgramParser::parsePragmaStructEnd(const StringRaii* begin, const S
 	}
 	m_codeLines.pushBack(")");
 
+	// Now define the structure LOAD in HLSL
+	m_codeLines.pushBack("#else");
+	m_codeLines.pushBackSprintf("#define load%s(ssbo, offset) { \\", structName.cstr());
+	for(U32 i = 0; i < gstruct.m_members.getSize(); ++i)
+	{
+		const Member& m = gstruct.m_members[i];
+		m_codeLines.pushBackSprintf("\t%s_%s_LOAD(ssbo, offset) \\", structName.cstr(), m.m_name.cstr());
+	}
+	m_codeLines.pushBack("}");
+	m_codeLines.pushBack("#endif");
+
 	// Define the actual struct
 	m_codeLines.pushBackSprintf("#define %s %s_", structName.cstr(), structName.cstr());
 

+ 22 - 0
AnKi/Shaders/Common.hlsl

@@ -0,0 +1,22 @@
+// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+// This file contains common code for all shaders. It's optional but it's recomended to include it
+
+#pragma once
+
+#include <AnKi/Shaders/Include/Common.h>
+
+template<typename T>
+T uvToNdc(T x)
+{
+	return x * 2.0f - 1.0f;
+}
+
+template<typename T>
+T ndcToUv(T x)
+{
+	return x * 0.5f + 0.5f;
+}

+ 1 - 1
AnKi/Shaders/Functions.glsl

@@ -613,7 +613,7 @@ F32 computeMipLevel(Vec2 normalizedUvs)
 #endif
 
 #if ANKI_GLSL
-#	if ANKI_SUPPORTS_64BIT
+#	if ANKI_SUPPORTS_64BIT_TYPES
 /// The regular findLSB in glslang has some issues since it invokes a builtin that is only supposed to be used with
 /// 32bit input. This is an alternative implementation but it expects that the input is not zero.
 I32 findLSB2(U64 v)

+ 713 - 0
AnKi/Shaders/Functions.hlsl

@@ -0,0 +1,713 @@
+// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <AnKi/Shaders/Common.hlsl>
+
+// Convert to linear depth
+F32 linearizeDepth(F32 depth, F32 zNear, F32 zFar)
+{
+	return zNear / ((zNear - zFar) + zFar / depth);
+}
+
+// Convert to linear depth
+Vec4 linearizeDepth(Vec4 depth, F32 zNear, F32 zFar)
+{
+	return zNear / ((zNear - zFar) + zFar / depth);
+}
+
+// This is the optimal linearizeDepth where a=(n-f)/n and b=f/n
+F32 linearizeDepthOptimal(F32 depth, F32 a, F32 b)
+{
+	return 1.0 / (a + b / depth);
+}
+
+// This is the optimal linearizeDepth where a=(n-f)/n and b=f/n
+Vec4 linearizeDepthOptimal(Vec4 depths, F32 a, F32 b)
+{
+	return 1.0 / (a + b / depths);
+}
+
+// Project a vector by knowing only the non zero values of a perspective matrix
+Vec4 projectPerspective(Vec4 vec, F32 m00, F32 m11, F32 m22, F32 m23)
+{
+	Vec4 o;
+	o.x = vec.x * m00;
+	o.y = vec.y * m11;
+	o.z = vec.z * m22 + vec.w * m23;
+	o.w = -vec.z;
+	return o;
+}
+
+#if defined(ANKI_FRAGMENT_SHADER)
+// Stolen from shadertoy.com/view/4tyGDD
+Vec4 textureCatmullRom4Samples(Texture2D tex, SamplerState sampl, Vec2 uv, Vec2 texSize)
+{
+	const Vec2 halff = 2.0 * frac(0.5 * uv * texSize - 0.25) - 1.0;
+	const Vec2 f = frac(halff);
+	const Vec2 sum0 = (2.0 * f - 3.5) * f + 0.5;
+	const Vec2 sum1 = (2.0 * f - 2.5) * f - 0.5;
+	Vec4 w = Vec4(f * sum0 + 1.0, f * sum1);
+	const Vec4 pos = Vec4((((-2.0 * f + 3.0) * f + 0.5) * f - 1.5) * f / (w.xy * texSize) + uv,
+						  (((-2.0 * f + 5.0) * f - 2.5) * f - 0.5) / (sum1 * texSize) + uv);
+	w.xz *= halff.x * halff.y > 0.0 ? 1.0 : -1.0;
+
+	return (tex.Sample(sampl, pos.xy) * w.x + tex.Sample(sampl, pos.zy) * w.z) * w.y
+		   + (tex.Sample(sampl, pos.xw) * w.x + tex.Sample(sampl, pos.zw) * w.z) * w.w;
+}
+#endif
+
+// Stolen from shadertoy.com/view/4df3Dn
+Vec4 textureBicubic(Texture2D tex, SamplerState sampl, Vec2 uv, F32 lod, Vec2 texSize)
+{
+#define w0(a) ((1.0 / 6.0) * ((a) * ((a) * (-(a) + 3.0) - 3.0) + 1.0))
+#define w1(a) ((1.0 / 6.0) * ((a) * (a) * (3.0 * (a)-6.0) + 4.0))
+#define w2(a) ((1.0 / 6.0) * ((a) * ((a) * (-3.0 * (a) + 3.0) + 3.0) + 1.0))
+#define w3(a) ((1.0 / 6.0) * ((a) * (a) * (a)))
+#define g0(a) (w0(a) + w1(a))
+#define g1(a) (w2(a) + w3(a))
+#define h0(a) (-1.0 + w1(a) / (w0(a) + w1(a)))
+#define h1(a) (1.0 + w3(a) / (w2(a) + w3(a)))
+#define texSample(uv) tex.SampleLevel(sampl, uv, lod)
+
+	uv = uv * texSize + 0.5;
+	const Vec2 iuv = floor(uv);
+	const Vec2 fuv = frac(uv);
+
+	const F32 g0x = g0(fuv.x);
+	const F32 g1x = g1(fuv.x);
+	const F32 h0x = h0(fuv.x);
+	const F32 h1x = h1(fuv.x);
+	const F32 h0y = h0(fuv.y);
+	const F32 h1y = h1(fuv.y);
+
+	const Vec2 p0 = (Vec2(iuv.x + h0x, iuv.y + h0y) - 0.5) / texSize;
+	const Vec2 p1 = (Vec2(iuv.x + h1x, iuv.y + h0y) - 0.5) / texSize;
+	const Vec2 p2 = (Vec2(iuv.x + h0x, iuv.y + h1y) - 0.5) / texSize;
+	const Vec2 p3 = (Vec2(iuv.x + h1x, iuv.y + h1y) - 0.5) / texSize;
+
+	return g0(fuv.y) * (g0x * texSample(p0) + g1x * texSample(p1))
+		   + g1(fuv.y) * (g0x * texSample(p2) + g1x * texSample(p3));
+
+#undef w0
+#undef w1
+#undef w2
+#undef g0
+#undef g1
+#undef h0
+#undef h1
+#undef texSample
+}
+
+F32 rand(Vec2 n)
+{
+	return 0.5 + 0.5 * frac(sin(dot(n, Vec2(12.9898, 78.233))) * 43758.5453);
+}
+
+Vec4 nearestDepthUpscale(Vec2 uv, Texture2D depthFull, Texture2D depthHalf, Texture2D colorTex,
+						 SamplerState linearAnyClampSampler, Vec2 linearDepthCf, F32 depthThreshold)
+{
+	F32 fullDepth = depthFull.SampleLevel(linearAnyClampSampler, uv, 0.0).r; // Sampler not important.
+	fullDepth = linearizeDepthOptimal(fullDepth, linearDepthCf.x, linearDepthCf.y);
+
+	Vec4 halfDepths = depthHalf.GatherRed(linearAnyClampSampler, uv); // Sampler not important.
+	halfDepths = linearizeDepthOptimal(halfDepths, linearDepthCf.x, linearDepthCf.y);
+
+	const Vec4 diffs = abs(Vec4(fullDepth, fullDepth, fullDepth, fullDepth) - halfDepths);
+	Vec4 color;
+
+	if(all(diffs < Vec4(depthThreshold, depthThreshold, depthThreshold, depthThreshold)))
+	{
+		// No major discontinuites, sample with bilinear
+		color = colorTex.SampleLevel(linearAnyClampSampler, uv, 0.0);
+	}
+	else
+	{
+		// Some discontinuites, need to use the newUv
+		const Vec4 r = colorTex.GatherRed(linearAnyClampSampler, uv);
+		const Vec4 g = colorTex.GatherGreen(linearAnyClampSampler, uv);
+		const Vec4 b = colorTex.GatherBlue(linearAnyClampSampler, uv);
+		const Vec4 a = colorTex.GatherAlpha(linearAnyClampSampler, uv);
+
+		F32 minDiff = diffs.x;
+		U32 comp = 0u;
+
+		if(diffs.y < minDiff)
+		{
+			comp = 1u;
+			minDiff = diffs.y;
+		}
+
+		if(diffs.z < minDiff)
+		{
+			comp = 2u;
+			minDiff = diffs.z;
+		}
+
+		if(diffs.w < minDiff)
+		{
+			comp = 3u;
+		}
+
+		color = Vec4(r[comp], g[comp], b[comp], a[comp]);
+	}
+
+	return color;
+}
+
+F32 _calcDepthWeight(Texture2D depthLow, SamplerState nearestAnyClamp, Vec2 uv, F32 ref, Vec2 linearDepthCf)
+{
+	const F32 d = depthLow.SampleLevel(nearestAnyClamp, uv, 0.0).r;
+	const F32 linearD = linearizeDepthOptimal(d, linearDepthCf.x, linearDepthCf.y);
+	return 1.0 / (kEpsilonf + abs(ref - linearD));
+}
+
+Vec4 _sampleAndWeight(Texture2D depthLow, Texture2D colorLow, SamplerState linearAnyClamp, SamplerState nearestAnyClamp,
+					  const Vec2 lowInvSize, Vec2 uv, const Vec2 offset, const F32 ref, const F32 weight,
+					  const Vec2 linearDepthCf, inout F32 normalize)
+{
+	uv += offset * lowInvSize;
+	const F32 dw = _calcDepthWeight(depthLow, nearestAnyClamp, uv, ref, linearDepthCf);
+	const Vec4 v = colorLow.SampleLevel(linearAnyClamp, uv, 0.0);
+	normalize += weight * dw;
+	return v * dw * weight;
+}
+
+Vec4 bilateralUpsample(Texture2D depthHigh, Texture2D depthLow, Texture2D colorLow, SamplerState linearAnyClamp,
+					   SamplerState nearestAnyClamp, const Vec2 lowInvSize, const Vec2 uv, const Vec2 linearDepthCf)
+{
+	const Vec3 kWeights = Vec3(0.25, 0.125, 0.0625);
+	F32 depthRef = depthHigh.SampleLevel(nearestAnyClamp, uv, 0.0).r;
+	depthRef = linearizeDepthOptimal(depthRef, linearDepthCf.x, linearDepthCf.y);
+	F32 normalize = 0.0;
+
+	Vec4 sum = _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(0.0, 0.0),
+								depthRef, kWeights.x, linearDepthCf, normalize);
+	sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(-1.0, 0.0),
+							depthRef, kWeights.y, linearDepthCf, normalize);
+	sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(0.0, -1.0),
+							depthRef, kWeights.y, linearDepthCf, normalize);
+	sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(1.0, 0.0),
+							depthRef, kWeights.y, linearDepthCf, normalize);
+	sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(0.0, 1.0),
+							depthRef, kWeights.y, linearDepthCf, normalize);
+	sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(1.0, 1.0),
+							depthRef, kWeights.z, linearDepthCf, normalize);
+	sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(1.0, -1.0),
+							depthRef, kWeights.z, linearDepthCf, normalize);
+	sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(-1.0, 1.0),
+							depthRef, kWeights.z, linearDepthCf, normalize);
+	sum += _sampleAndWeight(depthLow, colorLow, linearAnyClamp, nearestAnyClamp, lowInvSize, uv, Vec2(-1.0, -1.0),
+							depthRef, kWeights.z, linearDepthCf, normalize);
+
+	return sum / normalize;
+}
+
+Vec3 getCubemapDirection(const Vec2 norm, const U32 faceIdx)
+{
+	Vec3 zDir = Vec3((faceIdx <= 1u) ? 1 : 0, (faceIdx & 2u) >> 1u, (faceIdx & 4u) >> 2u);
+	zDir *= (((faceIdx & 1u) == 1u) ? -1.0 : 1.0);
+	const Vec3 yDir = (faceIdx == 2u)   ? Vec3(0.0, 0.0, 1.0)
+					  : (faceIdx == 3u) ? Vec3(0.0, 0.0, -1.0)
+										: Vec3(0.0, -1.0, 0.0);
+	const Vec3 xDir = cross(zDir, yDir);
+	return normalize(norm.x * xDir + norm.y * yDir + zDir);
+}
+
+// Convert 3D cubemap coordinates to 2D plus face index. v doesn't need to be normalized.
+Vec2 convertCubeUvs(const Vec3 v, out F32 faceIndex)
+{
+	const Vec3 absV = abs(v);
+	F32 mag;
+	Vec2 uv;
+
+	if(absV.z >= absV.x && absV.z >= absV.y)
+	{
+		faceIndex = (v.z < 0.0) ? 5.0 : 4.0;
+		uv = Vec2((v.z < 0.0) ? -v.x : v.x, -v.y);
+		mag = absV.z;
+	}
+	else if(absV.y >= absV.x)
+	{
+		faceIndex = (v.y < 0.0) ? 3.0 : 2.0;
+		uv = Vec2(v.x, (v.y < 0.0) ? -v.z : v.z);
+		mag = absV.y;
+	}
+	else
+	{
+		faceIndex = (v.x < 0.0) ? 1.0 : 0.0;
+		uv = Vec2((v.x < 0.0) ? v.z : -v.z, -v.y);
+		mag = absV.x;
+	}
+
+	return 0.5 / mag * uv + 0.5;
+}
+
+// Same as convertCubeUvs but it returns the faceIndex as unsigned I32.
+Vec2 convertCubeUvsu(const Vec3 v, out U32 faceIndex)
+{
+	const Vec3 absV = abs(v);
+	F32 mag;
+	Vec2 uv;
+
+	if(absV.z >= absV.x && absV.z >= absV.y)
+	{
+		faceIndex = (v.z < 0.0) ? 5u : 4u;
+		uv = Vec2((v.z < 0.0) ? -v.x : v.x, -v.y);
+		mag = absV.z;
+	}
+	else if(absV.y >= absV.x)
+	{
+		faceIndex = (v.y < 0.0) ? 3u : 2u;
+		uv = Vec2(v.x, (v.y < 0.0) ? -v.z : v.z);
+		mag = absV.y;
+	}
+	else
+	{
+		faceIndex = (v.x < 0.0) ? 1u : 0u;
+		uv = Vec2((v.x < 0.0) ? v.z : -v.z, -v.y);
+		mag = absV.x;
+	}
+
+	return 0.5 / mag * uv + 0.5;
+}
+
+RVec3 grayScale(const RVec3 col)
+{
+	const F32 grey = (col.r + col.g + col.b) * (1.0 / 3.0);
+	return RVec3(grey, grey, grey);
+}
+
+Vec3 saturateColor(const Vec3 col, const F32 factor)
+{
+	const Vec3 lumCoeff = Vec3(0.2125, 0.7154, 0.0721);
+	const F32 d = dot(col, lumCoeff);
+	const Vec3 intensity = Vec3(d, d, d);
+	return lerp(intensity, col, factor);
+}
+
+Vec3 gammaCorrection(Vec3 gamma, Vec3 col)
+{
+	return pow(col, 1.0 / gamma);
+}
+
+// Can use 0.15 for sharpenFactor
+Vec3 readSharpen(Texture2D tex, SamplerState sampl, Vec2 uv, F32 sharpenFactor, Bool detailed)
+{
+	Vec3 col = tex.SampleLevel(sampl, uv, 0.0).rgb;
+
+	Vec3 col2 = tex.SampleLevel(sampl, uv, 0.0, IVec2(1, 1)).rgb;
+	col2 += tex.SampleLevel(sampl, uv, 0.0, IVec2(-1, -1)).rgb;
+	col2 += tex.SampleLevel(sampl, uv, 0.0, IVec2(1, -1)).rgb;
+	col2 += tex.SampleLevel(sampl, uv, 0.0, IVec2(-1, 1)).rgb;
+
+	F32 f = 4.0;
+	if(detailed)
+	{
+		col2 += tex.SampleLevel(sampl, uv, 0.0, IVec2(0, 1)).rgb;
+		col2 += tex.SampleLevel(sampl, uv, 0.0, IVec2(1, 0)).rgb;
+		col2 += tex.SampleLevel(sampl, uv, 0.0, IVec2(-1, 0)).rgb;
+		col2 += tex.SampleLevel(sampl, uv, 0.0, IVec2(0, -1)).rgb;
+
+		f = 8.0;
+	}
+
+	col = col * (f * sharpenFactor + 1.0) - sharpenFactor * col2;
+	return max(Vec3(0.0, 0.0, 0.0), col);
+}
+
+Vec3 readErosion(Texture2D tex, SamplerState sampl, const Vec2 uv)
+{
+	Vec3 minValue = tex.SampleLevel(sampl, uv, 0.0).rgb;
+
+#define ANKI_EROSION(x, y) \
+	col2 = tex.SampleLevel(sampl, uv, 0.0, IVec2(x, y)).rgb; \
+	minValue = min(col2, minValue);
+
+	Vec3 col2;
+	ANKI_EROSION(1, 1);
+	ANKI_EROSION(-1, -1);
+	ANKI_EROSION(1, -1);
+	ANKI_EROSION(-1, 1);
+	ANKI_EROSION(0, 1);
+	ANKI_EROSION(1, 0);
+	ANKI_EROSION(-1, 0);
+	ANKI_EROSION(0, -1);
+
+#undef ANKI_EROSION
+
+	return minValue;
+}
+
+// 5 color heatmap from a factor.
+Vec3 heatmap(const F32 factor)
+{
+	F32 intPart;
+	const F32 fractional = modf(factor * 4.0, intPart);
+
+	if(intPart < 1.0)
+	{
+		return lerp(Vec3(0.0, 0.0, 0.0), Vec3(0.0, 0.0, 1.0), fractional);
+	}
+	else if(intPart < 2.0)
+	{
+		return lerp(Vec3(0.0, 0.0, 1.0), Vec3(0.0, 1.0, 0.0), fractional);
+	}
+	else if(intPart < 3.0)
+	{
+		return lerp(Vec3(0.0, 1.0, 0.0), Vec3(1.0, 1.0, 0.0), fractional);
+	}
+	else
+	{
+		return lerp(Vec3(1.0, 1.0, 0.0), Vec3(1.0, 0.0, 0.0), fractional);
+	}
+}
+
+// Return a color per cubemap face. The +X is red, -X dark red, +Y green, -Y dark green, +Z blue, -Z dark blue
+Vec3 colorPerCubeFace(const U32 dir)
+{
+	Vec3 color;
+	switch(dir)
+	{
+	case 0:
+		color = Vec3(1.0, 0.0, 0.0);
+		break;
+	case 1:
+		color = Vec3(0.25, 0.0, 0.0);
+		break;
+	case 2:
+		color = Vec3(0.0, 1.0, 0.0);
+		break;
+	case 3:
+		color = Vec3(0.0, 0.25, 0.0);
+		break;
+	case 4:
+		color = Vec3(0.0, 0.0, 1.0);
+		break;
+	default:
+		color = Vec3(0.0, 0.0, 0.25);
+	}
+	return color;
+}
+
+Bool incorrectColor(const Vec3 c)
+{
+	return isnan(c.x) || isnan(c.y) || isnan(c.z) || isinf(c.x) || isinf(c.y) || isinf(c.z);
+}
+
+F32 areaElement(const F32 x, const F32 y)
+{
+	return atan2(x * y, sqrt(x * x + y * y + 1.0));
+}
+
+// Compute the solid angle of a cube. Solid angle is the area of a sphere when projected into a cubemap. It's also the
+// delta omega (dω) in the irradiance integral and other integrals that operate in a sphere.
+// http://www.rorydriscoll.com/2012/01/15/cubemap-texel-solid-angle/
+F32 cubeCoordSolidAngle(Vec2 norm, F32 cubeFaceSize)
+{
+	const F32 s = 1.0f / cubeFaceSize;
+	const Vec2 invSize = Vec2(s, s);
+	const Vec2 v0 = norm - invSize;
+	const Vec2 v1 = norm + invSize;
+	return areaElement(v0.x, v0.y) - areaElement(v0.x, v1.y) - areaElement(v1.x, v0.y) + areaElement(v1.x, v1.y);
+}
+
+// A convenience function to skip out of bounds invocations on post-process compute shaders. Both the arguments should
+// be constexpr.
+#if defined(ANKI_COMPUTE_SHADER) && ANKI_GLSL
+Bool skipOutOfBoundsInvocations(UVec2 workgroupSize, UVec2 globalInvocationCount)
+{
+	if((globalInvocationCount.x % workgroupSize.x) != 0u || (globalInvocationCount.y % workgroupSize.y) != 0u)
+	{
+		if(gl_GlobalInvocationID.x >= globalInvocationCount.x || gl_GlobalInvocationID.y >= globalInvocationCount.y)
+		{
+			return true;
+		}
+	}
+
+	return false;
+}
+#endif
+
+// Create a matrix from some direction.
+Mat3 rotationFromDirection(Vec3 zAxis)
+{
+#if 0
+	const Vec3 z = zAxis;
+	const Bool alignsWithXBasis = abs(z.x - 1.0) <= kEpsilonf; // aka z == Vec3(1.0, 0.0, 0.0)
+	Vec3 x = (alignsWithXBasis) ? Vec3(0.0, 0.0, 1.0) : Vec3(1.0, 0.0, 0.0);
+	const Vec3 y = normalize(cross(x, z));
+	x = normalize(cross(z, y));
+	return Mat3(x, y, z);
+#else
+	// http://jcgt.org/published/0006/01/01/
+	const Vec3 z = zAxis;
+	const F32 sign = (z.z >= 0.0) ? 1.0 : -1.0;
+	const F32 a = -1.0 / (sign + z.z);
+	const F32 b = z.x * z.y * a;
+
+	const Vec3 x = Vec3(1.0 + sign * a * pow(z.x, 2.0), sign * b, -sign * z.x);
+	const Vec3 y = Vec3(b, sign + a * pow(z.y, 2.0), -z.y);
+
+	return Mat3(x, y, z);
+#endif
+}
+
+#if defined(ANKI_COMPUTE_SHADER) && ANKI_GLSL
+// See getOptimalGlobalInvocationId8x8Amd
+U32 _ABfiM(U32 src, U32 ins, U32 bits)
+{
+	const U32 mask = (1u << bits) - 1u;
+	return (ins & mask) | (src & (~mask));
+}
+
+// See getOptimalGlobalInvocationId8x8Amd
+U32 _ABfe(U32 src, U32 off, U32 bits)
+{
+	const U32 mask = (1u << bits) - 1u;
+	return (src >> off) & mask;
+}
+
+// See getOptimalGlobalInvocationId8x8Amd
+UVec2 _ARmpRed8x8(U32 a)
+{
+	return UVec2(_ABfiM(_ABfe(a, 2u, 3u), a, 1u), _ABfiM(_ABfe(a, 3u, 3u), _ABfe(a, 1u, 2u), 2u));
+}
+
+// https://github.com/GPUOpen-Effects/FidelityFX-CAS/blob/master/ffx-cas/ffx_a.h
+UVec2 getOptimalGlobalInvocationId8x8Amd()
+{
+	const UVec2 localInvocationId = _ARmpRed8x8(gl_LocalInvocationIndex);
+	return gl_WorkGroupID.xy * UVec2(8u) + localInvocationId;
+}
+
+// https://github.com/LouisBavoil/ThreadGroupIDSwizzling/blob/master/ThreadGroupTilingX.hlsl
+UVec2 getOptimalGlobalInvocationId8x8Nvidia()
+{
+	const U32 maxTileWidth = 8u;
+	const UVec2 workgroupSize = UVec2(8u);
+
+	const U32 workgroupsInAPerfectTile = maxTileWidth * gl_NumWorkGroups.y;
+
+	const U32 perfectTileCount = gl_NumWorkGroups.x / maxTileWidth;
+
+	const U32 totalWorkgroupsInAllPerfectTiles = perfectTileCount * maxTileWidth * gl_NumWorkGroups.y;
+	const U32 vThreadGroupIDFlattened = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;
+
+	const U32 tileIdOfCurrentWorkgroup = vThreadGroupIDFlattened / workgroupsInAPerfectTile;
+	const U32 localWorkgroupIdWithinCurrentTile = vThreadGroupIDFlattened % workgroupsInAPerfectTile;
+	U32 localWorkgroupIdYWithinCurrentTile;
+	U32 localWorgroupIdXWithinCurrentTile;
+
+	if(totalWorkgroupsInAllPerfectTiles <= vThreadGroupIDFlattened)
+	{
+		U32 xDimensionOfLastTile = gl_NumWorkGroups.x % maxTileWidth;
+		localWorkgroupIdYWithinCurrentTile = localWorkgroupIdWithinCurrentTile / xDimensionOfLastTile;
+		localWorgroupIdXWithinCurrentTile = localWorkgroupIdWithinCurrentTile % xDimensionOfLastTile;
+	}
+	else
+	{
+		localWorkgroupIdYWithinCurrentTile = localWorkgroupIdWithinCurrentTile / maxTileWidth;
+		localWorgroupIdXWithinCurrentTile = localWorkgroupIdWithinCurrentTile % maxTileWidth;
+	}
+
+	const U32 swizzledvThreadGroupIdFlattened = tileIdOfCurrentWorkgroup * maxTileWidth
+												+ localWorkgroupIdYWithinCurrentTile * gl_NumWorkGroups.x
+												+ localWorgroupIdXWithinCurrentTile;
+
+	UVec2 swizzledvThreadGroupId;
+	swizzledvThreadGroupId.y = swizzledvThreadGroupIdFlattened / gl_NumWorkGroups.x;
+	swizzledvThreadGroupId.x = swizzledvThreadGroupIdFlattened % gl_NumWorkGroups.x;
+
+	UVec2 swizzledGlobalId;
+	swizzledGlobalId.x = workgroupSize.x * swizzledvThreadGroupId.x + gl_LocalInvocationID.x;
+	swizzledGlobalId.y = workgroupSize.y * swizzledvThreadGroupId.y + gl_LocalInvocationID.y;
+
+	return swizzledGlobalId.xy;
+}
+#endif
+
+// Gaussian distrubution function
+F32 gaussianWeight(F32 s, F32 x)
+{
+	F32 p = 1.0 / (s * sqrt(2.0 * kPi));
+	p *= exp((x * x) / (-2.0 * s * s));
+	return p;
+}
+
+#if ANKI_GLSL
+Vec4 bilinearFiltering(Texture2D tex, SamplerState nearestSampler, Vec2 uv, F32 lod, Vec2 textureSize)
+{
+	const Vec2 texelSize = 1.0 / textureSize;
+	const Vec2 unnormTexCoord = (uv * textureSize) - 0.5;
+	const Vec2 f = frac(unnormTexCoord);
+	const Vec2 snapTexCoord = (floor(unnormTexCoord) + 0.5) / textureSize;
+	const Vec4 s1 = textureLod(tex, nearestSampler, uv, lod);
+	const Vec4 s2 = textureLod(tex, nearestSampler, uv + Vec2(texelSize.x, 0.0), lod);
+	const Vec4 s3 = textureLod(tex, nearestSampler, uv + Vec2(0.0, texelSize.y), lod);
+	const Vec4 s4 = textureLod(tex, nearestSampler, uv + texelSize, lod);
+	return mix(mix(s1, s2, f.x), mix(s3, s4, f.x), f.y);
+}
+#endif
+
+// https://www.shadertoy.com/view/WsfBDf
+Vec3 animateBlueNoise(Vec3 inputBlueNoise, U32 frameIdx)
+{
+	const F32 goldenRatioConjugate = 0.61803398875;
+	return frac(inputBlueNoise + F32(frameIdx % 64u) * goldenRatioConjugate);
+}
+
+#if defined(ANKI_FRAGMENT_SHADER)
+/// https://bgolus.medium.com/distinctive-derivative-differences-cce38d36797b
+/// normalizedUvs is uv*textureResolution
+F32 computeMipLevel(Vec2 normalizedUvs)
+{
+	const Vec2 dx = ddx_coarse(normalizedUvs);
+	const Vec2 dy = ddy_coarse(normalizedUvs);
+	const F32 deltaMax2 = max(dot(dx, dx), dot(dy, dy));
+	return max(0.0, 0.5 * log2(deltaMax2));
+}
+#endif
+
+#if ANKI_GLSL
+#	if ANKI_SUPPORTS_64BIT_TYPES
+/// The regular findLSB in glslang has some issues since it invokes a builtin that is only supposed to be used with
+/// 32bit input. This is an alternative implementation but it expects that the input is not zero.
+I32 findLSB2(U64 v)
+{
+	const I32 lsb1 = findLSB(U32(v));
+	const I32 lsb2 = findLSB(U32(v >> 32ul));
+	return (lsb1 >= 0) ? lsb1 : lsb2 + 32;
+}
+#	endif
+
+/// Define an alternative findLSB to go in pair with the 64bit version.
+I32 findLSB2(U32 v)
+{
+	return findLSB(v);
+}
+#endif
+
+/// Encode the shading rate to be stored in an SRI. The rates should be power of two, can't be zero and can't exceed 4.
+/// So the possible values are 1,2,4
+U32 encodeVrsRate(UVec2 rateXY)
+{
+	return (rateXY.y >> 1u) | ((rateXY.x << 1u) & 12u);
+}
+
+Vec3 visualizeVrsRate(UVec2 rate)
+{
+	if(all(rate == UVec2(1u, 1u)))
+	{
+		return Vec3(1.0, 0.0, 0.0);
+	}
+	else if(all(rate == UVec2(2u, 1u)) || all(rate == UVec2(1u, 2u)))
+	{
+		return Vec3(1.0, 0.5, 0.0);
+	}
+	else if(all(rate == UVec2(2u, 2u)) || all(rate == UVec2(4u, 1u)) || all(rate == UVec2(1u, 4u)))
+	{
+		return Vec3(1.0, 1.0, 0.0);
+	}
+	else if(all(rate == UVec2(4u, 2u)) || all(rate == UVec2(2u, 4u)))
+	{
+		return Vec3(0.65, 1.0, 0.0);
+	}
+	else if(all(rate == UVec2(4u, 4u)))
+	{
+		return Vec3(0.0, 1.0, 0.0);
+	}
+	else
+	{
+		return Vec3(0.0, 0.0, 0.0);
+	}
+}
+
+/// Decodes a number produced by encodeVrsRate(). Returns the shading rates.
+UVec2 decodeVrsRate(U32 texel)
+{
+	UVec2 rateXY;
+	rateXY.x = 1u << ((texel >> 2u) & 3u);
+	rateXY.y = 1u << (texel & 3u);
+	return rateXY;
+}
+
+/// 3D coordinates to equirectangular 2D coordinates.
+Vec2 equirectangularMapping(Vec3 v)
+{
+	Vec2 uv = Vec2(atan2(v.z, v.x), asin(v.y));
+	uv *= Vec2(0.1591, 0.3183);
+	uv += 0.5;
+	return uv;
+}
+
+Vec3 linearToSRgb(Vec3 linearRgb)
+{
+	const F32 a = 6.10352e-5;
+	const F32 b = 1.0 / 2.4;
+	linearRgb = max(Vec3(a, a, a), linearRgb);
+	return min(linearRgb * 12.92, pow(max(linearRgb, 0.00313067), Vec3(b, b, b)) * 1.055 - 0.055);
+}
+
+Vec3 sRgbToLinear(Vec3 sRgb)
+{
+#if ANKI_GLSL
+	const bvec3 cutoff = lessThan(sRgb, Vec3(0.04045));
+	const Vec3 higher = pow((sRgb + 0.055) / 1.055, Vec3(2.4));
+	const Vec3 lower = sRgb / 12.92;
+	return mix(higher, lower, cutoff);
+#else
+	const bool3 cutoff = sRgb < Vec3(0.04045, 0.04045, 0.04045);
+	const Vec3 higher = pow((sRgb + 0.055) / 1.055, Vec3(2.4, 2.4, 2.4));
+	const Vec3 lower = sRgb / 12.92;
+	return lerp(higher, lower, cutoff);
+#endif
+}
+
+#if ANKI_GLSL
+ANKI_RP Vec3 filmGrain(ANKI_RP Vec3 color, Vec2 uv, ANKI_RP F32 strength, ANKI_RP F32 time)
+{
+	const F32 x = (uv.x + 4.0) * (uv.y + 4.0) * time;
+	const F32 grain = 1.0 - (mod((mod(x, 13.0) + 1.0) * (mod(x, 123.0) + 1.0), 0.01) - 0.005) * strength;
+	return color * grain;
+}
+#else
+template<typename TVec3, typename TFloat>
+TVec3 filmGrain(TVec3 color, Vec2 uv, TFloat strength, TFloat time)
+{
+	const TFloat x = (uv.x + 4.0) * (uv.y + 4.0) * time;
+	const TFloat grain = 1.0 - (mod((mod(x, 13.0) + 1.0) * (mod(x, 123.0) + 1.0), 0.01) - 0.005) * strength;
+	return color * grain;
+}
+#endif
+
+/// Sin approximation: https://www.desmos.com/calculator/svgcjfskne
+F32 fastSin(F32 x)
+{
+	const F32 k2Pi = 2.0 * kPi;
+	const F32 kPiOver2 = kPi / 2.0;
+
+	x = (x + kPiOver2) / (k2Pi) + 0.75;
+	x = frac(x);
+	x = x * 2.0 - 1.0;
+	x = x * abs(x) - x;
+	x *= 4.0;
+	return x;
+}
+
+/// Cos approximation
+F32 fastCos(F32 x)
+{
+	return fastSin(x + kPi / 2.0);
+}
+
+Vec3 transform(Vec4 mat[3u], Vec4 v)
+{
+	const F32 a = dot(mat[0], v);
+	const F32 b = dot(mat[1], v);
+	const F32 c = dot(mat[2], v);
+	return Vec3(a, b, c);
+}

+ 357 - 0
AnKi/Shaders/GBufferGeneric2.ankiprog

@@ -0,0 +1,357 @@
+// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma anki hlsl
+
+#pragma anki mutator ANKI_LOD 0 1 2
+#pragma anki mutator ANKI_VELOCITY 0 1
+#pragma anki mutator ANKI_TECHNIQUE 0 1 2
+#pragma anki mutator ANKI_BONES 0 1
+#pragma anki mutator DIFFUSE_TEX 0 1
+#pragma anki mutator SPECULAR_TEX 0 1
+#pragma anki mutator ROUGHNESS_TEX 0 1
+#pragma anki mutator METAL_TEX 0 1
+#pragma anki mutator NORMAL_TEX 0 1
+#pragma anki mutator PARALLAX 0 1
+#pragma anki mutator EMISSIVE_TEX 0 1
+#pragma anki mutator ALPHA_TEST 0 1
+
+#pragma anki skip_mutation ALPHA_TEST 1 DIFFUSE_TEX 0
+#pragma anki skip_mutation ANKI_VELOCITY 1 ANKI_TECHNIQUE 1
+#pragma anki skip_mutation ANKI_VELOCITY 1 ANKI_TECHNIQUE 2
+#pragma anki skip_mutation ANKI_LOD 1 ANKI_TECHNIQUE 1
+#pragma anki skip_mutation ANKI_LOD 2 ANKI_TECHNIQUE 1
+#pragma anki skip_mutation ANKI_LOD 1 ANKI_TECHNIQUE 2
+#pragma anki skip_mutation ANKI_LOD 2 ANKI_TECHNIQUE 2
+
+// Some defines the clear up things
+#define REALLY_ALPHA_TEST (ALPHA_TEST && DIFFUSE_TEX)
+#define UVS (ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER || REALLY_ALPHA_TEST)
+#define REALLY_VELOCITY ((ANKI_VELOCITY || ANKI_BONES) && ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER)
+#define REALLY_USING_PARALLAX \
+	(PARALLAX == 1 && ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER && ANKI_LOD == 0 && ALPHA_TEST == 0)
+
+#include <AnKi/Shaders/Include/MaterialTypes.h>
+#include <AnKi/Shaders/Include/GpuSceneTypes.h>
+#include <AnKi/Shaders/PackFunctions.hlsl>
+#include <AnKi/Shaders/Functions.hlsl>
+
+ANKI_BINDLESS_SET(kMaterialSetBindless)
+
+[[vk::binding(kMaterialBindingTrilinearRepeatSampler, kMaterialSetGlobal)]] SamplerState u_globalSampler;
+[[vk::binding(kMaterialBindingLocalUniforms, kMaterialSetLocal)]] StructuredBuffer<U32> u_localUniforms;
+[[vk::binding(kMaterialBindingRenderableGpuView, kMaterialSetLocal)]] StructuredBuffer<RenderableGpuView>
+	u_renderableGpuViews;
+[[vk::binding(kMaterialBindingGlobalUniforms, kMaterialSetGlobal)]] ConstantBuffer<MaterialGlobalUniforms>
+	u_globalUniforms;
+
+#if ANKI_BONES
+[[vk::binding(kMaterialBindingBoneTransforms, kMaterialSetLocal)]] StructuredBuffer<Mat4> u_boneTransforms;
+[[vk::binding(kMaterialBindingPreviousBoneTransforms, kMaterialSetLocal)]] StructuredBuffer<Mat4>
+	u_prevFrameBoneTransforms;
+#endif
+
+#pragma anki reflect AnKiLocalUniforms
+#pragma anki struct AnKiLocalUniforms
+#pragma anki member U32 m_normalTex if NORMAL_TEX is 1
+
+#pragma anki member Vec3 m_diffColor if DIFFUSE_TEX is 0
+#pragma anki member U32 m_diffTex if DIFFUSE_TEX is 1
+
+#pragma anki member F32 m_roughness if ROUGHNESS_TEX is 0
+#pragma anki member U32 m_roughnessTex if ROUGHNESS_TEX is 1
+
+#pragma anki member Vec3 m_specColor if SPECULAR_TEX is 0
+#pragma anki member U32 m_specTex if SPECULAR_TEX is 1
+
+#pragma anki member F32 m_metallic if METAL_TEX is 0
+#pragma anki member U32 m_metallicTex if METAL_TEX is 1
+
+#pragma anki member Vec3 m_emission if EMISSIVE_TEX is 0
+#pragma anki member U32 m_emissiveTex if EMISSIVE_TEX is 1
+
+#pragma anki member F32 m_heightmapScale if PARALLAX is 1
+#pragma anki member U32 m_heightTex if PARALLAX is 1
+
+#pragma anki member F32 m_subsurface
+#pragma anki struct end
+
+struct VertIn
+{
+	[[vk::location(VertexStreamId::kPosition)]] Vec3 m_position : POSITION;
+	U32 m_instanceId : SV_INSTANCEID;
+
+#if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
+	[[vk::location(VertexStreamId::kNormal)]] RVec3 m_normal : NORMAL;
+	[[vk::location(VertexStreamId::kTangent)]] RVec4 m_tangent : TANGENT;
+#endif
+
+#if UVS
+	[[vk::location(VertexStreamId::kUv)]] Vec2 m_uv : TEXCOORD;
+#endif
+
+#if ANKI_BONES
+	[[vk::location(VertexStreamId::kBoneIds)]] UVec4 m_boneIndices : COLOR0;
+	[[vk::location(VertexStreamId::kBoneWeights)]] RVec4 m_boneWeights : COLOR1;
+#endif
+};
+
+struct VertOut
+{
+	Vec4 m_position : SV_POSITION;
+
+#if UVS
+	Vec2 m_uv : TEXCOORD;
+#endif
+
+#if REALLY_VELOCITY
+	Vec3 m_prevClipXyw : PREV_CLIP;
+	Vec3 m_crntClipXyw : CRNT_CLIP;
+#endif
+
+#if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
+	RVec3 m_normal : NORMAL;
+	RVec3 m_tangent : TANGENT;
+	RVec3 m_bitangent : BINTANGENT;
+#endif
+};
+
+#if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER || ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER_EZ
+struct FragOut
+{
+	Vec4 m_color0 : SV_TARGET0;
+	Vec4 m_color1 : SV_TARGET1;
+	Vec4 m_color2 : SV_TARGET2;
+	Vec2 m_color3 : SV_TARGET3;
+};
+#endif
+
+#pragma anki start vert
+
+#if ANKI_BONES
+void skinning(VertIn input, inout Vec3 pos, inout Vec3 prevPos, inout RVec3 normal, inout RVec4 tangent)
+{
+	Mat4 skinMat = u_boneTransforms[input.m_boneIndices[0]] * input.m_boneWeights[0];
+	Mat4 prevSkinMat = u_prevFrameBoneTransforms[input.m_boneIndices[0]] * input.m_boneWeights[0];
+	[[unroll]] for(U32 i = 1u; i < 4u; ++i)
+	{
+		skinMat += u_boneTransforms[input.m_boneIndices[i]] * input.m_boneWeights[i];
+		prevSkinMat += u_prevFrameBoneTransforms[input.m_boneIndices[i]] * input.m_boneWeights[i];
+	}
+
+#	if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
+	prevPos = mul(prevSkinMat, Vec4(pos, 1.0)).xyz;
+	tangent.xyz = mul(skinMat, Vec4(tangent.xyz, 0.0)).xyz;
+	normal = mul(skinMat, Vec4(normal, 0.0)).xyz;
+#	endif
+	ANKI_MAYBE_UNUSED(prevPos);
+	ANKI_MAYBE_UNUSED(tangent);
+	ANKI_MAYBE_UNUSED(normal);
+
+	pos = mul(skinMat, Vec4(pos, 1.0)).xyz;
+}
+#endif
+
+#if(ANKI_VELOCITY || ANKI_BONES) && ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
+void velocity(RenderableGpuView view, Vec3 prevLocalPos, inout VertOut output)
+{
+#	if ANKI_VELOCITY
+	// Object is also moving
+	const Vec4 trf[3] = view.m_previousWorldTransform;
+#	else
+	// Object is a skin that is not moving
+	const Vec4 trf[3] = view.m_worldTransform;
+#	endif
+
+	Vec4 v4 = Vec4(transform(trf, Vec4(prevLocalPos, 1.0)), 1.0);
+	v4 = mul(u_globalUniforms.m_previousViewProjectionMatrix, v4);
+
+	output.m_prevClipXyw = v4.xyw;
+	output.m_crntClipXyw = output.m_position.xyw;
+}
+#endif
+
+VertOut main(VertIn input)
+{
+	const RenderableGpuView view = u_renderableGpuViews[input.m_instanceId];
+	VertOut output;
+
+	// All values in local space
+#if ANKI_BONES
+	Vec3 pos =
+		input.m_position * view.m_positionScaleF32AndTranslationVec3.x + view.m_positionScaleF32AndTranslationVec3.yzw;
+#else
+	Vec3 pos = input.m_position;
+#endif
+
+	Vec3 prevPos = pos;
+	ANKI_MAYBE_UNUSED(prevPos);
+
+#if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
+	RVec3 normal = input.m_normal;
+	RVec4 tangent = input.m_tangent;
+#else
+	RVec3 normal = RVec3(0.0, 0.0, 0.0);
+	RVec4 tangent = RVec4(0.0, 0.0, 0.0, 0.0);
+	ANKI_MAYBE_UNUSED(normal);
+	ANKI_MAYBE_UNUSED(tangent);
+#endif
+
+#if UVS
+	output.m_uv = input.m_uv;
+#endif
+
+	// Do stuff
+#if ANKI_BONES
+	skinning(input, pos, prevPos, normal, tangent);
+#endif
+
+	output.m_position = Vec4(transform(view.m_worldTransform, Vec4(pos, 1.0)), 1.0);
+	output.m_position = mul(u_globalUniforms.m_viewProjectionMatrix, output.m_position);
+
+#if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
+	output.m_normal = transform(view.m_worldTransform, Vec4(normal, 0.0));
+	output.m_tangent = transform(view.m_worldTransform, Vec4(tangent.xyz, 0.0));
+	output.m_bitangent = cross(output.m_normal, output.m_tangent) * tangent.w;
+#endif
+
+#if REALLY_VELOCITY
+	velocity(view, prevPos, output);
+#endif
+
+	return output;
+};
+
+#pragma anki end
+
+#pragma anki start frag
+
+void doAlphaTest(RF32 alpha)
+{
+	if(alpha == 0.0)
+	{
+		discard;
+	}
+}
+
+#if ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_SHADOWS
+void main(VertOut input)
+{
+	ANKI_MAYBE_UNUSED(input);
+#	if REALLY_ALPHA_TEST
+	const AnKiLocalUniforms localUniforms = loadAnKiLocalUniforms(u_localUniforms, 0u);
+	const RVec4 diffColorA = u_bindlessTextures2dF32[localUniforms.m_diffTex].Sample(u_globalSampler, input.m_uv);
+	doAlphaTest(diffColorA.a);
+#	endif
+}
+#elif ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER_EZ
+FragOut main(VertOut input)
+{
+	ANKI_MAYBE_UNUSED(input);
+#	if REALLY_ALPHA_TEST
+	const AnKiLocalUniforms localUniforms = loadAnKiLocalUniforms(u_localUniforms, 0u);
+	const RVec4 diffColorA = u_bindlessTextures2dF32[localUniforms.m_diffTex].Sample(u_globalSampler, input.m_uv);
+	doAlphaTest(diffColorA.a);
+#	endif
+	return (FragOut)0;
+}
+#elif ANKI_TECHNIQUE == ANKI_RENDERING_TECHNIQUE_GBUFFER
+// Do normal mapping
+RVec3 readNormalFromTexture(VertOut input, Texture2D<RVec4> map, SamplerState sampl, Vec2 texCoords)
+{
+	// First read the texture
+	const RVec3 nAtTangentspace = normalize((map.Sample(sampl, texCoords).rgb - 0.5) * 2.0);
+
+	const RVec3 n = normalize(input.m_normal);
+	const RVec3 t = normalize(input.m_tangent);
+	const RVec3 b = normalize(input.m_bitangent);
+
+	const RMat3 tbnMat = Mat3(t, b, n);
+
+	return mul(tbnMat, nAtTangentspace);
+}
+
+FragOut main(VertOut input)
+{
+	const AnKiLocalUniforms localUniforms = loadAnKiLocalUniforms(u_localUniforms, 0u);
+
+#	if REALLY_USING_PARALLAX
+	// TODO
+	const Vec2 uv = input.m_uv;
+#	else
+	const Vec2 uv = input.m_uv;
+#	endif
+	ANKI_MAYBE_UNUSED(uv);
+
+#	if DIFFUSE_TEX
+#		if REALLY_ALPHA_TEST
+	const RVec4 diffColorA = u_bindlessTextures2dF32[localUniforms.m_diffTex].Sample(u_globalSampler, uv);
+	doAlphaTest(diffColorA.a);
+	const RVec3 diffColor = diffColorA.rgb;
+#		else
+	const RVec3 diffColor = u_bindlessTextures2dF32[localUniforms.m_diffTex].Sample(u_globalSampler, uv).rgb;
+#		endif
+#	else
+	const RVec3 diffColor = localUniforms.m_diffColor;
+#	endif
+
+#	if SPECULAR_TEX
+	const RVec3 specColor = u_bindlessTextures2dF32[localUniforms.m_specTex].Sample(u_globalSampler, uv).rgb;
+#	else
+	const RVec3 specColor = localUniforms.m_specColor;
+#	endif
+
+#	if ROUGHNESS_TEX
+	const RF32 roughness = u_bindlessTextures2dF32[localUniforms.m_roughnessTex].Sample(u_globalSampler, uv).g;
+#	else
+	const RF32 roughness = localUniforms.m_roughness;
+#	endif
+
+#	if METAL_TEX
+	const RF32 metallic = u_bindlessTextures2dF32[localUniforms.m_metallicTex].Sample(u_globalSampler, uv).b;
+#	else
+	const RF32 metallic = localUniforms.m_metallic;
+#	endif
+
+#	if NORMAL_TEX
+	const RVec3 normal =
+		readNormalFromTexture(input, u_bindlessTextures2dF32[localUniforms.m_normalTex], u_globalSampler, uv);
+#	else
+	const RVec3 normal = normalize(input.m_normal);
+#	endif
+
+#	if EMISSIVE_TEX
+	const RVec3 emission = u_bindlessTextures2dF32[localUniforms.m_emissiveTex].Sample(u_globalSampler, uv).rgb;
+#	else
+	const RVec3 emission = localUniforms.m_emission;
+#	endif
+
+#	if ANKI_VELOCITY || ANKI_BONES
+	const Vec2 prevNdc = input.m_prevClipXyw.xy / input.m_prevClipXyw.z;
+	const Vec2 crntNdc = input.m_crntClipXyw.xy / input.m_crntClipXyw.z;
+
+	// It's NDC_TO_UV(prevNdc) - NDC_TO_UV(crntNdc) or:
+	const Vec2 velocity = (prevNdc - crntNdc) * 0.5;
+#	else
+	const Vec2 velocity = Vec2(1.0, 1.0);
+#	endif
+
+	GbufferInfo g;
+	g.m_diffuse = diffColor;
+	g.m_normal = normal;
+	g.m_f0 = specColor;
+	g.m_roughness = roughness;
+	g.m_subsurface = localUniforms.m_subsurface;
+	g.m_emission = emission;
+	g.m_metallic = metallic;
+	g.m_velocity = velocity;
+
+	FragOut output;
+	packGBuffer(g, output.m_color0, output.m_color1, output.m_color2, output.m_color3);
+	return output;
+}
+#endif
+
+#pragma anki end

+ 46 - 7
AnKi/Shaders/Include/Common.h

@@ -40,9 +40,27 @@ ANKI_END_NAMESPACE
 #	define ANKI_SHADER_FUNC_INLINE
 
 #	define ANKI_SHADER_STATIC_ASSERT(cond_)
+#	define ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(enum_)
 
 #	define constexpr static const
 
+#	define ANKI_SUPPORTS_16BIT_TYPES 0
+#	define ANKI_SUPPORTS_64BIT_TYPES !ANKI_PLATFORM_MOBILE
+
+template<typename T>
+void maybeUnused(T a)
+{
+	a = a;
+}
+#	define ANKI_MAYBE_UNUSED(x) maybeUnused(x)
+
+#	define ANKI_BINDLESS_SET(s) \
+		[[vk::binding(0, s)]] Texture2D<uint4> u_bindlessTextures2dU32[kMaxBindlessTextures]; \
+		[[vk::binding(0, s)]] Texture2D<int4> u_bindlessTextures2dI32[kMaxBindlessTextures]; \
+		[[vk::binding(0, s)]] Texture2D<RVec4> u_bindlessTextures2dF32[kMaxBindlessTextures]; \
+		[[vk::binding(0, s)]] Texture2DArray<RVec4> u_bindlessTextures2dArrayF32[kMaxBindlessTextures]; \
+		[[vk::binding(1, s)]] Buffer<float4> u_bindlessTextureBuffersF32[kMaxBindlessReadonlyTextureBuffers];
+
 typedef float F32;
 constexpr uint kSizeof_F32 = 4u;
 typedef float2 Vec2;
@@ -52,6 +70,7 @@ constexpr uint kSizeof_Vec3 = 12u;
 typedef float4 Vec4;
 constexpr uint kSizeof_Vec4 = 16u;
 
+#	if ANKI_SUPPORTS_16BIT_TYPES
 typedef float16_t F16;
 constexpr uint kSizeof_F16 = 2u;
 typedef float16_t2 HVec2;
@@ -78,6 +97,7 @@ typedef int16_t3 I16Vec3;
 constexpr uint kSizeof_I16Vec3 = 6u;
 typedef int16_t4 I16Vec4;
 constexpr uint kSizeof_I16Vec4 = 8u;
+#	endif
 
 typedef uint U32;
 constexpr uint kSizeof_U32 = 4u;
@@ -97,7 +117,7 @@ constexpr uint kSizeof_IVec3 = 12u;
 typedef int32_t4 IVec4;
 constexpr uint kSizeof_IVec4 = 16u;
 
-#	if ANKI_SUPPORTS_64BIT
+#	if ANKI_SUPPORTS_64BIT_TYPES
 typedef uint64_t U64;
 constexpr uint kSizeof_U64 = 8u;
 typedef uint64_t2 U64Vec2;
@@ -118,23 +138,37 @@ constexpr uint kSizeof_I64Vec4 = 32u;
 #	endif
 
 typedef float3x3 Mat3;
+typedef float4x4 Mat4;
+typedef float3x4 Mat3x4;
 
 typedef bool Bool;
 
-#	if 0
+#	if ANKI_FORCE_FULL_FP_PRECISION
+typedef float RF32;
+typedef float2 RVec2;
+typedef float3 RVec3;
+typedef float4 RVec4;
+typedef float3x3 RMat3;
+#	else
 typedef min16float RF32;
 typedef min16float2 RVec2;
 typedef min16float3 RVec3;
 typedef min16float4 RVec4;
+typedef min16float3x3 RMat3;
 #	endif
 
 constexpr F32 kEpsilonf = 0.000001f;
-constexpr F16 kEpsilonhf = (F16)0.0001f; // Divisions by this should be OK according to http://weitz.de/ieee/
+#	if ANKI_SUPPORTS_16BIT_TYPES
+constexpr F16 kEpsilonhf = (F16)0.0001f; // Divisions by this should be OK according to http://weitz.de/ieee
+#	endif
+constexpr RF32 kEpsilonRf = 0.0001f;
 
 constexpr U32 kMaxU32 = 0xFFFFFFFFu;
 constexpr F32 kMaxF32 = 3.402823e+38;
+#	if ANKI_SUPPORTS_16BIT_TYPES
 constexpr F16 kMaxF16 = (F16)65504.0;
 constexpr F16 kMinF16 = (F16)0.00006104;
+#	endif
 
 constexpr F32 kPi = 3.14159265358979323846f;
 
@@ -156,7 +190,7 @@ constexpr F32 kPi = 3.14159265358979323846f;
 
 #	define constexpr const
 
-#	define ANKI_SUPPORTS_64BIT !ANKI_PLATFORM_MOBILE
+#	define ANKI_SUPPORTS_64BIT_TYPES !ANKI_PLATFORM_MOBILE
 
 #	extension GL_EXT_control_flow_attributes : require
 #	extension GL_KHR_shader_subgroup_vote : require
@@ -178,7 +212,7 @@ constexpr F32 kPi = 3.14159265358979323846f;
 #	extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
 #	extension GL_EXT_shader_explicit_arithmetic_types_float32 : enable
 
-#	if ANKI_SUPPORTS_64BIT
+#	if ANKI_SUPPORTS_64BIT_TYPES
 #		extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable
 #		extension GL_EXT_shader_explicit_arithmetic_types_float64 : enable
 #		extension GL_EXT_shader_atomic_int64 : enable
@@ -265,7 +299,7 @@ const uint kSizeof_ivec3 = 12u;
 #	define IVec4 ivec4
 const uint kSizeof_ivec4 = 16u;
 
-#	if ANKI_SUPPORTS_64BIT
+#	if ANKI_SUPPORTS_64BIT_TYPES
 #		define U64 uint64_t
 const uint kSizeof_uint64_t = 8u;
 #		define U64Vec2 u64vec2
@@ -296,7 +330,7 @@ const uint kSizeof_mat4x3 = 48u;
 
 #	define Bool bool
 
-#	if ANKI_SUPPORTS_64BIT
+#	if ANKI_SUPPORTS_64BIT_TYPES
 #		define Address U64
 #	else
 #		define Address UVec2
@@ -409,6 +443,11 @@ Bool all(Bool b)
 #	define lerp(a, b, t) mix(a, b, t)
 #	define atan2(x, y) atan(x, y)
 
+float asfloat(uint u)
+{
+	return uintBitsToFloat(u);
+}
+
 constexpr F32 kEpsilonf = 0.000001f;
 constexpr F16 kEpsilonhf = 0.0001hf; // Divisions by this should be OK according to http://weitz.de/ieee/
 constexpr ANKI_RP F32 kEpsilonRp = F32(kEpsilonhf);

+ 4 - 0
AnKi/Shaders/Include/GpuSceneTypes.h

@@ -32,7 +32,11 @@ struct MeshGpuView
 	Vec3 m_positionTranslation;
 	F32 m_positionScale;
 
+#if ANKI_GLSL
 	U32 m_vertexOffsets[kMaxLodCount][kMaxVertexStreamIds];
+#else
+	U32 m_vertexOffsets[kMaxLodCount][(U32)VertexStreamId::kCount];
+#endif
 	U32 m_indexCounts[kMaxLodCount];
 	U32 m_indexOffsets[kMaxLodCount];
 };

+ 23 - 17
AnKi/Shaders/Include/MaterialTypes.h

@@ -19,30 +19,36 @@ struct MaterialGlobalUniforms
 };
 ANKI_SHADER_STATIC_ASSERT(sizeof(MaterialGlobalUniforms) == 14 * sizeof(Vec4));
 
-const U32 kMaterialSetBindless = 0u;
-const U32 kMaterialSetGlobal = 1u;
-const U32 kMaterialSetLocal = 2u;
+constexpr U32 kMaterialSetBindless = 0u;
+constexpr U32 kMaterialSetGlobal = 1u;
+constexpr U32 kMaterialSetLocal = 2u;
 
 // Begin global bindings
-const U32 kMaterialBindingTrilinearRepeatSampler = 0u;
-const U32 kMaterialBindingGlobalUniforms = 1u;
+constexpr U32 kMaterialBindingTrilinearRepeatSampler = 0u;
+constexpr U32 kMaterialBindingGlobalUniforms = 1u;
 
 // For forward shading:
-const U32 kMaterialBindingLinearClampSampler = 2u;
-const U32 kMaterialBindingDepthRt = 3u;
-const U32 kMaterialBindingLightVolume = 4u;
-const U32 kMaterialBindingClusterShadingUniforms = 5u;
-const U32 kMaterialBindingClusterShadingLights = 6u;
-const U32 kMaterialBindingClusters = 9u;
-const U32 kMaterialBindingShadowSampler = 10u;
+constexpr U32 kMaterialBindingLinearClampSampler = 2u;
+constexpr U32 kMaterialBindingDepthRt = 3u;
+constexpr U32 kMaterialBindingLightVolume = 4u;
+constexpr U32 kMaterialBindingClusterShadingUniforms = 5u;
+constexpr U32 kMaterialBindingClusterShadingLights = 6u;
+constexpr U32 kMaterialBindingClusters = 9u;
+constexpr U32 kMaterialBindingShadowSampler = 10u;
 // End global bindings
 
 // Begin local bindings
-const U32 kMaterialBindingLocalUniforms = 0u;
-const U32 kMaterialBindingRenderableGpuView = 1u;
-const U32 kMaterialBindingBoneTransforms = 2u;
-const U32 kMaterialBindingPreviousBoneTransforms = 3u;
-const U32 kMaterialBindingFirstNonStandardLocal = 4u;
+constexpr U32 kMaterialBindingLocalUniforms = 0u;
+constexpr U32 kMaterialBindingRenderableGpuView = 1u;
+constexpr U32 kMaterialBindingBoneTransforms = 2u;
+constexpr U32 kMaterialBindingPreviousBoneTransforms = 3u;
+constexpr U32 kMaterialBindingFirstNonStandardLocal = 4u;
 // End local bindings
 
+// Techniques
+#define ANKI_RENDERING_TECHNIQUE_GBUFFER 0
+#define ANKI_RENDERING_TECHNIQUE_GBUFFER_EZ 1
+#define ANKI_RENDERING_TECHNIQUE_SHADOWS 2
+#define ANKI_RENDERING_TECHNIQUE_FORWARD 3
+
 ANKI_END_NAMESPACE

+ 28 - 15
AnKi/Shaders/Include/MeshTypes.h

@@ -9,8 +9,13 @@
 
 ANKI_BEGIN_NAMESPACE
 
-#if __cplusplus
+#if defined(__cplusplus) || ANKI_HLSL
+
+#	if ANKI_HLSL
+enum class VertexStreamId : U32
+#	else
 enum class VertexStreamId : U8
+#	endif
 {
 	// For regular geometry
 	kPosition,
@@ -30,10 +35,16 @@ enum class VertexStreamId : U8
 	kParticleLife,
 	kParticleStartingLife,
 	kParticlePreviousPosition,
+
+	kCount = 6
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(VertexStreamId)
 
+#	if ANKI_HLSL
+enum class VertexStreamMask : U32
+#	else
 enum class VertexStreamMask : U8
+#	endif
 {
 	kNone,
 
@@ -53,31 +64,33 @@ enum class VertexStreamMask : U8
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(VertexStreamMask)
 
+#	if defined(__cplusplus)
 inline constexpr Array<Format, U32(VertexStreamId::kMeshRelatedCount)> kMeshRelatedVertexStreamFormats = {
 	Format::kR16G16B16_Unorm, Format::kR8G8B8A8_Snorm, Format::kR8G8B8A8_Snorm,
 	Format::kR32G32_Sfloat,   Format::kR8G8B8A8_Uint,  Format::kR8G8B8A8_Snorm};
 
 constexpr U32 kMaxVertexStreamIds = 6u;
+#	endif
 
 #else
 
 // For regular geometry
-const U32 kVertexStreamIdPosition = 0u;
-const U32 kVertexStreamIdNormal = 1u;
-const U32 kVertexStreamIdTangent = 2u;
-const U32 kVertexStreamIdUv = 3u;
-const U32 kVertexStreamIdBoneIds = 4u;
-const U32 kVertexStreamIdBoneWeights = 5u;
+constexpr U32 kVertexStreamIdPosition = 0u;
+constexpr U32 kVertexStreamIdNormal = 1u;
+constexpr U32 kVertexStreamIdTangent = 2u;
+constexpr U32 kVertexStreamIdUv = 3u;
+constexpr U32 kVertexStreamIdBoneIds = 4u;
+constexpr U32 kVertexStreamIdBoneWeights = 5u;
 
 // For particles
-const U32 kVertexStreamIdParticlePosition = 0u;
-const U32 kVertexStreamIdParticleScale = 1u;
-const U32 kVertexStreamIdParticleAlpha = 2u;
-const U32 kVertexStreamIdParticleLife = 3u;
-const U32 kVertexStreamIdParticleStartingLife = 4u;
-const U32 kVertexStreamIdParticlePreviousPosition = 5u;
-
-const U32 kMaxVertexStreamIds = 6u;
+constexpr U32 kVertexStreamIdParticlePosition = 0u;
+constexpr U32 kVertexStreamIdParticleScale = 1u;
+constexpr U32 kVertexStreamIdParticleAlpha = 2u;
+constexpr U32 kVertexStreamIdParticleLife = 3u;
+constexpr U32 kVertexStreamIdParticleStartingLife = 4u;
+constexpr U32 kVertexStreamIdParticlePreviousPosition = 5u;
+
+constexpr U32 kMaxVertexStreamIds = 6u;
 #endif
 
 ANKI_END_NAMESPACE

+ 1 - 1
AnKi/Shaders/Include/MiscRendererTypes.h

@@ -10,7 +10,7 @@
 ANKI_BEGIN_NAMESPACE
 
 // RT shadows
-const U32 kMaxRtShadowLayers = 8u;
+constexpr U32 kMaxRtShadowLayers = 8u;
 
 struct RtShadowsUniforms
 {

+ 33 - 33
AnKi/Shaders/Include/ModelTypes.h

@@ -30,18 +30,18 @@ enum class VertexAttributeId : U8
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(VertexAttributeId)
 #else
-const U32 kVertexAttributeIdPosition = 0u;
-const U32 kVertexAttributeIdUv0 = 1u;
-const U32 kVertexAttributeIdUv1 = 2u;
-const U32 kVertexAttributeIdNormal = 3u;
-const U32 kVertexAttributeIdTangent = 4u;
-const U32 kVertexAttributeIdColor = 5u;
-const U32 kVertexAttributeIdBoneWeights = 6u;
-const U32 kVertexAttributeIdBoneIndices = 7u;
-const U32 kVertexAttributeIdCount = 8u;
-
-const U32 kVertexAttributeIdScale = kVertexAttributeIdUv0; ///< Only for particles.
-const U32 kVertexAttributeIdAlpha = kVertexAttributeIdUv1; ///< Only for particles.
+constexpr U32 kVertexAttributeIdPosition = 0u;
+constexpr U32 kVertexAttributeIdUv0 = 1u;
+constexpr U32 kVertexAttributeIdUv1 = 2u;
+constexpr U32 kVertexAttributeIdNormal = 3u;
+constexpr U32 kVertexAttributeIdTangent = 4u;
+constexpr U32 kVertexAttributeIdColor = 5u;
+constexpr U32 kVertexAttributeIdBoneWeights = 6u;
+constexpr U32 kVertexAttributeIdBoneIndices = 7u;
+constexpr U32 kVertexAttributeIdCount = 8u;
+
+constexpr U32 kVertexAttributeIdScale = kVertexAttributeIdUv0; ///< Only for particles.
+constexpr U32 kVertexAttributeIdAlpha = kVertexAttributeIdUv1; ///< Only for particles.
 #endif
 
 // Vertex buffers
@@ -56,10 +56,10 @@ enum class VertexAttributeBufferId : U8
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(VertexAttributeBufferId)
 #else
-const U32 kVertexAttributeBufferIdPosition = 0u;
-const U32 kVertexAttributeBufferIdNormalTangentUv0 = 1u;
-const U32 kVertexAttributeBufferIdBone = 2u;
-const U32 kVertexAttributeBufferIdCount = 3u;
+constexpr U32 kVertexAttributeBufferIdPosition = 0u;
+constexpr U32 kVertexAttributeBufferIdNormalTangentUv0 = 1u;
+constexpr U32 kVertexAttributeBufferIdBone = 2u;
+constexpr U32 kVertexAttributeBufferIdCount = 3u;
 #endif
 
 /// The main vertex that contains normals, tangents and UVs.
@@ -70,8 +70,8 @@ struct MainVertex
 	Vec2 m_uv0;
 };
 
-const U32 kSizeof_MainVertex = 4u * 4u;
-const U32 kAlignof_MainVertex = 4u;
+constexpr U32 kSizeof_MainVertex = 4u * 4u;
+constexpr U32 kAlignof_MainVertex = 4u;
 ANKI_SHADER_STATIC_ASSERT(kSizeof_MainVertex == sizeof(MainVertex));
 
 /// The vertex that contains the bone influences.
@@ -81,8 +81,8 @@ struct BoneInfoVertex
 	U8Vec4 m_boneWeights;
 };
 
-const U32 kSizeof_BoneInfoVertex = 8u;
-const U32 kAlignof_BoneInfoVertex = 1u;
+constexpr U32 kSizeof_BoneInfoVertex = 8u;
+constexpr U32 kAlignof_BoneInfoVertex = 1u;
 ANKI_SHADER_STATIC_ASSERT(kSizeof_BoneInfoVertex == sizeof(BoneInfoVertex));
 
 /// A structure that contains all the info of a geometry.
@@ -100,8 +100,8 @@ struct MeshGpuDescriptor
 	Vec3 m_aabbMax;
 };
 
-const U32 kSizeof_MeshGpuDescriptor = 4u * sizeof(UVec2) + 8u * sizeof(F32);
-const U32 kAlignof_MeshGpuDescriptor = 8u;
+constexpr U32 kSizeof_MeshGpuDescriptor = 4u * sizeof(UVec2) + 8u * sizeof(F32);
+constexpr U32 kAlignof_MeshGpuDescriptor = 8u;
 ANKI_SHADER_STATIC_ASSERT(kSizeof_MeshGpuDescriptor == sizeof(MeshGpuDescriptor));
 
 #if defined(__cplusplus)
@@ -120,15 +120,15 @@ enum class TextureChannelId : U8
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(TextureChannelId)
 #else
-const U32 TEXTURE_CHANNEL_ID_DIFFUSE = 0u;
-const U32 TEXTURE_CHANNEL_ID_NORMAL = 1u;
-const U32 TEXTURE_CHANNEL_ID_ROUGHNESS_METALNESS = 2u;
-const U32 TEXTURE_CHANNEL_ID_EMISSION = 3u;
-const U32 TEXTURE_CHANNEL_ID_HEIGHT = 4u;
-const U32 TEXTURE_CHANNEL_ID_AUX_0 = 5u;
-const U32 TEXTURE_CHANNEL_ID_AUX_1 = 6u;
-const U32 TEXTURE_CHANNEL_ID_AUX_2 = 7u;
-const U32 TEXTURE_CHANNEL_ID_COUNT = 8u;
+constexpr U32 TEXTURE_CHANNEL_ID_DIFFUSE = 0u;
+constexpr U32 TEXTURE_CHANNEL_ID_NORMAL = 1u;
+constexpr U32 TEXTURE_CHANNEL_ID_ROUGHNESS_METALNESS = 2u;
+constexpr U32 TEXTURE_CHANNEL_ID_EMISSION = 3u;
+constexpr U32 TEXTURE_CHANNEL_ID_HEIGHT = 4u;
+constexpr U32 TEXTURE_CHANNEL_ID_AUX_0 = 5u;
+constexpr U32 TEXTURE_CHANNEL_ID_AUX_1 = 6u;
+constexpr U32 TEXTURE_CHANNEL_ID_AUX_2 = 7u;
+constexpr U32 TEXTURE_CHANNEL_ID_COUNT = 8u;
 #endif
 
 struct MaterialGpuDescriptor
@@ -145,8 +145,8 @@ struct MaterialGpuDescriptor
 	F32 m_metalness;
 };
 
-const U32 kSizeof_MaterialGpuDescriptor = 8u * sizeof(U16) + 3u * sizeof(Vec3) + 2u * sizeof(F32);
-const U32 kAlignof_MaterialGpuDescriptor = 4u;
+constexpr U32 kSizeof_MaterialGpuDescriptor = 8u * sizeof(U16) + 3u * sizeof(Vec3) + 2u * sizeof(F32);
+constexpr U32 kAlignof_MaterialGpuDescriptor = 4u;
 ANKI_SHADER_STATIC_ASSERT(kSizeof_MaterialGpuDescriptor == sizeof(MaterialGpuDescriptor));
 
 struct ModelGpuDescriptor

+ 195 - 0
AnKi/Shaders/PackFunctions.hlsl

@@ -0,0 +1,195 @@
+// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <AnKi/Shaders/Common.hlsl>
+#include <AnKi/Shaders/TonemappingFunctions.hlsl>
+
+constexpr RF32 kMinRoughness = 0.05;
+
+/// Pack 3D normal to 2D vector
+/// See the clean code in comments in revision < r467
+Vec2 packNormal(const Vec3 normal)
+{
+	const F32 scale = 1.7777;
+	const F32 scalar1 = (normal.z + 1.0) * (scale * 2.0);
+	return normal.xy / scalar1 + 0.5;
+}
+
+/// Reverse the packNormal
+Vec3 unpackNormal(const Vec2 enc)
+{
+	const F32 scale = 1.7777;
+	const Vec2 nn = enc * (2.0 * scale) - scale;
+	const F32 g = 2.0 / (dot(nn.xy, nn.xy) + 1.0);
+	Vec3 normal;
+	normal.xy = g * nn.xy;
+	normal.z = g - 1.0;
+	return normalize(normal);
+}
+
+// See http://johnwhite3d.blogspot.no/2017/10/signed-octahedron-normal-encoding.html
+// Result in [0.0, 1.0]
+Vec3 signedOctEncode(Vec3 n)
+{
+	Vec3 outn;
+
+	const Vec3 nabs = abs(n);
+	n /= nabs.x + nabs.y + nabs.z;
+
+	outn.y = n.y * 0.5 + 0.5;
+	outn.x = n.x * 0.5 + outn.y;
+	outn.y = n.x * -0.5 + outn.y;
+
+	outn.z = saturate(n.z * kMaxF32);
+	return outn;
+}
+
+// See http://johnwhite3d.blogspot.no/2017/10/signed-octahedron-normal-encoding.html
+Vec3 signedOctDecode(const Vec3 n)
+{
+	Vec3 outn;
+
+	outn.x = n.x - n.y;
+	outn.y = n.x + n.y - 1.0;
+	outn.z = n.z * 2.0 - 1.0;
+	outn.z = outn.z * (1.0 - abs(outn.x) - abs(outn.y));
+
+	outn = normalize(outn);
+	return outn;
+}
+
+// Vectorized version. Assumes that v is in [0.0, 1.0]
+U32 newPackUnorm4x8(const Vec4 v)
+{
+	Vec4 a = v * 255.0;
+	UVec4 b = UVec4(a) << UVec4(0u, 8u, 16u, 24u);
+	UVec2 c = b.xy | b.zw;
+	return c.x | c.y;
+}
+
+// Vectorized version
+Vec4 newUnpackUnorm4x8(const U32 u)
+{
+	const UVec4 a = ((UVec4)u) >> UVec4(0u, 8u, 16u, 24u);
+	const UVec4 b = a & ((UVec4)0xFFu);
+	const Vec4 c = Vec4(b);
+	return c * (1.0 / 255.0);
+}
+
+// Convert from RGB to YCbCr.
+// The RGB should be in [0, 1] and the output YCbCr will be in [0, 1] as well.
+Vec3 rgbToYCbCr(const Vec3 rgb)
+{
+	const F32 y = dot(rgb, Vec3(0.299, 0.587, 0.114));
+	const F32 cb = 0.5 + dot(rgb, Vec3(-0.168736, -0.331264, 0.5));
+	const F32 cr = 0.5 + dot(rgb, Vec3(0.5, -0.418688, -0.081312));
+	return Vec3(y, cb, cr);
+}
+
+// Convert the output of rgbToYCbCr back to RGB.
+Vec3 yCbCrToRgb(const Vec3 ycbcr)
+{
+	const F32 cb = ycbcr.y - 0.5;
+	const F32 cr = ycbcr.z - 0.5;
+	const F32 y = ycbcr.x;
+	const F32 r = 1.402 * cr;
+	const F32 g = -0.344 * cb - 0.714 * cr;
+	const F32 b = 1.772 * cb;
+	return Vec3(r, g, b) + y;
+}
+
+// Pack a Vec2 to a single F32.
+// comp should be in [0, 1] and the output will be in [0, 1].
+F32 packUnorm2ToUnorm1(const Vec2 comp)
+{
+	return dot(round(comp * 15.0), Vec2(1.0 / (255.0 / 16.0), 1.0 / 255.0));
+}
+
+// Unpack a single F32 to Vec2. Does the oposite of packUnorm2ToUnorm1.
+Vec2 unpackUnorm1ToUnorm2(F32 c)
+{
+#if 1
+	const F32 temp = c * (255.0 / 16.0);
+	const F32 a = floor(temp);
+	const F32 b = temp - a; // b = fract(temp)
+	return Vec2(a, b) * Vec2(1.0 / 15.0, 16.0 / 15.0);
+#else
+	const U32 temp = U32(c * 255.0);
+	const U32 a = temp >> 4;
+	const U32 b = temp & 0xF;
+	return Vec2(a, b) / 15.0;
+#endif
+}
+
+// G-Buffer structure
+struct GbufferInfo
+{
+	RVec3 m_diffuse;
+	RVec3 m_f0; ///< Freshnel at zero angles.
+	RVec3 m_normal;
+	RF32 m_roughness;
+	RF32 m_metallic;
+	RF32 m_subsurface;
+	RVec3 m_emission;
+	Vec2 m_velocity;
+};
+
+// Populate the G buffer
+void packGBuffer(GbufferInfo g, out Vec4 rt0, out Vec4 rt1, out Vec4 rt2, out Vec2 rt3)
+{
+	const F32 packedSubsurfaceMetallic = packUnorm2ToUnorm1(Vec2(g.m_subsurface, g.m_metallic));
+
+	const Vec3 tonemappedEmission = reinhardTonemap(g.m_emission);
+
+	rt0 = Vec4(g.m_diffuse, packedSubsurfaceMetallic);
+	rt1 = Vec4(g.m_roughness, g.m_f0.x, tonemappedEmission.rb);
+
+	const Vec3 encNorm = signedOctEncode(g.m_normal);
+	rt2 = Vec4(tonemappedEmission.g, encNorm);
+
+	rt3 = g.m_velocity;
+}
+
+RVec3 unpackDiffuseFromGBuffer(RVec4 rt0, RF32 metallic)
+{
+	return rt0.xyz *= 1.0 - metallic;
+}
+
+Vec3 unpackNormalFromGBuffer(Vec4 rt2)
+{
+	return signedOctDecode(rt2.yzw);
+}
+
+RF32 unpackRoughnessFromGBuffer(RVec4 rt1)
+{
+	RF32 r = rt1.x;
+	r = r * (1.0 - kMinRoughness) + kMinRoughness;
+	return r;
+}
+
+// Read part of the G-buffer
+void unpackGBufferNoVelocity(Vec4 rt0, Vec4 rt1, Vec4 rt2, out GbufferInfo g)
+{
+	g.m_diffuse = rt0.xyz;
+	const Vec2 unpackedSubsurfaceMetallic = unpackUnorm1ToUnorm2(rt0.w);
+	g.m_subsurface = unpackedSubsurfaceMetallic.x;
+	g.m_metallic = unpackedSubsurfaceMetallic.y;
+
+	g.m_roughness = unpackRoughnessFromGBuffer(rt1);
+	g.m_f0 = Vec3(rt1.y, rt1.y, rt1.y);
+	g.m_emission = invertReinhardTonemap(Vec3(rt1.z, rt2.x, rt1.w));
+
+	g.m_normal = signedOctDecode(rt2.yzw);
+
+	g.m_velocity = Vec2(kMaxF32, kMaxF32); // Put something random
+
+	// Compute reflectance
+	g.m_f0 = lerp(g.m_f0, g.m_diffuse, g.m_metallic);
+
+	// Compute diffuse
+	g.m_diffuse *= 1.0 - g.m_metallic;
+}

+ 2 - 2
AnKi/Shaders/RtShadowsHit.ankiprog

@@ -27,14 +27,14 @@ layout(location = 0) rayPayloadInEXT F32 g_payload;
 
 hitAttributeEXT vec2 g_attribs;
 
-#if ALPHA_TEXTURE == 1 && ANKI_SUPPORTS_64BIT
+#if ALPHA_TEXTURE == 1 && ANKI_SUPPORTS_64BIT_TYPES
 ANKI_DEFINE_LOAD_STORE(U16Vec3, 2)
 ANKI_DEFINE_LOAD_STORE(MainVertex, alignof(MainVertex))
 #endif
 
 void main()
 {
-#if ALPHA_TEXTURE == 1 && ANKI_SUPPORTS_64BIT
+#if ALPHA_TEXTURE == 1 && ANKI_SUPPORTS_64BIT_TYPES
 	const ModelGpuDescriptor model = u_modelDescriptor;
 	const MeshGpuDescriptor mesh = model.m_mesh;
 

+ 1 - 1
AnKi/Shaders/TonemappingFunctions.glsl

@@ -38,7 +38,7 @@ ANKI_RP Vec3 tonemapReinhard(ANKI_RP Vec3 color, ANKI_RP F32 saturation)
 {
 	const ANKI_RP F32 lum = computeLuminance(color);
 	const ANKI_RP F32 toneMappedLuminance = lum / (lum + 1.0);
-	return toneMappedLuminance * pow(color / lum, Vec3(saturation));
+	return toneMappedLuminance * pow(color / lum, Vec3(saturation, saturation, saturation));
 }
 
 // Uncharted 2 operator

+ 103 - 0
AnKi/Shaders/TonemappingFunctions.hlsl

@@ -0,0 +1,103 @@
+// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <AnKi/Shaders/Common.hlsl>
+
+// A tick to compute log of base 10
+template<typename T>
+T log10(T x)
+{
+	return log(x) / log((T)10.0);
+}
+
+RVec3 computeLuminance(RVec3 color)
+{
+	return max(dot(RVec3(0.30, 0.59, 0.11), color), kEpsilonRf);
+}
+
+RF32 computeExposure(RF32 avgLum, RF32 threshold)
+{
+	const RF32 keyValue = 1.03 - (2.0 / (2.0 + log10(avgLum + 1.0)));
+	const RF32 linearExposure = (keyValue / avgLum);
+	RF32 exposure = log2(linearExposure);
+
+	exposure -= threshold;
+	return exp2(exposure);
+}
+
+RVec3 computeExposedColor(RVec3 color, RF32 avgLum, RF32 threshold)
+{
+	return computeExposure(avgLum, threshold) * color;
+}
+
+// Uncharted 2 operator
+RF32 tonemapUncharted2(RF32 color)
+{
+	const RF32 A = 0.15;
+	const RF32 B = 0.50;
+	const RF32 C = 0.10;
+	const RF32 D = 0.20;
+	const RF32 E = 0.02;
+	const RF32 F = 0.30;
+
+	return ((color * (A * color + C * B) + D * E) / (color * (A * color + B) + D * F)) - E / F;
+}
+
+constexpr RF32 kAcesA = 2.51;
+constexpr RF32 kAcesB = 0.03;
+constexpr RF32 kAcesC = 2.43;
+constexpr RF32 kAcesD = 0.59;
+constexpr RF32 kAcesE = 0.14;
+
+// See ACES in action and its inverse at https://www.desmos.com/calculator/n1lkpc6hwq
+RVec3 tonemapACESFilm(RVec3 x)
+{
+	return saturate((x * (kAcesA * x + kAcesB)) / (x * (kAcesC * x + kAcesD) + kAcesE));
+}
+
+// https://www.desmos.com/calculator/n1lkpc6hwq
+RVec3 invertTonemapACESFilm(RVec3 x)
+{
+	RVec3 res = kAcesD * x - kAcesB;
+	res += sqrt(x * x * (kAcesD * kAcesD - 4.0 * kAcesE * kAcesC) + x * (4.0 * kAcesE * kAcesA - 2.0 * kAcesB * kAcesD)
+				+ kAcesB * kAcesB);
+	res /= 2.0 * kAcesA - 2.0 * kAcesC * x;
+
+	return res;
+}
+
+RVec3 tonemap(RVec3 color, RF32 exposure)
+{
+	color *= exposure;
+	return tonemapACESFilm(color);
+}
+
+RVec3 invertTonemap(RVec3 color, RF32 exposure)
+{
+	color = invertTonemapACESFilm(color);
+	color /= max(kEpsilonRf, exposure);
+	return color;
+}
+
+RVec3 tonemap(RVec3 color, RF32 avgLum, RF32 threshold)
+{
+	const RF32 exposure = computeExposure(avgLum, threshold);
+	return tonemap(color, exposure);
+}
+
+// https://graphicrants.blogspot.com/2013/12/tone-mapping.html
+RVec3 reinhardTonemap(RVec3 colour)
+{
+	// rgb / (1 + max(rgb))
+	return colour / (1.0 + max(max(colour.r, colour.g), colour.b));
+}
+
+RVec3 invertReinhardTonemap(RVec3 colour)
+{
+	// rgb / (1 - max(rgb))
+	return colour / max(1.0 / 32768.0, 1.0 - max(max(colour.r, colour.g), colour.b));
+}

+ 10 - 0
ThirdParty/SpirvCross/spirv_glsl.cpp

@@ -7353,12 +7353,22 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset");
 		break;
 
+	// Panos: Treat NMin as FMin because I hate these isnan
+#if 0
 	case GLSLstd450NMin:
 	case GLSLstd450NMax:
 	{
 		emit_nminmax_op(result_type, id, args[0], args[1], op);
 		break;
 	}
+#else
+	case GLSLstd450NMin:
+		emit_binary_func_op(result_type, id, args[0], args[1], "min");
+		break;
+	case GLSLstd450NMax:
+		emit_binary_func_op(result_type, id, args[0], args[1], "max");
+		break;
+#endif
 
 	case GLSLstd450NClamp:
 	{

+ 1 - 1
Tools/FormatSource.py

@@ -10,7 +10,7 @@ import subprocess
 import threading
 import multiprocessing
 
-file_extensions = ["h", "hpp", "c", "cpp", "glsl", "ankiprog"]
+file_extensions = ["h", "hpp", "c", "cpp", "glsl", "hlsl", "ankiprog"]
 directories = ["AnKi", "Tests", "Sandbox", "Tools", "Samples"]