Browse Source

Moving some forward shading shaders to HLSL

Panagiotis Christopoulos Charitos 3 years ago
parent
commit
38157e437c

+ 190 - 0
AnKi/Shaders/ClusteredShadingCommon.hlsl

@@ -0,0 +1,190 @@
+// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <AnKi/Shaders/LightFunctions.hlsl>
+
+//
+// Common uniforms
+//
+#if defined(CLUSTERED_SHADING_UNIFORMS_BINDING)
+[[vk::binding(CLUSTERED_SHADING_UNIFORMS_BINDING, CLUSTERED_SHADING_SET)]] ConstantBuffer<ClusteredShadingUniforms>
+	u_clusteredShading;
+#endif
+
+//
+// Light uniforms (3)
+//
+#if defined(CLUSTERED_SHADING_LIGHTS_BINDING)
+[[vk::binding(CLUSTERED_SHADING_LIGHTS_BINDING, CLUSTERED_SHADING_SET)]] cbuffer b_pointLights
+{
+	PointLight u_pointLights2[kMaxVisiblePointLights];
+};
+
+[[vk::binding(CLUSTERED_SHADING_LIGHTS_BINDING + 1u, CLUSTERED_SHADING_SET)]] cbuffer b_spotLights
+{
+	SpotLight u_spotLights[kMaxVisibleSpotLights];
+};
+
+[[vk::binding(CLUSTERED_SHADING_LIGHTS_BINDING + 2u, CLUSTERED_SHADING_SET)]] Texture2D u_shadowAtlasTex;
+#endif
+
+//
+// Reflection probes (3)
+//
+#if defined(CLUSTERED_SHADING_REFLECTIONS_BINDING)
+[[vk::binding(CLUSTERED_SHADING_REFLECTIONS_BINDING, CLUSTERED_SHADING_SET)]] cbuffer b_reflectionProbes
+{
+	ReflectionProbe u_reflectionProbes[kMaxVisibleReflectionProbes];
+};
+
+[[vk::binding(CLUSTERED_SHADING_REFLECTIONS_BINDING + 1u, CLUSTERED_SHADING_SET)]] TextureCubeArray<RVec4>
+	u_reflectionsTex;
+#endif
+
+//
+// Decal uniforms (3)
+//
+#if defined(CLUSTERED_SHADING_DECALS_BINDING)
+[[vk::binding(CLUSTERED_SHADING_DECALS_BINDING, CLUSTERED_SHADING_SET)]] cbuffer b_decals
+{
+	Decal u_decals2[kMaxVisibleDecals];
+};
+
+[[vk::binding(CLUSTERED_SHADING_DECALS_BINDING + 1u, CLUSTERED_SHADING_SET)]] Texture2D<RVec4> u_diffuseDecalTex;
+[[vk::binding(CLUSTERED_SHADING_DECALS_BINDING + 2u, CLUSTERED_SHADING_SET)]] Texture2D<RVec4>
+	u_specularRoughnessDecalTex;
+#endif
+
+//
+// Fog density uniforms (1)
+//
+#if defined(CLUSTERED_SHADING_FOG_BINDING)
+[[vk::binding(CLUSTERED_SHADING_FOG_BINDING, CLUSTERED_SHADING_SET)]] cbuffer b_fogDensityVolumes
+{
+	FogDensityVolume u_fogDensityVolumes[kMaxVisibleFogDensityVolumes];
+};
+#endif
+
+//
+// GI (2)
+//
+#if defined(CLUSTERED_SHADING_GI_BINDING)
+[[vk::binding(CLUSTERED_SHADING_GI_BINDING, CLUSTERED_SHADING_SET)]] Texture3D<RVec4>
+	u_globalIlluminationTextures[kMaxVisibleGlobalIlluminationProbes];
+
+[[vk::binding(CLUSTERED_SHADING_GI_BINDING + 1u, CLUSTERED_SHADING_SET)]] cbuffer b_giProbes
+{
+	GlobalIlluminationProbe u_giProbes[kMaxVisibleGlobalIlluminationProbes];
+};
+#endif
+
+//
+// Cluster uniforms
+//
+#if defined(CLUSTERED_SHADING_CLUSTERS_BINDING)
+[[vk::binding(CLUSTERED_SHADING_CLUSTERS_BINDING, CLUSTERED_SHADING_SET)]] StructuredBuffer<Cluster> u_clusters;
+#endif
+
+// Debugging function
+Vec3 clusterHeatmap(Cluster cluster, U32 objectTypeMask)
+{
+	U32 maxObjects = 0u;
+	I32 count = 0;
+
+	if((objectTypeMask & (1u << kClusterObjectTypePointLight)) != 0u)
+	{
+		maxObjects += kMaxVisiblePointLights;
+		count += I32(countbits(cluster.m_pointLightsMask));
+	}
+
+	if((objectTypeMask & (1u << kClusterObjectTypeSpotLight)) != 0u)
+	{
+		maxObjects += kMaxVisibleSpotLights;
+		count += I32(countbits(cluster.m_spotLightsMask));
+	}
+
+	if((objectTypeMask & (1u << kClusterObjectTypeDecal)) != 0u)
+	{
+		maxObjects += kMaxVisibleDecals;
+		count += I32(countbits(cluster.m_decalsMask));
+	}
+
+	if((objectTypeMask & (1u << kClusterObjectTypeFogDensityVolume)) != 0u)
+	{
+		maxObjects += kMaxVisibleFogDensityVolumes;
+		count += countbits(cluster.m_fogDensityVolumesMask);
+	}
+
+	if((objectTypeMask & (1u << kClusterObjectTypeReflectionProbe)) != 0u)
+	{
+		maxObjects += kMaxVisibleReflectionProbes;
+		count += countbits(cluster.m_reflectionProbesMask);
+	}
+
+	if((objectTypeMask & (1u << kClusterObjectTypeGlobalIlluminationProbe)) != 0u)
+	{
+		maxObjects += kMaxVisibleGlobalIlluminationProbes;
+		count += countbits(cluster.m_giProbesMask);
+	}
+
+	const F32 factor = min(1.0, F32(count) / F32(maxObjects));
+	return heatmap(factor);
+}
+
+/// Returns the index of the zSplit or linearizeDepth(n, f, depth)*zSplitCount
+/// Simplifying this equation is 1/(a+b/depth) where a=(n-f)/(n*zSplitCount) and b=f/(n*zSplitCount)
+U32 computeZSplitClusterIndex(F32 depth, U32 zSplitCount, F32 a, F32 b)
+{
+	const F32 fSplitIdx = 1.0 / (a + b / depth);
+	return min(zSplitCount - 1u, (U32)fSplitIdx);
+}
+
+/// Return the tile index.
+U32 computeTileClusterIndexFragCoord(Vec2 fragCoord, U32 tileSize, U32 tileCountX)
+{
+	const UVec2 tileXY = UVec2(fragCoord / (F32)tileSize);
+	return tileXY.y * tileCountX + tileXY.x;
+}
+
+/// Merge the tiles with z splits into a single cluster.
+Cluster mergeClusters(Cluster tileCluster, Cluster zCluster)
+{
+//#define ANKI_OR_MASKS(x) subgroupOr(x)
+#define ANKI_OR_MASKS(x) (x)
+
+	Cluster outCluster;
+	outCluster.m_pointLightsMask = ANKI_OR_MASKS(tileCluster.m_pointLightsMask & zCluster.m_pointLightsMask);
+	outCluster.m_spotLightsMask = ANKI_OR_MASKS(tileCluster.m_spotLightsMask & zCluster.m_spotLightsMask);
+	outCluster.m_decalsMask = ANKI_OR_MASKS(tileCluster.m_decalsMask & zCluster.m_decalsMask);
+	outCluster.m_fogDensityVolumesMask =
+		ANKI_OR_MASKS(tileCluster.m_fogDensityVolumesMask & zCluster.m_fogDensityVolumesMask);
+	outCluster.m_reflectionProbesMask =
+		ANKI_OR_MASKS(tileCluster.m_reflectionProbesMask & zCluster.m_reflectionProbesMask);
+	outCluster.m_giProbesMask = ANKI_OR_MASKS(tileCluster.m_giProbesMask & zCluster.m_giProbesMask);
+
+#undef ANKI_OR_MASKS
+
+	return outCluster;
+}
+
+#if defined(CLUSTERED_SHADING_CLUSTERS_BINDING)
+/// Get the final cluster after ORing and ANDing the masks.
+Cluster getClusterFragCoord(Vec3 fragCoord, U32 tileSize, UVec2 tileCounts, U32 zSplitCount, F32 a, F32 b)
+{
+	const Cluster tileCluster = u_clusters[computeTileClusterIndexFragCoord(fragCoord.xy, tileSize, tileCounts.x)];
+	const Cluster zCluster =
+		u_clusters[computeZSplitClusterIndex(fragCoord.z, zSplitCount, a, b) + tileCounts.x * tileCounts.y];
+	return mergeClusters(tileCluster, zCluster);
+}
+
+Cluster getClusterFragCoord(Vec3 fragCoord)
+{
+	return getClusterFragCoord(fragCoord, u_clusteredShading.m_tileSize, u_clusteredShading.m_tileCounts,
+							   u_clusteredShading.m_zSplitCount, u_clusteredShading.m_zSplitMagic.x,
+							   u_clusteredShading.m_zSplitMagic.y);
+}
+#endif

+ 130 - 0
AnKi/Shaders/CollisionFunctions.hlsl

@@ -0,0 +1,130 @@
+// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+/// https://www.scratchapixel.com/lessons/3d-basic-rendering/ray-tracing-rendering-a-triangle/moller-trumbore-ray-triangle-intersection
+Bool testRayTriangle(Vec3 rayOrigin, Vec3 rayDir, Vec3 v0, Vec3 v1, Vec3 v2, Bool backfaceCulling, out F32 t, out F32 u,
+					 out F32 v)
+{
+	const Vec3 v0v1 = v1 - v0;
+	const Vec3 v0v2 = v2 - v0;
+	const Vec3 pvec = cross(rayDir, v0v2);
+	const F32 det = dot(v0v1, pvec);
+
+	if((backfaceCulling && det < kEpsilonF32) || abs(det) < kEpsilonF32)
+	{
+		return false;
+	}
+
+	const F32 invDet = 1.0 / det;
+
+	const Vec3 tvec = rayOrigin - v0;
+	u = dot(tvec, pvec) * invDet;
+	if(u < 0.0 || u > 1.0)
+	{
+		return false;
+	}
+
+	const Vec3 qvec = cross(tvec, v0v1);
+	v = dot(rayDir, qvec) * invDet;
+	if(v < 0.0 || u + v > 1.0)
+	{
+		return false;
+	}
+
+	t = dot(v0v2, qvec) * invDet;
+
+	if(t <= kEpsilonF32)
+	{
+		// This is an addition to the original code. Can't have rays that don't touch the triangle
+		return false;
+	}
+
+	return true;
+}
+
+/// Return true if to AABBs overlap.
+Bool testAabbAabb(Vec3 aMin, Vec3 aMax, Vec3 bMin, Vec3 bMax)
+{
+	return all(aMin < bMax) && all(bMin < aMax);
+}
+
+/// Intersect a ray against an AABB. The ray is inside the AABB. The function returns the distance 'a' where the
+/// intersection point is rayOrigin + rayDir * a
+/// https://community.arm.com/graphics/b/blog/posts/reflections-based-on-local-cubemaps-in-unity
+F32 testRayAabbInside(Vec3 rayOrigin, Vec3 rayDir, Vec3 aabbMin, Vec3 aabbMax)
+{
+	const Vec3 intersectMaxPointPlanes = (aabbMax - rayOrigin) / rayDir;
+	const Vec3 intersectMinPointPlanes = (aabbMin - rayOrigin) / rayDir;
+	const Vec3 largestParams = max(intersectMaxPointPlanes, intersectMinPointPlanes);
+	const F32 distToIntersect = min(min(largestParams.x, largestParams.y), largestParams.z);
+	return distToIntersect;
+}
+
+/// Ray box intersection by Simon Green
+Bool testRayAabb(Vec3 rayOrigin, Vec3 rayDir, Vec3 aabbMin, Vec3 aabbMax, out F32 t0, out F32 t1)
+{
+	const Vec3 invR = 1.0 / rayDir;
+	const Vec3 tbot = invR * (aabbMin - rayOrigin);
+	const Vec3 ttop = invR * (aabbMax - rayOrigin);
+
+	const Vec3 tmin = min(ttop, tbot);
+	const Vec3 tmax = max(ttop, tbot);
+
+	t0 = max(tmin.x, max(tmin.y, tmin.z));
+	t1 = min(tmax.x, min(tmax.y, tmax.z));
+
+	return t0 < t1 && t1 > kEpsilonF32;
+}
+
+Bool testRayObb(Vec3 rayOrigin, Vec3 rayDir, Vec3 obbExtend, Mat4 obbTransformInv, out F32 t0, out F32 t1)
+{
+	// Transform ray to OBB space
+	const Vec3 rayOriginS = mul(obbTransformInv, Vec4(rayOrigin, 1.0)).xyz;
+	const Vec3 rayDirS = mul(obbTransformInv, Vec4(rayDir, 0.0)).xyz;
+
+	// Test as AABB
+	return testRayAabb(rayOriginS, rayDirS, -obbExtend, obbExtend, t0, t1);
+}
+
+/// https://www.scratchapixel.com/lessons/3d-basic-rendering/minimal-ray-tracer-rendering-simple-shapes/ray-sphere-intersection
+Bool testRaySphere(Vec3 rayOrigin, Vec3 rayDir, Vec3 sphereCenter, F32 sphereRadius, out F32 t0, out F32 t1)
+{
+	const Vec3 L = sphereCenter - rayOrigin;
+	const F32 tca = dot(L, rayDir);
+	const F32 d2 = dot(L, L) - tca * tca;
+	const F32 radius2 = sphereRadius * sphereRadius;
+	const F32 diff = radius2 - d2;
+	if(diff < 0.0)
+	{
+		return false;
+	}
+
+	const F32 thc = sqrt(diff);
+	t0 = tca - thc;
+	t1 = tca + thc;
+
+	if(t0 < 0.0 && t1 < 0.0)
+	{
+		return false;
+	}
+
+	// Swap
+	if(t0 > t1)
+	{
+		const F32 tmp = t0;
+		t0 = t1;
+		t1 = tmp;
+	}
+
+	t0 = max(0.0, t0);
+	return true;
+}
+
+F32 testPlanePoint(Vec3 planeNormal, F32 planeOffset, Vec3 point3d)
+{
+	return dot(planeNormal, point3d) - planeOffset;
+}

+ 155 - 0
AnKi/Shaders/ForwardShadingCommon.hlsl

@@ -0,0 +1,155 @@
+// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <AnKi/Shaders/Functions.hlsl>
+#include <AnKi/Shaders/Include/MeshTypes.h>
+#include <AnKi/Shaders/Include/MaterialTypes.h>
+#include <AnKi/Shaders/Include/GpuSceneTypes.h>
+
+ANKI_BINDLESS_SET(kMaterialSetBindless)
+
+//
+// Frag
+//
+#if defined(ANKI_FRAGMENT_SHADER)
+// Global resources
+[[vk::binding(kMaterialBindingLinearClampSampler, kMaterialSetGlobal)]] SamplerState u_linearAnyClampSampler;
+[[vk::binding(kMaterialBindingDepthRt, kMaterialSetGlobal)]] Texture2D u_gbufferDepthRt;
+[[vk::binding(kMaterialBindingLightVolume, kMaterialSetGlobal)]] Texture3D<RVec4> u_lightVol;
+[[vk::binding(kMaterialBindingShadowSampler, kMaterialSetGlobal)]] SamplerComparisonState u_shadowSampler;
+#	define CLUSTERED_SHADING_SET kMaterialSetGlobal
+#	define CLUSTERED_SHADING_UNIFORMS_BINDING kMaterialBindingClusterShadingUniforms
+#	define CLUSTERED_SHADING_LIGHTS_BINDING kMaterialBindingClusterShadingLights
+#	define CLUSTERED_SHADING_CLUSTERS_BINDING kMaterialBindingClusters
+#	include <AnKi/Shaders/ClusteredShadingCommon.hlsl>
+
+struct FragOut
+{
+	RVec4 m_color;
+};
+
+void packGBuffer(Vec4 color, out FragOut output)
+{
+	output.m_color = RVec4(color.rgb, color.a);
+}
+
+RVec4 readAnimatedTextureRgba(Texture2DArray<RVec4> tex, SamplerState sampl, F32 period, Vec2 uv, F32 time)
+{
+	Vec2 texSize;
+	F32 layerCount;
+	F32 mipCount;
+	tex.GetDimensions(0, texSize.x, texSize.y, layerCount, mipCount);
+
+	const F32 layer = fmod(time * layerCount / period, layerCount);
+	return tex.Sample(sampl, Vec3(uv, layer));
+}
+
+// Iterate the clusters to compute the light color
+Vec3 computeLightColorHigh(Vec3 diffCol, Vec3 worldPos, Vec4 svPosition)
+{
+	diffCol = diffuseLobe(diffCol);
+	Vec3 outColor = Vec3(0.0, 0.0, 0.0);
+
+	// Find the cluster and then the light counts
+	Cluster cluster = getClusterFragCoord(svPosition.xyz);
+
+	// Point lights
+	[[loop]] while(cluster.m_pointLightsMask != (ExtendedClusterObjectMask)0)
+	{
+		const I32 idx = findLSB2(cluster.m_pointLightsMask);
+		cluster.m_pointLightsMask &= ~((ExtendedClusterObjectMask)1 << (ExtendedClusterObjectMask)idx);
+		const PointLight light = u_pointLights2[idx];
+
+		const Vec3 diffC = diffCol * light.m_diffuseColor;
+
+		const Vec3 frag2Light = light.m_position - worldPos;
+		const F32 att = computeAttenuationFactor(light.m_squareRadiusOverOne, frag2Light);
+
+#	if LOD > 1
+		const F32 shadow = 1.0;
+#	else
+		F32 shadow = 1.0;
+		if(light.m_shadowAtlasTileScale >= 0.0)
+		{
+			shadow = computeShadowFactorPointLight(light, frag2Light, u_shadowAtlasTex, u_shadowSampler);
+		}
+#	endif
+
+		outColor += diffC * (att * shadow);
+	}
+
+	// Spot lights
+	[[loop]] while(cluster.m_spotLightsMask != (ExtendedClusterObjectMask)0)
+	{
+		const I32 idx = findLSB2(cluster.m_spotLightsMask);
+		cluster.m_spotLightsMask &= ~((ExtendedClusterObjectMask)1 << (ExtendedClusterObjectMask)idx);
+		const SpotLight light = u_spotLights[idx];
+
+		const Vec3 diffC = diffCol * light.m_diffuseColor;
+
+		const Vec3 frag2Light = light.m_position - worldPos;
+		const F32 att = computeAttenuationFactor(light.m_squareRadiusOverOne, frag2Light);
+
+		const Vec3 l = normalize(frag2Light);
+
+		const F32 spot = computeSpotFactor(l, light.m_outerCos, light.m_innerCos, light.m_direction);
+
+#	if LOD > 1
+		const F32 shadow = 1.0;
+#	else
+		F32 shadow = 1.0;
+		[[branch]] if(light.m_shadowLayer != kMaxU32)
+		{
+			shadow = computeShadowFactorSpotLight(light, worldPos, u_shadowAtlasTex, u_shadowSampler);
+		}
+#	endif
+
+		outColor += diffC * (att * spot * shadow);
+	}
+
+	return outColor;
+}
+
+// Just read the light color from the vol texture
+RVec3 computeLightColorLow(RVec3 diffCol, RVec3 worldPos, Vec4 svPosition)
+{
+	const Vec2 uv = svPosition.xy / u_clusteredShading.m_renderingSize;
+	const F32 linearDepth = linearizeDepth(svPosition.z, u_clusteredShading.m_near, u_clusteredShading.m_far);
+	const F32 w =
+		linearDepth * (F32(u_clusteredShading.m_zSplitCount) / F32(u_clusteredShading.m_lightVolumeLastZSplit + 1u));
+	const Vec3 uvw = Vec3(uv, w);
+
+	const RVec3 light = u_lightVol.SampleLevel(u_linearAnyClampSampler, uvw, 0.0).rgb;
+	return diffuseLobe(diffCol) * light;
+}
+
+void particleAlpha(RVec4 color, RVec4 scaleColor, RVec4 biasColor, out FragOut output)
+{
+	packGBuffer(color * scaleColor + biasColor, output);
+}
+
+void fog(RVec3 color, RF32 fogAlphaScale, RF32 fogDistanceOfMaxThikness, F32 zVSpace, Vec2 svPosition,
+		 out FragOut output)
+{
+	const Vec2 screenSize = 1.0 / u_clusteredShading.m_renderingSize;
+
+	const Vec2 texCoords = svPosition * screenSize;
+	const F32 depth = u_gbufferDepthRt.Sample(u_linearAnyClampSampler, texCoords, 0.0).r;
+	F32 zFeatherFactor;
+
+	const Vec4 fragPosVspace4 =
+		u_clusteredShading.m_matrices.m_invertedProjectionJitter * Vec4(Vec3(UV_TO_NDC(texCoords), depth), 1.0);
+	const F32 sceneZVspace = fragPosVspace4.z / fragPosVspace4.w;
+
+	const F32 diff = max(0.0, zVSpace - sceneZVspace);
+
+	zFeatherFactor = min(1.0, diff / fogDistanceOfMaxThikness);
+
+	packGBuffer(Vec4(color, zFeatherFactor * fogAlphaScale), output);
+}
+
+#endif // defined(ANKI_FRAGMENT_SHADER)

+ 89 - 0
AnKi/Shaders/ForwardShadingParticles2.ankiprog

@@ -0,0 +1,89 @@
+// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma anki hlsl
+
+#pragma anki mutator ANKI_TECHNIQUE 3
+#pragma anki mutator ANIMATED_TEXTURE 0 1
+#pragma anki mutator LIGHT 0 1
+
+#include <AnKi/Shaders/ForwardShadingCommon.hlsl>
+
+struct VertIn
+{
+	[[vk::location(VertexStreamId::kParticlePosition)]] Vec3 m_position : POSITION;
+	[[vk::location(VertexStreamId::kParticleScale)]] RF32 m_scale : SCALE;
+	[[vk::location(VertexStreamId::kParticleAlpha)]] RF32 m_alpha : ALPHA;
+	U32 m_vertexId : SV_VERTEXID;
+};
+
+struct VertOut
+{
+	nointerpolation RF32 m_alpha : ALPHA;
+	Vec2 m_uv : TEXCOORD;
+	Vec3 m_worldPos : WORLD_POS;
+	Vec4 m_svPosition : SV_POSITION;
+};
+
+#pragma anki reflect AnKiLocalUniforms
+#pragma anki struct AnKiLocalUniforms
+#pragma anki member F32 m_animationPeriod if ANIMATED_TEXTURE is 1
+#pragma anki member Vec4 m_colorScale
+#pragma anki member Vec4 m_colorBias
+#pragma anki member U32 m_diffuseMap
+#pragma anki struct end
+
+[[vk::binding(kMaterialBindingGlobalUniforms, kMaterialSetGlobal)]] ConstantBuffer<MaterialGlobalUniforms>
+	u_ankiGlobals;
+[[vk::binding(kMaterialBindingTrilinearRepeatSampler, kMaterialSetGlobal)]] SamplerState u_globalSampler;
+[[vk::binding(kMaterialBindingLocalUniforms, kMaterialSetLocal)]] StructuredBuffer<U32> u_localUniforms;
+[[vk::binding(kMaterialBindingRenderableGpuView, kMaterialSetLocal)]] StructuredBuffer<RenderableGpuView>
+	u_renderableGpuViews;
+
+#pragma anki start vert
+
+VertOut main(VertIn input)
+{
+	VertOut output;
+
+	output.m_uv = Vec2(input.m_vertexId & 1, input.m_vertexId >> 1);
+
+	output.m_worldPos =
+		mul(u_ankiGlobals.m_cameraTransform, Vec4((output.m_uv - 0.5) * input.m_scale, 0.0, 0.0)) + input.m_position;
+
+	output.m_svPosition = Vec4(transform(u_renderableGpuViews[0].m_worldTransform, Vec4(output.m_worldPos, 1.0)), 1.0);
+	output.m_svPosition = mul(u_ankiGlobals.m_viewProjectionMatrix, output.m_svPosition);
+
+	output.m_alpha = input.m_alpha;
+
+	return output;
+}
+#pragma anki end
+
+#pragma anki start frag
+
+FragOut main(VerIn input)
+{
+	FragOut output;
+	const AnKiLocalUniforms localUniforms = loadAnKiLocalUniforms(u_localUniforms, 0u);
+
+#if ANIMATED_TEXTURE == 1
+	RVec4 texCol = readAnimatedTextureRgba(u_bindlessTextures2dArrayF32[localUniforms.m_diffuseMap], u_globalSampler,
+										   localUniforms.m_animationPeriod, input.m_uv, u_clusteredShading.m_time);
+#else
+	RVec4 texCol = u_bindlessTextures2dF32[localUniforms.m_diffuseMap].Sample(u_globalSampler, input.m_uv);
+#endif
+
+#if LIGHT
+	texCol.rgb = computeLightColorLow(texCol.rgb, input.m_worldPos, input.m_svPosition);
+#endif
+
+	RVec4 colScale = localUniforms.m_colorScale;
+	colScale.a *= in_alpha;
+	particleAlpha(texCol, colScale, localUniforms.m_colorBias, output);
+
+	return output;
+}
+#pragma anki end

+ 2 - 2
AnKi/Shaders/Functions.hlsl

@@ -162,7 +162,7 @@ F32 _calcDepthWeight(Texture2D depthLow, SamplerState nearestAnyClamp, Vec2 uv,
 {
 	const F32 d = depthLow.SampleLevel(nearestAnyClamp, uv, 0.0).r;
 	const F32 linearD = linearizeDepthOptimal(d, linearDepthCf.x, linearDepthCf.y);
-	return 1.0 / (kEpsilonf + abs(ref - linearD));
+	return 1.0 / (kEpsilonF32 + abs(ref - linearD));
 }
 
 Vec4 _sampleAndWeight(Texture2D depthLow, Texture2D colorLow, SamplerState linearAnyClamp, SamplerState nearestAnyClamp,
@@ -437,7 +437,7 @@ Mat3 rotationFromDirection(Vec3 zAxis)
 {
 #if 0
 	const Vec3 z = zAxis;
-	const Bool alignsWithXBasis = abs(z.x - 1.0) <= kEpsilonf; // aka z == Vec3(1.0, 0.0, 0.0)
+	const Bool alignsWithXBasis = abs(z.x - 1.0) <= kEpsilonF32; // aka z == Vec3(1.0, 0.0, 0.0)
 	Vec3 x = (alignsWithXBasis) ? Vec3(0.0, 0.0, 1.0) : Vec3(1.0, 0.0, 0.0);
 	const Vec3 y = normalize(cross(x, z));
 	x = normalize(cross(z, y));

+ 25 - 25
AnKi/Shaders/Include/ClusteredShadingTypes.h

@@ -35,17 +35,17 @@ constexpr U32 kMaxVisibleReflectionProbes = 16u;
 constexpr U32 kMaxVisibleGlobalIlluminationProbes = 8u;
 
 // Other consts
-constexpr ANKI_RP F32 kClusterObjectFrustumNearPlane = 0.1f / 4.0f; ///< Near plane of all clusterer object frustums.
-constexpr ANKI_RP F32 kSubsurfaceMin = 0.01f;
+constexpr RF32 kClusterObjectFrustumNearPlane = 0.1f / 4.0f; ///< Near plane of all clusterer object frustums.
+constexpr RF32 kSubsurfaceMin = 0.01f;
 
 /// Point light.
 struct PointLight
 {
 	Vec3 m_position; ///< Position in world space.
-	ANKI_RP F32 m_radius; ///< Radius
+	RF32 m_radius; ///< Radius
 
-	ANKI_RP Vec3 m_diffuseColor;
-	ANKI_RP F32 m_squareRadiusOverOne; ///< 1/(radius^2).
+	RVec3 m_diffuseColor;
+	RF32 m_squareRadiusOverOne; ///< 1/(radius^2).
 
 	Vec2 m_padding0;
 	U32 m_shadowLayer; ///< Shadow layer used in RT shadows. Also used to show that it doesn't cast shadow.
@@ -64,15 +64,15 @@ struct SpotLight
 
 	Vec4 m_edgePoints[4u]; ///< Edge points in world space.
 
-	ANKI_RP Vec3 m_diffuseColor;
-	ANKI_RP F32 m_radius; ///< Max distance.
+	RVec3 m_diffuseColor;
+	RF32 m_radius; ///< Max distance.
 
-	ANKI_RP Vec3 m_direction; ///< Light direction.
-	ANKI_RP F32 m_squareRadiusOverOne; ///< 1/(radius^2).
+	RVec3 m_direction; ///< Light direction.
+	RF32 m_squareRadiusOverOne; ///< 1/(radius^2).
 
 	U32 m_shadowLayer; ///< Shadow layer used in RT shadows. Also used to show that it doesn't cast shadow.
-	ANKI_RP F32 m_outerCos;
-	ANKI_RP F32 m_innerCos;
+	RF32 m_outerCos;
+	RF32 m_innerCos;
 	U32 m_padding1;
 
 	Mat4 m_textureMatrix;
@@ -85,15 +85,15 @@ struct SpotLightBinning
 {
 	Vec4 m_edgePoints[5u]; ///< Edge points in world space. Point 0 is the eye pos.
 
-	ANKI_RP Vec3 m_diffuseColor;
-	ANKI_RP F32 m_radius; ///< Max distance.
+	RVec3 m_diffuseColor;
+	RF32 m_radius; ///< Max distance.
 
-	ANKI_RP Vec3 m_direction; ///< Light direction.
-	ANKI_RP F32 m_squareRadiusOverOne; ///< 1/(radius^2).
+	RVec3 m_direction; ///< Light direction.
+	RF32 m_squareRadiusOverOne; ///< 1/(radius^2).
 
 	U32 m_shadowLayer; ///< Shadow layer used in RT shadows. Also used to show that it doesn't cast shadow.
-	ANKI_RP F32 m_outerCos;
-	ANKI_RP F32 m_innerCos;
+	RF32 m_outerCos;
+	RF32 m_innerCos;
 	U32 m_padding0;
 
 	Mat4 m_textureMatrix;
@@ -105,10 +105,10 @@ ANKI_SHADER_STATIC_ASSERT(sizeof(SpotLight) == sizeof(SpotLightBinning));
 /// Directional light (sun).
 struct DirectionalLight
 {
-	ANKI_RP Vec3 m_diffuseColor;
+	RVec3 m_diffuseColor;
 	U32 m_shadowCascadeCount; ///< If it's zero then it doesn't cast shadow.
 
-	ANKI_RP Vec3 m_direction;
+	RVec3 m_direction;
 	U32 m_active;
 
 	Vec4 m_shadowCascadeDistances;
@@ -144,7 +144,7 @@ struct Decal
 
 	Vec4 m_normRoughnessUv;
 
-	ANKI_RP Vec4 m_blendFactors;
+	RVec4 m_blendFactors;
 
 	Mat4 m_textureMatrix;
 
@@ -163,7 +163,7 @@ struct FogDensityVolume
 	U32 m_isBox;
 
 	Vec3 m_aabbMaxOrSphereRadiusSquared;
-	ANKI_RP F32 m_density;
+	RF32 m_density;
 };
 constexpr U32 kSizeof_FogDensityVolume = 2u * sizeof(Vec4);
 ANKI_SHADER_STATIC_ASSERT(sizeof(FogDensityVolume) == kSizeof_FogDensityVolume);
@@ -180,7 +180,7 @@ struct GlobalIlluminationProbe
 	U32 m_textureIndex; ///< Index to the array of volume textures.
 	F32 m_halfTexelSizeU; ///< (1.0 / textureSize(texArr[textureIndex]).x) / 2.0
 	/// Used to calculate a factor that is zero when fragPos is close to AABB bounds and 1.0 at fadeDistance and less.
-	ANKI_RP F32 m_fadeDistance;
+	RF32 m_fadeDistance;
 	F32 m_padding2;
 };
 constexpr U32 kSizeof_GlobalIlluminationProbe = 3u * sizeof(Vec4);
@@ -256,7 +256,7 @@ constexpr U32 kSizeof_ClusteredShadingUniforms =
 ANKI_SHADER_STATIC_ASSERT(sizeof(ClusteredShadingUniforms) == kSizeof_ClusteredShadingUniforms);
 
 // Define the type of some cluster object masks
-#if !defined(__cplusplus)
+#if ANKI_GLSL
 #	if ANKI_CLUSTERED_SHADING_USE_64BIT
 #		define ExtendedClusterObjectMask U64
 #	else
@@ -264,9 +264,9 @@ ANKI_SHADER_STATIC_ASSERT(sizeof(ClusteredShadingUniforms) == kSizeof_ClusteredS
 #	endif
 #else
 #	if ANKI_CLUSTERED_SHADING_USE_64BIT
-using ExtendedClusterObjectMask = U64;
+typedef U64 ExtendedClusterObjectMask;
 #	else
-using ExtendedClusterObjectMask = U32;
+typedef U32 ExtendedClusterObjectMask;
 #	endif
 #endif
 

+ 23 - 3
AnKi/Shaders/Include/Common.h

@@ -25,6 +25,12 @@ using Address = U64;
 using ScalarVec4 = Array<F32, 4>;
 using ScalarMat3x4 = Array<F32, 12>;
 using ScalarMat4 = Array<F32, 16>;
+
+using RF32 = F32;
+using RVec2 = Vec2;
+using RVec3 = Vec3;
+using RVec4 = Vec4;
+using RMat3 = Mat3;
 ANKI_END_NAMESPACE
 
 #	define ANKI_RP
@@ -199,11 +205,11 @@ typedef min16float4 RVec4;
 typedef min16float3x3 RMat3;
 #	endif
 
-constexpr F32 kEpsilonf = 0.000001f;
+constexpr F32 kEpsilonF32 = 0.000001f;
 #	if ANKI_SUPPORTS_16BIT_TYPES
-constexpr F16 kEpsilonhf = (F16)0.0001f; // Divisions by this should be OK according to http://weitz.de/ieee
+constexpr F16 kEpsilonhF16 = (F16)0.0001f; // Divisions by this should be OK according to http://weitz.de/ieee
 #	endif
-constexpr RF32 kEpsilonRf = 0.0001f;
+constexpr RF32 kEpsilonRF32 = 0.0001f;
 
 constexpr U32 kMaxU32 = 0xFFFFFFFFu;
 constexpr F32 kMaxF32 = 3.402823e+38;
@@ -378,6 +384,20 @@ const uint kSizeof_mat4x3 = 48u;
 #		define Address UVec2
 #	endif
 
+#	if ANKI_FORCE_FULL_FP_PRECISION
+#		define RF32 F32
+#		define RVec2 Vec2
+#		define RVec3 Vec3
+#		define RVec4 Vec4
+#		define RMat3 Mat3
+#	else
+#		define RF32 mediump F32
+#		define RVec2 mediump Vec2
+#		define RVec3 mediump Vec3
+#		define RVec4 mediump Vec4
+#		define RMat3 mediump Mat3
+#	endif
+
 #	define _ANKI_CONCATENATE(a, b) a##b
 #	define ANKI_CONCATENATE(a, b) _ANKI_CONCATENATE(a, b)
 

+ 12 - 12
AnKi/Shaders/Include/MiscRendererTypes.h

@@ -35,12 +35,12 @@ struct IndirectDiffuseUniforms
 
 	Vec4 m_projectionMat;
 
-	ANKI_RP F32 m_radius; ///< In meters.
+	RF32 m_radius; ///< In meters.
 	U32 m_sampleCount;
-	ANKI_RP F32 m_sampleCountf;
-	ANKI_RP F32 m_ssaoBias;
+	RF32 m_sampleCountf;
+	RF32 m_ssaoBias;
 
-	ANKI_RP F32 m_ssaoStrength;
+	RF32 m_ssaoStrength;
 	F32 m_padding0;
 	F32 m_padding1;
 	F32 m_padding2;
@@ -63,7 +63,7 @@ struct IndirectDiffuseDenoiseUniforms
 struct LensFlareSprite
 {
 	Vec4 m_posScale; // xy: Position, zw: Scale
-	ANKI_RP Vec4 m_color;
+	RVec4 m_color;
 	Vec4 m_depthPad3;
 };
 
@@ -104,20 +104,20 @@ struct SsrUniforms
 // Vol fog
 struct VolumetricFogUniforms
 {
-	ANKI_RP Vec3 m_fogDiffuse;
-	ANKI_RP F32 m_fogScatteringCoeff;
+	RVec3 m_fogDiffuse;
+	RF32 m_fogScatteringCoeff;
 
-	ANKI_RP F32 m_fogAbsorptionCoeff;
-	ANKI_RP F32 m_near;
-	ANKI_RP F32 m_far;
+	RF32 m_fogAbsorptionCoeff;
+	RF32 m_near;
+	RF32 m_far;
 	F32 m_padding;
 };
 
 // Vol lighting
 struct VolumetricLightingUniforms
 {
-	ANKI_RP F32 m_densityAtMinHeight;
-	ANKI_RP F32 m_densityAtMaxHeight;
+	RF32 m_densityAtMinHeight;
+	RF32 m_densityAtMaxHeight;
 	F32 m_minHeight;
 	F32 m_oneOverMaxMinusMinHeight; // 1 / (maxHeight / minHeight)
 };

+ 543 - 0
AnKi/Shaders/LightFunctions.hlsl

@@ -0,0 +1,543 @@
+// Copyright (C) 2009-2022, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+// Contains functions for light calculations
+
+#pragma once
+
+#include <AnKi/Shaders/Functions.hlsl>
+#include <AnKi/Shaders/CollisionFunctions.hlsl>
+#include <AnKi/Shaders/PackFunctions.hlsl>
+#include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
+#include <AnKi/Shaders/Include/MiscRendererTypes.h>
+
+constexpr Vec2 kPoissonDisk[4u] = {Vec2(-0.94201624, -0.39906216), Vec2(0.94558609, -0.76890725),
+								   Vec2(-0.094184101, -0.92938870), Vec2(0.34495938, 0.29387760)};
+constexpr RF32 kPcfScale = 2.0f;
+
+// Fresnel term unreal
+// specular: The specular color aka F0
+Vec3 F_Unreal(Vec3 specular, F32 VoH)
+{
+	return specular + (1.0 - specular) * pow(2.0, (-5.55473 * VoH - 6.98316) * VoH);
+}
+
+// Fresnel Schlick: "An Inexpensive BRDF Model for Physically-Based Rendering"
+// It has lower VGRPs than F_Unreal
+RVec3 F_Schlick(RVec3 f0, RF32 VoH)
+{
+	const RF32 f = pow(1.0 - VoH, 5.0);
+	return f + f0 * (1.0 - f);
+}
+
+// D(n,h) aka NDF: GGX Trowbridge-Reitz
+RF32 D_GGX(RF32 roughness, RF32 NoH, RVec3 h, RVec3 worldNormal)
+{
+#if 0 && ANKI_PLATFORM_MOBILE
+	const RVec3 NxH = cross(worldNormal, h);
+	const RF32 oneMinusNoHSquared = dot(NxH, NxH);
+#else
+	const RF32 oneMinusNoHSquared = 1.0 - NoH * NoH;
+	ANKI_MAYBE_UNUSED(h);
+	ANKI_MAYBE_UNUSED(worldNormal);
+#endif
+
+	const RF32 a = roughness * roughness;
+	const RF32 v = NoH * a;
+	const RF32 k = a / (oneMinusNoHSquared + v * v);
+	const RF32 d = k * k * (1.0 / kPi);
+	return saturate(d);
+}
+
+// Visibility term: Geometric shadowing divided by BRDF denominator
+RF32 V_Schlick(RF32 roughness, RF32 NoV, RF32 NoL)
+{
+	const RF32 k = (roughness * roughness) * 0.5;
+	const RF32 Vis_SchlickV = NoV * (1.0 - k) + k;
+	const RF32 Vis_SchlickL = NoL * (1.0 - k) + k;
+	return 0.25 / (Vis_SchlickV * Vis_SchlickL);
+}
+
+// Visibility term: Hammon 2017, "PBR Diffuse Lighting for GGX+Smith Microsurfaces"
+RF32 V_SmithGGXCorrelatedFast(RF32 roughness, RF32 NoV, RF32 NoL)
+{
+	const RF32 a = roughness * roughness;
+	const RF32 v = 0.5 / lerp(2.0 * NoL * NoV, NoL + NoV, a);
+	return saturate(v);
+}
+
+RF32 Fd_Lambert()
+{
+	return 1.0 / kPi;
+}
+
+RVec3 diffuseLobe(RVec3 diffuse)
+{
+	return diffuse * Fd_Lambert();
+}
+
+// Performs BRDF specular lighting
+RVec3 specularIsotropicLobe(GbufferInfo gbuffer, Vec3 viewDir, Vec3 frag2Light)
+{
+	const RVec3 H = normalize(frag2Light + viewDir);
+
+	const RF32 NoL = max(0.0, dot(gbuffer.m_normal, frag2Light));
+	const RF32 VoH = max(0.0, dot(viewDir, H));
+	const RF32 NoH = max(0.0, dot(gbuffer.m_normal, H));
+	const RF32 NoV = max(0.05, dot(gbuffer.m_normal, viewDir));
+
+	// F
+	const RVec3 F = F_Schlick(gbuffer.m_f0, VoH);
+
+	// D
+	const RF32 D = D_GGX(gbuffer.m_roughness, NoH, H, gbuffer.m_normal);
+
+	// Vis
+	const RF32 V = V_SmithGGXCorrelatedFast(gbuffer.m_roughness, NoV, NoL);
+
+	return F * (V * D);
+}
+
+Vec3 specularDFG(Vec3 F0, F32 roughness, Texture2D integrationLut, SamplerState integrationLutSampler, F32 NoV)
+{
+	const Vec2 envBRDF = integrationLut.SampleLevel(integrationLutSampler, Vec2(roughness, NoV), 0.0).xy;
+	return lerp(envBRDF.xxx, envBRDF.yyy, F0);
+}
+
+RF32 computeSpotFactor(RVec3 l, RF32 outerCos, RF32 innerCos, RVec3 spotDir)
+{
+	const RF32 costheta = -dot(l, spotDir);
+	const RF32 spotFactor = smoothstep(outerCos, innerCos, costheta);
+	return spotFactor;
+}
+
+RF32 computeShadowFactorSpotLightPcf(SpotLight light, Vec3 worldPos, Texture2D shadowTex,
+									 SamplerComparisonState shadowMapSampler, RF32 randFactor)
+{
+	const Vec4 texCoords4 = mul(light.m_textureMatrix, Vec4(worldPos, 1.0));
+	const Vec3 texCoords3 = texCoords4.xyz / texCoords4.w;
+
+	Vec2 texSize;
+	F32 mipCount;
+	shadowTex.GetDimensions(0, texSize.x, texSize.y, mipCount);
+	const Vec2 smTexelSize = 1.0 / texSize;
+
+	const F32 sinTheta = sin(randFactor * 2.0 * kPi);
+	const F32 cosTheta = cos(randFactor * 2.0 * kPi);
+
+	RF32 shadow = 0.0;
+	[[unroll]] for(U32 i = 0u; i < 4u; ++i)
+	{
+		const Vec2 diskPoint = kPoissonDisk[i] * kPcfScale;
+
+		// Rotate the disk point
+		Vec2 rotatedDiskPoint;
+		rotatedDiskPoint.x = diskPoint.x * cosTheta - diskPoint.y * sinTheta;
+		rotatedDiskPoint.y = diskPoint.y * cosTheta + diskPoint.x * sinTheta;
+
+		// Offset calculation
+		const Vec2 newUv = texCoords3.xy + rotatedDiskPoint * smTexelSize;
+
+		shadow += shadowTex.SampleCmpLevelZero(shadowMapSampler, newUv, texCoords3.z);
+	}
+
+	shadow /= 4.0;
+
+	return shadow;
+}
+
+RF32 computeShadowFactorSpotLight(SpotLight light, Vec3 worldPos, Texture2D shadowTex,
+								  SamplerComparisonState shadowMapSampler)
+{
+	const Vec4 texCoords4 = mul(light.m_textureMatrix, Vec4(worldPos, 1.0));
+	const Vec3 texCoords3 = texCoords4.xyz / texCoords4.w;
+	return shadowTex.SampleCmpLevelZero(shadowMapSampler, texCoords3.xy, texCoords3.z);
+}
+
+// Compute the shadow factor of point (omni) lights.
+RF32 computeShadowFactorPointLightGeneric(PointLight light, Vec3 frag2Light, Texture2D shadowMap,
+										  SamplerComparisonState shadowMapSampler, RF32 randFactor, Bool pcf)
+{
+	const Vec3 dir = -frag2Light;
+	const Vec3 dirabs = abs(dir);
+	const F32 dist = max(dirabs.x, max(dirabs.y, dirabs.z));
+
+	// 1) Project the dist to light's proj mat
+	//
+	const F32 near = kClusterObjectFrustumNearPlane;
+	const F32 far = light.m_radius;
+	const F32 g = near - far;
+
+	const F32 zVSpace = -dist;
+	const F32 w = -zVSpace;
+	F32 z = (far * zVSpace + far * near) / g;
+	z /= w;
+
+	// 2) Read shadow tex
+	//
+
+	// Convert cube coords
+	U32 faceIdxu;
+	Vec2 uv = convertCubeUvsu(dir, faceIdxu);
+
+	// Get the atlas offset
+	const Vec2 atlasOffset = light.m_shadowAtlasTileOffsets[faceIdxu].xy;
+
+	// Compute UV
+	uv *= Vec2(light.m_shadowAtlasTileScale, light.m_shadowAtlasTileScale);
+	uv += atlasOffset;
+
+	// Sample
+	RF32 shadow;
+	if(pcf)
+	{
+		F32 mipCount;
+		Vec2 smTexelSize;
+		shadowMap.GetDimensions(0, smTexelSize.x, smTexelSize.y, mipCount);
+		smTexelSize = 1.0 / smTexelSize;
+
+		const F32 sinTheta = sin(randFactor * 2.0 * kPi);
+		const F32 cosTheta = cos(randFactor * 2.0 * kPi);
+
+		shadow = 0.0;
+		[[unroll]] for(U32 i = 0u; i < 4u; ++i)
+		{
+			const Vec2 diskPoint = kPoissonDisk[i] * kPcfScale;
+
+			// Rotate the disk point
+			Vec2 rotatedDiskPoint;
+			rotatedDiskPoint.x = diskPoint.x * cosTheta - diskPoint.y * sinTheta;
+			rotatedDiskPoint.y = diskPoint.y * cosTheta + diskPoint.x * sinTheta;
+
+			// Offset calculation
+			const Vec2 newUv = uv + rotatedDiskPoint * smTexelSize;
+
+			shadow += shadowMap.SampleCmpLevelZero(shadowMapSampler, newUv, z);
+		}
+
+		shadow /= 4.0;
+	}
+	else
+	{
+		shadow = shadowMap.SampleCmpLevelZero(shadowMapSampler, uv, z);
+	}
+
+	return shadow;
+}
+
+RF32 computeShadowFactorPointLight(PointLight light, Vec3 frag2Light, Texture2D shadowMap,
+								   SamplerComparisonState shadowMapSampler)
+{
+	return computeShadowFactorPointLightGeneric(light, frag2Light, shadowMap, shadowMapSampler, -1.0, false);
+}
+
+RF32 computeShadowFactorPointLightPcf(PointLight light, Vec3 frag2Light, Texture2D shadowMap,
+									  SamplerComparisonState shadowMapSampler, RF32 randFactor)
+{
+	return computeShadowFactorPointLightGeneric(light, frag2Light, shadowMap, shadowMapSampler, randFactor, true);
+}
+
+// Compute the shadow factor of a directional light
+RF32 computeShadowFactorDirLightGeneric(DirectionalLight light, U32 cascadeIdx, Vec3 worldPos, Texture2D shadowMap,
+										SamplerComparisonState shadowMapSampler, RF32 randFactor, Bool pcf)
+{
+#define ANKI_FAST_CASCADES_WORKAROUND 1 // Doesn't make sense but it's super fast
+
+#if ANKI_FAST_CASCADES_WORKAROUND
+	// Assumes kMaxShadowCascades is 4
+	Mat4 lightProjectionMat;
+	switch(cascadeIdx)
+	{
+	case 0:
+		lightProjectionMat = light.m_textureMatrices[0];
+		break;
+	case 1:
+		lightProjectionMat = light.m_textureMatrices[1];
+		break;
+	case 2:
+		lightProjectionMat = light.m_textureMatrices[2];
+		break;
+	default:
+		lightProjectionMat = light.m_textureMatrices[3];
+	}
+#else
+	const Mat4 lightProjectionMat = light.m_textureMatrices[cascadeIdx];
+#endif
+
+	const Vec4 texCoords4 = mul(lightProjectionMat, Vec4(worldPos, 1.0));
+	Vec3 texCoords3 = texCoords4.xyz / texCoords4.w;
+
+	RF32 shadow;
+	if(pcf)
+	{
+		F32 mipCount;
+		Vec2 smTexelSize;
+		shadowMap.GetDimensions(0, smTexelSize.x, smTexelSize.y, mipCount);
+		smTexelSize = 1.0 / smTexelSize;
+
+		const F32 sinTheta = sin(randFactor * 2.0 * kPi);
+		const F32 cosTheta = cos(randFactor * 2.0 * kPi);
+
+		shadow = 0.0;
+		[[unroll]] for(U32 i = 0u; i < 4u; ++i)
+		{
+			const Vec2 diskPoint = kPoissonDisk[i] * kPcfScale;
+
+			// Rotate the disk point
+			Vec2 rotatedDiskPoint;
+			rotatedDiskPoint.x = diskPoint.x * cosTheta - diskPoint.y * sinTheta;
+			rotatedDiskPoint.y = diskPoint.y * cosTheta + diskPoint.x * sinTheta;
+
+			// Offset calculation
+			Vec2 newUv = texCoords3.xy + rotatedDiskPoint * smTexelSize;
+
+			shadow += shadowMap.SampleCmpLevelZero(shadowMapSampler, newUv, texCoords3.z);
+		}
+
+		shadow /= 4.0;
+	}
+	else
+	{
+		shadow = shadowMap.SampleCmpLevelZero(shadowMapSampler, texCoords3.xy, texCoords3.z);
+	}
+
+	return shadow;
+}
+
+RF32 computeShadowFactorDirLight(DirectionalLight light, U32 cascadeIdx, Vec3 worldPos, Texture2D shadowMap,
+								 SamplerComparisonState shadowMapSampler)
+{
+	return computeShadowFactorDirLightGeneric(light, cascadeIdx, worldPos, shadowMap, shadowMapSampler, -1.0, false);
+}
+
+RF32 computeShadowFactorDirLightPcf(DirectionalLight light, U32 cascadeIdx, Vec3 worldPos, Texture2D shadowMap,
+									SamplerComparisonState shadowMapSampler, F32 randFactor)
+{
+	return computeShadowFactorDirLightGeneric(light, cascadeIdx, worldPos, shadowMap, shadowMapSampler, randFactor,
+											  true);
+}
+
+// Compute the shadow factor of a directional light
+RF32 computeShadowFactorDirLight(Mat4 lightProjectionMat, Vec3 worldPos, Texture2D shadowMap,
+								 SamplerComparisonState shadowMapSampler)
+{
+	const Vec4 texCoords4 = mul(lightProjectionMat, Vec4(worldPos, 1.0));
+	const Vec3 texCoords3 = texCoords4.xyz / texCoords4.w;
+
+	const RF32 shadowFactor = shadowMap.SampleCmpLevelZero(shadowMapSampler, texCoords3.xy, texCoords3.z);
+	return shadowFactor;
+}
+
+// Compute the cubemap texture lookup vector given the reflection vector (r) the radius squared of the probe (R2) and
+// the frag pos in sphere space (f)
+Vec3 computeCubemapVecAccurate(Vec3 r, F32 R2, Vec3 f)
+{
+	// Compute the collision of the r to the inner part of the sphere
+	// From now on we work on the sphere's space
+
+	// Project the center of the sphere (it's zero now since we are in sphere space) in ray "f,r"
+	const Vec3 p = f - r * dot(f, r);
+
+	// The collision to the sphere is point x where x = p + T * r
+	// Because of the pythagorean theorem: R^2 = dot(p, p) + dot(T * r, T * r)
+	// solving for T, T = R / |p|
+	// then x becomes x = sqrt(R^2 - dot(p, p)) * r + p;
+	F32 pp = dot(p, p);
+	pp = min(pp, R2);
+	const F32 sq = sqrt(R2 - pp);
+	const Vec3 x = p + sq * r;
+
+	return x;
+}
+
+// Cheap version of computeCubemapVecAccurate
+Vec3 computeCubemapVecCheap(Vec3 r, F32 R2, Vec3 f)
+{
+	ANKI_MAYBE_UNUSED(R2);
+	ANKI_MAYBE_UNUSED(f);
+	return r;
+}
+
+RF32 computeAttenuationFactor(RF32 squareRadiusOverOne, RVec3 frag2Light)
+{
+	const RF32 fragLightDist = dot(frag2Light, frag2Light);
+	RF32 att = 1.0 - fragLightDist * squareRadiusOverOne;
+	att = max(0.0, att);
+	return att * att;
+}
+
+// Given the probe properties trace a ray inside the probe and find the cube tex coordinates to sample
+Vec3 intersectProbe(Vec3 fragPos, // Ray origin
+					Vec3 rayDir, // Ray direction
+					Vec3 probeAabbMin, Vec3 probeAabbMax,
+					Vec3 probeOrigin // Cubemap origin
+)
+{
+	// Compute the intersection point
+	const F32 intresectionDist = testRayAabbInside(fragPos, rayDir, probeAabbMin, probeAabbMax);
+	const Vec3 intersectionPoint = fragPos + intresectionDist * rayDir;
+
+	// Compute the cubemap vector
+	return intersectionPoint - probeOrigin;
+}
+
+// Compute a weight (factor) of fragPos against some probe's bounds. The weight will be zero when fragPos is close to
+// AABB bounds and 1.0 at fadeDistance and less.
+F32 computeProbeBlendWeight(Vec3 fragPos, // Doesn't need to be inside the AABB
+							Vec3 probeAabbMin, Vec3 probeAabbMax, F32 fadeDistance)
+{
+	// Compute the min distance of fragPos from the edges of the AABB
+	const Vec3 distFromMin = fragPos - probeAabbMin;
+	const Vec3 distFromMax = probeAabbMax - fragPos;
+	const Vec3 minDistVec = min(distFromMin, distFromMax);
+	const F32 minDist = min(minDistVec.x, min(minDistVec.y, minDistVec.z));
+
+	// Use saturate because minDist might be negative.
+	return saturate(minDist / fadeDistance);
+}
+
+// Given the value of the 6 faces of the dice and a normal, sample the correct weighted value.
+// https://www.shadertoy.com/view/XtcBDB
+RVec3 sampleAmbientDice(RVec3 posx, RVec3 negx, RVec3 posy, RVec3 negy, RVec3 posz, RVec3 negz, RVec3 normal)
+{
+	const RVec3 axisWeights = abs(normal);
+	const RVec3 uv = ndcToUv(normal);
+
+	RVec3 col = lerp(negx, posx, uv.x) * axisWeights.x;
+	col += lerp(negy, posy, uv.y) * axisWeights.y;
+	col += lerp(negz, posz, uv.z) * axisWeights.z;
+
+	// Divide by weight
+	col /= axisWeights.x + axisWeights.y + axisWeights.z + kEpsilonRF32;
+
+	return col;
+}
+
+// Sample the irradiance term from the clipmap
+RVec3 sampleGlobalIllumination(const Vec3 worldPos, const Vec3 normal, const GlobalIlluminationProbe probe,
+							   Texture3D<RVec4> textures[kMaxVisibleGlobalIlluminationProbes],
+							   SamplerState linearAnyClampSampler)
+{
+	// Find the UVW
+	Vec3 uvw = (worldPos - probe.m_aabbMin) / (probe.m_aabbMax - probe.m_aabbMin);
+
+	// The U contains the 6 directions so divide
+	uvw.x /= 6.0;
+
+	// Calmp it to avoid direction leaking
+	uvw.x = clamp(uvw.x, probe.m_halfTexelSizeU, (1.0 / 6.0) - probe.m_halfTexelSizeU);
+
+	// Read the irradiance
+	RVec3 irradiancePerDir[6u];
+	[[unroll]] for(U32 dir = 0u; dir < 6u; ++dir)
+	{
+		// Point to the correct UV
+		Vec3 shiftedUVw = uvw;
+		shiftedUVw.x += (1.0 / 6.0) * F32(dir);
+
+		irradiancePerDir[dir] = textures[NonUniformResourceIndex(probe.m_textureIndex)]
+									.SampleLevel(linearAnyClampSampler, shiftedUVw, 0.0)
+									.rgb;
+	}
+
+	// Sample the irradiance
+	const RVec3 irradiance = sampleAmbientDice(irradiancePerDir[0], irradiancePerDir[1], irradiancePerDir[2],
+											   irradiancePerDir[3], irradiancePerDir[4], irradiancePerDir[5], normal);
+
+	return irradiance;
+}
+
+/// To play with it use https://www.shadertoy.com/view/sttSDf
+/// http://jcgt.org/published/0007/04/01/paper.pdf by Eric Heitz
+/// Input v: view direction
+/// Input alphaX, alphaY: roughness parameters
+/// Input u1, u2: uniform random numbers
+/// Output: normal sampled with PDF D_Ve(nE) = G1(v) * max(0, dot(v, nE)) * D(nE) / v.z
+Vec3 sampleGgxVndf(Vec3 v, F32 alphaX, F32 alphaY, F32 u1, F32 u2)
+{
+	// Section 3.2: transforming the view direction to the hemisphere configuration
+	const Vec3 vH = normalize(Vec3(alphaX * v.x, alphaY * v.y, v.z));
+
+	// Section 4.1: orthonormal basis (with special case if cross product is zero)
+	const F32 lensq = vH.x * vH.x + vH.y * vH.y;
+	const Vec3 tangent1 = (lensq > 0.0) ? Vec3(-vH.y, vH.x, 0) / sqrt(lensq) : Vec3(1.0, 0.0, 0.0);
+	const Vec3 tangent2 = cross(vH, tangent1);
+
+	// Section 4.2: parameterization of the projected area
+	const F32 r = sqrt(u1);
+	const F32 phi = 2.0 * kPi * u2;
+	const F32 t1 = r * cos(phi);
+	F32 t2 = r * sin(phi);
+	const F32 s = 0.5 * (1.0 + vH.z);
+	t2 = (1.0 - s) * sqrt(1.0 - t1 * t1) + s * t2;
+
+	// Section 4.3: reprojection onto hemisphere
+	const Vec3 nH = t1 * tangent1 + t2 * tangent2 + sqrt(max(0.0, 1.0 - t1 * t1 - t2 * t2)) * vH;
+
+	// Section 3.4: transforming the normal back to the ellipsoid configuration
+	const Vec3 nE = normalize(Vec3(alphaX * nH.x, alphaY * nH.y, max(0.0, nH.z)));
+
+	return nE;
+}
+
+/// Calculate the reflection vector based on roughness.
+Vec3 sampleReflectionVector(Vec3 viewDir, Vec3 normal, F32 roughness, Vec2 uniformRandom)
+{
+	const Mat3 tbn = rotationFromDirection(normal);
+	const Mat3 tbnT = transpose(tbn);
+	const Vec3 viewDirTbn = mul(tbnT, viewDir);
+
+	Vec3 sampledNormalTbn = sampleGgxVndf(viewDirTbn, roughness, roughness, uniformRandom.x, uniformRandom.y);
+	const Bool perfectReflection = false; // For debugging
+	if(perfectReflection)
+	{
+		sampledNormalTbn = Vec3(0.0, 0.0, 1.0);
+	}
+
+	const Vec3 reflectedDirTbn = reflect(-viewDirTbn, sampledNormalTbn);
+
+	// Transform reflected_direction back to the initial space.
+	return mul(tbn, reflectedDirTbn);
+}
+
+/// Get the index of the cascade given the distance from zero.
+U32 computeShadowCascadeIndex(F32 distance, Vec4 cascadeDistances, U32 shadowCascadeCount)
+{
+	U32 cascade;
+	if(distance < cascadeDistances[0u])
+	{
+		cascade = 0u;
+	}
+	else if(distance < cascadeDistances[1u])
+	{
+		cascade = 1u;
+	}
+	else if(distance < cascadeDistances[2u])
+	{
+		cascade = 2u;
+	}
+	else
+	{
+		cascade = 3u;
+	}
+
+	return min(shadowCascadeCount - 1u, cascade);
+}
+
+/// Bring the indices of the closest cascades and a factor to blend them. To visualize what's going on go to:
+/// https://www.desmos.com/calculator/g1ibye6ebg
+UVec2 computeShadowCascadeIndex2(F32 distance, Vec4 cascadeDistances, U32 shadowCascadeCount, out RF32 factor)
+{
+	const U32 cascade = computeShadowCascadeIndex(distance, cascadeDistances, shadowCascadeCount);
+	const U32 nextCascade = min(cascade + 1u, shadowCascadeCount - 1u);
+
+	const F32 minDist = (cascade == 0u) ? 0.0f : cascadeDistances[cascade - 1u];
+	const F32 maxDist = cascadeDistances[cascade];
+
+	factor = (distance - minDist) / max(kEpsilonF32, maxDist - minDist);
+	factor = pow(factor, 16.0f);
+
+	return UVec2(cascade, nextCascade);
+}

+ 2 - 2
AnKi/Shaders/TonemappingFunctions.hlsl

@@ -16,7 +16,7 @@ T log10(T x)
 
 RVec3 computeLuminance(RVec3 color)
 {
-	return max(dot(RVec3(0.30, 0.59, 0.11), color), kEpsilonRf);
+	return max(dot(RVec3(0.30, 0.59, 0.11), color), kEpsilonRF32);
 }
 
 RF32 computeExposure(RF32 avgLum, RF32 threshold)
@@ -79,7 +79,7 @@ RVec3 tonemap(RVec3 color, RF32 exposure)
 RVec3 invertTonemap(RVec3 color, RF32 exposure)
 {
 	color = invertTonemapACESFilm(color);
-	color /= max(kEpsilonRf, exposure);
+	color /= max(kEpsilonRF32, exposure);
 	return color;
 }