Browse Source

Merge pull request #44 from godlikepanos/csm

Cascaded Shadowmaps v2
Panagiotis Christopoulos Charitos 7 years ago
parent
commit
18f06eef3f
100 changed files with 2719 additions and 974 deletions
  1. BIN
      samples/physics_playground/assets/physics_playground.blend
  2. 3 4
      samples/physics_playground/assets/scene.lua
  3. BIN
      samples/sponza/assets/metal_rod.ankimesh
  4. 8 29
      samples/sponza/assets/scene.lua
  5. BIN
      samples/sponza/assets/sponza_05.ankimesh
  6. BIN
      samples/sponza/assets/sponza_117.ankimesh
  7. BIN
      samples/sponza/assets/sponza_18.ankimesh
  8. BIN
      samples/sponza/assets/sponza_382.ankimesh
  9. 1 1
      shaders/ApplyIrradianceToReflection.glslp
  10. 7 6
      shaders/ClusteredShadingCommon.glsl
  11. 57 28
      shaders/ExponentialShadowmappingResolve.glslp
  12. 11 13
      shaders/ForwardShadingCommonFrag.glsl
  13. 37 13
      shaders/LightFunctions.glsl
  14. 41 20
      shaders/LightShading.glslp
  15. 69 39
      shaders/TraditionalDeferredShading.glslp
  16. 48 25
      shaders/VolumetricLightingAccumulation.glslp
  17. 50 15
      shaders/glsl_cpp_common/ClusteredShading.h
  18. 5 1
      shaders/glsl_cpp_common/Common.h
  19. 59 11
      shaders/glsl_cpp_common/TraditionalDeferredShading.h
  20. 13 0
      src/anki/collision/Aabb.h
  21. 1 1
      src/anki/collision/CompoundShape.cpp
  22. 1 1
      src/anki/collision/Plane.cpp
  23. 11 4
      src/anki/core/Config.cpp
  24. 2 2
      src/anki/gr/Common.h
  25. 1 1
      src/anki/gr/vulkan/Pipeline.h
  26. 3 3
      src/anki/gr/vulkan/ShaderProgramImpl.cpp
  27. 1 1
      src/anki/gr/vulkan/TextureImpl.cpp
  28. 3 3
      src/anki/input/Input.cpp
  29. 1 1
      src/anki/input/InputAndroid.cpp
  30. 11 0
      src/anki/math/Vec.h
  31. 0 54
      src/anki/physics/Common.h
  32. 2 1
      src/anki/physics/PhysicsBody.h
  33. 7 6
      src/anki/physics/PhysicsCollisionShape.h
  34. 3 2
      src/anki/physics/PhysicsJoint.h
  35. 4 3
      src/anki/physics/PhysicsPlayerController.h
  36. 2 1
      src/anki/physics/PhysicsTrigger.h
  37. 9 9
      src/anki/physics/PhysicsWorld.h
  38. 10 2
      src/anki/renderer/Bloom.cpp
  39. 0 10
      src/anki/renderer/Bloom.h
  40. 22 14
      src/anki/renderer/ClusterBin.cpp
  41. 1 0
      src/anki/renderer/Common.h
  42. 7 1
      src/anki/renderer/Dbg.cpp
  43. 0 7
      src/anki/renderer/Dbg.h
  44. 12 2
      src/anki/renderer/DownscaleBlur.cpp
  45. 0 6
      src/anki/renderer/DownscaleBlur.h
  46. 7 1
      src/anki/renderer/FinalComposite.cpp
  47. 0 7
      src/anki/renderer/FinalComposite.h
  48. 5 1
      src/anki/renderer/GBuffer.cpp
  49. 0 7
      src/anki/renderer/GBuffer.h
  50. 4 1
      src/anki/renderer/GBufferPost.cpp
  51. 0 5
      src/anki/renderer/GBufferPost.h
  52. 152 3
      src/anki/renderer/Indirect.cpp
  53. 11 7
      src/anki/renderer/Indirect.h
  54. 7 1
      src/anki/renderer/LensFlare.cpp
  55. 0 7
      src/anki/renderer/LensFlare.h
  56. 13 2
      src/anki/renderer/MainRenderer.cpp
  57. 0 13
      src/anki/renderer/MainRenderer.h
  58. 31 2
      src/anki/renderer/RenderQueue.h
  59. 27 5
      src/anki/renderer/Renderer.cpp
  60. 455 287
      src/anki/renderer/ShadowMapping.cpp
  61. 36 75
      src/anki/renderer/ShadowMapping.h
  62. 28 4
      src/anki/renderer/Ssao.cpp
  63. 0 14
      src/anki/renderer/Ssao.h
  64. 2 1
      src/anki/renderer/Ssr.cpp
  65. 0 5
      src/anki/renderer/Ssr.h
  66. 7 1
      src/anki/renderer/TemporalAA.cpp
  67. 0 7
      src/anki/renderer/TemporalAA.h
  68. 383 0
      src/anki/renderer/TileAllocator.cpp
  69. 95 0
      src/anki/renderer/TileAllocator.h
  70. 7 1
      src/anki/renderer/Tonemapping.cpp
  71. 0 7
      src/anki/renderer/Tonemapping.h
  72. 62 14
      src/anki/renderer/TraditionalDeferredShading.cpp
  73. 4 0
      src/anki/renderer/TraditionalDeferredShading.h
  74. 2 2
      src/anki/resource/ShaderProgramResource.h
  75. 4 2
      src/anki/scene/CameraNode.cpp
  76. 46 0
      src/anki/scene/LightNode.cpp
  77. 14 0
      src/anki/scene/LightNode.h
  78. 8 1
      src/anki/scene/Octree.cpp
  79. 16 2
      src/anki/scene/Octree.h
  80. 2 1
      src/anki/scene/PhysicsDebugNode.cpp
  81. 6 4
      src/anki/scene/ReflectionProbeNode.cpp
  82. 0 2
      src/anki/scene/ReflectionProbeNode.h
  83. 5 4
      src/anki/scene/SceneGraph.cpp
  84. 18 19
      src/anki/scene/SceneGraph.h
  85. 118 37
      src/anki/scene/Visibility.cpp
  86. 6 0
      src/anki/scene/VisibilityInternal.h
  87. 43 22
      src/anki/scene/components/FrustumComponent.h
  88. 158 5
      src/anki/scene/components/LightComponent.cpp
  89. 61 25
      src/anki/scene/components/LightComponent.h
  90. 1 1
      src/anki/scene/components/SpatialComponent.cpp
  91. 10 3
      src/anki/scene/components/SpatialComponent.h
  92. 133 0
      src/anki/script/Scene.cpp
  93. 15 0
      src/anki/script/Scene.xml
  94. 11 0
      src/anki/util/Allocator.h
  95. 82 0
      src/anki/util/ClassWrapper.h
  96. 0 16
      src/anki/util/Functions.h
  97. 1 1
      src/anki/util/String.cpp
  98. 74 0
      tests/renderer/TileAllocator.cpp
  99. 1 1
      tests/scene/Octree.cpp
  100. 25 8
      tools/scene/Exporter.cpp

BIN
samples/physics_playground/assets/physics_playground.blend


+ 3 - 4
samples/physics_playground/assets/scene.lua

@@ -232,18 +232,17 @@ trf:setScale(1)
 node:getSceneNodeBase():getMoveComponent():setLocalTransform(trf)
 node = scene:newStaticCollisionNode("wallwalls_001-materialnone20_cl", "assets/wall.ankicl", trf)
 
-node = scene:newPointLightNode("Lamp")
+node = scene:newDirectionalLightNode("Lamp")
 lcomp = node:getSceneNodeBase():getLightComponent()
 lcomp:setDiffuseColor(Vec4.new(50, 50, 50, 1))
-lcomp:setRadius(300)
 trf = Transform.new()
 trf:setOrigin(Vec4.new(4.07624, 74.4447, -1.00545, 0))
 rot = Mat3x4.new()
-rot:setAll(-0.290865, -0.771075, 0.566429, 0, -0.0551891, 0.604562, 0.794644, 0, -0.955171, 0.199873, -0.2184, 0)
+rot:setAll(0.649201, -0.658201, -0.381195, 0, 0.506372, 3.995e-05, 0.862315, 0, -0.567562, -0.752842, 0.33332, 0)
 trf:setRotation(rot)
 trf:setScale(1)
 node:getSceneNodeBase():getMoveComponent():setLocalTransform(trf)
-lcomp:setShadowEnabled(0)
+lcomp:setShadowEnabled(1)
 
 node = scene:newPointLightNode("Lamp_001")
 lcomp = node:getSceneNodeBase():getLightComponent()

BIN
samples/sponza/assets/metal_rod.ankimesh


+ 8 - 29
samples/sponza/assets/scene.lua

@@ -95,9 +95,9 @@ trf:setRotation(rot)
 trf:setScale(1)
 node:getSceneNodeBase():getMoveComponent():setLocalTransform(trf)
 
-node = scene:newReflectionProbeNode("reflprobe2", Vec4.new(-24.8339, -4.2659, -4.12719, 0), Vec4.new(24.8339, 4.2659, 4.12719, 0))
+node = scene:newReflectionProbeNode("reflprobe2", Vec4.new(-17.2975, -4.71667, -4.19979, 0), Vec4.new(17.2975, 4.71667, 4.19979, 0))
 trf = Transform.new()
-trf:setOrigin(Vec4.new(-1.00211, 19.8704, -7.63218, 0))
+trf:setOrigin(Vec4.new(-1.83947, 20.1518, -0.637399, 0))
 rot = Mat3x4.new()
 rot:setAll(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0)
 trf:setRotation(rot)
@@ -106,15 +106,6 @@ node:getSceneNodeBase():getMoveComponent():setLocalTransform(trf)
 
 node = scene:newReflectionProbeNode("reflprobe3", Vec4.new(-24.8339, -4.2659, -4.12719, 0), Vec4.new(24.8339, 4.2659, 4.12719, 0))
 trf = Transform.new()
-trf:setOrigin(Vec4.new(-1.00211, 19.8704, -0.0445416, 0))
-rot = Mat3x4.new()
-rot:setAll(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0)
-trf:setRotation(rot)
-trf:setScale(1)
-node:getSceneNodeBase():getMoveComponent():setLocalTransform(trf)
-
-node = scene:newReflectionProbeNode("reflprobe4", Vec4.new(-24.8339, -4.2659, -4.12719, 0), Vec4.new(24.8339, 4.2659, 4.12719, 0))
-trf = Transform.new()
 trf:setOrigin(Vec4.new(-1.00211, 11.4761, -0.0445416, 0))
 rot = Mat3x4.new()
 rot:setAll(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0)
@@ -122,7 +113,7 @@ trf:setRotation(rot)
 trf:setScale(1)
 node:getSceneNodeBase():getMoveComponent():setLocalTransform(trf)
 
-node = scene:newReflectionProbeNode("reflprobe5", Vec4.new(-24.8339, -3.89695, -4.12719, 0), Vec4.new(24.8339, 3.89695, 4.12719, 0))
+node = scene:newReflectionProbeNode("reflprobe4", Vec4.new(-24.8339, -3.89695, -4.12719, 0), Vec4.new(24.8339, 3.89695, 4.12719, 0))
 trf = Transform.new()
 trf:setOrigin(Vec4.new(-1.00211, 3.65293, -0.0445416, 0))
 rot = Mat3x4.new()
@@ -131,7 +122,7 @@ trf:setRotation(rot)
 trf:setScale(1)
 node:getSceneNodeBase():getMoveComponent():setLocalTransform(trf)
 
-node = scene:newReflectionProbeNode("reflprobe6", Vec4.new(-24.8339, -3.89695, -4.12719, 0), Vec4.new(24.8339, 3.89695, 4.12719, 0))
+node = scene:newReflectionProbeNode("reflprobe5", Vec4.new(-24.8339, -3.89695, -4.12719, 0), Vec4.new(24.8339, 3.89695, 4.12719, 0))
 trf = Transform.new()
 trf:setOrigin(Vec4.new(-1.00211, 3.65293, 6.48553, 0))
 rot = Mat3x4.new()
@@ -140,7 +131,7 @@ trf:setRotation(rot)
 trf:setScale(1)
 node:getSceneNodeBase():getMoveComponent():setLocalTransform(trf)
 
-node = scene:newReflectionProbeNode("reflprobe7", Vec4.new(-24.8339, -4.2659, -4.12719, 0), Vec4.new(24.8339, 4.2659, 4.12719, 0))
+node = scene:newReflectionProbeNode("reflprobe6", Vec4.new(-24.8339, -4.2659, -4.12719, 0), Vec4.new(24.8339, 4.2659, 4.12719, 0))
 trf = Transform.new()
 trf:setOrigin(Vec4.new(-1.00211, 11.4761, 6.48553, 0))
 rot = Mat3x4.new()
@@ -149,15 +140,6 @@ trf:setRotation(rot)
 trf:setScale(1)
 node:getSceneNodeBase():getMoveComponent():setLocalTransform(trf)
 
-node = scene:newReflectionProbeNode("reflprobe8", Vec4.new(-24.8339, -4.2659, -4.12719, 0), Vec4.new(24.8339, 4.2659, 4.12719, 0))
-trf = Transform.new()
-trf:setOrigin(Vec4.new(-1.00211, 19.8704, 6.48553, 0))
-rot = Mat3x4.new()
-rot:setAll(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0)
-trf:setRotation(rot)
-trf:setScale(1)
-node:getSceneNodeBase():getMoveComponent():setLocalTransform(trf)
-
 node = scene:newModelNode("sponza_277leaf-materialnone0", "assets/sponza_277leaf-material.ankimdl")
 trf = Transform.new()
 trf:setOrigin(Vec4.new(-10.8267, 3.02038, -4.74626, 0))
@@ -2507,16 +2489,13 @@ trf:setRotation(rot)
 trf:setScale(0.0170465)
 node:getSceneNodeBase():getMoveComponent():setLocalTransform(trf)
 
-node = scene:newSpotLightNode("Lamp")
+node = scene:newDirectionalLightNode("Lamp")
 lcomp = node:getSceneNodeBase():getLightComponent()
 lcomp:setDiffuseColor(Vec4.new(15, 15, 15, 1))
-lcomp:setInnerAngle(0.737402)
-lcomp:setOuterAngle(1.4748)
-lcomp:setDistance(89.9999)
 trf = Transform.new()
-trf:setOrigin(Vec4.new(9.66932, 40.2052, -9.96416, 0))
+trf:setOrigin(Vec4.new(-0.637397, 34.4336, -6.74682, 0))
 rot = Mat3x4.new()
-rot:setAll(-0.175432, -0.931125, 0.319735, 0, -0.392148, 0.363986, 0.844828, 0, -0.903019, 0.0228264, -0.428994, 0)
+rot:setAll(1, 0, 0, 0, 0, -0.376544, 0.926399, 0, 0, -0.926399, -0.376544, 0)
 trf:setRotation(rot)
 trf:setScale(1)
 node:getSceneNodeBase():getMoveComponent():setLocalTransform(trf)

BIN
samples/sponza/assets/sponza_05.ankimesh


BIN
samples/sponza/assets/sponza_117.ankimesh


BIN
samples/sponza/assets/sponza_18.ankimesh


BIN
samples/sponza/assets/sponza_382.ankimesh


+ 1 - 1
shaders/ApplyIrradianceToReflection.glslp

@@ -33,7 +33,7 @@ void main()
 	GbufferInfo gbuffer;
 	readGBuffer(u_gbufferTex0, u_gbufferTex1, u_gbufferTex2, sampleUv, 0.0, gbuffer);
 
-	// Read the irradiance. Use the layer 0 because the C++ sets an appropriate texture view
+	// Read the irradiance. Use the layer 0 because C++ will set the appropriate texture view
 	Vec3 irradiance = textureLod(u_irradianceTex, Vec4(gbuffer.m_normal, 0.0), 0.0).rgb;
 
 	// Compute the indirect term

+ 7 - 6
shaders/ClusteredShadingCommon.glsl

@@ -19,17 +19,17 @@ layout(ANKI_UBO_BINDING(LIGHT_SET, LIGHT_UBO_BINDING), std140, row_major) unifor
 	LightingUniforms u_lightingUniforms;
 };
 
-#	define u_near UNIFORM(u_lightingUniforms.m_rendererSizeTimeNear.w)
-#	define u_far UNIFORM(u_lightingUniforms.m_cameraPosFar.w)
-#	define u_cameraPos UNIFORM(u_lightingUniforms.m_cameraPosFar.xyz)
+#	define u_near UNIFORM(u_lightingUniforms.m_near)
+#	define u_far UNIFORM(u_lightingUniforms.m_far)
+#	define u_cameraPos UNIFORM(u_lightingUniforms.m_cameraPos)
 #	define u_clusterCountX UNIFORM(u_lightingUniforms.m_clusterCount.x)
 #	define u_clusterCountY UNIFORM(u_lightingUniforms.m_clusterCount.y)
 #	define u_clustererMagic u_lightingUniforms.m_clustererMagicValues
 #	define u_prevClustererMagic u_lightingUniforms.m_prevClustererMagicValues
-#	define u_time UNIFORM(u_lightingUniforms.m_rendererSizeTimeNear.z)
+#	define u_time UNIFORM(u_lightingUniforms.m_time)
 #	define u_unprojectionParams UNIFORM(u_lightingUniforms.m_unprojectionParams)
-#	define u_rendererSize u_lightingUniforms.m_rendererSizeTimeNear.xy
-#	define u_lightVolumeLastCluster UNIFORM(u_lightingUniforms.m_lightVolumeLastClusterPad3.x)
+#	define u_rendererSize u_lightingUniforms.m_rendererSize
+#	define u_lightVolumeLastCluster UNIFORM(u_lightingUniforms.m_lightVolumeLastCluster)
 
 #	define u_viewMat u_lightingUniforms.m_viewMat
 #	define u_invViewMat u_lightingUniforms.m_invViewMat
@@ -39,6 +39,7 @@ layout(ANKI_UBO_BINDING(LIGHT_SET, LIGHT_UBO_BINDING), std140, row_major) unifor
 #	define u_invViewProjMat u_lightingUniforms.m_invViewProjMat
 #	define u_prevViewProjMat u_lightingUniforms.m_prevViewProjMat
 #	define u_prevViewProjMatMulInvViewProjMat u_lightingUniforms.m_prevViewProjMatMulInvViewProjMat
+#	define u_dirLight u_lightingUniforms.m_dirLight
 
 #else
 const U32 _NEXT_UBO_BINDING = LIGHT_UBO_BINDING;

+ 57 - 28
shaders/ExponentialShadowmappingResolve.glslp

@@ -10,6 +10,23 @@ const F32 OFFSET = 1.25;
 const Vec2 TEXEL_SIZE = 1.0 / Vec2(INPUT_TEXTURE_SIZE);
 const Vec2 HALF_TEXEL_SIZE = TEXEL_SIZE / 2.0;
 
+struct Uniforms
+{
+	Vec2 m_uvScale;
+	Vec2 m_uvTranslation;
+	F32 m_near;
+	F32 m_far;
+	U32 m_renderingTechnique; // If value is 0: perspective+blur, 1: perspective, 2: ortho+blur, 3: ortho
+	U32 m_padding;
+};
+
+ANKI_PUSH_CONSTANTS(Uniforms, u_regs);
+#define u_uvScale u_regs.m_uvScale
+#define u_uvTranslation u_regs.m_uvTranslation
+#define u_near u_regs.m_near
+#define u_far u_regs.m_far
+#define u_renderingTechnique u_regs.m_renderingTechnique
+
 #pragma anki start vert
 #include <shaders/Common.glsl>
 
@@ -18,12 +35,6 @@ out gl_PerVertex
 	Vec4 gl_Position;
 };
 
-layout(ANKI_UBO_BINDING(0, 0)) uniform u_
-{
-	Vec4 u_nearFarPad2;
-	Vec4 u_uvScaleAndTranslation;
-};
-
 layout(location = 0) out Vec2 out_uv;
 layout(location = 1) flat out Vec2 out_maxUv;
 layout(location = 2) flat out Vec2 out_minUv;
@@ -33,12 +44,12 @@ void main()
 	out_uv = Vec2(gl_VertexID & 1, gl_VertexID >> 1) * 2.0;
 	Vec2 pos = out_uv * 2.0 - 1.0;
 
-	out_uv = fma(out_uv, u_uvScaleAndTranslation.zw, u_uvScaleAndTranslation.xy);
+	out_uv = fma(out_uv, u_uvScale, u_uvTranslation);
 	gl_Position = Vec4(pos, 0.0, 1.0);
 
 	// Compute the limits
-	out_maxUv = fma(Vec2(1.0), u_uvScaleAndTranslation.zw, u_uvScaleAndTranslation.xy) - HALF_TEXEL_SIZE;
-	out_minUv = fma(Vec2(0.0), u_uvScaleAndTranslation.zw, u_uvScaleAndTranslation.xy) + HALF_TEXEL_SIZE;
+	out_maxUv = fma(Vec2(1.0), u_uvScale, u_uvTranslation) - HALF_TEXEL_SIZE;
+	out_minUv = fma(Vec2(0.0), u_uvScale, u_uvTranslation) + HALF_TEXEL_SIZE;
 }
 #pragma anki end
 
@@ -52,35 +63,53 @@ layout(location = 2) flat in Vec2 in_minUv;
 
 layout(ANKI_TEX_BINDING(0, 0)) uniform sampler2D u_inputTex;
 
-layout(ANKI_UBO_BINDING(0, 0)) uniform u_
-{
-	Vec4 u_nearFarPad2;
-	Vec4 u_uvScaleAndTranslation;
-};
-
-#define u_near u_nearFarPad2.x
-#define u_far u_nearFarPad2.y
-
 layout(location = 0) out F32 out_color;
 
-F32 sampleLinearDepth(Vec2 uv)
+F32 sampleLinearDepthPerspective(Vec2 uv)
 {
 	uv = clamp(uv, in_minUv, in_maxUv);
 	return linearizeDepth(textureLod(u_inputTex, uv, 0.0).r, u_near, u_far);
 }
 
+F32 sampleLinearDepthOrhographic(Vec2 uv)
+{
+	uv = clamp(uv, in_minUv, in_maxUv);
+	return textureLod(u_inputTex, uv, 0.0).r;
+}
+
 void main()
 {
 	const Vec2 UV_OFFSET = OFFSET * TEXEL_SIZE;
 
-	out_color = sampleLinearDepth(in_uv) * BOX_WEIGHTS[0u];
-	out_color += sampleLinearDepth(in_uv + Vec2(UV_OFFSET.x, 0.0)) * BOX_WEIGHTS[1u];
-	out_color += sampleLinearDepth(in_uv + Vec2(-UV_OFFSET.x, 0.0)) * BOX_WEIGHTS[1u];
-	out_color += sampleLinearDepth(in_uv + Vec2(0.0, UV_OFFSET.y)) * BOX_WEIGHTS[1u];
-	out_color += sampleLinearDepth(in_uv + Vec2(0.0, -UV_OFFSET.y)) * BOX_WEIGHTS[1u];
-	out_color += sampleLinearDepth(in_uv + Vec2(UV_OFFSET.x, UV_OFFSET.y)) * BOX_WEIGHTS[2u];
-	out_color += sampleLinearDepth(in_uv + Vec2(-UV_OFFSET.x, UV_OFFSET.y)) * BOX_WEIGHTS[2u];
-	out_color += sampleLinearDepth(in_uv + Vec2(UV_OFFSET.x, -UV_OFFSET.y)) * BOX_WEIGHTS[2u];
-	out_color += sampleLinearDepth(in_uv + Vec2(-UV_OFFSET.x, -UV_OFFSET.y)) * BOX_WEIGHTS[2u];
+	switch(u_renderingTechnique)
+	{
+	case 0u:
+		out_color = sampleLinearDepthPerspective(in_uv) * BOX_WEIGHTS[0u];
+		out_color += sampleLinearDepthPerspective(in_uv + Vec2(UV_OFFSET.x, 0.0)) * BOX_WEIGHTS[1u];
+		out_color += sampleLinearDepthPerspective(in_uv + Vec2(-UV_OFFSET.x, 0.0)) * BOX_WEIGHTS[1u];
+		out_color += sampleLinearDepthPerspective(in_uv + Vec2(0.0, UV_OFFSET.y)) * BOX_WEIGHTS[1u];
+		out_color += sampleLinearDepthPerspective(in_uv + Vec2(0.0, -UV_OFFSET.y)) * BOX_WEIGHTS[1u];
+		out_color += sampleLinearDepthPerspective(in_uv + Vec2(UV_OFFSET.x, UV_OFFSET.y)) * BOX_WEIGHTS[2u];
+		out_color += sampleLinearDepthPerspective(in_uv + Vec2(-UV_OFFSET.x, UV_OFFSET.y)) * BOX_WEIGHTS[2u];
+		out_color += sampleLinearDepthPerspective(in_uv + Vec2(UV_OFFSET.x, -UV_OFFSET.y)) * BOX_WEIGHTS[2u];
+		out_color += sampleLinearDepthPerspective(in_uv + Vec2(-UV_OFFSET.x, -UV_OFFSET.y)) * BOX_WEIGHTS[2u];
+		break;
+	case 1u:
+		out_color = sampleLinearDepthPerspective(in_uv);
+		break;
+	case 2u:
+		out_color = sampleLinearDepthOrhographic(in_uv) * BOX_WEIGHTS[0u];
+		out_color += sampleLinearDepthOrhographic(in_uv + Vec2(UV_OFFSET.x, 0.0)) * BOX_WEIGHTS[1u];
+		out_color += sampleLinearDepthOrhographic(in_uv + Vec2(-UV_OFFSET.x, 0.0)) * BOX_WEIGHTS[1u];
+		out_color += sampleLinearDepthOrhographic(in_uv + Vec2(0.0, UV_OFFSET.y)) * BOX_WEIGHTS[1u];
+		out_color += sampleLinearDepthOrhographic(in_uv + Vec2(0.0, -UV_OFFSET.y)) * BOX_WEIGHTS[1u];
+		out_color += sampleLinearDepthOrhographic(in_uv + Vec2(UV_OFFSET.x, UV_OFFSET.y)) * BOX_WEIGHTS[2u];
+		out_color += sampleLinearDepthOrhographic(in_uv + Vec2(-UV_OFFSET.x, UV_OFFSET.y)) * BOX_WEIGHTS[2u];
+		out_color += sampleLinearDepthOrhographic(in_uv + Vec2(UV_OFFSET.x, -UV_OFFSET.y)) * BOX_WEIGHTS[2u];
+		out_color += sampleLinearDepthOrhographic(in_uv + Vec2(-UV_OFFSET.x, -UV_OFFSET.y)) * BOX_WEIGHTS[2u];
+		break;
+	default:
+		out_color = sampleLinearDepthOrhographic(in_uv);
+	}
 }
 #pragma anki end

+ 11 - 13
shaders/ForwardShadingCommonFrag.glsl

@@ -55,19 +55,18 @@ Vec3 computeLightColorHigh(Vec3 diffCol, Vec3 worldPos)
 	{
 		PointLight light = u_pointLights[idx];
 
-		Vec3 diffC = diffCol * light.m_diffuseColorTileSize.rgb;
+		Vec3 diffC = diffCol * light.m_diffuseColor;
 
-		Vec3 frag2Light = light.m_posRadius.xyz - worldPos;
-		F32 att = computeAttenuationFactor(light.m_posRadius.w, frag2Light);
+		Vec3 frag2Light = light.m_position - worldPos;
+		F32 att = computeAttenuationFactor(light.m_squareRadiusOverOne, frag2Light);
 
 #if LOD > 1
 		const F32 shadow = 1.0;
 #else
 		F32 shadow = 1.0;
-		if(light.m_diffuseColorTileSize.w >= 0.0)
+		if(light.m_shadowAtlasTileScale >= 0.0)
 		{
-			shadow = computeShadowFactorOmni(
-				frag2Light, light.m_radiusPad1.x, light.m_atlasTiles, light.m_diffuseColorTileSize.w, u_shadowTex);
+			shadow = computeShadowFactorPointLight(light, frag2Light, u_shadowTex);
 		}
 #endif
 
@@ -79,24 +78,23 @@ Vec3 computeLightColorHigh(Vec3 diffCol, Vec3 worldPos)
 	{
 		SpotLight light = u_spotLights[idx];
 
-		Vec3 diffC = diffCol * light.m_diffuseColorShadowmapId.rgb;
+		Vec3 diffC = diffCol * light.m_diffuseColor;
 
-		Vec3 frag2Light = light.m_posRadius.xyz - worldPos;
-		F32 att = computeAttenuationFactor(light.m_posRadius.w, frag2Light);
+		Vec3 frag2Light = light.m_position - worldPos;
+		F32 att = computeAttenuationFactor(light.m_squareRadiusOverOne, frag2Light);
 
 		Vec3 l = normalize(frag2Light);
 
-		F32 spot =
-			computeSpotFactor(l, light.m_outerCosInnerCos.x, light.m_outerCosInnerCos.y, light.m_lightDirRadius.xyz);
+		F32 spot = computeSpotFactor(l, light.m_outerCos, light.m_innerCos, light.m_dir);
 
 #if LOD > 1
 		const F32 shadow = 1.0;
 #else
 		F32 shadow = 1.0;
-		F32 shadowmapLayerIdx = light.m_diffuseColorShadowmapId.w;
+		F32 shadowmapLayerIdx = light.m_shadowmapId;
 		if(shadowmapLayerIdx >= 0.0)
 		{
-			shadow = computeShadowFactorSpot(light.m_texProjectionMat, worldPos, light.m_lightDirRadius.w, u_shadowTex);
+			shadow = computeShadowFactorSpotLight(light, worldPos, u_shadowTex);
 		}
 #endif
 

+ 37 - 13
shaders/LightFunctions.glsl

@@ -109,13 +109,13 @@ U32 computeShadowSampleCount(const U32 COUNT, F32 zVSpace)
 	return sampleCount;
 }
 
-F32 computeShadowFactorSpot(Mat4 lightProjectionMat, Vec3 worldPos, F32 distance, sampler2D spotMapArr)
+F32 computeShadowFactorSpotLight(SpotLight light, Vec3 worldPos, sampler2D spotMapArr)
 {
-	Vec4 texCoords4 = lightProjectionMat * Vec4(worldPos, 1.0);
+	Vec4 texCoords4 = light.m_texProjectionMat * Vec4(worldPos, 1.0);
 	Vec3 texCoords3 = texCoords4.xyz / texCoords4.w;
 
 	const F32 near = LIGHT_FRUSTUM_NEAR_PLANE;
-	const F32 far = distance;
+	const F32 far = light.m_radius;
 
 	F32 linearDepth = linearizeDepth(texCoords3.z, near, far);
 
@@ -124,14 +124,15 @@ F32 computeShadowFactorSpot(Mat4 lightProjectionMat, Vec3 worldPos, F32 distance
 	return saturate(exp(ESM_CONSTANT * (shadowFactor - linearDepth)));
 }
 
-F32 computeShadowFactorOmni(Vec3 frag2Light, F32 radius, UVec2 atlasTiles, F32 tileSize, sampler2D shadowMap)
+// Compute the shadow factor of point (omni) lights.
+F32 computeShadowFactorPointLight(PointLight light, Vec3 frag2Light, sampler2D shadowMap)
 {
 	Vec3 dir = -frag2Light;
 	Vec3 dirabs = abs(dir);
 	F32 dist = max(dirabs.x, max(dirabs.y, dirabs.z));
 
 	const F32 near = LIGHT_FRUSTUM_NEAR_PLANE;
-	const F32 far = radius;
+	const F32 far = light.m_radius;
 
 	F32 linearDepth = (dist - near) / (far - near);
 
@@ -142,16 +143,13 @@ F32 computeShadowFactorOmni(Vec3 frag2Light, F32 radius, UVec2 atlasTiles, F32 t
 		U32 faceIdxu;
 		Vec2 uv = convertCubeUvsu(dir, faceIdxu);
 
-		// Clamp uv to a small value to avoid reading from other tiles due to bilinear filtering. It's not a perfect
-		// solution but it works
-		uv = clamp(uv, Vec2(0.001), Vec2(1.0 - 0.001));
-
-		// Compute atlas tile
-		atlasTiles >>= UVec2(faceIdxu * 5u);
-		atlasTiles &= UVec2(31u);
+		// Get the atlas offset
+		Vec2 atlasOffset;
+		atlasOffset.x = light.m_shadowAtlasTileOffsets[faceIdxu >> 1u][(faceIdxu & 1u) << 1u];
+		atlasOffset.y = light.m_shadowAtlasTileOffsets[faceIdxu >> 1u][((faceIdxu & 1u) << 1u) + 1u];
 
 		// Compute UV
-		uv = (uv + Vec2(atlasTiles)) * tileSize;
+		uv = fma(uv, Vec2(light.m_shadowAtlasTileScale), atlasOffset);
 
 		// Sample
 		shadowFactor = textureLod(shadowMap, uv, 0.0).r;
@@ -160,6 +158,32 @@ F32 computeShadowFactorOmni(Vec3 frag2Light, F32 radius, UVec2 atlasTiles, F32 t
 	return saturate(exp(ESM_CONSTANT * (shadowFactor - linearDepth)));
 }
 
+// Compute the shadow factor of a directional light
+F32 computeShadowFactorDirLight(DirectionalLight light, U32 cascadeIdx, Vec3 worldPos, sampler2D shadowMap)
+{
+	Mat4 lightProjectionMat = light.m_textureMatrices[cascadeIdx];
+
+	Vec4 texCoords4 = lightProjectionMat * Vec4(worldPos, 1.0);
+	Vec3 texCoords3 = texCoords4.xyz / texCoords4.w;
+
+	F32 cascadeLinearDepth = texCoords3.z;
+
+	F32 shadowFactor = textureLod(shadowMap, texCoords3.xy, 0.0).r;
+	shadowFactor = saturate(exp(ESM_CONSTANT * 3.0 * (shadowFactor - cascadeLinearDepth)));
+
+	return shadowFactor;
+}
+
+// Compute the shadow factor of a directional light
+F32 computeShadowFactorDirLight(Mat4 lightProjectionMat, Vec3 worldPos, sampler2DShadow shadowMap)
+{
+	Vec4 texCoords4 = lightProjectionMat * Vec4(worldPos, 1.0);
+	Vec3 texCoords3 = texCoords4.xyz / texCoords4.w;
+
+	F32 shadowFactor = textureLod(shadowMap, texCoords3, 0.0);
+	return shadowFactor;
+}
+
 // Compute the cubemap texture lookup vector given the reflection vector (r) the radius squared of the probe (R2) and
 // the frag pos in sphere space (f)
 Vec3 computeCubemapVecAccurate(in Vec3 r, in F32 R2, in Vec3 f)

+ 41 - 20
shaders/LightShading.glslp

@@ -55,8 +55,6 @@ layout(location = 1) in Vec2 in_clusterIJ;
 
 layout(location = 0) out Vec3 out_color;
 
-const F32 SUBSURFACE_MIN = 0.05;
-
 // Note: All calculations in world space
 void readReflectionsAndIrradianceFromProbes(U32 idxOffset,
 	Vec3 worldPos,
@@ -78,10 +76,10 @@ void readReflectionsAndIrradianceFromProbes(U32 idxOffset,
 	ANKI_LOOP while((idx = u_lightIndices[idxOffset++]) != MAX_U32)
 	{
 		ReflectionProbe probe = u_reflectionProbes[idx];
-		Vec3 aabbMin = probe.m_aabbMinPad1.xyz;
-		Vec3 aabbMax = probe.m_aabbMaxPad1.xyz;
-		Vec3 probeOrigin = probe.m_positionCubemapIndex.xyz;
-		F32 cubemapIndex = probe.m_positionCubemapIndex.w;
+		Vec3 aabbMin = probe.m_aabbMin;
+		Vec3 aabbMax = probe.m_aabbMax;
+		Vec3 probeOrigin = probe.m_position;
+		F32 cubemapIndex = probe.m_cubemapIndex;
 
 		// Compute blend weight
 		F32 blendWeight = computeProbeBlendWeight(worldPos, aabbMin, aabbMax, 0.2);
@@ -105,11 +103,11 @@ void readReflectionsAndIrradianceFromProbes(U32 idxOffset,
 
 // Common code for lighting
 #define LIGHTING_COMMON_BRDF() \
-	Vec3 frag2Light = light.m_posRadius.xyz - worldPos; \
+	Vec3 frag2Light = light.m_position - worldPos; \
 	Vec3 l = normalize(frag2Light); \
 	Vec3 specC = computeSpecularColorBrdf(gbuffer, viewDir, l); \
 	Vec3 diffC = diffuseLambert(gbuffer.m_diffuse); \
-	F32 att = computeAttenuationFactor(light.m_posRadius.w, frag2Light); \
+	F32 att = computeAttenuationFactor(light.m_squareRadiusOverOne, frag2Light); \
 	F32 lambert = max(0.0, dot(gbuffer.m_normal, l));
 
 void main()
@@ -145,8 +143,35 @@ void main()
 	// Ambient and emissive color
 	out_color = gbuffer.m_diffuse * gbuffer.m_emission;
 
-	// Point lights
+	// Dir light
 	Vec3 viewDir = normalize(u_cameraPos - worldPos);
+	if(u_dirLight.m_active != 0u)
+	{
+		F32 shadowFactor;
+		if(u_dirLight.m_cascadeCount > 0)
+		{
+			F32 linearDepth = linearizeDepth(depth, u_near, u_far);
+			F32 cascadeCountf = F32(u_dirLight.m_cascadeCount);
+			U32 cascadeIdx = min(U32(linearDepth * cascadeCountf), u_dirLight.m_cascadeCount - 1u);
+
+			shadowFactor = computeShadowFactorDirLight(u_dirLight, cascadeIdx, worldPos, u_shadowTex);
+		}
+		else
+		{
+			shadowFactor = 1.0;
+		}
+
+		Vec3 l = -u_dirLight.m_dir;
+
+		F32 lambert = max(gbuffer.m_subsurface, dot(l, gbuffer.m_normal));
+
+		Vec3 diffC = diffuseLambert(gbuffer.m_diffuse);
+		Vec3 specC = computeSpecularColorBrdf(gbuffer, viewDir, l);
+
+		out_color += (diffC + specC) * u_dirLight.m_diffuseColor * (shadowFactor * lambert);
+	}
+
+	// Point lights
 	U32 idx;
 	ANKI_LOOP while((idx = u_lightIndices[idxOffset++]) != MAX_U32)
 	{
@@ -154,14 +179,13 @@ void main()
 
 		LIGHTING_COMMON_BRDF();
 
-		ANKI_BRANCH if(light.m_diffuseColorTileSize.w >= 0.0)
+		ANKI_BRANCH if(light.m_shadowAtlasTileScale >= 0.0)
 		{
-			F32 shadow = computeShadowFactorOmni(
-				frag2Light, light.m_radiusPad1.x, light.m_atlasTiles, light.m_diffuseColorTileSize.w, u_shadowTex);
+			F32 shadow = computeShadowFactorPointLight(light, frag2Light, u_shadowTex);
 			lambert *= shadow;
 		}
 
-		out_color += (diffC + specC) * light.m_diffuseColorTileSize.rgb * (att * max(gbuffer.m_subsurface, lambert));
+		out_color += (diffC + specC) * light.m_diffuseColor * (att * max(gbuffer.m_subsurface, lambert));
 	}
 
 	// Spot lights
@@ -171,19 +195,16 @@ void main()
 
 		LIGHTING_COMMON_BRDF();
 
-		F32 spot =
-			computeSpotFactor(l, light.m_outerCosInnerCos.x, light.m_outerCosInnerCos.y, light.m_lightDirRadius.xyz);
+		F32 spot = computeSpotFactor(l, light.m_outerCos, light.m_innerCos, light.m_dir);
 
-		F32 shadowmapLayerIdx = light.m_diffuseColorShadowmapId.w;
+		F32 shadowmapLayerIdx = light.m_shadowmapId;
 		ANKI_BRANCH if(shadowmapLayerIdx >= 0.0)
 		{
-			F32 shadow =
-				computeShadowFactorSpot(light.m_texProjectionMat, worldPos, light.m_lightDirRadius.w, u_shadowTex);
+			F32 shadow = computeShadowFactorSpotLight(light, worldPos, u_shadowTex);
 			lambert *= shadow;
 		}
 
-		out_color +=
-			(diffC + specC) * light.m_diffuseColorShadowmapId.rgb * (att * spot * max(gbuffer.m_subsurface, lambert));
+		out_color += (diffC + specC) * light.m_diffuseColor * (att * spot * max(gbuffer.m_subsurface, lambert));
 	}
 
 	// Refl & indirect

+ 69 - 39
shaders/TraditionalDeferredShading.glslp

@@ -5,19 +5,31 @@
 
 // Classic deferred lighting shader
 
-#pragma anki mutator LIGHT_TYPE 0 1
+#pragma anki mutator LIGHT_TYPE 0 1 2
+
+#define POINT_LIGHT_TYPE 0
+#define SPOT_LIGHT_TYPE 1
+#define DIR_LIGHT_TYPE 2
 
 // VERT
 #pragma anki start vert
 #include <shaders/Common.glsl>
 
-layout(location = 0) in Vec3 in_position;
-
 out gl_PerVertex
 {
 	Vec4 gl_Position;
 };
 
+#if LIGHT_TYPE == DIR_LIGHT_TYPE
+void main()
+{
+	Vec2 uv = Vec2(gl_VertexID & 1, gl_VertexID >> 1) * 2.0;
+	Vec2 pos = uv * 2.0 - 1.0;
+	gl_Position = Vec4(pos, 0.0, 1.0);
+}
+#else
+layout(location = 0) in Vec3 in_position;
+
 layout(ANKI_UBO_BINDING(0, 0), row_major) uniform u0_
 {
 	Mat4 u_mvp;
@@ -27,6 +39,7 @@ void main()
 {
 	gl_Position = u_mvp * Vec4(in_position, 1.0);
 }
+#endif
 #pragma anki end
 
 // FRAG
@@ -35,15 +48,12 @@ void main()
 #include <shaders/LightFunctions.glsl>
 #include <shaders/glsl_cpp_common/TraditionalDeferredShading.h>
 
-#define POINT_LIGHT_TYPE 0
-#define SPOT_LIGHT_TYPE 1
-
 layout(location = 0) out Vec3 out_color;
 
-layout(ANKI_TEX_BINDING(GBUFFER_RT0_BINDING.x, GBUFFER_RT0_BINDING.y)) uniform sampler2D u_msRt0;
-layout(ANKI_TEX_BINDING(GBUFFER_RT1_BINDING.x, GBUFFER_RT1_BINDING.y)) uniform sampler2D u_msRt1;
-layout(ANKI_TEX_BINDING(GBUFFER_RT2_BINDING.x, GBUFFER_RT2_BINDING.y)) uniform sampler2D u_msRt2;
-layout(ANKI_TEX_BINDING(GBUFFER_DEPTH_BINDING.x, GBUFFER_DEPTH_BINDING.y)) uniform sampler2D u_msDepthRt;
+layout(ANKI_TEX_BINDING(GBUFFER_RT0_BINDING[0], GBUFFER_RT0_BINDING[1])) uniform sampler2D u_msRt0;
+layout(ANKI_TEX_BINDING(GBUFFER_RT1_BINDING[0], GBUFFER_RT1_BINDING[1])) uniform sampler2D u_msRt1;
+layout(ANKI_TEX_BINDING(GBUFFER_RT2_BINDING[0], GBUFFER_RT2_BINDING[1])) uniform sampler2D u_msRt2;
+layout(ANKI_TEX_BINDING(GBUFFER_DEPTH_BINDING[0], GBUFFER_DEPTH_BINDING[1])) uniform sampler2D u_msDepthRt;
 
 layout(ANKI_UBO_BINDING(0, 1), row_major) uniform u1_
 {
@@ -51,65 +61,85 @@ layout(ANKI_UBO_BINDING(0, 1), row_major) uniform u1_
 	DeferredPointLightUniforms u_unis;
 #elif LIGHT_TYPE == SPOT_LIGHT_TYPE
 	DeferredSpotLightUniforms u_unis;
+#elif LIGHT_TYPE == DIR_LIGHT_TYPE
+	DeferredDirectionalLightUniforms u_unis;
 #else
 #	error See file
 #endif
 };
 
-#if LIGHT_TYPE == POINT_LIGHT_TYPE
-#	define u_ldiff u_unis.m_diffuseColorPad1.xyz
-#else
-#	define u_ldiff u_unis.m_diffuseColorOuterCos.xyz
-#	define u_lightDir u_unis.m_lightDirInnerCos.xyz
-#	define u_outerCos u_unis.m_diffuseColorOuterCos.w
-#	define u_innerCos u_unis.m_lightDirInnerCos.w
+#if LIGHT_TYPE == DIR_LIGHT_TYPE
+layout(ANKI_TEX_BINDING(
+	GBUFFER_SHADOW_ATLAS_BINDING[0], GBUFFER_SHADOW_ATLAS_BINDING[1])) uniform sampler2DShadow u_shadowMap;
 #endif
 
-#define u_pos u_unis.m_posRadius.xyz
-#define u_radius u_unis.m_posRadius.w
-#define u_camPos u_unis.m_camPosPad1.xyz
-#define u_inputTexUvScaleAndOffset u_unis.m_inputTexUvScaleAndOffset
-#define u_invViewProjMat u_unis.m_invViewProjMat
-#define u_fbSize u_unis.m_fbSizePad2.xy
-
 void main()
 {
 	// Compute UV coordinates
-	Vec2 uv = Vec2(gl_FragCoord.xy) / u_fbSize;
-	Vec2 uvToRead = fma(uv, u_inputTexUvScaleAndOffset.xy, u_inputTexUvScaleAndOffset.zw);
+	Vec2 uv = Vec2(gl_FragCoord.xy) / u_unis.m_fbSize;
+	Vec2 uvToRead = fma(uv, u_unis.m_inputTexUvScale, u_unis.m_inputTexUvOffset);
 
-	// Do manual depth test
 	F32 depth = texture(u_msDepthRt, uvToRead).r;
+
+#if LIGHT_TYPE != DIR_LIGHT_TYPE
+	// Do manual depth test
 	if(gl_FragCoord.z < depth)
 	{
 		discard;
 	}
+#endif
 
 	// Decode and process gbuffer
 	GbufferInfo gbuffer;
 	readGBuffer(u_msRt0, u_msRt1, u_msRt2, uvToRead, 0.0, gbuffer);
+	gbuffer.m_subsurface = max(gbuffer.m_subsurface, SUBSURFACE_MIN * 8.0);
 
-	Vec4 worldPos4 = u_invViewProjMat * Vec4(UV_TO_NDC(uv), depth, 1.0);
+	Vec4 worldPos4 = u_unis.m_invViewProjMat * Vec4(UV_TO_NDC(uv), depth, 1.0);
 	Vec3 worldPos = worldPos4.xyz / worldPos4.w;
 
-	// Calculate the light color
-	Vec3 viewDir = normalize(u_camPos - worldPos);
-	Vec3 frag2Light = u_pos - worldPos;
+	// Compute diff
+	Vec3 diffC = diffuseLambert(gbuffer.m_diffuse);
+
+	// Compute spec
+	Vec3 viewDir = normalize(u_unis.m_camPos - worldPos);
+#if LIGHT_TYPE == DIR_LIGHT_TYPE
+	Vec3 l = -u_unis.m_lightDir;
+#else
+	Vec3 frag2Light = u_unis.m_position - worldPos;
 	Vec3 l = normalize(frag2Light);
 	F32 nol = max(0.0, dot(gbuffer.m_normal, l));
-
+#endif
 	Vec3 specC = computeSpecularColorBrdf(gbuffer, viewDir, l);
-	Vec3 diffC = diffuseLambert(gbuffer.m_diffuse);
-
-	F32 att = computeAttenuationFactor(u_radius, frag2Light);
-	F32 lambert = nol;
 
+	// Compute factors
 #if LIGHT_TYPE == POINT_LIGHT_TYPE
-	out_color = (specC + diffC) * u_ldiff * (att * max(lambert, gbuffer.m_subsurface));
+	F32 att = computeAttenuationFactor(u_unis.m_oneOverSquareRadius, frag2Light);
+	F32 lambert = nol;
+	F32 factor = att * max(lambert, gbuffer.m_subsurface);
+#elif LIGHT_TYPE == SPOT_LIGHT_TYPE
+	F32 att = computeAttenuationFactor(u_unis.m_oneOverSquareRadius, frag2Light);
+	F32 lambert = nol;
+	F32 spot = computeSpotFactor(l, u_unis.m_outerCos, u_unis.m_innerCos, u_unis.m_lightDir);
+	F32 factor = att * spot * max(lambert, gbuffer.m_subsurface);
 #else
-	F32 spot = computeSpotFactor(l, u_outerCos, u_innerCos, u_lightDir);
-	out_color = (diffC + specC) * u_ldiff * (att * spot * max(lambert, gbuffer.m_subsurface));
+	F32 linearDepth = linearizeDepth(depth, u_unis.m_near, u_unis.m_far);
+	F32 shadowFactor;
+	if(linearDepth * (u_unis.m_far - u_unis.m_near) < u_unis.m_effectiveShadowDistance)
+	{
+		// Acceptable distance
+
+		shadowFactor = computeShadowFactorDirLight(u_unis.m_lightMatrix, worldPos, u_shadowMap);
+	}
+	else
+	{
+		shadowFactor = 1.0;
+	}
+
+	F32 lambert = dot(l, gbuffer.m_normal);
+	F32 factor = shadowFactor * max(gbuffer.m_subsurface, lambert);
 #endif
+
+	out_color = (specC + diffC) * u_unis.m_diffuseColor * factor;
 }
 
 #pragma anki end

+ 48 - 25
shaders/VolumetricLightingAccumulation.glslp

@@ -54,7 +54,7 @@ Vec3 readRand()
 	return textureLod(u_noiseTex, uv, 0.0).rgb;
 }
 
-Vec3 worldPosInsideCluster(Vec3 relativePos)
+Vec3 worldPosInsideClusterAndZViewSpace(Vec3 relativePos, out F32 negativeZViewSpace)
 {
 	// Compute the cluster Z as float
 	F32 clusterKNear = g_globalInvocationID.z * (F32(FINAL_CLUSTER_Z + 1u) / F32(VOLUME_SIZE.z));
@@ -62,7 +62,8 @@ Vec3 worldPosInsideCluster(Vec3 relativePos)
 	F32 clusterK = mix(clusterKNear, clusterKFar, relativePos.z);
 
 	// Get a Z value
-	F32 zVSpace = -computeClusterNearf(u_clustererMagic, clusterK);
+	negativeZViewSpace = computeClusterNearf(u_clustererMagic, clusterK);
+	F32 zVSpace = -negativeZViewSpace;
 
 	// Get a XY value
 	Vec2 uvMin = g_globalInvocationID.xy / Vec2(VOLUME_SIZE.xy);
@@ -78,6 +79,12 @@ Vec3 worldPosInsideCluster(Vec3 relativePos)
 	return worldPos;
 }
 
+Vec3 worldPosInsideCluster(Vec3 relativePos)
+{
+	F32 unused;
+	return worldPosInsideClusterAndZViewSpace(relativePos, unused);
+}
+
 // https://developer.nvidia.com/gpugems/GPUGems2/gpugems2_chapter16.html
 F32 phaseFunction(Vec3 viewDir, Vec3 lightDir, F32 g)
 {
@@ -91,7 +98,7 @@ F32 phaseFunction(Vec3 viewDir, Vec3 lightDir, F32 g)
 	return saturate(a * b);
 }
 
-Vec4 accumulateLightsAndFog(U32 clusterIdx, Vec3 worldPos)
+Vec4 accumulateLightsAndFog(U32 clusterIdx, Vec3 worldPos, F32 linearDepth)
 {
 	Vec3 color = Vec3(0.0);
 	Vec3 viewDir = normalize(u_cameraPos - worldPos);
@@ -99,26 +106,42 @@ Vec4 accumulateLightsAndFog(U32 clusterIdx, Vec3 worldPos)
 	// Get ID offset
 	U32 idxOffset = u_clusters[clusterIdx];
 
+	// Dir light
+	if(u_dirLight.m_active != 0u)
+	{
+		F32 factor = phaseFunction(viewDir, u_dirLight.m_dir, PHASE_FUNCTION_ANISOTROPY);
+
+#if ENABLE_SHADOWS
+		if(u_dirLight.m_cascadeCount > 0u)
+		{
+			F32 cascadeCountf = F32(u_dirLight.m_cascadeCount);
+			U32 cascadeIdx = min(U32(linearDepth * cascadeCountf), u_dirLight.m_cascadeCount - 1u);
+			factor *= computeShadowFactorDirLight(u_dirLight, cascadeIdx, worldPos, u_shadowTex);
+		}
+#endif
+
+		color += u_dirLight.m_diffuseColor * factor;
+	}
+
 	// Point lights
 	U32 idx;
 	ANKI_LOOP while((idx = u_lightIndices[idxOffset++]) != MAX_U32)
 	{
 		PointLight light = u_pointLights[idx];
 
-		Vec3 frag2Light = light.m_posRadius.xyz - worldPos;
-		F32 factor = computeAttenuationFactor(light.m_posRadius.w, frag2Light);
+		Vec3 frag2Light = light.m_position - worldPos;
+		F32 factor = computeAttenuationFactor(light.m_squareRadiusOverOne, frag2Light);
 
-		factor *= phaseFunction(viewDir, normalize(worldPos - light.m_posRadius.xyz), PHASE_FUNCTION_ANISOTROPY);
+		factor *= phaseFunction(viewDir, normalize(worldPos - light.m_position), PHASE_FUNCTION_ANISOTROPY);
 
 #if ENABLE_SHADOWS
-		if(light.m_diffuseColorTileSize.w >= 0.0)
+		if(light.m_shadowAtlasTileScale >= 0.0)
 		{
-			factor *= computeShadowFactorOmni(
-				frag2Light, light.m_radiusPad1.x, light.m_atlasTiles, light.m_diffuseColorTileSize.w, u_shadowTex);
+			factor *= computeShadowFactorPointLight(light, frag2Light, u_shadowTex);
 		}
 #endif
 
-		color += light.m_diffuseColorTileSize.rgb * factor;
+		color += light.m_diffuseColor * factor;
 	}
 
 	// Spot lights
@@ -126,26 +149,24 @@ Vec4 accumulateLightsAndFog(U32 clusterIdx, Vec3 worldPos)
 	{
 		SpotLight light = u_spotLights[idx];
 
-		Vec3 frag2Light = light.m_posRadius.xyz - worldPos;
-		F32 factor = computeAttenuationFactor(light.m_posRadius.w, frag2Light);
+		Vec3 frag2Light = light.m_position - worldPos;
+		F32 factor = computeAttenuationFactor(light.m_squareRadiusOverOne, frag2Light);
 
 		Vec3 l = normalize(frag2Light);
 
-		factor *=
-			computeSpotFactor(l, light.m_outerCosInnerCos.x, light.m_outerCosInnerCos.y, light.m_lightDirRadius.xyz);
+		factor *= computeSpotFactor(l, light.m_outerCos, light.m_innerCos, light.m_dir);
 
-		factor *= phaseFunction(viewDir, light.m_lightDirRadius.xyz, PHASE_FUNCTION_ANISOTROPY);
+		factor *= phaseFunction(viewDir, light.m_dir, PHASE_FUNCTION_ANISOTROPY);
 
 #if ENABLE_SHADOWS
-		F32 shadowmapLayerIdx = light.m_diffuseColorShadowmapId.w;
+		F32 shadowmapLayerIdx = light.m_shadowmapId;
 		if(shadowmapLayerIdx >= 0.0)
 		{
-			factor *=
-				computeShadowFactorSpot(light.m_texProjectionMat, worldPos, light.m_lightDirRadius.w, u_shadowTex);
+			factor *= computeShadowFactorSpotLight(light, worldPos, u_shadowTex);
 		}
 #endif
 
-		color += light.m_diffuseColorShadowmapId.rgb * factor;
+		color += light.m_diffuseColor * factor;
 	}
 
 	// Probes
@@ -154,10 +175,10 @@ Vec4 accumulateLightsAndFog(U32 clusterIdx, Vec3 worldPos)
 	ANKI_LOOP while((idx = u_lightIndices[idxOffset++]) != MAX_U32)
 	{
 		ReflectionProbe probe = u_reflectionProbes[idx];
-		Vec3 aabbMin = probe.m_aabbMinPad1.xyz;
-		Vec3 aabbMax = probe.m_aabbMaxPad1.xyz;
-		Vec3 probeOrigin = probe.m_positionCubemapIndex.xyz;
-		F32 cubemapIndex = probe.m_positionCubemapIndex.w;
+		Vec3 aabbMin = probe.m_aabbMin;
+		Vec3 aabbMax = probe.m_aabbMax;
+		Vec3 probeOrigin = probe.m_position;
+		F32 cubemapIndex = probe.m_cubemapIndex;
 
 		F32 blendWeight = computeProbeBlendWeight(worldPos, aabbMin, aabbMax, 0.2);
 		totalBlendWeight += blendWeight;
@@ -210,10 +231,12 @@ void main()
 	U32 clusterIdx = clusterXYZ.z * (CLUSTER_COUNT.x * CLUSTER_COUNT.y) + clusterXYZ.y * CLUSTER_COUNT.x + clusterXYZ.x;
 
 	// Find a random pos inside the cluster
-	Vec3 worldPos = worldPosInsideCluster(readRand());
+	F32 negativeZViewSpace;
+	Vec3 worldPos = worldPosInsideClusterAndZViewSpace(readRand(), negativeZViewSpace);
 
 	// Get lighting
-	Vec4 lightAndFog = accumulateLightsAndFog(clusterIdx, worldPos);
+	F32 linearDepth = negativeZViewSpace / (u_far - u_near);
+	Vec4 lightAndFog = accumulateLightsAndFog(clusterIdx, worldPos, linearDepth);
 
 	// Read the prev result
 	{

+ 50 - 15
shaders/glsl_cpp_common/ClusteredShading.h

@@ -3,6 +3,8 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
+// Mainly contains light related structures. Everything is packed to align with std140
+
 #pragma once
 
 #include <shaders/glsl_cpp_common/Common.h>
@@ -13,6 +15,8 @@ ANKI_BEGIN_NAMESPACE
 const U32 TYPED_OBJECT_COUNT = 5u;
 const F32 INVALID_TEXTURE_INDEX = -1.0;
 const F32 LIGHT_FRUSTUM_NEAR_PLANE = 0.1 / 4.0; // The near plane on the shadow map frustums.
+const U32 MAX_SHADOW_CASCADES = 4u;
+const F32 SUBSURFACE_MIN = 0.05;
 
 // See the documentation in the ClustererBin class.
 struct ClustererMagicValues
@@ -24,32 +28,56 @@ struct ClustererMagicValues
 // Point light
 struct PointLight
 {
-	Vec4 m_posRadius; // xyz: Light pos in world space. w: The 1/(radius^2)
-	Vec4 m_diffuseColorTileSize; // xyz: diff color, w: tile size in the shadow atlas
-	Vec2 m_radiusPad1; // x: radius
-	UVec2 m_atlasTiles; // x: encodes 6 uints with atlas tile indices in the x dir. y: same for y dir.
+	Vec3 m_position; // Position in world space
+	F32 m_squareRadiusOverOne; // 1/(radius^2)
+	Vec3 m_diffuseColor;
+	F32 m_shadowAtlasTileScale; // UV scale for all tiles
+	Vec3 m_padding;
+	F32 m_radius; // Radius
+	Vec4 m_shadowAtlasTileOffsets[3u]; // It's a Vec4 because of the std140 limitations
 };
-const U32 SIZEOF_POINT_LIGHT = 3 * SIZEOF_VEC4;
+const U32 SIZEOF_POINT_LIGHT = 6 * SIZEOF_VEC4;
 ANKI_SHADER_STATIC_ASSERT(sizeof(PointLight) == SIZEOF_POINT_LIGHT)
 
 // Spot light
 struct SpotLight
 {
-	Vec4 m_posRadius; // xyz: Light pos in world space. w: The 1/(radius^2)
-	Vec4 m_diffuseColorShadowmapId; // xyz: diff color, w: shadowmap tex ID
-	Vec4 m_lightDirRadius; // xyz: light direction, w: radius
-	Vec4 m_outerCosInnerCos;
+	Vec3 m_position; // Position in world space
+	F32 m_squareRadiusOverOne; // 1/(radius^2)
+	Vec3 m_diffuseColor;
+	F32 m_shadowmapId; // Shadowmap tex ID
+	Vec3 m_dir; // Light direction
+	F32 m_radius; // Max distance
+	F32 m_outerCos;
+	F32 m_innerCos;
+	F32 m_padding0;
+	F32 m_padding1;
 	Mat4 m_texProjectionMat;
 };
 const U32 SIZEOF_SPOT_LIGHT = 4 * SIZEOF_VEC4 + SIZEOF_MAT4;
 ANKI_SHADER_STATIC_ASSERT(sizeof(SpotLight) == SIZEOF_SPOT_LIGHT)
 
+// Directional light (sun)
+struct DirectionalLight
+{
+	Vec3 m_diffuseColor;
+	U32 m_cascadeCount; // If it's zero then it doesn't case shadow
+	Vec3 m_dir;
+	U32 m_active;
+	Mat4 m_textureMatrices[MAX_SHADOW_CASCADES];
+};
+const U32 SIZEOF_DIR_LIGHT = 2 * SIZEOF_VEC4 + MAX_SHADOW_CASCADES * SIZEOF_MAT4;
+ANKI_SHADER_STATIC_ASSERT(sizeof(DirectionalLight) == SIZEOF_DIR_LIGHT)
+
 // Representation of a reflection probe
 struct ReflectionProbe
 {
-	Vec4 m_positionCubemapIndex; // xyz: Position of the prove in view space. w: Slice in u_reflectionsTex vector.
-	Vec4 m_aabbMinPad1;
-	Vec4 m_aabbMaxPad1;
+	Vec3 m_position; // Position of the probe in world space
+	F32 m_cubemapIndex; // Slice in cubemap array texture
+	Vec3 m_aabbMin;
+	F32 m_padding0;
+	Vec3 m_aabbMax;
+	F32 m_padding1;
 };
 const U32 SIZEOF_REFLECTION_PROBE = 3 * SIZEOF_VEC4;
 ANKI_SHADER_STATIC_ASSERT(sizeof(ReflectionProbe) == SIZEOF_REFLECTION_PROBE)
@@ -80,12 +108,16 @@ ANKI_SHADER_STATIC_ASSERT(sizeof(FogDensityVolume) == SIZEOF_FOG_DENSITY_VOLUME)
 struct LightingUniforms
 {
 	Vec4 m_unprojectionParams;
-	Vec4 m_rendererSizeTimeNear;
-	Vec4 m_cameraPosFar;
+	Vec2 m_rendererSize;
+	F32 m_time;
+	F32 m_near;
+	Vec3 m_cameraPos;
+	F32 m_far;
 	ClustererMagicValues m_clustererMagicValues;
 	ClustererMagicValues m_prevClustererMagicValues;
 	UVec4 m_clusterCount;
-	UVec4 m_lightVolumeLastClusterPad3;
+	Vec3 m_padding;
+	U32 m_lightVolumeLastCluster;
 	Mat4 m_viewMat;
 	Mat4 m_invViewMat;
 	Mat4 m_projMat;
@@ -94,7 +126,10 @@ struct LightingUniforms
 	Mat4 m_invViewProjMat;
 	Mat4 m_prevViewProjMat;
 	Mat4 m_prevViewProjMatMulInvViewProjMat; // Used to re-project previous frames
+	DirectionalLight m_dirLight;
 };
+const U32 SIZEOF_LIGHTING_UNIFORMS = 9 * SIZEOF_VEC4 + 8 * SIZEOF_MAT4 + SIZEOF_DIR_LIGHT;
+ANKI_SHADER_STATIC_ASSERT(sizeof(LightingUniforms) == SIZEOF_LIGHTING_UNIFORMS)
 
 ANKI_SHADER_FUNC_INLINE F32 computeClusterKf(ClustererMagicValues magic, Vec3 worldPos)
 {

+ 5 - 1
shaders/glsl_cpp_common/Common.h

@@ -6,7 +6,7 @@
 #pragma once
 
 //
-// Macros & functions
+// Macros & functions C++
 //
 #if defined(__cplusplus)
 #	define ANKI_BEGIN_NAMESPACE \
@@ -29,6 +29,9 @@ inline F32 dot(const T& a, const T& b)
 }
 ANKI_END_NAMESPACE
 
+//
+// Macros & functions GLSL
+//
 #else
 #	define x() x
 #	define y() y
@@ -53,6 +56,7 @@ ANKI_END_NAMESPACE
 //
 ANKI_BEGIN_NAMESPACE
 
+const U32 SIZEOF_VEC2 = 2u * 4u;
 const U32 SIZEOF_VEC4 = 4u * 4u;
 const U32 SIZEOF_MAT4 = 4u * SIZEOF_VEC4;
 

+ 59 - 11
shaders/glsl_cpp_common/TraditionalDeferredShading.h

@@ -11,27 +11,74 @@ ANKI_BEGIN_NAMESPACE
 
 struct DeferredPointLightUniforms
 {
-	Vec4 m_inputTexUvScaleAndOffset; // Use this to get the correct face UVs
+	// Use these to get the correct face UVs
+	Vec2 m_inputTexUvScale;
+	Vec2 m_inputTexUvOffset;
+
 	Mat4 m_invViewProjMat;
-	Vec4 m_camPosPad1;
-	Vec4 m_fbSizePad2;
+
+	Vec3 m_camPos;
+	F32 m_padding;
+
+	Vec2 m_fbSize;
+	Vec2 m_padding1;
 
 	// Light props
-	Vec4 m_posRadius; // xyz: Light pos in world space. w: The -1/radius
-	Vec4 m_diffuseColorPad1; // xyz: diff color
+	Vec3 m_position;
+	F32 m_oneOverSquareRadius; // 1/radius^2
+
+	Vec3 m_diffuseColor;
+	F32 m_padding2;
 };
 
 struct DeferredSpotLightUniforms
 {
-	Vec4 m_inputTexUvScaleAndOffset; // Use this to get the correct face UVs
+	// Use these to get the correct face UVs
+	Vec2 m_inputTexUvScale;
+	Vec2 m_inputTexUvOffset;
+
+	Mat4 m_invViewProjMat;
+
+	Vec3 m_camPos;
+	F32 m_padding;
+
+	Vec2 m_fbSize;
+	Vec2 m_padding1;
+
+	// Light props
+	Vec3 m_position;
+	F32 m_oneOverSquareRadius; // 1/radius^2
+
+	Vec3 m_diffuseColor;
+	F32 m_outerCos;
+
+	Vec3 m_lightDir;
+	F32 m_innerCos;
+};
+
+struct DeferredDirectionalLightUniforms
+{
+	// Use these to get the correct face UVs
+	Vec2 m_inputTexUvScale;
+	Vec2 m_inputTexUvOffset;
+
 	Mat4 m_invViewProjMat;
-	Vec4 m_camPosPad1;
-	Vec4 m_fbSizePad2;
+
+	Vec3 m_camPos;
+	F32 m_padding;
+
+	Vec2 m_fbSize;
+	F32 m_near;
+	F32 m_far;
 
 	// Light props
-	Vec4 m_posRadius; // xyz: Light pos in world space. w: The -1/radius
-	Vec4 m_diffuseColorOuterCos; // xyz: diff color, w: outer cosine of spot
-	Vec4 m_lightDirInnerCos; // xyz: light dir, w: inner cosine of spot
+	Vec3 m_diffuseColor;
+	F32 m_padding2;
+
+	Vec3 m_lightDir;
+	F32 m_effectiveShadowDistance;
+
+	Mat4 m_lightMatrix;
 };
 
 struct DeferredVertexUniforms
@@ -43,5 +90,6 @@ const UVec2 GBUFFER_RT0_BINDING = UVec2(0, 0);
 const UVec2 GBUFFER_RT1_BINDING = UVec2(0, 1);
 const UVec2 GBUFFER_RT2_BINDING = UVec2(0, 2);
 const UVec2 GBUFFER_DEPTH_BINDING = UVec2(0, 3);
+const UVec2 GBUFFER_SHADOW_ATLAS_BINDING = UVec2(0, 4);
 
 ANKI_END_NAMESPACE

+ 13 - 0
src/anki/collision/Aabb.h

@@ -125,6 +125,19 @@ public:
 	/// Calculate from a set of points
 	void setFromPointCloud(const void* buff, U count, PtrSize stride, PtrSize buffSize);
 
+	// Intersect a ray against an AABB. The ray is inside the AABB. The function returns the distance 'a' where the
+	// intersection point is rayOrigin + rayDir * a
+	// https://community.arm.com/graphics/b/blog/posts/reflections-based-on-local-cubemaps-in-unity
+	F32 intersectRayInside(const Vec3& rayOrigin, const Vec3& rayDir) const
+	{
+		const Vec3 reciprocal = rayDir.reciprocal();
+		const Vec3 intersectMaxPointPlanes = (m_max.xyz() - rayOrigin) * reciprocal;
+		const Vec3 intersectMinPointPlanes = (m_min.xyz() - rayOrigin) * reciprocal;
+		const Vec3 largestParams = intersectMaxPointPlanes.max(intersectMinPointPlanes);
+		const F32 distToIntersect = min(min(largestParams.x(), largestParams.y()), largestParams.z());
+		return distToIntersect;
+	}
+
 private:
 	Vec4 m_min;
 	Vec4 m_max;

+ 1 - 1
src/anki/collision/CompoundShape.cpp

@@ -13,7 +13,7 @@ namespace anki
 CompoundShape::CompoundShape()
 	: CollisionShape(CollisionShapeType::COMPOUND)
 {
-	memset(&m_dflt, 0, sizeof(m_dflt));
+	zeroMemory(m_dflt);
 }
 
 F32 CompoundShape::testPlane(const Plane& p) const

+ 1 - 1
src/anki/collision/Plane.cpp

@@ -26,7 +26,7 @@ void Plane::setFrom3Points(const Vec4& p0, const Vec4& p1, const Vec4& p2)
 	m_normal = u.cross(v);
 
 	// length of normal had better not be zero
-	ANKI_ASSERT(!isZero(m_normal.getLengthSquared()));
+	ANKI_ASSERT(m_normal.getLengthSquared() != 0.0f);
 
 	m_normal.normalize();
 	m_offset = m_normal.dot(p0);

+ 11 - 4
src/anki/core/Config.cpp

@@ -5,6 +5,8 @@
 
 #include <anki/core/Config.h>
 #include <anki/util/System.h>
+#include <anki/Math.h>
+#include <shaders/glsl_cpp_common/ClusteredShading.h>
 
 namespace anki
 {
@@ -25,9 +27,12 @@ Config::Config()
 	newOption("r.volumetricLightingAccumulation.finalClusterInZ", 26);
 
 	newOption("r.shadowMapping.enabled", true);
-	newOption("r.shadowMapping.resolution", 512);
-	newOption("r.shadowMapping.tileCountPerRowOrColumn", 8);
-	newOption("r.shadowMapping.scratchTileCount", 8);
+	newOption("r.shadowMapping.tileResolution", 128);
+	newOption("r.shadowMapping.tileCountPerRowOrColumn", 16);
+	newOption("r.shadowMapping.scratchTileCountX", 4 * (MAX_SHADOW_CASCADES + 2));
+	newOption("r.shadowMapping.scratchTileCountY", 4);
+	newOption("r.shadowMapping.lightLodDistance0", 10.0);
+	newOption("r.shadowMapping.lightLodDistance1", 20.0);
 
 	newOption("r.lensFlare.maxSpritesPerFlare", 8);
 	newOption("r.lensFlare.maxFlares", 16);
@@ -37,6 +42,7 @@ Config::Config()
 
 	newOption("r.indirect.reflectionResolution", 128);
 	newOption("r.indirect.maxSimultaneousProbeCount", 32);
+	newOption("r.indirect.shadowMapResolution", 64);
 
 	newOption("r.motionBlur.maxSamples", 32);
 
@@ -48,8 +54,9 @@ Config::Config()
 	newOption("r.final.motionBlurSamples", 32);
 
 	// Scene
-	newOption("scene.imageReflectionMaxDistance", 30.0);
 	newOption("scene.earlyZDistance", 10.0, "Objects with distance lower than that will be used in early Z");
+	newOption("scene.reflectionProbeEffectiveDistance", 256.0, "How far reflection probes can look");
+	newOption("scene.reflectionProbeShadowEffectiveDistance", 32.0, "How far to render shadows for reflection probes");
 
 	// Globals
 	newOption("width", 1280);

+ 2 - 2
src/anki/gr/Common.h

@@ -168,7 +168,7 @@ public:
 
 	ClearValue()
 	{
-		memset(this, 0, sizeof(*this));
+		zeroMemory(*this);
 	}
 
 	ClearValue(const ClearValue& b)
@@ -351,7 +351,7 @@ public:
 	void setName(CString name)
 	{
 		// Zero it because the derived classes may be hashed.
-		memset(&m_name[0], 0, sizeof(m_name));
+		zeroMemory(m_name);
 
 		if(name && name.getLength())
 		{

+ 1 - 1
src/anki/gr/vulkan/Pipeline.h

@@ -155,7 +155,7 @@ public:
 
 		// Do a special construction. The state will be hashed and the padding may contain garbage. With this trick
 		// zero the padding
-		memset(this, 0, sizeof(*this));
+		zeroMemory(*this);
 
 #define ANKI_CONSTRUCT_AND_ZERO_PADDING(memb_) new(&memb_) decltype(memb_)()
 

+ 3 - 3
src/anki/gr/vulkan/ShaderProgramImpl.cpp

@@ -47,7 +47,7 @@ Error ShaderProgramImpl::init(const ShaderProgramInitInfo& inf)
 
 			m_stages |= static_cast<ShaderTypeBit>(1 << stype);
 
-			const ShaderImpl& simpl = *scast<const ShaderImpl*>(m_shaders[stype].get());
+			const ShaderImpl& simpl = *static_cast<const ShaderImpl*>(m_shaders[stype].get());
 
 			m_refl.m_activeBindingMask[set] |= simpl.m_activeBindingMask[set];
 
@@ -121,9 +121,9 @@ Error ShaderProgramImpl::init(const ShaderProgramInitInfo& inf)
 	const Bool graphicsProg = !!(m_stages & ShaderTypeBit::VERTEX);
 	if(graphicsProg)
 	{
-		m_refl.m_attributeMask = scast<const ShaderImpl*>(m_shaders[ShaderType::VERTEX].get())->m_attributeMask;
+		m_refl.m_attributeMask = static_cast<const ShaderImpl*>(m_shaders[ShaderType::VERTEX].get())->m_attributeMask;
 		m_refl.m_colorAttachmentWritemask =
-			scast<const ShaderImpl*>(m_shaders[ShaderType::FRAGMENT].get())->m_colorAttachmentWritemask;
+			static_cast<const ShaderImpl*>(m_shaders[ShaderType::FRAGMENT].get())->m_colorAttachmentWritemask;
 
 		const U attachmentCount = m_refl.m_colorAttachmentWritemask.getEnabledBitCount();
 		for(U i = 0; i < attachmentCount; ++i)

+ 1 - 1
src/anki/gr/vulkan/TextureImpl.cpp

@@ -91,7 +91,7 @@ Error TextureImpl::initInternal(VkImage externalImage, const TextureInitInfo& in
 	}
 
 	// Init the template
-	memset(&m_viewCreateInfoTemplate, 0, sizeof(m_viewCreateInfoTemplate)); // memset, it will be used for hashing
+	zeroMemory(m_viewCreateInfoTemplate); // zero it, it will be used for hashing
 	m_viewCreateInfoTemplate.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
 	m_viewCreateInfoTemplate.image = m_imageHandle;
 	m_viewCreateInfoTemplate.viewType = convertTextureViewType(init.m_type);

+ 3 - 3
src/anki/input/Input.cpp

@@ -11,11 +11,11 @@ namespace anki
 
 void Input::reset()
 {
-	std::memset(&m_keys[0], 0, sizeof(m_keys));
-	std::memset(&m_mouseBtns[0], 0, sizeof(m_mouseBtns));
+	zeroMemory(m_keys);
+	zeroMemory(m_mouseBtns);
 	m_mousePosNdc = Vec2(-1.0f);
 	m_mousePosWin = UVec2(0u);
-	std::memset(&m_events[0], 0, sizeof(m_events));
+	zeroMemory(m_events);
 }
 
 } // end namespace anki

+ 1 - 1
src/anki/input/InputAndroid.cpp

@@ -36,7 +36,7 @@ void Input::handleEvents()
 	int outEvents;
 	android_poll_source* source;
 
-	memset(&events[0], 0, sizeof(events));
+	zeroMemory(events);
 
 	while((ident = ALooper_pollAll(0, NULL, &outEvents, (void**)&source)) >= 0)
 	{

+ 11 - 0
src/anki/math/Vec.h

@@ -2239,6 +2239,17 @@ public:
 		return out;
 	}
 
+	/// Get a safe 1 / (*this)
+	TV reciprocal() const
+	{
+		TV out;
+		for(U i = 0; i < N; ++i)
+		{
+			out[i] = T(1) / m_arr[i];
+		}
+		return out;
+	}
+
 	/// Serialize the structure.
 	void serialize(void* data, PtrSize& size) const
 	{

+ 0 - 54
src/anki/physics/Common.h

@@ -109,60 +109,6 @@ ANKI_USE_RESULT inline Transform toAnki(const btTransform& t)
 	out.setScale(1.0f);
 	return out;
 }
-
-/// A wrapper template to compensate for of the fact that Bullet classes get initialized in the constructor.
-template<typename TBtClass>
-class BtClassWrapper
-{
-public:
-	BtClassWrapper()
-	{
-	}
-
-	template<typename... TArgs>
-	void init(TArgs&&... args)
-	{
-		::new(&m_data[0]) TBtClass(std::forward<TArgs>(args)...);
-	}
-
-	void destroy()
-	{
-		reinterpret_cast<TBtClass*>(&m_data[0])->~TBtClass();
-	}
-
-	TBtClass* operator->()
-	{
-		return reinterpret_cast<TBtClass*>(&m_data[0]);
-	}
-
-	const TBtClass* operator->() const
-	{
-		return reinterpret_cast<const TBtClass*>(&m_data[0]);
-	}
-
-	TBtClass& operator*()
-	{
-		return *reinterpret_cast<TBtClass*>(&m_data[0]);
-	}
-
-	const TBtClass& operator*() const
-	{
-		return *reinterpret_cast<const TBtClass*>(&m_data[0]);
-	}
-
-	TBtClass* get()
-	{
-		return reinterpret_cast<TBtClass*>(&m_data[0]);
-	}
-
-	const TBtClass* get() const
-	{
-		return reinterpret_cast<const TBtClass*>(&m_data[0]);
-	}
-
-private:
-	alignas(alignof(TBtClass)) Array<U8, sizeof(TBtClass)> m_data;
-};
 /// @}
 
 } // end namespace anki

+ 2 - 1
src/anki/physics/PhysicsBody.h

@@ -6,6 +6,7 @@
 #pragma once
 
 #include <anki/physics/PhysicsObject.h>
+#include <anki/util/ClassWrapper.h>
 
 namespace anki
 {
@@ -117,7 +118,7 @@ private:
 	};
 
 	/// Store the data of the btRigidBody in place to avoid additional allocations.
-	BtClassWrapper<btRigidBody> m_body;
+	ClassWrapper<btRigidBody> m_body;
 
 	Transform m_trf = Transform::getIdentity();
 	MotionState m_motionState;

+ 7 - 6
src/anki/physics/PhysicsCollisionShape.h

@@ -7,6 +7,7 @@
 
 #include <anki/physics/PhysicsObject.h>
 #include <anki/util/WeakArray.h>
+#include <anki/util/ClassWrapper.h>
 
 namespace anki
 {
@@ -43,16 +44,16 @@ protected:
 	class TriMesh
 	{
 	public:
-		BtClassWrapper<btGImpactMeshShape> m_dynamic;
-		BtClassWrapper<btBvhTriangleMeshShape> m_static;
+		ClassWrapper<btGImpactMeshShape> m_dynamic;
+		ClassWrapper<btBvhTriangleMeshShape> m_static;
 	};
 
 	// All shapes
 	union
 	{
-		BtClassWrapper<btBoxShape> m_box;
-		BtClassWrapper<btSphereShape> m_sphere;
-		BtClassWrapper<btConvexHullShape> m_convex;
+		ClassWrapper<btBoxShape> m_box;
+		ClassWrapper<btSphereShape> m_sphere;
+		ClassWrapper<btConvexHullShape> m_convex;
 		TriMesh m_triMesh;
 	};
 
@@ -129,7 +130,7 @@ class PhysicsTriangleSoup final : public PhysicsCollisionShape
 	ANKI_PHYSICS_OBJECT
 
 private:
-	BtClassWrapper<btTriangleMesh> m_mesh;
+	ClassWrapper<btTriangleMesh> m_mesh;
 
 	PhysicsTriangleSoup(
 		PhysicsWorld* world, ConstWeakArray<Vec3> positions, ConstWeakArray<U32> indices, Bool convex = false);

+ 3 - 2
src/anki/physics/PhysicsJoint.h

@@ -6,6 +6,7 @@
 #pragma once
 
 #include <anki/physics/PhysicsObject.h>
+#include <anki/util/ClassWrapper.h>
 
 namespace anki
 {
@@ -40,8 +41,8 @@ public:
 protected:
 	union
 	{
-		BtClassWrapper<btPoint2PointConstraint> m_p2p;
-		BtClassWrapper<btHingeConstraint> m_hinge;
+		ClassWrapper<btPoint2PointConstraint> m_p2p;
+		ClassWrapper<btHingeConstraint> m_hinge;
 	};
 
 	PhysicsBodyPtr m_bodyA;

+ 4 - 3
src/anki/physics/PhysicsPlayerController.h

@@ -6,6 +6,7 @@
 #pragma once
 
 #include <anki/physics/PhysicsObject.h>
+#include <anki/util/ClassWrapper.h>
 
 namespace anki
 {
@@ -49,9 +50,9 @@ public:
 	}
 
 private:
-	BtClassWrapper<btPairCachingGhostObject> m_ghostObject;
-	BtClassWrapper<btCapsuleShape> m_convexShape;
-	BtClassWrapper<btKinematicCharacterController> m_controller;
+	ClassWrapper<btPairCachingGhostObject> m_ghostObject;
+	ClassWrapper<btCapsuleShape> m_convexShape;
+	ClassWrapper<btKinematicCharacterController> m_controller;
 
 	Transform m_prevTrf = Transform::getIdentity();
 

+ 2 - 1
src/anki/physics/PhysicsTrigger.h

@@ -7,6 +7,7 @@
 
 #include <anki/physics/PhysicsObject.h>
 #include <anki/util/WeakArray.h>
+#include <anki/util/ClassWrapper.h>
 
 namespace anki
 {
@@ -46,7 +47,7 @@ public:
 
 private:
 	PhysicsCollisionShapePtr m_shape;
-	BtClassWrapper<btGhostObject> m_ghostShape;
+	ClassWrapper<btGhostObject> m_ghostShape;
 
 	PhysicsTriggerProcessContactCallback* m_contactCallback = nullptr;
 

+ 9 - 9
src/anki/physics/PhysicsWorld.h

@@ -9,6 +9,7 @@
 #include <anki/physics/PhysicsObject.h>
 #include <anki/util/List.h>
 #include <anki/util/WeakArray.h>
+#include <anki/util/ClassWrapper.h>
 
 namespace anki
 {
@@ -48,10 +49,9 @@ public:
 	template<typename T, typename... TArgs>
 	PhysicsPtr<T> newInstance(TArgs&&... args)
 	{
-		void* mem = m_alloc.getMemoryPool().allocate(sizeof(T), alignof(T));
-		::new(mem) T(this, std::forward<TArgs>(args)...);
+		T* obj = static_cast<T*>(m_alloc.getMemoryPool().allocate(sizeof(T), alignof(T)));
+		::new(obj) T(this, std::forward<TArgs>(args)...);
 
-		T* obj = static_cast<T*>(mem);
 		LockGuard<Mutex> lock(m_objectListsMtx);
 		m_objectLists[obj->getType()].pushBack(obj);
 
@@ -110,14 +110,14 @@ private:
 	HeapAllocator<U8> m_alloc;
 	StackAllocator<U8> m_tmpAlloc;
 
-	BtClassWrapper<btDbvtBroadphase> m_broadphase;
-	BtClassWrapper<btGhostPairCallback> m_gpc;
+	ClassWrapper<btDbvtBroadphase> m_broadphase;
+	ClassWrapper<btGhostPairCallback> m_gpc;
 	MyOverlapFilterCallback* m_filterCallback = nullptr;
 
-	BtClassWrapper<btDefaultCollisionConfiguration> m_collisionConfig;
-	BtClassWrapper<btCollisionDispatcher> m_dispatcher;
-	BtClassWrapper<btSequentialImpulseConstraintSolver> m_solver;
-	BtClassWrapper<btDiscreteDynamicsWorld> m_world;
+	ClassWrapper<btDefaultCollisionConfiguration> m_collisionConfig;
+	ClassWrapper<btCollisionDispatcher> m_dispatcher;
+	ClassWrapper<btSequentialImpulseConstraintSolver> m_solver;
+	ClassWrapper<btDiscreteDynamicsWorld> m_world;
 	mutable Mutex m_btWorldMtx;
 
 	Array<IntrusiveList<PhysicsObject>, U(PhysicsObjectType::COUNT)> m_objectLists;

+ 10 - 2
src/anki/renderer/Bloom.cpp

@@ -88,7 +88,10 @@ void Bloom::populateRenderGraph(RenderingContext& ctx)
 
 		// Set the render pass
 		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("Bloom Main");
-		rpass.setWork(runExposureCallback, this, 0);
+		rpass.setWork(
+			[](RenderPassWorkContext& rgraphCtx) { static_cast<Bloom*>(rgraphCtx.m_userData)->runExposure(rgraphCtx); },
+			this,
+			0);
 
 		TextureSubresourceInfo inputTexSubresource;
 		inputTexSubresource.m_firstMipmap = m_r->getDownscaleBlur().getMipmapCount() - 1;
@@ -103,7 +106,12 @@ void Bloom::populateRenderGraph(RenderingContext& ctx)
 
 		// Set the render pass
 		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("Bloom Upscale");
-		rpass.setWork(runUpscaleAndSslfCallback, this, 0);
+		rpass.setWork(
+			[](RenderPassWorkContext& rgraphCtx) {
+				static_cast<Bloom*>(rgraphCtx.m_userData)->runUpscaleAndSslf(rgraphCtx);
+			},
+			this,
+			0);
 
 		rpass.newDependency({m_runCtx.m_exposureRt, TextureUsageBit::SAMPLED_COMPUTE});
 		rpass.newDependency({m_runCtx.m_upscaleRt, TextureUsageBit::IMAGE_COMPUTE_WRITE});

+ 0 - 10
src/anki/renderer/Bloom.h

@@ -91,16 +91,6 @@ private:
 		return Error::NONE;
 	}
 
-	static void runExposureCallback(RenderPassWorkContext& rgraphCtx)
-	{
-		scast<Bloom*>(rgraphCtx.m_userData)->runExposure(rgraphCtx);
-	}
-
-	static void runUpscaleAndSslfCallback(RenderPassWorkContext& rgraphCtx)
-	{
-		scast<Bloom*>(rgraphCtx.m_userData)->runUpscaleAndSslf(rgraphCtx);
-	}
-
 	void runExposure(RenderPassWorkContext& rgraphCtx);
 	void runUpscaleAndSslf(RenderPassWorkContext& rgraphCtx);
 };

+ 22 - 14
src/anki/renderer/ClusterBin.cpp

@@ -557,20 +557,24 @@ void ClusterBin::writeTypedObjectsToGpuBuffers(BinCtx& ctx) const
 			const PointLightQueueElement& in = rqueue.m_pointLights[i];
 			PointLight& out = gpuLights[i];
 
-			out.m_posRadius = Vec4(in.m_worldPosition.xyz(), 1.0f / (in.m_radius * in.m_radius));
-			out.m_diffuseColorTileSize = in.m_diffuseColor.xyz0();
+			out.m_position = in.m_worldPosition;
+			out.m_squareRadiusOverOne = 1.0f / (in.m_radius * in.m_radius);
+			out.m_diffuseColor = in.m_diffuseColor;
 
 			if(in.m_shadowRenderQueues[0] == nullptr || !ctx.m_in->m_shadowsEnabled)
 			{
-				out.m_diffuseColorTileSize.w() = INVALID_TEXTURE_INDEX;
+				out.m_shadowAtlasTileScale = INVALID_TEXTURE_INDEX;
 			}
 			else
 			{
-				out.m_diffuseColorTileSize.w() = in.m_atlasTileSize;
-				out.m_atlasTiles = UVec2(in.m_atlasTiles.x(), in.m_atlasTiles.y());
+				out.m_shadowAtlasTileScale = in.m_shadowAtlasTileSize;
+				ANKI_ASSERT(sizeof(out.m_shadowAtlasTileOffsets) == sizeof(in.m_shadowAtlasTileOffsets));
+				memcpy(&out.m_shadowAtlasTileOffsets[0],
+					&in.m_shadowAtlasTileOffsets[0],
+					sizeof(in.m_shadowAtlasTileOffsets));
 			}
 
-			out.m_radiusPad1 = Vec2(in.m_radius);
+			out.m_radius = in.m_radius;
 		}
 	}
 	else
@@ -603,18 +607,21 @@ void ClusterBin::writeTypedObjectsToGpuBuffers(BinCtx& ctx) const
 			}
 
 			// Pos & dist
-			out.m_posRadius =
-				Vec4(in.m_worldTransform.getTranslationPart().xyz(), 1.0f / (in.m_distance * in.m_distance));
+			out.m_position = in.m_worldTransform.getTranslationPart().xyz();
+			out.m_squareRadiusOverOne = 1.0f / (in.m_distance * in.m_distance);
 
 			// Diff color and shadowmap ID now
-			out.m_diffuseColorShadowmapId = Vec4(in.m_diffuseColor, shadowmapIndex);
+			out.m_diffuseColor = in.m_diffuseColor;
+			out.m_shadowmapId = shadowmapIndex;
 
 			// Light dir & radius
 			Vec3 lightDir = -in.m_worldTransform.getRotationPart().getZAxis();
-			out.m_lightDirRadius = Vec4(lightDir, in.m_distance);
+			out.m_dir = lightDir;
+			out.m_radius = in.m_distance;
 
 			// Angles
-			out.m_outerCosInnerCos = Vec4(cos(in.m_outerAngle / 2.0f), cos(in.m_innerAngle / 2.0f), 1.0f, 1.0f);
+			out.m_outerCos = cos(in.m_outerAngle / 2.0f);
+			out.m_innerCos = cos(in.m_innerAngle / 2.0f);
 		}
 	}
 	else
@@ -684,9 +691,10 @@ void ClusterBin::writeTypedObjectsToGpuBuffers(BinCtx& ctx) const
 			const ReflectionProbeQueueElement& in = rqueue.m_reflectionProbes[i];
 			ReflectionProbe& out = gpuProbes[i];
 
-			out.m_positionCubemapIndex = Vec4(in.m_worldPosition, in.m_textureArrayIndex);
-			out.m_aabbMinPad1 = in.m_aabbMin.xyz0();
-			out.m_aabbMaxPad1 = in.m_aabbMax.xyz0();
+			out.m_position = in.m_worldPosition;
+			out.m_cubemapIndex = in.m_textureArrayIndex;
+			out.m_aabbMin = in.m_aabbMin;
+			out.m_aabbMax = in.m_aabbMax;
 		}
 	}
 	else

+ 1 - 0
src/anki/renderer/Common.h

@@ -45,6 +45,7 @@ class DebugDrawer;
 class RenderQueue;
 class RenderableQueueElement;
 class PointLightQueueElement;
+class DirectionalLightQueueElement;
 class SpotLightQueueElement;
 class ReflectionProbeQueueElement;
 class DecalQueueElement;

+ 7 - 1
src/anki/renderer/Dbg.cpp

@@ -108,7 +108,13 @@ void Dbg::populateRenderGraph(RenderingContext& ctx)
 	// Create pass
 	GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("DBG");
 
-	pass.setWork(runCallback, this, 0);
+	pass.setWork(
+		[](RenderPassWorkContext& rgraphCtx) {
+			Dbg* self = static_cast<Dbg*>(rgraphCtx.m_userData);
+			self->run(rgraphCtx, *self->m_runCtx.m_ctx);
+		},
+		this,
+		0);
 	pass.setFramebufferInfo(m_fbDescr, {{m_runCtx.m_rt}}, m_r->getGBuffer().getDepthRt());
 
 	pass.newDependency({m_runCtx.m_rt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE});

+ 0 - 7
src/anki/renderer/Dbg.h

@@ -91,13 +91,6 @@ private:
 
 	ANKI_USE_RESULT Error lazyInit();
 
-	// A RenderPassWorkCallback for debug pass.
-	static void runCallback(RenderPassWorkContext& rgraphCtx)
-	{
-		Dbg* self = static_cast<Dbg*>(rgraphCtx.m_userData);
-		self->run(rgraphCtx, *self->m_runCtx.m_ctx);
-	}
-
 	void run(RenderPassWorkContext& rgraphCtx, const RenderingContext& ctx);
 };
 /// @}

+ 12 - 2
src/anki/renderer/DownscaleBlur.cpp

@@ -106,7 +106,12 @@ void DownscaleBlur::populateRenderGraph(RenderingContext& ctx)
 		for(U i = 0; i < m_passCount; ++i)
 		{
 			ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passNames[i]);
-			pass.setWork(runCallback, this, 0);
+			pass.setWork(
+				[](RenderPassWorkContext& rgraphCtx) {
+					static_cast<DownscaleBlur*>(rgraphCtx.m_userData)->run(rgraphCtx);
+				},
+				this,
+				0);
 
 			if(i > 0)
 			{
@@ -133,7 +138,12 @@ void DownscaleBlur::populateRenderGraph(RenderingContext& ctx)
 		for(U i = 0; i < m_passCount; ++i)
 		{
 			GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(passNames[i]);
-			pass.setWork(runCallback, this, 0);
+			pass.setWork(
+				[](RenderPassWorkContext& rgraphCtx) {
+					static_cast<DownscaleBlur*>(rgraphCtx.m_userData)->run(rgraphCtx);
+				},
+				this,
+				0);
 			pass.setFramebufferInfo(m_fbDescrs[i], {{m_runCtx.m_rt}}, {});
 
 			if(i > 0)

+ 0 - 6
src/anki/renderer/DownscaleBlur.h

@@ -76,12 +76,6 @@ private:
 	ANKI_USE_RESULT Error initSubpass(U idx, const UVec2& inputTexSize);
 
 	void run(RenderPassWorkContext& rgraphCtx);
-
-	/// A RenderPassWorkCallback for the downscall passes.
-	static void runCallback(RenderPassWorkContext& rgraphCtx)
-	{
-		scast<DownscaleBlur*>(rgraphCtx.m_userData)->run(rgraphCtx);
-	}
 };
 /// @}
 

+ 7 - 1
src/anki/renderer/FinalComposite.cpp

@@ -141,7 +141,13 @@ void FinalComposite::populateRenderGraph(RenderingContext& ctx)
 	// Create the pass
 	GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("Final Composite");
 
-	pass.setWork(runCallback, this, 0);
+	pass.setWork(
+		[](RenderPassWorkContext& rgraphCtx) {
+			FinalComposite* self = static_cast<FinalComposite*>(rgraphCtx.m_userData);
+			self->run(*self->m_runCtx.m_ctx, rgraphCtx);
+		},
+		this,
+		0);
 	pass.setFramebufferInfo(m_fbDescr, {{ctx.m_outRenderTarget}}, {});
 
 	pass.newDependency({ctx.m_outRenderTarget, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE});

+ 0 - 7
src/anki/renderer/FinalComposite.h

@@ -52,13 +52,6 @@ private:
 	ANKI_USE_RESULT Error initInternal(const ConfigSet& config);
 
 	void run(RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);
-
-	/// A RenderPassWorkCallback for the composite pass.
-	static void runCallback(RenderPassWorkContext& rgraphCtx)
-	{
-		FinalComposite* self = scast<FinalComposite*>(rgraphCtx.m_userData);
-		self->run(*self->m_runCtx.m_ctx, rgraphCtx);
-	}
 };
 /// @}
 

+ 5 - 1
src/anki/renderer/GBuffer.cpp

@@ -155,7 +155,11 @@ void GBuffer::populateRenderGraph(RenderingContext& ctx)
 	GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("GBuffer");
 
 	pass.setFramebufferInfo(m_fbDescr, rts, m_depthRt);
-	pass.setWork(runCallback,
+	pass.setWork(
+		[](RenderPassWorkContext& rgraphCtx) {
+			GBuffer* self = static_cast<GBuffer*>(rgraphCtx.m_userData);
+			self->runInThread(*self->m_ctx, rgraphCtx);
+		},
 		this,
 		computeNumberOfSecondLevelCommandBuffers(
 			ctx.m_renderQueue->m_earlyZRenderables.getSize() + ctx.m_renderQueue->m_renderables.getSize()));

+ 0 - 7
src/anki/renderer/GBuffer.h

@@ -51,13 +51,6 @@ private:
 
 	ANKI_USE_RESULT Error initInternal(const ConfigSet& initializer);
 
-	// A RenderPassWorkCallback for G-buffer pass.
-	static void runCallback(RenderPassWorkContext& rgraphCtx)
-	{
-		GBuffer* self = scast<GBuffer*>(rgraphCtx.m_userData);
-		self->runInThread(*self->m_ctx, rgraphCtx);
-	}
-
 	void runInThread(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx) const;
 };
 /// @}

+ 4 - 1
src/anki/renderer/GBufferPost.cpp

@@ -59,7 +59,10 @@ void GBufferPost::populateRenderGraph(RenderingContext& ctx)
 	// Create pass
 	GraphicsRenderPassDescription& rpass = rgraph.newGraphicsRenderPass("GBuffPost");
 
-	rpass.setWork(runCallback, this, 0);
+	rpass.setWork(
+		[](RenderPassWorkContext& rgraphCtx) { static_cast<GBufferPost*>(rgraphCtx.m_userData)->run(rgraphCtx); },
+		this,
+		0);
 	rpass.setFramebufferInfo(m_fbDescr, {{m_r->getGBuffer().getColorRt(0), m_r->getGBuffer().getColorRt(1)}}, {});
 
 	rpass.newDependency({m_r->getGBuffer().getColorRt(0), TextureUsageBit::FRAMEBUFFER_ATTACHMENT_READ_WRITE});

+ 0 - 5
src/anki/renderer/GBufferPost.h

@@ -43,11 +43,6 @@ private:
 
 	ANKI_USE_RESULT Error initInternal(const ConfigSet& cfg);
 
-	static void runCallback(RenderPassWorkContext& rgraphCtx)
-	{
-		static_cast<GBufferPost*>(rgraphCtx.m_userData)->run(rgraphCtx);
-	}
-
 	void run(RenderPassWorkContext& rgraphCtx);
 };
 /// @}

+ 152 - 3
src/anki/renderer/Indirect.cpp

@@ -50,6 +50,7 @@ Error Indirect::initInternal(const ConfigSet& config)
 	ANKI_CHECK(initLightShading(config));
 	ANKI_CHECK(initIrradiance(config));
 	ANKI_CHECK(initIrradianceToRefl(config));
+	ANKI_CHECK(initShadowMapping(config));
 
 	// Load split sum integration LUT
 	ANKI_CHECK(getResourceManager().loadResource("engine_data/SplitSumIntegration.ankitex", m_integrationLut));
@@ -96,12 +97,12 @@ Error Indirect::initGBuffer(const ConfigSet& config)
 
 		for(U j = 0; j < GBUFFER_COLOR_ATTACHMENT_COUNT; ++j)
 		{
-			m_gbuffer.m_fbDescr.m_colorAttachments[j].m_loadOperation = AttachmentLoadOperation::DONT_CARE;
+			m_gbuffer.m_fbDescr.m_colorAttachments[j].m_loadOperation = AttachmentLoadOperation::CLEAR;
 		}
 
 		m_gbuffer.m_fbDescr.m_depthStencilAttachment.m_aspect = DepthStencilAspectBit::DEPTH;
 		m_gbuffer.m_fbDescr.m_depthStencilAttachment.m_loadOperation = AttachmentLoadOperation::CLEAR;
-		m_gbuffer.m_fbDescr.m_depthStencilAttachment.m_clearValue.m_depthStencil.m_depth = 1.0;
+		m_gbuffer.m_fbDescr.m_depthStencilAttachment.m_clearValue.m_depthStencil.m_depth = 1.0f;
 
 		m_gbuffer.m_fbDescr.bake();
 	}
@@ -186,6 +187,36 @@ Error Indirect::initIrradianceToRefl(const ConfigSet& cfg)
 	return Error::NONE;
 }
 
+Error Indirect::initShadowMapping(const ConfigSet& cfg)
+{
+	const U resolution = cfg.getNumber("r.indirect.shadowMapResolution");
+	ANKI_ASSERT(resolution > 8);
+
+	// RT descr
+	m_shadowMapping.m_rtDescr =
+		m_r->create2DRenderTargetDescription(resolution * 6, resolution, Format::D32_SFLOAT, "GI SM");
+	m_shadowMapping.m_rtDescr.bake();
+
+	// FB descr
+	m_shadowMapping.m_fbDescr.m_colorAttachmentCount = 0;
+	m_shadowMapping.m_fbDescr.m_depthStencilAttachment.m_aspect = DepthStencilAspectBit::DEPTH;
+	m_shadowMapping.m_fbDescr.m_depthStencilAttachment.m_clearValue.m_depthStencil.m_depth = 1.0f;
+	m_shadowMapping.m_fbDescr.m_depthStencilAttachment.m_loadOperation = AttachmentLoadOperation::CLEAR;
+	m_shadowMapping.m_fbDescr.bake();
+
+	// Shadow sampler
+	{
+		SamplerInitInfo inf;
+		inf.m_compareOperation = CompareOperation::LESS_EQUAL;
+		inf.m_addressing = SamplingAddressing::CLAMP;
+		inf.m_mipmapFilter = SamplingFilter::BASE;
+		inf.m_minMagFilter = SamplingFilter::LINEAR;
+		m_shadowMapping.m_shadowSampler = getGrManager().newSampler(inf);
+	}
+
+	return Error::NONE;
+}
+
 void Indirect::initCacheEntry(U32 cacheEntryIdx)
 {
 	CacheEntry& cacheEntry = m_cacheEntries[cacheEntryIdx];
@@ -387,12 +418,28 @@ void Indirect::runLightShading(U32 faceIdx, RenderPassWorkContext& rgraphCtx)
 		TextureSubresourceInfo(DepthStencilAspectBit::DEPTH),
 		m_r->getNearestSampler());
 
+	// Get shadowmap info
+	const Bool hasDirLight = probe.m_renderQueues[0]->m_directionalLight.m_uuid;
+	if(hasDirLight)
+	{
+		ANKI_ASSERT(m_ctx.m_shadowMapRt.isValid());
+
+		rgraphCtx.bindTextureAndSampler(GBUFFER_SHADOW_ATLAS_BINDING.x(),
+			GBUFFER_SHADOW_ATLAS_BINDING.y(),
+			m_ctx.m_shadowMapRt,
+			TextureSubresourceInfo(DepthStencilAspectBit::DEPTH),
+			m_shadowMapping.m_shadowSampler);
+	}
+
 	m_lightShading.m_deferred.drawLights(rqueue.m_viewProjectionMatrix,
 		rqueue.m_viewProjectionMatrix.getInverse(),
 		rqueue.m_cameraTransform.getTranslationPart(),
 		UVec4(0, 0, m_lightShading.m_tileSize, m_lightShading.m_tileSize),
 		Vec2(faceIdx * (1.0f / 6.0f), 0.0f),
 		Vec2((faceIdx + 1) * (1.0f / 6.0f), 1.0f),
+		probe.m_renderQueues[faceIdx]->m_cameraNear,
+		probe.m_renderQueues[faceIdx]->m_cameraFar,
+		(hasDirLight) ? &probe.m_renderQueues[faceIdx]->m_directionalLight : nullptr,
 		rqueue.m_pointLights,
 		rqueue.m_spotLights,
 		cmdb);
@@ -524,7 +571,12 @@ void Indirect::populateRenderGraph(RenderingContext& rctx)
 		// Pass
 		GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("GI gbuff");
 		pass.setFramebufferInfo(m_gbuffer.m_fbDescr, rts, m_ctx.m_gbufferDepthRt);
-		pass.setWork(runGBufferCallback, this, 0);
+		pass.setWork(
+			[](RenderPassWorkContext& rgraphCtx) {
+				static_cast<Indirect*>(rgraphCtx.m_userData)->runGBuffer(rgraphCtx.m_commandBuffer);
+			},
+			this,
+			0);
 
 		for(U i = 0; i < GBUFFER_COLOR_ATTACHMENT_COUNT; ++i)
 		{
@@ -535,6 +587,62 @@ void Indirect::populateRenderGraph(RenderingContext& rctx)
 		pass.newDependency({m_ctx.m_gbufferDepthRt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_READ_WRITE, subresource});
 	}
 
+	// Shadow pass. Optional
+	if(probeToUpdate->m_renderQueues[0]->m_directionalLight.m_uuid
+		&& probeToUpdate->m_renderQueues[0]->m_directionalLight.m_shadowCascadeCount > 0)
+	{
+		// Update light matrices
+		for(U i = 0; i < 6; ++i)
+		{
+			ANKI_ASSERT(probeToUpdate->m_renderQueues[i]->m_directionalLight.m_uuid
+						&& probeToUpdate->m_renderQueues[i]->m_directionalLight.m_shadowCascadeCount == 1);
+
+			const F32 xScale = 1.0f / 6.0f;
+			const F32 yScale = 1.0f;
+			const F32 xOffset = F32(i) * (1.0f / 6.0f);
+			const F32 yOffset = 0.0f;
+			const Mat4 atlasMtx(xScale,
+				0.0f,
+				0.0f,
+				xOffset,
+				0.0f,
+				yScale,
+				0.0f,
+				yOffset,
+				0.0f,
+				0.0f,
+				1.0f,
+				0.0f,
+				0.0f,
+				0.0f,
+				0.0f,
+				1.0f);
+
+			Mat4& lightMat = probeToUpdate->m_renderQueues[i]->m_directionalLight.m_textureMatrices[0];
+			lightMat = atlasMtx * lightMat;
+		}
+
+		// RT
+		m_ctx.m_shadowMapRt = rgraph.newRenderTarget(m_shadowMapping.m_rtDescr);
+
+		// Pass
+		GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("GI SM");
+		pass.setFramebufferInfo(m_shadowMapping.m_fbDescr, {}, m_ctx.m_shadowMapRt);
+		pass.setWork(
+			[](RenderPassWorkContext& rgraphCtx) {
+				static_cast<Indirect*>(rgraphCtx.m_userData)->runShadowMapping(rgraphCtx.m_commandBuffer);
+			},
+			this,
+			0);
+
+		TextureSubresourceInfo subresource(DepthStencilAspectBit::DEPTH);
+		pass.newDependency({m_ctx.m_shadowMapRt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_READ_WRITE, subresource});
+	}
+	else
+	{
+		m_ctx.m_shadowMapRt = {};
+	}
+
 	// Light shading passes
 	{
 		Array<RenderPassWorkCallback, 6> callbacks = {{runLightShadingCallback<0>,
@@ -572,6 +680,11 @@ void Indirect::populateRenderGraph(RenderingContext& rctx)
 			pass.newDependency({m_ctx.m_gbufferDepthRt,
 				TextureUsageBit::SAMPLED_FRAGMENT,
 				TextureSubresourceInfo(DepthStencilAspectBit::DEPTH)});
+
+			if(m_ctx.m_shadowMapRt.isValid())
+			{
+				pass.newDependency({m_ctx.m_shadowMapRt, TextureUsageBit::SAMPLED_FRAGMENT});
+			}
 		}
 	}
 
@@ -771,4 +884,40 @@ Bool Indirect::findBestCacheEntry(U64 probeUuid, U32& cacheEntryIdxAllocated, Bo
 	return failed;
 }
 
+void Indirect::runShadowMapping(CommandBufferPtr& cmdb)
+{
+	cmdb->setPolygonOffset(1.0f, 1.0f);
+
+	for(U faceIdx = 0; faceIdx < 6; ++faceIdx)
+	{
+		ANKI_ASSERT(m_ctx.m_probe);
+		ANKI_ASSERT(m_ctx.m_probe->m_renderQueues[faceIdx]);
+		const RenderQueue& faceRenderQueue = *m_ctx.m_probe->m_renderQueues[faceIdx];
+		ANKI_ASSERT(faceRenderQueue.m_directionalLight.m_uuid != 0);
+		ANKI_ASSERT(faceRenderQueue.m_directionalLight.m_shadowCascadeCount == 1);
+
+		ANKI_ASSERT(faceRenderQueue.m_directionalLight.m_shadowRenderQueues[0]);
+		const RenderQueue& cascadeRenderQueue = *faceRenderQueue.m_directionalLight.m_shadowRenderQueues[0];
+
+		if(cascadeRenderQueue.m_renderables.getSize() == 0)
+		{
+			continue;
+		}
+
+		const U rez = m_shadowMapping.m_rtDescr.m_height;
+		cmdb->setViewport(rez * faceIdx, 0, rez, rez);
+		cmdb->setScissor(rez * faceIdx, 0, rez, rez);
+
+		m_r->getSceneDrawer().drawRange(Pass::SM,
+			cascadeRenderQueue.m_viewMatrix,
+			cascadeRenderQueue.m_viewProjectionMatrix,
+			Mat4::getIdentity(), // Don't care about prev matrices here
+			cmdb,
+			cascadeRenderQueue.m_renderables.getBegin(),
+			cascadeRenderQueue.m_renderables.getEnd());
+	}
+
+	cmdb->setPolygonOffset(0.0f, 0.0f);
+}
+
 } // end namespace anki

+ 11 - 7
src/anki/renderer/Indirect.h

@@ -100,6 +100,14 @@ private:
 		ShaderProgramPtr m_grProg;
 	} m_irradianceToRefl; ///< Apply irradiance back to the reflection.
 
+	class
+	{
+	public:
+		RenderTargetDescription m_rtDescr;
+		FramebufferDescription m_fbDescr;
+		SamplerPtr m_shadowSampler;
+	} m_shadowMapping;
+
 	class CacheEntry
 	{
 	public:
@@ -128,6 +136,7 @@ private:
 		RenderTargetHandle m_gbufferDepthRt;
 		RenderTargetHandle m_lightShadingRt;
 		RenderTargetHandle m_irradianceRt;
+		RenderTargetHandle m_shadowMapRt;
 	} m_ctx; ///< Runtime context.
 
 	ANKI_USE_RESULT Error initInternal(const ConfigSet& cfg);
@@ -135,6 +144,7 @@ private:
 	ANKI_USE_RESULT Error initLightShading(const ConfigSet& cfg);
 	ANKI_USE_RESULT Error initIrradiance(const ConfigSet& cfg);
 	ANKI_USE_RESULT Error initIrradianceToRefl(const ConfigSet& cfg);
+	ANKI_USE_RESULT Error initShadowMapping(const ConfigSet& cfg);
 
 	/// Lazily init the cache entry
 	void initCacheEntry(U32 cacheEntryIdx);
@@ -146,18 +156,12 @@ private:
 	Bool findBestCacheEntry(U64 probeUuid, U32& cacheEntryIdx, Bool& cacheEntryFound);
 
 	void runGBuffer(CommandBufferPtr& cmdb);
+	void runShadowMapping(CommandBufferPtr& cmdb);
 	void runLightShading(U32 faceIdx, RenderPassWorkContext& rgraphCtx);
 	void runMipmappingOfLightShading(U32 faceIdx, RenderPassWorkContext& rgraphCtx);
 	void runIrradiance(U32 faceIdx, RenderPassWorkContext& rgraphCtx);
 	void runIrradianceToRefl(U32 faceIdx, RenderPassWorkContext& rgraphCtx);
 
-	// A RenderPassWorkCallback for G-buffer pass
-	static void runGBufferCallback(RenderPassWorkContext& rgraphCtx)
-	{
-		Indirect* const self = static_cast<Indirect*>(rgraphCtx.m_userData);
-		self->runGBuffer(rgraphCtx.m_commandBuffer);
-	}
-
 	// A RenderPassWorkCallback for the light shading pass into a single face.
 	template<U faceIdx>
 	static void runLightShadingCallback(RenderPassWorkContext& rgraphCtx)

+ 7 - 1
src/anki/renderer/LensFlare.cpp

@@ -129,7 +129,13 @@ void LensFlare::populateRenderGraph(RenderingContext& ctx)
 	{
 		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("LF Upd Ind/ct");
 
-		rpass.setWork(runUpdateIndirectCallback, this, 0);
+		rpass.setWork(
+			[](RenderPassWorkContext& rgraphCtx) {
+				LensFlare* const self = static_cast<LensFlare*>(rgraphCtx.m_userData);
+				self->updateIndirectInfo(*self->m_runCtx.m_ctx, rgraphCtx);
+			},
+			this,
+			0);
 
 		rpass.newDependency({m_runCtx.m_indirectBuffHandle, BufferUsageBit::STORAGE_COMPUTE_WRITE});
 		rpass.newDependency({m_r->getDepthDownscale().getHiZRt(), TextureUsageBit::SAMPLED_COMPUTE, HIZ_QUARTER_DEPTH});

+ 0 - 7
src/anki/renderer/LensFlare.h

@@ -64,13 +64,6 @@ private:
 	ANKI_USE_RESULT Error initInternal(const ConfigSet& initializer);
 
 	void updateIndirectInfo(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);
-
-	/// A RenderPassWorkCallback for updating the indirect info.
-	static void runUpdateIndirectCallback(RenderPassWorkContext& rgraphCtx)
-	{
-		LensFlare* const self = scast<LensFlare*>(rgraphCtx.m_userData);
-		self->updateIndirectInfo(*self->m_runCtx.m_ctx, rgraphCtx);
-	}
 };
 /// @}
 

+ 13 - 2
src/anki/renderer/MainRenderer.cpp

@@ -130,7 +130,13 @@ Error MainRenderer::render(RenderQueue& rqueue, TexturePtr presentTex)
 		fbDescr.bake();
 
 		pass.setFramebufferInfo(fbDescr, {{presentRt}}, {});
-		pass.setWork(runCallback, this, 0);
+		pass.setWork(
+			[](RenderPassWorkContext& rgraphCtx) {
+				MainRenderer* const self = static_cast<MainRenderer*>(rgraphCtx.m_userData);
+				self->runBlit(rgraphCtx);
+			},
+			this,
+			0);
 
 		pass.newDependency({presentRt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE});
 		pass.newDependency({ctx.m_outRenderTarget, TextureUsageBit::SAMPLED_FRAGMENT});
@@ -140,7 +146,12 @@ Error MainRenderer::render(RenderQueue& rqueue, TexturePtr presentTex)
 	{
 		ComputeRenderPassDescription& pass = ctx.m_renderGraphDescr.newComputeRenderPass("Present");
 
-		pass.setWork(presentCallback, nullptr, 0);
+		pass.setWork(
+			[](RenderPassWorkContext& rgraphCtx) {
+				// Do nothing. This pass is dummy
+			},
+			nullptr,
+			0);
 		pass.newDependency({presentRt, TextureUsageBit::PRESENT});
 	}
 

+ 0 - 13
src/anki/renderer/MainRenderer.h

@@ -97,19 +97,6 @@ private:
 	void runBlit(RenderPassWorkContext& rgraphCtx);
 	void present(RenderPassWorkContext& rgraphCtx);
 
-	// A RenderPassWorkCallback for blit pass.
-	static void runCallback(RenderPassWorkContext& rgraphCtx)
-	{
-		MainRenderer* const self = scast<MainRenderer*>(rgraphCtx.m_userData);
-		self->runBlit(rgraphCtx);
-	}
-
-	// A RenderPassWorkCallback for present.
-	static void presentCallback(RenderPassWorkContext& rgraphCtx)
-	{
-		// Do nothing. This pass is dummy
-	}
-
 	static void executeSecondaryCallback(
 		void* userData, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* signalSemaphore);
 };

+ 31 - 2
src/anki/renderer/RenderQueue.h

@@ -8,6 +8,7 @@
 #include <anki/renderer/Common.h>
 #include <anki/resource/RenderingKey.h>
 #include <anki/ui/Canvas.h>
+#include <shaders/glsl_cpp_common/ClusteredShading.h>
 
 namespace anki
 {
@@ -76,8 +77,8 @@ public:
 	const void* m_userData;
 	RenderQueueDrawCallback m_drawCallback;
 
-	UVec2 m_atlasTiles; ///< Renderer internal.
-	F32 m_atlasTileSize; ///< Renderer internal.
+	Array<Vec2, 6> m_shadowAtlasTileOffsets; ///< Renderer internal.
+	F32 m_shadowAtlasTileSize; ///< Renderer internal.
 
 	PointLightQueueElement()
 	{
@@ -119,6 +120,27 @@ public:
 
 static_assert(std::is_trivially_destructible<SpotLightQueueElement>::value == true, "Should be trivially destructible");
 
+/// Directional light render queue element.
+class DirectionalLightQueueElement final
+{
+public:
+	Array<Mat4, MAX_SHADOW_CASCADES> m_textureMatrices;
+	Array<RenderQueue*, MAX_SHADOW_CASCADES> m_shadowRenderQueues;
+	const void* m_userData;
+	RenderQueueDrawCallback m_drawCallback;
+	U64 m_uuid; ///< Zero means that there is no dir light
+	Vec3 m_diffuseColor;
+	Vec3 m_direction;
+	U8 m_shadowCascadeCount; ///< Zero means that it doesn't case any shadows
+
+	DirectionalLightQueueElement()
+	{
+	}
+};
+
+static_assert(
+	std::is_trivially_destructible<DirectionalLightQueueElement>::value == true, "Should be trivially destructible");
+
 /// Normally the visibility tests don't perform tests on the reflection probes because probes dont change that often.
 /// This callback will be used by the renderer to inform a reflection probe that on the next frame it will be rendererd.
 /// In that case the probe should fill the render queues.
@@ -248,6 +270,7 @@ public:
 	WeakArray<PointLightQueueElement> m_pointLights;
 	WeakArray<PointLightQueueElement*> m_shadowPointLights; ///< Points to elements in m_pointLights.
 	WeakArray<SpotLightQueueElement> m_spotLights;
+	DirectionalLightQueueElement m_directionalLight;
 	WeakArray<SpotLightQueueElement*> m_shadowSpotLights; ///< Points to elements in m_spotLights.
 	WeakArray<ReflectionProbeQueueElement> m_reflectionProbes;
 	WeakArray<LensFlareQueueElement> m_lensFlares;
@@ -260,10 +283,16 @@ public:
 
 	F32 m_cameraNear;
 	F32 m_cameraFar;
+	F32 m_effectiveShadowDistance;
 
 	FillCoverageBufferCallback m_fillCoverageBufferCallback = nullptr;
 	void* m_fillCoverageBufferCallbackUserData = nullptr;
 
+	RenderQueue()
+	{
+		zeroMemory(m_directionalLight);
+	}
+
 	U countAllRenderables() const;
 };
 

+ 27 - 5
src/anki/renderer/Renderer.cpp

@@ -537,18 +537,19 @@ void Renderer::updateLightShadingUniforms(RenderingContext& ctx) const
 	// Start writing
 	blk->m_unprojectionParams = ctx.m_unprojParams;
 
-	blk->m_rendererSizeTimeNear =
-		Vec4(m_width, m_height, HighRezTimer::getCurrentTime(), ctx.m_renderQueue->m_cameraNear);
+	blk->m_rendererSize = Vec2(m_width, m_height);
+	blk->m_time = HighRezTimer::getCurrentTime();
+	blk->m_near = ctx.m_renderQueue->m_cameraNear;
 
 	blk->m_clusterCount = UVec4(m_clusterCount[0], m_clusterCount[1], m_clusterCount[2], m_clusterCount[3]);
 
-	blk->m_cameraPosFar =
-		Vec4(ctx.m_renderQueue->m_cameraTransform.getTranslationPart().xyz(), ctx.m_renderQueue->m_cameraFar);
+	blk->m_cameraPos = ctx.m_renderQueue->m_cameraTransform.getTranslationPart().xyz();
+	blk->m_far = ctx.m_renderQueue->m_cameraFar;
 
 	blk->m_clustererMagicValues = ctx.m_clusterBinOut.m_shaderMagicValues;
 	blk->m_prevClustererMagicValues = ctx.m_prevClustererMagicValues;
 
-	blk->m_lightVolumeLastClusterPad3 = UVec4(m_volLighting->getFinalClusterInZ());
+	blk->m_lightVolumeLastCluster = m_volLighting->getFinalClusterInZ();
 
 	// Matrices
 	blk->m_viewMat = ctx.m_renderQueue->m_viewMatrix;
@@ -564,6 +565,27 @@ void Renderer::updateLightShadingUniforms(RenderingContext& ctx) const
 
 	blk->m_prevViewProjMatMulInvViewProjMat =
 		ctx.m_prevMatrices.m_viewProjection * ctx.m_matrices.m_viewProjectionJitter.getInverse();
+
+	// Directional light
+	if(ctx.m_renderQueue->m_directionalLight.m_uuid != 0)
+	{
+		DirectionalLight& out = blk->m_dirLight;
+		const DirectionalLightQueueElement& in = ctx.m_renderQueue->m_directionalLight;
+
+		out.m_diffuseColor = in.m_diffuseColor;
+		out.m_cascadeCount = in.m_shadowCascadeCount;
+		out.m_dir = in.m_direction;
+		out.m_active = 1;
+
+		for(U cascade = 0; cascade < in.m_shadowCascadeCount; ++cascade)
+		{
+			out.m_textureMatrices[cascade] = in.m_textureMatrices[cascade];
+		}
+	}
+	else
+	{
+		blk->m_dirLight.m_active = 0;
+	}
 }
 
 } // end namespace anki

+ 455 - 287
src/anki/renderer/ShadowMapping.cpp

@@ -14,17 +14,37 @@
 namespace anki
 {
 
-struct ShadowMapping::LightToRenderToScratchInfo
+class ShadowMapping::ScratchBufferWorkItem
 {
+public:
+	Array<U32, 4> m_viewport;
+	RenderQueue* m_renderQueue;
+	U32 m_firstRenderableElement;
+	U32 m_renderableElementCount;
+	U32 m_threadPoolTaskIdx;
+};
+
+class ShadowMapping::LightToRenderToScratchInfo
+{
+public:
 	Array<U32, 4> m_viewport;
 	RenderQueue* m_renderQueue;
 	U32 m_drawcallCount;
 };
 
+class ShadowMapping::EsmResolveWorkItem
+{
+public:
+	Vec4 m_uvIn; ///< UV + size that point to the scratch buffer.
+	Array<U32, 4> m_viewportOut; ///< Viewport in the ESM RT.
+	F32 m_cameraNear;
+	F32 m_cameraFar;
+	Bool8 m_blur;
+	Bool8 m_perspectiveProjection;
+};
+
 ShadowMapping::~ShadowMapping()
 {
-	m_tiles.destroy(getAllocator());
-	m_lightUuidToTileIdx.destroy(getAllocator());
 }
 
 Error ShadowMapping::init(const ConfigSet& config)
@@ -37,6 +57,12 @@ Error ShadowMapping::init(const ConfigSet& config)
 		ANKI_R_LOGE("Failed to initialize shadowmapping");
 	}
 
+	ANKI_R_LOGI("\tScratch size %ux%u. ESM atlas size %ux%u",
+		m_scratchTileCountX * m_scratchTileResolution,
+		m_scratchTileCountY * m_scratchTileResolution,
+		m_esmTileCountBothAxis * m_esmTileResolution,
+		m_esmTileCountBothAxis * m_esmTileResolution);
+
 	return err;
 }
 
@@ -44,23 +70,26 @@ Error ShadowMapping::initScratch(const ConfigSet& cfg)
 {
 	// Init the shadowmaps and FBs
 	{
-		m_scratchTileCount = cfg.getNumber("r.shadowMapping.scratchTileCount");
-		m_scratchTileResolution = cfg.getNumber("r.shadowMapping.resolution");
+		m_scratchTileCountX = cfg.getNumber("r.shadowMapping.scratchTileCountX");
+		m_scratchTileCountY = cfg.getNumber("r.shadowMapping.scratchTileCountY");
+		m_scratchTileResolution = cfg.getNumber("r.shadowMapping.tileResolution");
 
 		// RT
-		m_scratchRtDescr = m_r->create2DRenderTargetDescription(m_scratchTileResolution * m_scratchTileCount,
-			m_scratchTileResolution,
+		m_scratchRtDescr = m_r->create2DRenderTargetDescription(m_scratchTileResolution * m_scratchTileCountX,
+			m_scratchTileResolution * m_scratchTileCountY,
 			SHADOW_DEPTH_PIXEL_FORMAT,
 			"Scratch ShadMap");
 		m_scratchRtDescr.bake();
 
 		// FB
 		m_scratchFbDescr.m_depthStencilAttachment.m_loadOperation = AttachmentLoadOperation::CLEAR;
-		m_scratchFbDescr.m_depthStencilAttachment.m_clearValue.m_depthStencil.m_depth = 1.0;
+		m_scratchFbDescr.m_depthStencilAttachment.m_clearValue.m_depthStencil.m_depth = 1.0f;
 		m_scratchFbDescr.m_depthStencilAttachment.m_aspect = DepthStencilAspectBit::DEPTH;
 		m_scratchFbDescr.bake();
 	}
 
+	m_scratchTileAlloc.init(getAllocator(), m_scratchTileCountX, m_scratchTileCountY, m_lodCount, false);
+
 	return Error::NONE;
 }
 
@@ -68,13 +97,12 @@ Error ShadowMapping::initEsm(const ConfigSet& cfg)
 {
 	// Init RTs and FBs
 	{
-		m_tileResolution = cfg.getNumber("r.shadowMapping.resolution");
-		m_tileCountPerRowOrColumn = cfg.getNumber("r.shadowMapping.tileCountPerRowOrColumn");
-		m_atlasResolution = m_tileResolution * m_tileCountPerRowOrColumn;
+		m_esmTileResolution = cfg.getNumber("r.shadowMapping.tileResolution");
+		m_esmTileCountBothAxis = cfg.getNumber("r.shadowMapping.tileCountPerRowOrColumn");
 
 		// RT
-		TextureInitInfo texinit = m_r->create2DRenderTargetInitInfo(m_atlasResolution,
-			m_atlasResolution,
+		TextureInitInfo texinit = m_r->create2DRenderTargetInitInfo(m_esmTileResolution * m_esmTileCountBothAxis,
+			m_esmTileResolution * m_esmTileCountBothAxis,
 			SHADOW_COLOR_PIXEL_FORMAT,
 			TextureUsageBit::SAMPLED_FRAGMENT | TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE
 				| TextureUsageBit::SAMPLED_COMPUTE,
@@ -91,31 +119,7 @@ Error ShadowMapping::initEsm(const ConfigSet& cfg)
 	}
 
 	// Tiles
-	{
-		m_tiles.create(getAllocator(), m_tileCountPerRowOrColumn * m_tileCountPerRowOrColumn);
-
-		for(U y = 0; y < m_tileCountPerRowOrColumn; ++y)
-		{
-			for(U x = 0; x < m_tileCountPerRowOrColumn; ++x)
-			{
-				const U tileIdx = y * m_tileCountPerRowOrColumn + x;
-				Tile& tile = m_tiles[tileIdx];
-
-				tile.m_uv[0] = F32(x) / m_tileCountPerRowOrColumn;
-				tile.m_uv[1] = F32(y) / m_tileCountPerRowOrColumn;
-				tile.m_uv[2] = 1.0f / m_tileCountPerRowOrColumn;
-				tile.m_uv[3] = tile.m_uv[2];
-
-				tile.m_viewport[0] = x * m_tileResolution;
-				tile.m_viewport[1] = y * m_tileResolution;
-				tile.m_viewport[2] = m_tileResolution;
-				tile.m_viewport[3] = m_tileResolution;
-			}
-		}
-
-		// The first tile is always pinned
-		m_tiles[0].m_pinned = true;
-	}
+	m_esmTileAlloc.init(getAllocator(), m_esmTileCountBothAxis, m_esmTileCountBothAxis, m_lodCount, true);
 
 	// Programs and shaders
 	{
@@ -123,7 +127,8 @@ Error ShadowMapping::initEsm(const ConfigSet& cfg)
 			getResourceManager().loadResource("shaders/ExponentialShadowmappingResolve.glslp", m_esmResolveProg));
 
 		ShaderProgramResourceConstantValueInitList<1> consts(m_esmResolveProg);
-		consts.add("INPUT_TEXTURE_SIZE", UVec2(m_scratchTileCount * m_scratchTileResolution, m_scratchTileResolution));
+		consts.add("INPUT_TEXTURE_SIZE",
+			UVec2(m_scratchTileCountX * m_scratchTileResolution, m_scratchTileCountY * m_scratchTileResolution));
 
 		const ShaderProgramResourceVariant* variant;
 		m_esmResolveProg->getOrCreateVariant(consts.get(), variant);
@@ -138,6 +143,9 @@ Error ShadowMapping::initInternal(const ConfigSet& cfg)
 	ANKI_CHECK(initScratch(cfg));
 	ANKI_CHECK(initEsm(cfg));
 
+	m_lodDistances[0] = cfg.getNumber("r.shadowMapping.lightLodDistance0");
+	m_lodDistances[1] = cfg.getNumber("r.shadowMapping.lightLodDistance1");
+
 	return Error::NONE;
 }
 
@@ -161,9 +169,30 @@ void ShadowMapping::runEsm(RenderPassWorkContext& rgraphCtx)
 		cmdb->setScissor(
 			workItem.m_viewportOut[0], workItem.m_viewportOut[1], workItem.m_viewportOut[2], workItem.m_viewportOut[3]);
 
-		Vec4* unis = allocateAndBindUniforms<Vec4*>(sizeof(Vec4) * 2, cmdb, 0, 0);
-		unis[0] = Vec4(workItem.m_cameraNear, workItem.m_cameraFar, 0.0f, 0.0f);
-		unis[1] = workItem.m_uvIn;
+		struct Uniforms
+		{
+			Vec2 m_uvScale;
+			Vec2 m_uvTranslation;
+			F32 m_near;
+			F32 m_far;
+			U32 m_renderingTechnique;
+			U32 m_padding;
+		} unis;
+		unis.m_uvScale = workItem.m_uvIn.zw();
+		unis.m_uvTranslation = workItem.m_uvIn.xy();
+		unis.m_near = workItem.m_cameraNear;
+		unis.m_far = workItem.m_cameraFar;
+
+		if(workItem.m_perspectiveProjection)
+		{
+			unis.m_renderingTechnique = (workItem.m_blur) ? 0 : 1;
+		}
+		else
+		{
+			unis.m_renderingTechnique = (workItem.m_blur) ? 2 : 3;
+		}
+
+		cmdb->setPushConstants(&unis, sizeof(unis));
 
 		drawQuad(cmdb);
 	}
@@ -219,8 +248,9 @@ void ShadowMapping::populateRenderGraph(RenderingContext& ctx)
 		// Scratch pass
 		{
 			// Compute render area
-			const U32 minx = 0, miny = 0, height = m_scratchTileResolution;
-			const U32 width = m_scratchTileResolution * m_scratchWorkItems.getSize();
+			const U32 minx = 0, miny = 0;
+			const U32 height = m_scratchMaxViewportHeight;
+			const U32 width = m_scratchMaxViewportWidth;
 
 			GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("SM scratch");
 
@@ -228,7 +258,12 @@ void ShadowMapping::populateRenderGraph(RenderingContext& ctx)
 			pass.setFramebufferInfo(m_scratchFbDescr, {}, m_scratchRt, minx, miny, width, height);
 			ANKI_ASSERT(
 				threadCountForScratchPass && threadCountForScratchPass <= m_r->getThreadHive().getThreadCount());
-			pass.setWork(runShadowmappingCallback, this, threadCountForScratchPass);
+			pass.setWork(
+				[](RenderPassWorkContext& rgraphCtx) {
+					static_cast<ShadowMapping*>(rgraphCtx.m_userData)->runShadowMapping(rgraphCtx);
+				},
+				this,
+				threadCountForScratchPass);
 
 			TextureSubresourceInfo subresource = TextureSubresourceInfo(DepthStencilAspectBit::DEPTH);
 			pass.newDependency({m_scratchRt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_READ_WRITE, subresource});
@@ -240,7 +275,12 @@ void ShadowMapping::populateRenderGraph(RenderingContext& ctx)
 
 			m_esmRt = rgraph.importRenderTarget(m_esmAtlas, TextureUsageBit::SAMPLED_FRAGMENT);
 			pass.setFramebufferInfo(m_esmFbDescr, {{m_esmRt}}, {});
-			pass.setWork(runEsmCallback, this, 0);
+			pass.setWork(
+				[](RenderPassWorkContext& rgraphCtx) {
+					static_cast<ShadowMapping*>(rgraphCtx.m_userData)->runEsm(rgraphCtx);
+				},
+				this,
+				0);
 
 			pass.newDependency(
 				{m_scratchRt, TextureUsageBit::SAMPLED_FRAGMENT, TextureSubresourceInfo(DepthStencilAspectBit::DEPTH)});
@@ -254,46 +294,322 @@ void ShadowMapping::populateRenderGraph(RenderingContext& ctx)
 	}
 }
 
-Mat4 ShadowMapping::createSpotLightTextureMatrix(const Tile& tile)
+Mat4 ShadowMapping::createSpotLightTextureMatrix(const Viewport& viewport) const
+{
+	const F32 atlasSize = m_esmTileResolution * m_esmTileCountBothAxis;
+	const Vec2 uv(F32(viewport[0]) / atlasSize, F32(viewport[1]) / atlasSize);
+	ANKI_ASSERT(uv >= Vec2(0.0f) && uv <= Vec2(1.0f));
+
+	ANKI_ASSERT(viewport[2] == viewport[3]);
+	const F32 sizeTextureSpace = F32(viewport[2]) / atlasSize;
+
+	return Mat4(sizeTextureSpace,
+		0.0f,
+		0.0f,
+		uv.x(),
+		0.0f,
+		sizeTextureSpace,
+		0.0f,
+		uv.y(),
+		0.0f,
+		0.0f,
+		1.0f,
+		0.0f,
+		0.0f,
+		0.0f,
+		0.0f,
+		1.0f);
+}
+
+U ShadowMapping::choseLod(const Vec4& cameraOrigin, const PointLightQueueElement& light, Bool& blurEsm) const
+{
+	const F32 distFromTheCamera = (cameraOrigin - light.m_worldPosition.xyz0()).getLength() - light.m_radius;
+	if(distFromTheCamera < m_lodDistances[0])
+	{
+		ANKI_ASSERT(m_pointLightsMaxLod == 1);
+		blurEsm = true;
+		return 1;
+	}
+	else
+	{
+		blurEsm = false;
+		return 0;
+	}
+}
+
+U ShadowMapping::choseLod(const Vec4& cameraOrigin, const SpotLightQueueElement& light, Bool& blurEsm) const
 {
-	return Mat4(tile.m_uv[2],
-		0.0,
-		0.0,
-		tile.m_uv[0],
-		0.0,
-		tile.m_uv[3],
-		0.0,
-		tile.m_uv[1],
-		0.0,
-		0.0,
-		1.0,
-		0.0,
-		0.0,
-		0.0,
-		0.0,
-		1.0);
+	// Get some data
+	const Vec4 coneOrigin = light.m_worldTransform.getTranslationPart().xyz0();
+	const Vec4 coneDir = -light.m_worldTransform.getZAxis().xyz0();
+	const F32 coneAngle = light.m_outerAngle;
+
+	// Compute the distance from the camera to the light cone
+	const Vec4 V = cameraOrigin - coneOrigin;
+	const F32 VlenSq = V.dot(V);
+	const F32 V1len = V.dot(coneDir);
+	const F32 distFromTheCamera = cos(coneAngle) * sqrt(VlenSq - V1len * V1len) - V1len * sin(coneAngle);
+
+	U lod;
+	if(distFromTheCamera < m_lodDistances[0])
+	{
+		blurEsm = true;
+		lod = 2;
+	}
+	else if(distFromTheCamera < m_lodDistances[1])
+	{
+		blurEsm = false;
+		lod = 1;
+	}
+	else
+	{
+		blurEsm = false;
+		lod = 0;
+	}
+
+	return lod;
+}
+
+TileAllocatorResult ShadowMapping::allocateTilesAndScratchTiles(U64 lightUuid,
+	U32 faceCount,
+	const U64* faceTimestamps,
+	const U32* faceIndices,
+	const U32* drawcallsCount,
+	const U32* lods,
+	Viewport* esmTileViewports,
+	Viewport* scratchTileViewports,
+	TileAllocatorResult* subResults)
+{
+	ANKI_ASSERT(lightUuid > 0);
+	ANKI_ASSERT(faceCount > 0);
+	ANKI_ASSERT(faceTimestamps);
+	ANKI_ASSERT(faceIndices);
+	ANKI_ASSERT(drawcallsCount);
+	ANKI_ASSERT(lods);
+
+	TileAllocatorResult res;
+
+	// Allocate ESM tiles first. They may be cached and that will affect how many scratch tiles we'll need
+	for(U i = 0; i < faceCount; ++i)
+	{
+		res = m_esmTileAlloc.allocate(m_r->getGlobalTimestamp(),
+			faceTimestamps[i],
+			lightUuid,
+			faceIndices[i],
+			drawcallsCount[i],
+			lods[i],
+			esmTileViewports[i]);
+
+		if(res == TileAllocatorResult::ALLOCATION_FAILED)
+		{
+			ANKI_R_LOGW("There is not enough space in the shadow atlas for more shadow maps. "
+						"Increase the r.shadowMapping.tileCountPerRowOrColumn or decrease the scene's shadow casters");
+
+			// Invalidate cache entries for what we already allocated
+			for(U j = 0; j < i; ++j)
+			{
+				m_esmTileAlloc.invalidateCache(lightUuid, faceIndices[j]);
+			}
+
+			return res;
+		}
+
+		subResults[i] = res;
+
+		// Fix viewport
+		esmTileViewports[i][0] *= m_esmTileResolution;
+		esmTileViewports[i][1] *= m_esmTileResolution;
+		esmTileViewports[i][2] *= m_esmTileResolution;
+		esmTileViewports[i][3] *= m_esmTileResolution;
+	}
+
+	// Allocate scratch tiles
+	for(U i = 0; i < faceCount; ++i)
+	{
+		if(subResults[i] == TileAllocatorResult::CACHED)
+		{
+			continue;
+		}
+
+		ANKI_ASSERT(subResults[i] == TileAllocatorResult::ALLOCATION_SUCCEEDED);
+
+		res = m_scratchTileAlloc.allocate(m_r->getGlobalTimestamp(),
+			faceTimestamps[i],
+			lightUuid,
+			faceIndices[i],
+			drawcallsCount[i],
+			lods[i],
+			scratchTileViewports[i]);
+
+		if(res == TileAllocatorResult::ALLOCATION_FAILED)
+		{
+			ANKI_R_LOGW("Don't have enough space in the scratch shadow mapping buffer. "
+						"If you see this message too often increase r.shadowMapping.scratchTileCountX/Y");
+
+			// Invalidate ESM tiles
+			for(U j = 0; j < faceCount; ++j)
+			{
+				m_esmTileAlloc.invalidateCache(lightUuid, faceIndices[j]);
+			}
+
+			return res;
+		}
+
+		// Fix viewport
+		scratchTileViewports[i][0] *= m_scratchTileResolution;
+		scratchTileViewports[i][1] *= m_scratchTileResolution;
+		scratchTileViewports[i][2] *= m_scratchTileResolution;
+		scratchTileViewports[i][3] *= m_scratchTileResolution;
+
+		// Update the max view width
+		m_scratchMaxViewportWidth =
+			max(m_scratchMaxViewportWidth, scratchTileViewports[i][0] + scratchTileViewports[i][2]);
+		m_scratchMaxViewportHeight =
+			max(m_scratchMaxViewportHeight, scratchTileViewports[i][1] + scratchTileViewports[i][3]);
+	}
+
+	return res;
 }
 
 void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForScratchPass)
 {
-	// Reset stuff
-	m_freeScratchTiles = m_scratchTileCount;
+	// Reset the scratch viewport width
+	m_scratchMaxViewportWidth = 0;
+	m_scratchMaxViewportHeight = 0;
 
 	// Vars
+	const Vec4 cameraOrigin = ctx.m_renderQueue->m_cameraTransform.getTranslationPart().xyz0();
 	DynamicArrayAuto<LightToRenderToScratchInfo> lightsToRender(ctx.m_tempAllocator);
 	U32 drawcallCount = 0;
 	DynamicArrayAuto<EsmResolveWorkItem> esmWorkItems(ctx.m_tempAllocator);
 
-	// Process the point lights first.
+	// First thing, allocate an empty tile for empty faces of point lights
+	Viewport emptyTileViewport;
+	{
+		const TileAllocatorResult res = m_esmTileAlloc.allocate(
+			m_r->getGlobalTimestamp(), 1, MAX_U64, 0, 1, m_pointLightsMaxLod, emptyTileViewport);
+
+		(void)res;
+#if ANKI_ASSERTS_ENABLED
+		static Bool firstRun = true;
+		if(firstRun)
+		{
+			ANKI_ASSERT(res == TileAllocatorResult::ALLOCATION_SUCCEEDED);
+			firstRun = false;
+		}
+		else
+		{
+			ANKI_ASSERT(res == TileAllocatorResult::CACHED);
+		}
+#endif
+	}
+
+	// Process the directional light first.
+	if(ctx.m_renderQueue->m_directionalLight.m_shadowCascadeCount > 0)
+	{
+		DirectionalLightQueueElement& light = ctx.m_renderQueue->m_directionalLight;
+
+		Array<U64, MAX_SHADOW_CASCADES> timestamps;
+		Array<U32, MAX_SHADOW_CASCADES> cascadeIndices;
+		Array<U32, MAX_SHADOW_CASCADES> drawcallCounts;
+		Array<Viewport, MAX_SHADOW_CASCADES> esmViewports;
+		Array<Viewport, MAX_SHADOW_CASCADES> scratchViewports;
+		Array<TileAllocatorResult, MAX_SHADOW_CASCADES> subResults;
+		Array<U32, MAX_SHADOW_CASCADES> lods;
+		Array<Bool, MAX_SHADOW_CASCADES> blurEsms;
+
+		U activeCascades = 0;
+
+		for(U cascade = 0; cascade < light.m_shadowCascadeCount; ++cascade)
+		{
+			ANKI_ASSERT(light.m_shadowRenderQueues[cascade]);
+			if(light.m_shadowRenderQueues[cascade]->m_renderables.getSize() > 0)
+			{
+				// Cascade with drawcalls, will need tiles
+
+				timestamps[activeCascades] = m_r->getGlobalTimestamp(); // This light is always updated
+				cascadeIndices[activeCascades] = cascade;
+				drawcallCounts[activeCascades] = 1; // Doesn't matter
+
+				// Change the quality per cascade
+				blurEsms[activeCascades] = (cascade <= 1);
+				lods[activeCascades] = (cascade <= 1) ? (m_lodCount - 1) : (lods[0] - 1);
+
+				++activeCascades;
+			}
+		}
+
+		const Bool allocationFailed = activeCascades == 0
+									  || allocateTilesAndScratchTiles(light.m_uuid,
+											 activeCascades,
+											 &timestamps[0],
+											 &cascadeIndices[0],
+											 &drawcallCounts[0],
+											 &lods[0],
+											 &esmViewports[0],
+											 &scratchViewports[0],
+											 &subResults[0])
+											 == TileAllocatorResult::ALLOCATION_FAILED;
+
+		if(!allocationFailed)
+		{
+			activeCascades = 0;
+
+			for(U cascade = 0; cascade < light.m_shadowCascadeCount; ++cascade)
+			{
+				if(light.m_shadowRenderQueues[cascade]->m_renderables.getSize() > 0)
+				{
+					// Cascade with drawcalls, push some work for it
+
+					// Update the texture matrix to point to the correct region in the atlas
+					light.m_textureMatrices[cascade] =
+						createSpotLightTextureMatrix(esmViewports[activeCascades]) * light.m_textureMatrices[cascade];
+
+					// Push work
+					newScratchAndEsmResloveRenderWorkItems(esmViewports[activeCascades],
+						scratchViewports[activeCascades],
+						blurEsms[activeCascades],
+						false,
+						light.m_shadowRenderQueues[cascade],
+						lightsToRender,
+						esmWorkItems,
+						drawcallCount);
+
+					++activeCascades;
+				}
+				else
+				{
+					// Empty cascade, point it to the empty tile
+
+					light.m_textureMatrices[cascade] =
+						createSpotLightTextureMatrix(emptyTileViewport) * light.m_textureMatrices[cascade];
+				}
+			}
+		}
+		else
+		{
+			// Light can't be a caster this frame
+			light.m_shadowCascadeCount = 0;
+			zeroMemory(light.m_shadowRenderQueues);
+		}
+	}
+
+	// Process the point lights.
 	for(PointLightQueueElement* light : ctx.m_renderQueue->m_shadowPointLights)
 	{
 		// Prepare data to allocate tiles and allocate
-		Array<U32, 6> tiles;
-		Array<U32, 6> scratchTiles;
 		Array<U64, 6> timestamps;
 		Array<U32, 6> faceIndices;
 		Array<U32, 6> drawcallCounts;
+		Array<Viewport, 6> esmViewports;
+		Array<Viewport, 6> scratchViewports;
+		Array<TileAllocatorResult, 6> subResults;
+		Array<U32, 6> lods;
 		U numOfFacesThatHaveDrawcalls = 0;
+
+		Bool blurEsm;
+		const U lod = choseLod(cameraOrigin, *light, blurEsm);
+
 		for(U face = 0; face < 6; ++face)
 		{
 			ANKI_ASSERT(light->m_shadowRenderQueues[face]);
@@ -308,24 +624,33 @@ void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForScra
 				drawcallCounts[numOfFacesThatHaveDrawcalls] =
 					light->m_shadowRenderQueues[face]->m_renderables.getSize();
 
+				lods[numOfFacesThatHaveDrawcalls] = lod;
+
 				++numOfFacesThatHaveDrawcalls;
 			}
 		}
+
 		const Bool allocationFailed = numOfFacesThatHaveDrawcalls == 0
 									  || allocateTilesAndScratchTiles(light->m_uuid,
 											 numOfFacesThatHaveDrawcalls,
 											 &timestamps[0],
 											 &faceIndices[0],
 											 &drawcallCounts[0],
-											 &tiles[0],
-											 &scratchTiles[0]);
+											 &lods[0],
+											 &esmViewports[0],
+											 &scratchViewports[0],
+											 &subResults[0])
+											 == TileAllocatorResult::ALLOCATION_FAILED;
 
 		if(!allocationFailed)
 		{
 			// All good, update the lights
 
-			light->m_atlasTiles = UVec2(0u);
-			light->m_atlasTileSize = 1.0f / m_tileCountPerRowOrColumn;
+			const F32 atlasResolution = F32(m_esmTileResolution * m_esmTileCountBothAxis);
+			F32 superTileSize = esmViewports[0][2]; // Should be the same for all tiles and faces
+			superTileSize -= 1.0f; // Remove 2 half texels to avoid bilinear filtering bleeding
+
+			light->m_shadowAtlasTileSize = superTileSize / atlasResolution;
 
 			numOfFacesThatHaveDrawcalls = 0;
 			for(U face = 0; face < 6; ++face)
@@ -334,17 +659,19 @@ void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForScra
 				{
 					// Has drawcalls, asigned it to a tile
 
-					const U32 tileIdx = tiles[numOfFacesThatHaveDrawcalls];
-					const U32 tileIdxX = tileIdx % m_tileCountPerRowOrColumn;
-					const U32 tileIdxY = tileIdx / m_tileCountPerRowOrColumn;
-					ANKI_ASSERT(tileIdxX <= 31u && tileIdxY <= 31u);
-					light->m_atlasTiles.x() |= tileIdxX << (5u * face);
-					light->m_atlasTiles.y() |= tileIdxY << (5u * face);
+					const Viewport& esmViewport = esmViewports[numOfFacesThatHaveDrawcalls];
+					const Viewport& scratchViewport = scratchViewports[numOfFacesThatHaveDrawcalls];
+
+					// Add a half texel to the viewport's start to avoid bilinear filtering bleeding
+					light->m_shadowAtlasTileOffsets[face].x() = (F32(esmViewport[0]) + 0.5f) / atlasResolution;
+					light->m_shadowAtlasTileOffsets[face].y() = (F32(esmViewport[1]) + 0.5f) / atlasResolution;
 
-					if(scratchTiles[numOfFacesThatHaveDrawcalls] != MAX_U32)
+					if(subResults[numOfFacesThatHaveDrawcalls] != TileAllocatorResult::CACHED)
 					{
-						newScratchAndEsmResloveRenderWorkItems(tiles[numOfFacesThatHaveDrawcalls],
-							scratchTiles[numOfFacesThatHaveDrawcalls],
+						newScratchAndEsmResloveRenderWorkItems(esmViewport,
+							scratchViewport,
+							blurEsm,
+							true,
 							light->m_shadowRenderQueues[face],
 							lightsToRender,
 							esmWorkItems,
@@ -355,16 +682,21 @@ void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForScra
 				}
 				else
 				{
-					// Doesn't have renderables, point the face to the 1st tile (that is pinned)
-					light->m_atlasTiles.x() |= 0u << (5u * face);
-					light->m_atlasTiles.y() |= 0u << (5u * face);
+					// Doesn't have renderables, point the face to the empty tile
+					Viewport esmViewport = emptyTileViewport;
+					ANKI_ASSERT(esmViewport[2] <= superTileSize && esmViewport[3] <= superTileSize);
+					esmViewport[2] = superTileSize;
+					esmViewport[3] = superTileSize;
+
+					light->m_shadowAtlasTileOffsets[face].x() = (F32(esmViewport[0]) + 0.5f) / atlasResolution;
+					light->m_shadowAtlasTileOffsets[face].y() = (F32(esmViewport[1]) + 0.5f) / atlasResolution;
 				}
 			}
 		}
 		else
 		{
 			// Light can't be a caster this frame
-			memset(&light->m_shadowRenderQueues[0], 0, sizeof(light->m_shadowRenderQueues));
+			zeroMemory(light->m_shadowRenderQueues);
 		}
 	}
 
@@ -374,28 +706,43 @@ void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForScra
 		ANKI_ASSERT(light->m_shadowRenderQueue);
 
 		// Allocate tiles
-		U32 tileIdx, scratchTileIdx, faceIdx = 0;
+		U32 faceIdx = 0;
+		TileAllocatorResult subResult;
+		Viewport esmViewport;
+		Viewport scratchViewport;
 		const U32 localDrawcallCount = light->m_shadowRenderQueue->m_renderables.getSize();
+
+		Bool blurEsm;
+		const U32 lod = choseLod(cameraOrigin, *light, blurEsm);
 		const Bool allocationFailed = localDrawcallCount == 0
 									  || allocateTilesAndScratchTiles(light->m_uuid,
 											 1,
 											 &light->m_shadowRenderQueue->m_shadowRenderablesLastUpdateTimestamp,
 											 &faceIdx,
 											 &localDrawcallCount,
-											 &tileIdx,
-											 &scratchTileIdx);
+											 &lod,
+											 &esmViewport,
+											 &scratchViewport,
+											 &subResult)
+											 == TileAllocatorResult::ALLOCATION_FAILED;
 
 		if(!allocationFailed)
 		{
 			// All good, update the light
 
 			// Update the texture matrix to point to the correct region in the atlas
-			light->m_textureMatrix = createSpotLightTextureMatrix(m_tiles[tileIdx]) * light->m_textureMatrix;
+			light->m_textureMatrix = createSpotLightTextureMatrix(esmViewport) * light->m_textureMatrix;
 
-			if(scratchTileIdx != MAX_U32)
+			if(subResult != TileAllocatorResult::CACHED)
 			{
-				newScratchAndEsmResloveRenderWorkItems(
-					tileIdx, scratchTileIdx, light->m_shadowRenderQueue, lightsToRender, esmWorkItems, drawcallCount);
+				newScratchAndEsmResloveRenderWorkItems(esmViewport,
+					scratchViewport,
+					blurEsm,
+					true,
+					light->m_shadowRenderQueue,
+					lightsToRender,
+					esmWorkItems,
+					drawcallCount);
 			}
 		}
 		else
@@ -479,8 +826,10 @@ void ShadowMapping::processLights(RenderingContext& ctx, U32& threadCountForScra
 	}
 }
 
-void ShadowMapping::newScratchAndEsmResloveRenderWorkItems(U32 tileIdx,
-	U32 scratchTileIdx,
+void ShadowMapping::newScratchAndEsmResloveRenderWorkItems(const Viewport& esmViewport,
+	const Viewport& scratchVewport,
+	Bool blurEsm,
+	Bool perspectiveProjection,
 	RenderQueue* lightRenderQueue,
 	DynamicArrayAuto<LightToRenderToScratchInfo>& scratchWorkItem,
 	DynamicArrayAuto<EsmResolveWorkItem>& esmResolveWorkItem,
@@ -488,214 +837,33 @@ void ShadowMapping::newScratchAndEsmResloveRenderWorkItems(U32 tileIdx,
 {
 	// Scratch work item
 	{
-		Array<U32, 4> viewport;
-		viewport[0] = scratchTileIdx * m_scratchTileResolution;
-		viewport[1] = 0;
-		viewport[2] = m_scratchTileResolution;
-		viewport[3] = m_scratchTileResolution;
-
 		LightToRenderToScratchInfo toRender = {
-			viewport, lightRenderQueue, U32(lightRenderQueue->m_renderables.getSize())};
+			scratchVewport, lightRenderQueue, U32(lightRenderQueue->m_renderables.getSize())};
 		scratchWorkItem.emplaceBack(toRender);
 		drawcallCount += lightRenderQueue->m_renderables.getSize();
 	}
 
 	// ESM resolve work item
 	{
+		const F32 scratchAtlasWidth = m_scratchTileCountX * m_scratchTileResolution;
+		const F32 scratchAtlasHeight = m_scratchTileCountY * m_scratchTileResolution;
+
 		EsmResolveWorkItem esmItem;
-		esmItem.m_uvIn[0] = F32(scratchTileIdx) / m_scratchTileCount;
-		esmItem.m_uvIn[1] = 0.0f;
-		esmItem.m_uvIn[2] = 1.0f / m_scratchTileCount;
-		esmItem.m_uvIn[3] = 1.0f;
+		esmItem.m_uvIn[0] = F32(scratchVewport[0]) / scratchAtlasWidth;
+		esmItem.m_uvIn[1] = F32(scratchVewport[1]) / scratchAtlasHeight;
+		esmItem.m_uvIn[2] = F32(scratchVewport[2]) / scratchAtlasWidth;
+		esmItem.m_uvIn[3] = F32(scratchVewport[3]) / scratchAtlasHeight;
 
-		esmItem.m_viewportOut = m_tiles[tileIdx].m_viewport;
+		esmItem.m_viewportOut = esmViewport;
 
 		esmItem.m_cameraFar = lightRenderQueue->m_cameraFar;
 		esmItem.m_cameraNear = lightRenderQueue->m_cameraNear;
 
-		esmResolveWorkItem.emplaceBack(esmItem);
-	}
-}
-
-Bool ShadowMapping::allocateTilesAndScratchTiles(U64 lightUuid,
-	U32 faceCount,
-	const U64* faceTimestamps,
-	const U32* faceIndices,
-	const U32* drawcallsCount,
-	U32* tileIndices,
-	U32* scratchTileIndices)
-{
-	ANKI_ASSERT(faceTimestamps);
-	ANKI_ASSERT(lightUuid > 0);
-	ANKI_ASSERT(faceCount > 0 && faceCount <= 6);
-	ANKI_ASSERT(faceIndices && tileIndices && scratchTileIndices && drawcallsCount);
-
-	Bool failed = false;
-	Array<Bool, 6> inTheCache;
-
-	// Allocate ESM tiles
-	{
-		memset(tileIndices, 0xFF, sizeof(*tileIndices) * faceCount);
-		for(U i = 0; i < faceCount && !failed; ++i)
-		{
-			failed = allocateTile(faceTimestamps[i], lightUuid, faceIndices[i], tileIndices[i], inTheCache[i]);
-		}
-
-		// Unpin the tiles
-		for(U i = 0; i < faceCount; ++i)
-		{
-			if(tileIndices[i] != MAX_U32)
-			{
-				m_tiles[tileIndices[i]].m_pinned = false;
-			}
-		}
-	}
-
-	// Allocate scratch tiles
-	{
-		U32 freeScratchTiles = m_freeScratchTiles;
-		for(U i = 0; i < faceCount && !failed; ++i)
-		{
-			scratchTileIndices[i] = MAX_U32;
-			const Bool shouldRender = shouldRenderTile(
-				faceTimestamps[i], lightUuid, faceIndices[i], m_tiles[tileIndices[i]], drawcallsCount[i]);
-			const Bool scratchTileFailed = shouldRender && freeScratchTiles == 0;
-
-			if(scratchTileFailed)
-			{
-				ANKI_R_LOGW("Don't have enough space in the scratch shadow mapping buffer. "
-							"If you see this message too often increase r.shadowMapping.scratchTileCount");
-				failed = true;
-			}
-			else if(shouldRender)
-			{
-				ANKI_ASSERT(m_scratchTileCount >= freeScratchTiles);
-				scratchTileIndices[i] = m_scratchTileCount - freeScratchTiles;
-				--freeScratchTiles;
-			}
-		}
-
-		if(!failed)
-		{
-			m_freeScratchTiles = freeScratchTiles;
-		}
-	}
-
-	// Update the tiles if everything was successful
-	if(!failed)
-	{
-		for(U i = 0; i < faceCount; ++i)
-		{
-			Tile& tile = m_tiles[tileIndices[i]];
-			tile.m_face = faceIndices[i];
-			tile.m_lightUuid = lightUuid;
-			tile.m_lastUsedTimestamp = m_r->getGlobalTimestamp();
-			tile.m_drawcallCount = drawcallsCount[i];
-
-			// Update the cache
-			if(!inTheCache[i])
-			{
-				TileKey key{lightUuid, faceIndices[i]};
-				ANKI_ASSERT(m_lightUuidToTileIdx.find(key) == m_lightUuidToTileIdx.getEnd());
-				m_lightUuidToTileIdx.emplace(getAllocator(), key, tileIndices[i]);
-			}
-		}
-	}
-
-	return failed;
-}
-
-Bool ShadowMapping::shouldRenderTile(
-	U64 lightTimestamp, U64 lightUuid, U32 face, const Tile& tileIdx, U32 drawcallCount)
-{
-	if(tileIdx.m_face == face && tileIdx.m_lightUuid == lightUuid && tileIdx.m_lastUsedTimestamp >= lightTimestamp
-		&& tileIdx.m_drawcallCount == drawcallCount)
-	{
-		return false;
-	}
-	else
-	{
-		return true;
-	}
-}
-
-Bool ShadowMapping::allocateTile(U64 lightTimestamp, U64 lightUuid, U32 face, U32& tileAllocated, Bool& inTheCache)
-{
-	ANKI_ASSERT(lightTimestamp > 0);
-	ANKI_ASSERT(lightUuid > 0);
-	ANKI_ASSERT(face < 6);
+		esmItem.m_blur = blurEsm;
+		esmItem.m_perspectiveProjection = perspectiveProjection;
 
-	// First, try to see if the light/face is in the cache
-	inTheCache = false;
-	TileKey key = TileKey{lightUuid, U64(face)};
-	auto it = m_lightUuidToTileIdx.find(key);
-	if(it != m_lightUuidToTileIdx.getEnd())
-	{
-		const U32 tileIdx = *it;
-		if(m_tiles[tileIdx].m_lightUuid == lightUuid && m_tiles[tileIdx].m_face == face && !m_tiles[tileIdx].m_pinned)
-		{
-			// Found it
-			tileAllocated = tileIdx;
-			inTheCache = true;
-			return false;
-		}
-		else
-		{
-			// Cache entry is wrong, remove it
-			m_lightUuidToTileIdx.erase(getAllocator(), it);
-		}
-	}
-
-	// 2nd and 3rd choice, find an empty tile or some tile to re-use
-	U32 emptyTile = MAX_U32;
-	U32 tileToKick = MAX_U32;
-	Timestamp tileToKickMinTimestamp = MAX_TIMESTAMP;
-	for(U32 tileIdx = 0; tileIdx < m_tiles.getSize(); ++tileIdx)
-	{
-		if(m_tiles[tileIdx].m_pinned)
-		{
-			continue;
-		}
-
-		if(m_tiles[tileIdx].m_lightUuid == 0)
-		{
-			// Found an empty
-			emptyTile = tileIdx;
-			break;
-		}
-		else if(m_tiles[tileIdx].m_lastUsedTimestamp != m_r->getGlobalTimestamp()
-				&& m_tiles[tileIdx].m_lastUsedTimestamp < tileToKickMinTimestamp)
-		{
-			// Found some with low timestamp
-			tileToKick = tileIdx;
-			tileToKickMinTimestamp = m_tiles[tileIdx].m_lastUsedTimestamp;
-		}
-	}
-
-	Bool failed = false;
-	if(emptyTile != MAX_U32)
-	{
-		tileAllocated = emptyTile;
-	}
-	else if(tileToKick != MAX_U32)
-	{
-		tileAllocated = tileToKick;
-	}
-	else
-	{
-		// We have a problem
-		failed = true;
-		ANKI_R_LOGW("There is not enough space in the shadow atlas for more shadow maps. "
-					"Increase the r.shadowMapping.tileCountPerRowOrColumn or decrease the scene's shadow casters");
-	}
-
-	if(!failed)
-	{
-		ANKI_ASSERT(!m_tiles[tileAllocated].m_pinned);
-		m_tiles[tileAllocated].m_pinned = true;
+		esmResolveWorkItem.emplaceBack(esmItem);
 	}
-
-	return failed;
 }
 
 } // end namespace anki

+ 36 - 75
src/anki/renderer/ShadowMapping.h

@@ -8,6 +8,7 @@
 #include <anki/renderer/RendererObject.h>
 #include <anki/Gr.h>
 #include <anki/resource/TextureResource.h>
+#include <anki/renderer/TileAllocator.h>
 
 namespace anki
 {
@@ -37,126 +38,86 @@ anki_internal:
 	}
 
 private:
+	using Viewport = Array<U32, 4>;
+
 	/// @name ESM stuff
 	/// @{
 
-	/// The ESM map consists of tiles.
-	class Tile
-	{
-	public:
-		U64 m_lastUsedTimestamp = 0;
-		U64 m_lightUuid = 0;
-		U32 m_drawcallCount = 0;
-		U8 m_face = 0;
-		Bool8 m_pinned = false; ///< If true we cannot allocate from it.
-
-		Vec4 m_uv;
-		Array<U32, 4> m_viewport;
-	};
-
-	/// A HashMap key.
-	class TileKey
-	{
-	public:
-		U64 m_lightUuid;
-		U64 m_face;
-
-		U64 computeHash() const
-		{
-			return anki::computeHash(this, sizeof(*this), 693);
-		}
-	};
+	TileAllocator m_esmTileAlloc;
 
 	FramebufferDescription m_esmFbDescr; ///< The FB for ESM
-	TexturePtr m_esmAtlas; ///< ESM texture atlas.
+	TexturePtr m_esmAtlas; ///< ESM texture atlas. Size (m_esmTileResolution*m_esmTileCountBothAxis)^2
 	RenderTargetHandle m_esmRt;
 
-	U32 m_tileResolution = 0; ///< Tile resolution.
-	U32 m_atlasResolution = 0; ///< Atlas size is (m_atlasResolution, m_atlasResolution)
-	U32 m_tileCountPerRowOrColumn = 0;
-	DynamicArray<Tile> m_tiles;
+	U32 m_esmTileResolution = 0; ///< Tile resolution.
+	U32 m_esmTileCountBothAxis = 0;
 
 	ShaderProgramResourcePtr m_esmResolveProg;
 	ShaderProgramPtr m_esmResolveGrProg;
 
-	HashMap<TileKey, U32> m_lightUuidToTileIdx;
-
-	Bool allocateTile(U64 lightTimestamp, U64 lightUuid, U32 face, U32& tileAllocated, Bool& inTheCache);
-	static Bool shouldRenderTile(U64 lightTimestamp, U64 lightUuid, U32 face, const Tile& tileIdx, U32 drawcallCount);
-
-	class EsmResolveWorkItem
-	{
-	public:
-		Vec4 m_uvIn; ///< UV + size that point to the scratch buffer.
-		Array<U32, 4> m_viewportOut; ///< Viewport in the ESM RT.
-		F32 m_cameraNear;
-		F32 m_cameraFar;
-	};
+	class EsmResolveWorkItem;
 	WeakArray<EsmResolveWorkItem> m_esmResolveWorkItems;
 
 	ANKI_USE_RESULT Error initEsm(const ConfigSet& cfg);
 
-	static Mat4 createSpotLightTextureMatrix(const Tile& tile);
-
-	/// A RenderPassWorkCallback for ESM
-	static void runEsmCallback(RenderPassWorkContext& rgraphCtx)
-	{
-		scast<ShadowMapping*>(rgraphCtx.m_userData)->runEsm(rgraphCtx);
-	}
+	inline Mat4 createSpotLightTextureMatrix(const Viewport& viewport) const;
 
 	void runEsm(RenderPassWorkContext& rgraphCtx);
 	/// @}
 
 	/// @name Scratch buffer stuff
 	/// @{
+	TileAllocator m_scratchTileAlloc;
+
 	RenderTargetHandle m_scratchRt; ///< Size of the RT is (m_scratchTileSize * m_scratchTileCount, m_scratchTileSize).
 	FramebufferDescription m_scratchFbDescr; ///< FB info.
 	RenderTargetDescription m_scratchRtDescr; ///< Render target.
 
-	U32 m_scratchTileCount = 0;
+	U32 m_scratchTileCountX = 0;
+	U32 m_scratchTileCountY = 0;
 	U32 m_scratchTileResolution = 0;
-	U32 m_freeScratchTiles = 0;
-
-	class ScratchBufferWorkItem
-	{
-	public:
-		Array<U32, 4> m_viewport;
-		RenderQueue* m_renderQueue;
-		U32 m_firstRenderableElement;
-		U32 m_renderableElementCount;
-		U32 m_threadPoolTaskIdx;
-	};
 
-	struct LightToRenderToScratchInfo;
+	class ScratchBufferWorkItem;
+	class LightToRenderToScratchInfo;
 
 	WeakArray<ScratchBufferWorkItem> m_scratchWorkItems;
+	U32 m_scratchMaxViewportWidth = 0;
+	U32 m_scratchMaxViewportHeight = 0;
 
 	ANKI_USE_RESULT Error initScratch(const ConfigSet& cfg);
 
-	/// A RenderPassWorkCallback for shadow passes.
-	static void runShadowmappingCallback(RenderPassWorkContext& rgraphCtx)
-	{
-		scast<ShadowMapping*>(rgraphCtx.m_userData)->runShadowMapping(rgraphCtx);
-	}
-
 	void runShadowMapping(RenderPassWorkContext& rgraphCtx);
 	/// @}
 
 	/// @name Misc & common
 	/// @{
 
+	static const U m_lodCount = 3;
+	static const U m_pointLightsMaxLod = 1;
+
+	Array<F32, m_lodCount - 1> m_lodDistances;
+
+	/// Find the lod of the light
+	U choseLod(const Vec4& cameraOrigin, const PointLightQueueElement& light, Bool& blurEsm) const;
+	/// Find the lod of the light
+	U choseLod(const Vec4& cameraOrigin, const SpotLightQueueElement& light, Bool& blurEsm) const;
+
 	/// Try to allocate a number of scratch tiles and regular tiles.
-	Bool allocateTilesAndScratchTiles(U64 lightUuid,
+	TileAllocatorResult allocateTilesAndScratchTiles(U64 lightUuid,
 		U32 faceCount,
 		const U64* faceTimestamps,
 		const U32* faceIndices,
 		const U32* drawcallsCount,
-		U32* tileIndices,
-		U32* scratchTileIndices);
+		const U32* lods,
+		Viewport* esmTileViewports,
+		Viewport* scratchTileViewports,
+		TileAllocatorResult* subResults);
 
 	/// Add new work to render to scratch buffer and ESM buffer.
-	void newScratchAndEsmResloveRenderWorkItems(U32 tileIdx,
-		U32 scratchTileIdx,
+	void newScratchAndEsmResloveRenderWorkItems(const Viewport& esmViewport,
+		const Viewport& scratchVewport,
+		Bool blurEsm,
+		Bool perspectiveProjection,
 		RenderQueue* lightRenderQueue,
 		DynamicArrayAuto<LightToRenderToScratchInfo>& scratchWorkItem,
 		DynamicArrayAuto<EsmResolveWorkItem>& esmResolveWorkItem,

+ 28 - 4
src/anki/renderer/Ssao.cpp

@@ -209,7 +209,13 @@ void Ssao::populateRenderGraph(RenderingContext& ctx)
 			pass.newDependency({m_r->getDepthDownscale().getHiZRt(), TextureUsageBit::SAMPLED_COMPUTE, HIZ_HALF_DEPTH});
 			pass.newDependency({m_runCtx.m_rts[0], TextureUsageBit::IMAGE_COMPUTE_WRITE});
 
-			pass.setWork(runMainCallback, this, 0);
+			pass.setWork(
+				[](RenderPassWorkContext& rgraphCtx) {
+					Ssao* const self = static_cast<Ssao*>(rgraphCtx.m_userData);
+					self->runMain(*self->m_runCtx.m_ctx, rgraphCtx);
+				},
+				this,
+				0);
 		}
 		else
 		{
@@ -226,7 +232,13 @@ void Ssao::populateRenderGraph(RenderingContext& ctx)
 				{m_r->getDepthDownscale().getHiZRt(), TextureUsageBit::SAMPLED_FRAGMENT, HIZ_HALF_DEPTH});
 			pass.newDependency({m_runCtx.m_rts[0], TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE});
 
-			pass.setWork(runMainCallback, this, 0);
+			pass.setWork(
+				[](RenderPassWorkContext& rgraphCtx) {
+					Ssao* const self = static_cast<Ssao*>(rgraphCtx.m_userData);
+					self->runMain(*self->m_runCtx.m_ctx, rgraphCtx);
+				},
+				this,
+				0);
 		}
 	}
 
@@ -236,7 +248,13 @@ void Ssao::populateRenderGraph(RenderingContext& ctx)
 		{
 			ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("SSAO blur");
 
-			pass.setWork(runBlurCallback, this, 0);
+			pass.setWork(
+				[](RenderPassWorkContext& rgraphCtx) {
+					Ssao* const self = static_cast<Ssao*>(rgraphCtx.m_userData);
+					self->runBlur(rgraphCtx);
+				},
+				this,
+				0);
 
 			pass.newDependency({m_runCtx.m_rts[1], TextureUsageBit::IMAGE_COMPUTE_WRITE});
 			pass.newDependency({m_runCtx.m_rts[0], TextureUsageBit::SAMPLED_COMPUTE});
@@ -245,7 +263,13 @@ void Ssao::populateRenderGraph(RenderingContext& ctx)
 		{
 			GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("SSAO blur");
 
-			pass.setWork(runBlurCallback, this, 0);
+			pass.setWork(
+				[](RenderPassWorkContext& rgraphCtx) {
+					Ssao* const self = static_cast<Ssao*>(rgraphCtx.m_userData);
+					self->runBlur(rgraphCtx);
+				},
+				this,
+				0);
 			pass.setFramebufferInfo(m_fbDescr, {{m_runCtx.m_rts[1]}}, {});
 
 			pass.newDependency({m_runCtx.m_rts[1], TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE});

+ 0 - 14
src/anki/renderer/Ssao.h

@@ -76,20 +76,6 @@ private:
 
 	void runMain(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);
 	void runBlur(RenderPassWorkContext& rgraphCtx);
-
-	/// A RenderPassWorkCallback for SSAO main pass.
-	static void runMainCallback(RenderPassWorkContext& rgraphCtx)
-	{
-		Ssao* const self = scast<Ssao*>(rgraphCtx.m_userData);
-		self->runMain(*self->m_runCtx.m_ctx, rgraphCtx);
-	}
-
-	/// A RenderPassWorkCallback for SSAO blur.
-	static void runBlurCallback(RenderPassWorkContext& rgraphCtx)
-	{
-		Ssao* const self = scast<Ssao*>(rgraphCtx.m_userData);
-		self->runBlur(rgraphCtx);
-	}
 };
 /// @}
 

+ 2 - 1
src/anki/renderer/Ssr.cpp

@@ -78,7 +78,8 @@ void Ssr::populateRenderGraph(RenderingContext& ctx)
 
 	// Create pass
 	ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("SSR");
-	rpass.setWork(runCallback, this, 0);
+	rpass.setWork(
+		[](RenderPassWorkContext& rgraphCtx) { static_cast<Ssr*>(rgraphCtx.m_userData)->run(rgraphCtx); }, this, 0);
 
 	rpass.newDependency({m_runCtx.m_rt, TextureUsageBit::IMAGE_COMPUTE_READ_WRITE});
 	rpass.newDependency({m_r->getGBuffer().getColorRt(1), TextureUsageBit::SAMPLED_COMPUTE});

+ 0 - 5
src/anki/renderer/Ssr.h

@@ -51,11 +51,6 @@ private:
 
 	ANKI_USE_RESULT Error initInternal(const ConfigSet& cfg);
 
-	static void runCallback(RenderPassWorkContext& rgraphCtx)
-	{
-		static_cast<Ssr*>(rgraphCtx.m_userData)->run(rgraphCtx);
-	}
-
 	void run(RenderPassWorkContext& rgraphCtx);
 };
 /// @}

+ 7 - 1
src/anki/renderer/TemporalAA.cpp

@@ -108,7 +108,13 @@ void TemporalAA::populateRenderGraph(RenderingContext& ctx)
 	// Create pass
 	ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("TemporalAA");
 
-	pass.setWork(runCallback, this, 0);
+	pass.setWork(
+		[](RenderPassWorkContext& rgraphCtx) {
+			TemporalAA* const self = static_cast<TemporalAA*>(rgraphCtx.m_userData);
+			self->run(*self->m_runCtx.m_ctx, rgraphCtx);
+		},
+		this,
+		0);
 
 	pass.newDependency({m_runCtx.m_renderRt, TextureUsageBit::IMAGE_COMPUTE_WRITE});
 	pass.newDependency({m_r->getGBuffer().getDepthRt(),

+ 0 - 7
src/anki/renderer/TemporalAA.h

@@ -49,13 +49,6 @@ private:
 	ANKI_USE_RESULT Error initInternal(const ConfigSet& cfg);
 
 	void run(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);
-
-	/// A RenderPassWorkCallback for the AA pass.
-	static void runCallback(RenderPassWorkContext& rgraphCtx)
-	{
-		TemporalAA* const self = scast<TemporalAA*>(rgraphCtx.m_userData);
-		self->run(*self->m_runCtx.m_ctx, rgraphCtx);
-	}
 };
 /// @}
 

+ 383 - 0
src/anki/renderer/TileAllocator.cpp

@@ -0,0 +1,383 @@
+// Copyright (C) 2009-2018, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#include <anki/renderer/TileAllocator.h>
+
+namespace anki
+{
+
+class TileAllocator::Tile
+{
+public:
+	Timestamp m_lightTimestamp = 0; ///< The last timestamp the light got updated
+	Timestamp m_lastUsedTimestamp = 0; ///< The last timestamp this tile was used
+	U64 m_lightUuid = 0;
+	U32 m_lightDrawcallCount = 0;
+	Array<U16, 4> m_viewport = {};
+	Array<U16, 4> m_subTiles = {MAX_U16, MAX_U16, MAX_U16, MAX_U16};
+	U16 m_superTile = MAX_U16;
+	U8 m_lightLod = 0;
+	U8 m_lightFace = 0;
+};
+
+class TileAllocator::HashMapKey
+{
+public:
+	U64 m_lightUuid;
+	U64 m_face;
+
+	U64 computeHash() const
+	{
+		return anki::computeHash(this, sizeof(*this), 693);
+	}
+};
+
+TileAllocator::~TileAllocator()
+{
+	m_lightInfoToTileIdx.destroy(m_alloc);
+	m_allTiles.destroy(m_alloc);
+	m_lodFirstTileIndex.destroy(m_alloc);
+}
+
+void TileAllocator::init(HeapAllocator<U8> alloc, U32 tileCountX, U32 tileCountY, U32 lodCount, Bool enableCaching)
+{
+	// Preconditions
+	ANKI_ASSERT(tileCountX > 0);
+	ANKI_ASSERT(tileCountY > 0);
+	ANKI_ASSERT(lodCount > 0);
+
+	// Store some stuff
+	m_tileCountX = tileCountX;
+	m_tileCountY = tileCountY;
+	m_lodCount = lodCount;
+	m_alloc = alloc;
+	m_cachingEnabled = enableCaching;
+	m_lodFirstTileIndex.create(m_alloc, lodCount + 1);
+
+	// Create the tile array & index ranges
+	U tileCount = 0;
+	for(U lod = 0; lod < lodCount; ++lod)
+	{
+		const U lodTileCountX = tileCountX >> lod;
+		const U lodTileCountY = tileCountY >> lod;
+		ANKI_ASSERT((lodTileCountX << lod) == tileCountX && "Every LOD should be power of 2 of its parent LOD");
+		ANKI_ASSERT((lodTileCountY << lod) == tileCountY && "Every LOD should be power of 2 of its parent LOD");
+
+		m_lodFirstTileIndex[lod] = tileCount;
+
+		tileCount += lodTileCountX * lodTileCountY;
+	}
+	ANKI_ASSERT(tileCount >= tileCountX * tileCountY && tileCount < MAX_U16);
+	m_allTiles.create(m_alloc, tileCount);
+	m_lodFirstTileIndex[lodCount] = tileCount - 1;
+
+	// Init the tiles
+	U tileIdx = 0;
+	for(U lod = 0; lod < lodCount; ++lod)
+	{
+		const U lodTileCountX = tileCountX >> lod;
+		const U lodTileCountY = tileCountY >> lod;
+
+		for(U y = 0; y < lodTileCountY; ++y)
+		{
+			for(U x = 0; x < lodTileCountX; ++x)
+			{
+				ANKI_ASSERT(tileIdx >= m_lodFirstTileIndex[lod] && tileIdx <= m_lodFirstTileIndex[lod + 1]);
+				Tile& tile = m_allTiles[tileIdx];
+
+				tile.m_viewport[0] = x << lod;
+				tile.m_viewport[1] = y << lod;
+				tile.m_viewport[2] = 1 << lod;
+				tile.m_viewport[3] = 1 << lod;
+
+				if(lod > 0)
+				{
+					// Has sub tiles
+					for(U j = 0; j < 2; ++j)
+					{
+						for(U i = 0; i < 2; ++i)
+						{
+							const U subTileIdx = translateTileIdx((x << 1) + i, (y << 1) + j, lod - 1);
+							m_allTiles[subTileIdx].m_superTile = tileIdx;
+
+							tile.m_subTiles[j * 2 + i] = subTileIdx;
+						}
+					}
+				}
+				else
+				{
+					// No sub-tiles
+				}
+
+				++tileIdx;
+			}
+		}
+	}
+}
+
+void TileAllocator::updateSubTiles(const Tile& updateFrom)
+{
+	if(updateFrom.m_subTiles[0] == MAX_U16)
+	{
+		return;
+	}
+
+	for(U16 idx : updateFrom.m_subTiles)
+	{
+		m_allTiles[idx].m_lightTimestamp = updateFrom.m_lightTimestamp;
+		m_allTiles[idx].m_lastUsedTimestamp = updateFrom.m_lastUsedTimestamp;
+		m_allTiles[idx].m_lightUuid = updateFrom.m_lightUuid;
+		m_allTiles[idx].m_lightDrawcallCount = updateFrom.m_lightDrawcallCount;
+		m_allTiles[idx].m_lightLod = updateFrom.m_lightLod;
+		m_allTiles[idx].m_lightFace = updateFrom.m_lightFace;
+
+		updateSubTiles(m_allTiles[idx]);
+	}
+}
+
+void TileAllocator::updateSuperTiles(const Tile& updateFrom)
+{
+	if(updateFrom.m_superTile != MAX_U16)
+	{
+		m_allTiles[updateFrom.m_superTile].m_lightUuid = 0;
+		m_allTiles[updateFrom.m_superTile].m_lastUsedTimestamp = updateFrom.m_lastUsedTimestamp;
+		updateSuperTiles(m_allTiles[updateFrom.m_superTile]);
+	}
+}
+
+Bool TileAllocator::searchTileRecursively(U crntTileIdx,
+	U crntTileLod,
+	U allocationLod,
+	Timestamp crntTimestamp,
+	U& emptyTileIdx,
+	U& toKickTileIdx,
+	Timestamp& tileToKickMinTimestamp) const
+{
+	const Tile& tile = m_allTiles[crntTileIdx];
+
+	if(crntTileLod == allocationLod)
+	{
+		// We may have a candidate
+
+		const Bool done =
+			evaluateCandidate(crntTileIdx, crntTimestamp, emptyTileIdx, toKickTileIdx, tileToKickMinTimestamp);
+
+		if(done)
+		{
+			return true;
+		}
+	}
+	else if(tile.m_subTiles[0] != MAX_U16)
+	{
+		// Move down the hierarchy
+
+		ANKI_ASSERT(allocationLod < crntTileLod);
+
+		for(const U16 idx : tile.m_subTiles)
+		{
+			const Bool done = searchTileRecursively(idx,
+				crntTileLod >> 1,
+				allocationLod,
+				crntTimestamp,
+				emptyTileIdx,
+				toKickTileIdx,
+				tileToKickMinTimestamp);
+
+			if(done)
+			{
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+Bool TileAllocator::evaluateCandidate(
+	U tileIdx, Timestamp crntTimestamp, U& emptyTileIdx, U& toKickTileIdx, Timestamp& tileToKickMinTimestamp) const
+{
+	const Tile& tile = m_allTiles[tileIdx];
+
+	if(m_cachingEnabled)
+	{
+		if(tile.m_lastUsedTimestamp == 0)
+		{
+			// Found empty
+			emptyTileIdx = tileIdx;
+			return true;
+		}
+		else if(tile.m_lastUsedTimestamp != crntTimestamp && tile.m_lastUsedTimestamp < tileToKickMinTimestamp)
+		{
+			// Found one with low timestamp
+			toKickTileIdx = tileIdx;
+			tileToKickMinTimestamp = tile.m_lightTimestamp;
+		}
+	}
+	else
+	{
+		if(tile.m_lastUsedTimestamp != crntTimestamp)
+		{
+			emptyTileIdx = tileIdx;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+TileAllocatorResult TileAllocator::allocate(Timestamp crntTimestamp,
+	Timestamp lightTimestamp,
+	U64 lightUuid,
+	U32 lightFace,
+	U32 drawcallCount,
+	U32 lod,
+	Array<U32, 4>& tileViewport)
+{
+	// Preconditions
+	ANKI_ASSERT(crntTimestamp > 0);
+	ANKI_ASSERT(lightTimestamp > 0);
+	ANKI_ASSERT(lightTimestamp <= crntTimestamp);
+	ANKI_ASSERT(lightUuid != 0);
+	ANKI_ASSERT(lightFace < 6);
+	ANKI_ASSERT(lod < m_lodCount);
+
+	// 1) Search if it's already cached
+	HashMapKey key;
+	if(m_cachingEnabled)
+	{
+		key.m_lightUuid = lightUuid;
+		key.m_face = lightFace;
+		auto it = m_lightInfoToTileIdx.find(key);
+		if(it != m_lightInfoToTileIdx.getEnd())
+		{
+			Tile& tile = m_allTiles[*it];
+
+			if(tile.m_lightUuid != lightUuid || tile.m_lightLod != lod || tile.m_lightFace != lightFace)
+			{
+				// Cache entry is wrong, remove it
+				m_lightInfoToTileIdx.erase(m_alloc, it);
+			}
+			else
+			{
+				// Same light & lod & face, found the cache entry.
+
+				ANKI_ASSERT(tile.m_lastUsedTimestamp != crntTimestamp
+							&& "Trying to allocate the same thing twice in this timestamp?");
+
+				ANKI_ASSERT(tile.m_lightUuid == lightUuid && tile.m_lightLod == lod && tile.m_lightFace == lightFace);
+
+				tileViewport = {tile.m_viewport[0], tile.m_viewport[1], tile.m_viewport[2], tile.m_viewport[3]};
+
+				const Bool needsReRendering =
+					tile.m_lightDrawcallCount != drawcallCount || tile.m_lightTimestamp != lightTimestamp;
+
+				tile.m_lightTimestamp = lightTimestamp;
+				tile.m_lastUsedTimestamp = crntTimestamp;
+				tile.m_lightDrawcallCount = drawcallCount;
+
+				updateTileHierarchy(tile);
+
+				return (needsReRendering) ? TileAllocatorResult::ALLOCATION_SUCCEEDED : TileAllocatorResult::CACHED;
+			}
+		}
+	}
+
+	// Start searching for a suitable tile. Do a hieratchical search to end up with better locality and not better
+	// utilization of the atlas' space
+	U emptyTileIdx = MAX_U;
+	U toKickTileIdx = MAX_U;
+	Timestamp tileToKickMinTimestamp = MAX_TIMESTAMP;
+	const U maxLod = m_lodCount - 1;
+	if(lod == maxLod)
+	{
+		// This search is simple, iterate the tiles of the max LOD
+
+		for(U tileIdx = m_lodFirstTileIndex[maxLod]; tileIdx <= m_lodFirstTileIndex[maxLod + 1]; ++tileIdx)
+		{
+			const Bool done =
+				evaluateCandidate(tileIdx, crntTimestamp, emptyTileIdx, toKickTileIdx, tileToKickMinTimestamp);
+
+			if(done)
+			{
+				break;
+			}
+		}
+	}
+	else
+	{
+		// Need to do a recursive search
+
+		for(U tileIdx = m_lodFirstTileIndex[maxLod]; tileIdx <= m_lodFirstTileIndex[maxLod + 1]; ++tileIdx)
+		{
+			const Bool done = searchTileRecursively(
+				tileIdx, maxLod, lod, crntTimestamp, emptyTileIdx, toKickTileIdx, tileToKickMinTimestamp);
+
+			if(done)
+			{
+				break;
+			}
+		}
+	}
+
+	U allocatedTileIdx;
+	if(emptyTileIdx != MAX_U)
+	{
+		allocatedTileIdx = emptyTileIdx;
+	}
+	else if(toKickTileIdx != MAX_U)
+	{
+		allocatedTileIdx = toKickTileIdx;
+	}
+	else
+	{
+		// Out of tiles
+		return TileAllocatorResult::ALLOCATION_FAILED;
+	}
+
+	// Allocation succedded, need to do some bookkeeping
+
+	// Mark the allocated tile
+	Tile& allocatedTile = m_allTiles[allocatedTileIdx];
+	allocatedTile.m_lightTimestamp = lightTimestamp;
+	allocatedTile.m_lastUsedTimestamp = crntTimestamp;
+	allocatedTile.m_lightUuid = lightUuid;
+	allocatedTile.m_lightDrawcallCount = drawcallCount;
+	allocatedTile.m_lightLod = lod;
+	allocatedTile.m_lightFace = lightFace;
+
+	updateTileHierarchy(allocatedTile);
+
+	// Update the cache
+	if(m_cachingEnabled)
+	{
+		m_lightInfoToTileIdx.emplace(m_alloc, key, allocatedTileIdx);
+	}
+
+	// Return
+	tileViewport = {allocatedTile.m_viewport[0],
+		allocatedTile.m_viewport[1],
+		allocatedTile.m_viewport[2],
+		allocatedTile.m_viewport[3]};
+
+	return TileAllocatorResult::ALLOCATION_SUCCEEDED;
+}
+
+void TileAllocator::invalidateCache(U64 lightUuid, U32 lightFace)
+{
+	ANKI_ASSERT(m_cachingEnabled);
+	ANKI_ASSERT(lightUuid > 0);
+
+	HashMapKey key;
+	key.m_lightUuid = lightUuid;
+	key.m_face = lightFace;
+
+	auto it = m_lightInfoToTileIdx.find(key);
+	if(it != m_lightInfoToTileIdx.getEnd())
+	{
+		m_lightInfoToTileIdx.erase(m_alloc, it);
+	}
+}
+
+} // end namespace anki

+ 95 - 0
src/anki/renderer/TileAllocator.h

@@ -0,0 +1,95 @@
+// Copyright (C) 2009-2018, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <anki/renderer/Common.h>
+
+namespace anki
+{
+
+/// @addtogroup renderer
+/// @{
+
+/// The result of a tile allocation.
+enum class TileAllocatorResult : U32
+{
+	CACHED, ///< The tile is cached. No need to re-render it.
+	ALLOCATION_FAILED, ///< No more available tiles.
+	ALLOCATION_SUCCEEDED ///< Allocation succeded but the tile needs update.
+};
+
+/// Allocates tiles out of a tilemap suitable for shadow mapping.
+class TileAllocator : public NonCopyable
+{
+public:
+	~TileAllocator();
+
+	/// Initialize the allocator.
+	void init(HeapAllocator<U8> alloc, U32 tileCountX, U32 tileCountY, U32 lodCount, Bool enableCaching);
+
+	/// Allocate some tiles.
+	ANKI_USE_RESULT TileAllocatorResult allocate(Timestamp crntTimestamp,
+		Timestamp lightTimestamp,
+		U64 lightUuid,
+		U32 lightFace,
+		U32 drawcallCount,
+		U32 lod,
+		Array<U32, 4>& tileViewport);
+
+	/// Remove an light from the cache.
+	void invalidateCache(U64 lightUuid, U32 lightFace);
+
+private:
+	class Tile;
+
+	/// A HashMap key.
+	class HashMapKey;
+
+	HeapAllocator<U8> m_alloc;
+	DynamicArray<Tile> m_allTiles;
+	DynamicArray<U32> m_lodFirstTileIndex;
+
+	HashMap<HashMapKey, U32> m_lightInfoToTileIdx;
+
+	U16 m_tileCountX = 0; ///< Tile count for LOD 0
+	U16 m_tileCountY = 0; ///< Tile count for LOD 0
+	U8 m_lodCount = 0;
+	Bool8 m_cachingEnabled = false;
+
+	U32 translateTileIdx(U32 x, U32 y, U32 lod) const
+	{
+		const U lodWidth = m_tileCountX >> lod;
+		const U idx = y * lodWidth + x + m_lodFirstTileIndex[lod];
+		ANKI_ASSERT(idx < m_allTiles.getSize());
+		return idx;
+	}
+
+	void updateSubTiles(const Tile& updateFrom);
+
+	void updateSuperTiles(const Tile& updateFrom);
+
+	/// Given a tile move the hierarchy up and down to update the hierarchy this tile belongs to.
+	void updateTileHierarchy(const Tile& updateFrom)
+	{
+		updateSubTiles(updateFrom);
+		updateSuperTiles(updateFrom);
+	}
+
+	/// Search for a tile recursively.
+	Bool searchTileRecursively(U crntTileIdx,
+		U crntTileLod,
+		U allocationLod,
+		Timestamp crntTimestamp,
+		U& emptyTileIdx,
+		U& toKickTileIdx,
+		Timestamp& tileToKickMinTimestamp) const;
+
+	Bool evaluateCandidate(
+		U tileIdx, Timestamp crntTimestamp, U& emptyTileIdx, U& toKickTileIdx, Timestamp& tileToKickMinTimestamp) const;
+};
+/// @}
+
+} // end namespace anki

+ 7 - 1
src/anki/renderer/Tonemapping.cpp

@@ -78,7 +78,13 @@ void Tonemapping::populateRenderGraph(RenderingContext& ctx)
 	// Create the pass
 	ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("Avg lum");
 
-	pass.setWork(runCallback, this, 0);
+	pass.setWork(
+		[](RenderPassWorkContext& rgraphCtx) {
+			Tonemapping* const self = static_cast<Tonemapping*>(rgraphCtx.m_userData);
+			self->run(rgraphCtx);
+		},
+		this,
+		0);
 
 	pass.newDependency({m_runCtx.m_buffHandle, BufferUsageBit::STORAGE_COMPUTE_READ_WRITE});
 

+ 0 - 7
src/anki/renderer/Tonemapping.h

@@ -50,13 +50,6 @@ private:
 	ANKI_USE_RESULT Error initInternal(const ConfigSet& cfg);
 
 	void run(RenderPassWorkContext& rgraphCtx);
-
-	/// A RenderPassWorkCallback to run the compute pass.
-	static void runCallback(RenderPassWorkContext& rgraphCtx)
-	{
-		Tonemapping* const self = scast<Tonemapping*>(rgraphCtx.m_userData);
-		self->run(rgraphCtx);
-	}
 };
 /// @}
 

+ 62 - 14
src/anki/renderer/TraditionalDeferredShading.cpp

@@ -38,6 +38,10 @@ Error TraditionalDeferredLightShading::init()
 		mutators[0].m_value = 1;
 		m_lightProg->getOrCreateVariant(mutators.get(), variant);
 		m_slightGrProg = variant->getProgram();
+
+		mutators[0].m_value = 2;
+		m_lightProg->getOrCreateVariant(mutators.get(), variant);
+		m_dirLightGrProg = variant->getProgram();
 	}
 
 	// Init meshes
@@ -78,6 +82,9 @@ void TraditionalDeferredLightShading::drawLights(const Mat4& vpMat,
 	const UVec4& viewport,
 	const Vec2& gbufferTexCoordsMin,
 	const Vec2& gbufferTexCoordsMax,
+	F32 cameraNear,
+	F32 cameraFar,
+	DirectionalLightQueueElement* directionalLight,
 	ConstWeakArray<PointLightQueueElement> plights,
 	ConstWeakArray<SpotLightQueueElement> slights,
 	CommandBufferPtr& cmdb)
@@ -93,9 +100,46 @@ void TraditionalDeferredLightShading::drawLights(const Mat4& vpMat,
 
 	// Set common state for all lights
 	cmdb->setBlendFactors(0, BlendFactor::ONE, BlendFactor::ONE);
-	cmdb->setCullMode(FaceSelectionBit::FRONT);
 	cmdb->setViewport(viewport.x(), viewport.y(), viewport.z(), viewport.w());
 
+	// Dir light
+	if(directionalLight)
+	{
+		ANKI_ASSERT(directionalLight->m_uuid && directionalLight->m_shadowCascadeCount == 1);
+
+		cmdb->bindShaderProgram(m_dirLightGrProg);
+
+		DeferredDirectionalLightUniforms* unis = allocateAndBindUniforms<DeferredDirectionalLightUniforms*>(
+			sizeof(DeferredDirectionalLightUniforms), cmdb, 0, 1);
+
+		unis->m_inputTexUvScale = inputTexUvScaleAndOffset.xy();
+		unis->m_inputTexUvOffset = inputTexUvScaleAndOffset.zw();
+		unis->m_invViewProjMat = invViewProjMat;
+		unis->m_camPos = cameraPosWSpace.xyz();
+		unis->m_fbSize = Vec2(viewport.z(), viewport.w());
+
+		unis->m_diffuseColor = directionalLight->m_diffuseColor;
+		unis->m_lightDir = directionalLight->m_direction;
+		unis->m_lightMatrix = directionalLight->m_textureMatrices[0];
+
+		unis->m_near = cameraNear;
+		unis->m_far = cameraFar;
+
+		if(directionalLight->m_shadowCascadeCount > 0)
+		{
+			unis->m_effectiveShadowDistance = directionalLight->m_shadowRenderQueues[0]->m_effectiveShadowDistance;
+		}
+		else
+		{
+			unis->m_effectiveShadowDistance = 0.0f;
+		}
+
+		drawQuad(cmdb);
+	}
+
+	// Set other light state
+	cmdb->setCullMode(FaceSelectionBit::FRONT);
+
 	// Do point lights
 	U32 indexCount;
 	bindVertexIndexBuffers(m_plightMesh, cmdb, indexCount);
@@ -114,12 +158,14 @@ void TraditionalDeferredLightShading::drawLights(const Mat4& vpMat,
 		DeferredPointLightUniforms* light =
 			allocateAndBindUniforms<DeferredPointLightUniforms*>(sizeof(DeferredPointLightUniforms), cmdb, 0, 1);
 
-		light->m_inputTexUvScaleAndOffset = inputTexUvScaleAndOffset;
+		light->m_inputTexUvScale = inputTexUvScaleAndOffset.xy();
+		light->m_inputTexUvOffset = inputTexUvScaleAndOffset.zw();
 		light->m_invViewProjMat = invViewProjMat;
-		light->m_camPosPad1 = cameraPosWSpace.xyz0();
-		light->m_fbSizePad2 = Vec4(viewport.z(), viewport.w(), 0.0f, 0.0f);
-		light->m_posRadius = Vec4(plightEl.m_worldPosition.xyz(), 1.0f / (plightEl.m_radius * plightEl.m_radius));
-		light->m_diffuseColorPad1 = plightEl.m_diffuseColor.xyz0();
+		light->m_camPos = cameraPosWSpace.xyz();
+		light->m_fbSize = Vec2(viewport.z(), viewport.w());
+		light->m_position = plightEl.m_worldPosition;
+		light->m_oneOverSquareRadius = 1.0f / (plightEl.m_radius * plightEl.m_radius);
+		light->m_diffuseColor = plightEl.m_diffuseColor;
 
 		// Draw
 		cmdb->drawElements(PrimitiveTopology::TRIANGLES, indexCount);
@@ -153,18 +199,20 @@ void TraditionalDeferredLightShading::drawLights(const Mat4& vpMat,
 		DeferredSpotLightUniforms* light =
 			allocateAndBindUniforms<DeferredSpotLightUniforms*>(sizeof(DeferredSpotLightUniforms), cmdb, 0, 1);
 
-		light->m_inputTexUvScaleAndOffset = inputTexUvScaleAndOffset;
+		light->m_inputTexUvScale = inputTexUvScaleAndOffset.xy();
+		light->m_inputTexUvOffset = inputTexUvScaleAndOffset.zw();
 		light->m_invViewProjMat = invViewProjMat;
-		light->m_camPosPad1 = cameraPosWSpace.xyz0();
-		light->m_fbSizePad2 = Vec4(viewport.z(), viewport.w(), 0.0f, 0.0f);
+		light->m_camPos = cameraPosWSpace.xyz();
+		light->m_fbSize = Vec2(viewport.z(), viewport.w());
 
-		light->m_posRadius = Vec4(splightEl.m_worldTransform.getTranslationPart().xyz(),
-			1.0f / (splightEl.m_distance * splightEl.m_distance));
+		light->m_position = splightEl.m_worldTransform.getTranslationPart().xyz();
+		light->m_oneOverSquareRadius = 1.0f / (splightEl.m_distance * splightEl.m_distance);
 
-		light->m_diffuseColorOuterCos = Vec4(splightEl.m_diffuseColor, cos(splightEl.m_outerAngle / 2.0f));
+		light->m_diffuseColor = splightEl.m_diffuseColor;
+		light->m_outerCos = cos(splightEl.m_outerAngle / 2.0f);
 
-		Vec3 lightDir = -splightEl.m_worldTransform.getZAxis().xyz();
-		light->m_lightDirInnerCos = Vec4(lightDir, cos(splightEl.m_innerAngle / 2.0f));
+		light->m_lightDir = -splightEl.m_worldTransform.getZAxis().xyz();
+		light->m_innerCos = cos(splightEl.m_innerAngle / 2.0f);
 
 		// Draw
 		cmdb->drawElements(PrimitiveTopology::TRIANGLES, indexCount);

+ 4 - 0
src/anki/renderer/TraditionalDeferredShading.h

@@ -31,6 +31,9 @@ public:
 		const UVec4& viewport,
 		const Vec2& gbufferTexCoordsMin,
 		const Vec2& gbufferTexCoordsMax,
+		F32 cameraNear,
+		F32 cameraFar,
+		DirectionalLightQueueElement* directionalLight,
 		ConstWeakArray<PointLightQueueElement> plights,
 		ConstWeakArray<SpotLightQueueElement> slights,
 		CommandBufferPtr& cmdb);
@@ -39,6 +42,7 @@ private:
 	ShaderProgramResourcePtr m_lightProg;
 	ShaderProgramPtr m_plightGrProg;
 	ShaderProgramPtr m_slightGrProg;
+	ShaderProgramPtr m_dirLightGrProg;
 
 	/// @name Meshes of light volumes.
 	/// @{

+ 2 - 2
src/anki/resource/ShaderProgramResource.h

@@ -65,7 +65,7 @@ public:
 
 	ShaderProgramResourceMutation()
 	{
-		memset(this, 0, sizeof(*this));
+		zeroMemory(*this);
 	}
 };
 
@@ -228,7 +228,7 @@ public:
 
 	ShaderProgramResourceConstantValue()
 	{
-		memset(this, 0, sizeof(*this));
+		zeroMemory(*this);
 	}
 };
 

+ 4 - 2
src/anki/scene/CameraNode.cpp

@@ -79,13 +79,15 @@ Error CameraNode::init(Frustum* frustum)
 		| FrustumComponentVisibilityTestFlag::LENS_FLARE_COMPONENTS
 		| FrustumComponentVisibilityTestFlag::REFLECTION_PROBES | FrustumComponentVisibilityTestFlag::REFLECTION_PROXIES
 		| FrustumComponentVisibilityTestFlag::OCCLUDERS | FrustumComponentVisibilityTestFlag::DECALS
-		| FrustumComponentVisibilityTestFlag::FOG_DENSITY_COMPONENTS | FrustumComponentVisibilityTestFlag::EARLY_Z);
+		| FrustumComponentVisibilityTestFlag::FOG_DENSITY_COMPONENTS | FrustumComponentVisibilityTestFlag::EARLY_Z
+		| FrustumComponentVisibilityTestFlag::ALL_SHADOWS_ENABLED);
 
 	// Feedback component #2
 	newComponent<FrustumFeedbackComponent>();
 
 	// Spatial component
-	newComponent<SpatialComponent>(this, frustum);
+	SpatialComponent* spatialc = newComponent<SpatialComponent>(this, frustum);
+	spatialc->setUpdateOctreeBounds(false);
 
 	return Error::NONE;
 }

+ 46 - 0
src/anki/scene/LightNode.cpp

@@ -297,4 +297,50 @@ Error SpotLightNode::frameUpdate(Second prevUpdateTime, Second crntTime)
 	return Error::NONE;
 }
 
+class DirectionalLightNode::FeedbackComponent : public SceneComponent
+{
+public:
+	FeedbackComponent()
+		: SceneComponent(SceneComponentType::NONE)
+	{
+	}
+
+	Error update(SceneNode& node, Second prevTime, Second crntTime, Bool& updated) override
+	{
+		const MoveComponent& move = node.getComponentAt<MoveComponent>(0);
+		if(move.getTimestamp() == node.getGlobalTimestamp())
+		{
+			// Move updated
+			LightComponent& lightc = node.getComponent<LightComponent>();
+			lightc.updateWorldTransform(move.getWorldTransform());
+
+			SpatialComponent& spatialc = node.getComponent<SpatialComponent>();
+			spatialc.setSpatialOrigin(move.getWorldTransform().getOrigin());
+			spatialc.markForUpdate();
+		}
+
+		return Error::NONE;
+	}
+};
+
+DirectionalLightNode::DirectionalLightNode(SceneGraph* scene, CString name)
+	: SceneNode(scene, name)
+{
+}
+
+Error DirectionalLightNode::init()
+{
+	newComponent<MoveComponent>();
+	newComponent<FeedbackComponent>();
+	newComponent<LightComponent>(LightComponentType::DIRECTIONAL, getSceneGraph().getNewUuid());
+	SpatialComponent* spatialc = newComponent<SpatialComponent>(this, &m_boundingBox);
+
+	// Make the bounding box large enough so it will always be visible. Because of that don't update the octree bounds
+	m_boundingBox.setMin(getSceneGraph().getSceneMin());
+	m_boundingBox.setMax(getSceneGraph().getSceneMax());
+	spatialc->setUpdateOctreeBounds(false);
+
+	return Error::NONE;
+}
+
 } // end namespace anki

+ 14 - 0
src/anki/scene/LightNode.h

@@ -89,6 +89,20 @@ private:
 	void onMoveUpdate(const MoveComponent& move) override;
 	void onShapeUpdate(LightComponent& light) override;
 };
+
+/// Directional light (the sun).
+class DirectionalLightNode : public SceneNode
+{
+public:
+	DirectionalLightNode(SceneGraph* scene, CString name);
+
+	ANKI_USE_RESULT Error init();
+
+private:
+	class FeedbackComponent;
+
+	Aabb m_boundingBox;
+};
 /// @}
 
 } // end namespace anki

+ 8 - 1
src/anki/scene/Octree.cpp

@@ -72,7 +72,7 @@ void Octree::init(const Vec3& sceneAabbMin, const Vec3& sceneAabbMax, U32 maxDep
 	m_sceneAabbMax = sceneAabbMax;
 }
 
-void Octree::place(const Aabb& volume, OctreePlaceable* placeable)
+void Octree::place(const Aabb& volume, OctreePlaceable* placeable, Bool updateActualSceneBounds)
 {
 	ANKI_ASSERT(placeable);
 	ANKI_ASSERT(testCollisionShapes(volume, Aabb(m_sceneAabbMin, m_sceneAabbMax)) && "volume is outside the scene");
@@ -93,6 +93,13 @@ void Octree::place(const Aabb& volume, OctreePlaceable* placeable)
 	// And re-place it
 	placeRecursive(volume, placeable, m_rootLeaf, 0);
 	++m_placeableCount;
+
+	// Update the actual scene bounds
+	if(updateActualSceneBounds)
+	{
+		m_actualSceneAabbMin = m_actualSceneAabbMin.min(volume.getMin().xyz());
+		m_actualSceneAabbMax = m_actualSceneAabbMax.max(volume.getMax().xyz());
+	}
 }
 
 void Octree::remove(OctreePlaceable& placeable)

+ 16 - 2
src/anki/scene/Octree.h

@@ -52,7 +52,7 @@ public:
 
 	/// Place or re-place an element in the tree.
 	/// @note It's thread-safe against place and remove methods.
-	void place(const Aabb& volume, OctreePlaceable* placeable);
+	void place(const Aabb& volume, OctreePlaceable* placeable, Bool updateActualSceneBounds);
 
 	/// Remove an element from the tree.
 	/// @note It's thread-safe against place and remove methods.
@@ -106,6 +106,16 @@ public:
 		debugDrawRecursive(*m_rootLeaf, drawer);
 	}
 
+	/// Get the bounds of the scene as calculated by the objects that were placed inside the Octree.
+	void getActualSceneBounds(Vec3& min, Vec3& max) const
+	{
+		LockGuard<Mutex> lock(m_globalMtx);
+		ANKI_ASSERT(m_actualSceneAabbMin.x() < MAX_F32);
+		ANKI_ASSERT(m_actualSceneAabbMax.x() > MIN_F32);
+		min = m_actualSceneAabbMin;
+		max = m_actualSceneAabbMax;
+	}
+
 private:
 	class GatherParallelCtx;
 	class GatherParallelTaskCtx;
@@ -192,7 +202,7 @@ private:
 	U32 m_maxDepth = 0;
 	Vec3 m_sceneAabbMin = Vec3(0.0f);
 	Vec3 m_sceneAabbMax = Vec3(0.0f);
-	Mutex m_globalMtx;
+	mutable Mutex m_globalMtx;
 
 	ObjectAllocatorSameType<Leaf, 256> m_leafAlloc;
 	ObjectAllocatorSameType<LeafNode, 128> m_leafNodeAlloc;
@@ -201,6 +211,10 @@ private:
 	Leaf* m_rootLeaf = nullptr;
 	U32 m_placeableCount = 0;
 
+	/// Compute the min of the scene bounds based on what is placed inside the octree.
+	Vec3 m_actualSceneAabbMin = Vec3(MAX_F32);
+	Vec3 m_actualSceneAabbMax = Vec3(MIN_F32);
+
 	Leaf* newLeaf()
 	{
 		return m_leafAlloc.newInstance(m_alloc);

+ 2 - 1
src/anki/scene/PhysicsDebugNode.cpp

@@ -72,11 +72,12 @@ Error PhysicsDebugNode::init()
 	ANKI_CHECK(rcomp->init());
 
 	ObbSpatialComponent* scomp = newComponent<ObbSpatialComponent>(this);
-	Vec3 center = (getSceneGraph().getSceneMax() + getSceneGraph().getSceneMin()) / 2.0f;
+	const Vec3 center = (getSceneGraph().getSceneMax() + getSceneGraph().getSceneMin()) / 2.0f;
 	scomp->m_obb.setCenter(center.xyz0());
 	scomp->m_obb.setExtend((getSceneGraph().getSceneMax() - center).xyz0());
 	scomp->m_obb.setRotation(Mat3x4::getIdentity());
 	scomp->setSpatialOrigin(Vec4(0.0f));
+	scomp->setUpdateOctreeBounds(false); // Don't mess with the bounds
 
 	return Error::NONE;
 }

+ 6 - 4
src/anki/scene/ReflectionProbeNode.cpp

@@ -16,7 +16,8 @@ namespace anki
 {
 
 const FrustumComponentVisibilityTestFlag FRUSTUM_TEST_FLAGS =
-	FrustumComponentVisibilityTestFlag::RENDER_COMPONENTS | FrustumComponentVisibilityTestFlag::LIGHT_COMPONENTS;
+	FrustumComponentVisibilityTestFlag::RENDER_COMPONENTS | FrustumComponentVisibilityTestFlag::LIGHT_COMPONENTS
+	| FrustumComponentVisibilityTestFlag::DIRECTIONAL_LIGHT_SHADOWS_1_CASCADE;
 
 /// Feedback component
 class ReflectionProbeNode::MoveFeedbackComponent : public SceneComponent
@@ -53,7 +54,7 @@ Error ReflectionProbeNode::init(const Vec4& aabbMinLSpace, const Vec4& aabbMaxLS
 	F32 effectiveDistance = aabbMaxLSpace.x() - aabbMinLSpace.x();
 	effectiveDistance = max(effectiveDistance, aabbMaxLSpace.y() - aabbMinLSpace.y());
 	effectiveDistance = max(effectiveDistance, aabbMaxLSpace.z() - aabbMinLSpace.z());
-	effectiveDistance = max(effectiveDistance, EFFECTIVE_DISTANCE);
+	effectiveDistance = max(effectiveDistance, getSceneGraph().getLimits().m_reflectionProbeEffectiveDistance);
 
 	// Move component first
 	newComponent<MoveComponent>();
@@ -89,8 +90,8 @@ Error ReflectionProbeNode::init(const Vec4& aabbMinLSpace, const Vec4& aabbMaxLS
 		m_cubeSides[i].m_frustum.resetTransform(m_cubeSides[i].m_localTrf);
 
 		FrustumComponent* frc = newComponent<FrustumComponent>(this, &m_cubeSides[i].m_frustum);
-
 		frc->setEnabledVisibilityTests(FrustumComponentVisibilityTestFlag::NONE);
+		frc->setEffectiveShadowDistance(getSceneGraph().getLimits().m_reflectionProbeShadowEffectiveDistance);
 	}
 
 	// Spatial component
@@ -98,7 +99,8 @@ Error ReflectionProbeNode::init(const Vec4& aabbMinLSpace, const Vec4& aabbMaxLS
 	m_aabbMaxLSpace = aabbMaxLSpace.xyz();
 	m_spatialAabb.setMin(aabbMinLSpace);
 	m_spatialAabb.setMax(aabbMaxLSpace);
-	newComponent<SpatialComponent>(this, &m_spatialAabb);
+	SpatialComponent* spatialc = newComponent<SpatialComponent>(this, &m_spatialAabb);
+	spatialc->setUpdateOctreeBounds(false);
 
 	// Reflection probe comp
 	ReflectionProbeComponent* reflc = newComponent<ReflectionProbeComponent>(getSceneGraph().getNewUuid());

+ 0 - 2
src/anki/scene/ReflectionProbeNode.h

@@ -20,8 +20,6 @@ namespace anki
 class ReflectionProbeNode : public SceneNode
 {
 public:
-	const F32 EFFECTIVE_DISTANCE = 256.0f;
-
 	ReflectionProbeNode(SceneGraph* scene, CString name)
 		: SceneNode(scene, name)
 	{

+ 5 - 4
src/anki/scene/SceneGraph.cpp

@@ -64,7 +64,6 @@ Error SceneGraph::init(AllocAlignedCallback allocCb,
 	m_globalTimestamp = globalTimestamp;
 	m_threadHive = threadHive;
 	m_resources = resources;
-	m_objectsMarkedForDeletionCount.store(0);
 	m_gr = &m_resources->getGrManager();
 	m_physics = &m_resources->getPhysicsWorld();
 	m_input = input;
@@ -73,12 +72,14 @@ Error SceneGraph::init(AllocAlignedCallback allocCb,
 	m_alloc = SceneAllocator<U8>(allocCb, allocCbData);
 	m_frameAlloc = SceneFrameAllocator<U8>(allocCb, allocCbData, 1 * 1024 * 1024);
 
-	m_earlyZDist = config.getNumber("scene.earlyZDistance");
+	// Limits
+	m_limits.m_earlyZDistance = config.getNumber("scene.earlyZDistance");
+	m_limits.m_reflectionProbeEffectiveDistance = config.getNumber("scene.reflectionProbeEffectiveDistance");
+	m_limits.m_reflectionProbeShadowEffectiveDistance =
+		config.getNumber("scene.reflectionProbeShadowEffectiveDistance");
 
 	ANKI_CHECK(m_events.init(this));
 
-	m_maxReflectionProxyDistance = config.getNumber("scene.imageReflectionMaxDistance");
-
 	m_octree = m_alloc.newInstance<Octree>(m_alloc);
 	m_octree->init(m_sceneMin, m_sceneMax, 5); // TODO
 

+ 18 - 19
src/anki/scene/SceneGraph.h

@@ -39,6 +39,15 @@ public:
 	Second m_physicsUpdate ANKI_DBG_NULLIFY;
 };
 
+/// SceneGraph limits.
+class SceneGraphLimits
+{
+public:
+	F32 m_earlyZDistance = -1.0f; ///< Objects with distance lower than that will be used in early Z.
+	F32 m_reflectionProbeEffectiveDistance = -1.0f; ///< How far reflection probes can look.
+	F32 m_reflectionProbeShadowEffectiveDistance = -1.0f; ///< How far to render shadows for reflection probes.
+};
+
 /// The scene graph that  all the scene entities
 class SceneGraph
 {
@@ -161,6 +170,11 @@ public:
 		return m_stats;
 	}
 
+	const SceneGraphLimits& getLimits() const
+	{
+		return m_limits;
+	}
+
 	const Vec3& getSceneMin() const
 	{
 		return m_sceneMin;
@@ -171,7 +185,6 @@ public:
 		return m_sceneMax;
 	}
 
-anki_internal:
 	ResourceManager& getResourceManager()
 	{
 		return *m_resources;
@@ -204,20 +217,9 @@ anki_internal:
 		return *m_input;
 	}
 
-	F32 getMaxReflectionProxyDistance() const
-	{
-		ANKI_ASSERT(m_maxReflectionProxyDistance > 0.0);
-		return m_maxReflectionProxyDistance;
-	}
-
 	U64 getNewUuid()
 	{
-		return m_nodesUuid++;
-	}
-
-	F32 getEarlyZDistance() const
-	{
-		return m_earlyZDist;
+		return m_nodesUuid.fetchAdd(1);
 	}
 
 	Octree& getOctree()
@@ -258,14 +260,11 @@ private:
 	Vec3 m_sceneMin = {-1000.0f, -200.0f, -1000.0f};
 	Vec3 m_sceneMax = {1000.0f, 200.0f, 1000.0f};
 
-	Atomic<U32> m_objectsMarkedForDeletionCount;
-
-	F32 m_maxReflectionProxyDistance = 0.0;
-
-	U64 m_nodesUuid = 0;
+	Atomic<U32> m_objectsMarkedForDeletionCount = {0};
 
-	F32 m_earlyZDist = -1.0;
+	Atomic<U64> m_nodesUuid = {1};
 
+	SceneGraphLimits m_limits;
 	SceneGraphStats m_stats;
 
 	/// Put a node in the appropriate containers

+ 118 - 37
src/anki/scene/Visibility.cpp

@@ -38,6 +38,7 @@ void VisibilityContext::submitNewWork(const FrustumComponent& frc, RenderQueue&
 	rqueue.m_previousViewProjectionMatrix = frc.getPreviousViewProjectionMatrix();
 	rqueue.m_cameraNear = frc.getFrustum().getNear();
 	rqueue.m_cameraFar = frc.getFrustum().getFar();
+	rqueue.m_effectiveShadowDistance = frc.getEffectiveShadowDistance();
 
 	auto alloc = m_scene->getFrameAllocator();
 
@@ -242,45 +243,24 @@ void VisibilityTestTask::test(ThreadHive& hive, U32 taskId)
 		Bool wantNode = false;
 
 		const RenderComponent* rc = nullptr;
-		if(wantsRenderComponents && (rc = node.tryGetComponent<RenderComponent>()))
-		{
-			wantNode = true;
-		}
+		wantNode |= wantsRenderComponents && (rc = node.tryGetComponent<RenderComponent>());
 
-		if(wantsShadowCasters && (rc = node.tryGetComponent<RenderComponent>()) && rc->getCastsShadow())
-		{
-			wantNode = true;
-		}
+		wantNode |= wantsShadowCasters && (rc = node.tryGetComponent<RenderComponent>()) && rc->getCastsShadow();
 
 		const LightComponent* lc = nullptr;
-		if(wantsLightComponents && (lc = node.tryGetComponent<LightComponent>()))
-		{
-			wantNode = true;
-		}
+		wantNode |= wantsLightComponents && (lc = node.tryGetComponent<LightComponent>());
 
 		const LensFlareComponent* lfc = nullptr;
-		if(wantsFlareComponents && (lfc = node.tryGetComponent<LensFlareComponent>()))
-		{
-			wantNode = true;
-		}
+		wantNode |= wantsFlareComponents && (lfc = node.tryGetComponent<LensFlareComponent>());
 
 		const ReflectionProbeComponent* reflc = nullptr;
-		if(wantsReflectionProbes && (reflc = node.tryGetComponent<ReflectionProbeComponent>()))
-		{
-			wantNode = true;
-		}
+		wantNode |= wantsReflectionProbes && (reflc = node.tryGetComponent<ReflectionProbeComponent>());
 
 		DecalComponent* decalc = nullptr;
-		if(wantsDecals && (decalc = node.tryGetComponent<DecalComponent>()))
-		{
-			wantNode = true;
-		}
+		wantNode |= wantsDecals && (decalc = node.tryGetComponent<DecalComponent>());
 
 		const FogDensityComponent* fogc = nullptr;
-		if(wantsFogDensityComponents && (fogc = node.tryGetComponent<FogDensityComponent>()))
-		{
-			wantNode = true;
-		}
+		wantNode |= wantsFogDensityComponents && (fogc = node.tryGetComponent<FogDensityComponent>());
 
 		if(ANKI_UNLIKELY(!wantNode))
 		{
@@ -321,7 +301,7 @@ void VisibilityTestTask::test(ThreadHive& hive, U32 taskId)
 		ANKI_ASSERT(count == 1 && "TODO: Support sub-spatials");
 
 		// Sort sub-spatials
-		Vec4 origin = testedFrc.getFrustumOrigin();
+		const Vec4 origin = testedFrc.getFrustumOrigin();
 		std::sort(sps.begin(), sps.begin() + count, [origin](const SpatialTemp& a, const SpatialTemp& b) -> Bool {
 			const Vec4& spa = a.m_origin;
 			const Vec4& spb = b.m_origin;
@@ -333,6 +313,7 @@ void VisibilityTestTask::test(ThreadHive& hive, U32 taskId)
 		});
 
 		WeakArray<RenderQueue> nextQueues;
+		WeakArray<FrustumComponent> nextQueueFrustumComponents; // Optional
 
 		if(rc)
 		{
@@ -361,6 +342,19 @@ void VisibilityTestTask::test(ThreadHive& hive, U32 taskId)
 
 		if(lc)
 		{
+			// Check if it casts shadow
+			Bool castsShadow = lc->getShadowEnabled();
+			if(castsShadow)
+			{
+				// Extra check
+
+				// Compute distance from the frustum
+				const Plane& nearPlane = testedFrc.getFrustum().getPlanesWorldSpace()[FrustumPlaneType::NEAR];
+				const F32 distFromFrustum = max(0.0f, sps[0].m_sp->getAabb().testPlane(nearPlane));
+
+				castsShadow = distFromFrustum < testedFrc.getEffectiveShadowDistance();
+			}
+
 			switch(lc->getLightComponentType())
 			{
 			case LightComponentType::POINT:
@@ -368,7 +362,9 @@ void VisibilityTestTask::test(ThreadHive& hive, U32 taskId)
 				PointLightQueueElement* el = result.m_pointLights.newElement(alloc);
 				lc->setupPointLightQueueElement(*el);
 
-				if(lc->getShadowEnabled())
+				if(castsShadow
+					&& testedFrc.visibilityTestsEnabled(
+						   FrustumComponentVisibilityTestFlag::POINT_LIGHT_SHADOWS_ENABLED))
 				{
 					RenderQueue* a = alloc.newArray<RenderQueue>(6);
 					nextQueues = WeakArray<RenderQueue>(a, 6);
@@ -395,7 +391,8 @@ void VisibilityTestTask::test(ThreadHive& hive, U32 taskId)
 				SpotLightQueueElement* el = result.m_spotLights.newElement(alloc);
 				lc->setupSpotLightQueueElement(*el);
 
-				if(lc->getShadowEnabled())
+				if(castsShadow
+					&& testedFrc.visibilityTestsEnabled(FrustumComponentVisibilityTestFlag::SPOT_LIGHT_SHADOWS_ENABLED))
 				{
 					RenderQueue* a = alloc.newInstance<RenderQueue>();
 					nextQueues = WeakArray<RenderQueue>(a, 1);
@@ -411,6 +408,71 @@ void VisibilityTestTask::test(ThreadHive& hive, U32 taskId)
 
 				break;
 			}
+			case LightComponentType::DIRECTIONAL:
+			{
+				ANKI_ASSERT(lc->getShadowEnabled() == true && "Only with shadow for now");
+
+				U cascadeCount;
+				if(ANKI_UNLIKELY(!castsShadow))
+				{
+					cascadeCount = 0;
+				}
+				else if(testedFrc.visibilityTestsEnabled(
+							FrustumComponentVisibilityTestFlag::DIRECTIONAL_LIGHT_SHADOWS_1_CASCADE))
+				{
+					cascadeCount = 1;
+				}
+				else
+				{
+					ANKI_ASSERT(testedFrc.visibilityTestsEnabled(
+						FrustumComponentVisibilityTestFlag::DIRECTIONAL_LIGHT_SHADOWS_ALL_CASCADES));
+					cascadeCount = MAX_SHADOW_CASCADES;
+				}
+				ANKI_ASSERT(cascadeCount <= MAX_SHADOW_CASCADES);
+
+				WeakArray<OrthographicFrustum> cascadeFrustums(
+					(cascadeCount) ? alloc.newArray<OrthographicFrustum>(cascadeCount) : nullptr, cascadeCount);
+
+				lc->setupDirectionalLightQueueElement(testedFrc.getFrustum(),
+					testedFrc.getEffectiveShadowDistance(),
+					result.m_directionalLight,
+					cascadeFrustums);
+
+				nextQueues = WeakArray<RenderQueue>(
+					(cascadeCount) ? alloc.newArray<RenderQueue>(cascadeCount) : nullptr, cascadeCount);
+				for(U i = 0; i < cascadeCount; ++i)
+				{
+					result.m_directionalLight.m_shadowRenderQueues[i] = &nextQueues[i];
+				}
+
+				// Despite the fact that it's the same light it will have different properties if viewed by different
+				// cameras. If the renderer finds the same UUID it will think it's cached and use wrong shadow tiles.
+				// That's why we need to change its UUID and bind it to the frustum that is currently viewing the light
+				result.m_directionalLight.m_uuid = testedNode.getUuid();
+
+				// Create some dummy frustum components and manually update them
+				FrustumComponent* cascadeFrustumComponents =
+					(cascadeCount) ? reinterpret_cast<FrustumComponent*>(alloc.allocate(
+										 cascadeCount * sizeof(FrustumComponent), alignof(FrustumComponent)))
+								   : nullptr;
+				for(U i = 0; i < cascadeCount; ++i)
+				{
+					::new(&cascadeFrustumComponents[i]) FrustumComponent(&node, &cascadeFrustums[i]);
+					cascadeFrustumComponents[i].setEnabledVisibilityTests(
+						FrustumComponentVisibilityTestFlag::SHADOW_CASTERS);
+					cascadeFrustumComponents[i].markShapeForUpdate();
+					cascadeFrustumComponents[i].markTransformForUpdate();
+					Bool updated;
+					Error err = cascadeFrustumComponents[i].update(node, 0.0f, 1.0f, updated);
+					ANKI_ASSERT(updated == true && !err);
+					(void)err;
+					(void)updated;
+				}
+
+				nextQueueFrustumComponents = WeakArray<FrustumComponent>(cascadeFrustumComponents, cascadeCount);
+
+				break;
+			}
 			default:
 				ANKI_ASSERT(0);
 			}
@@ -461,11 +523,22 @@ void VisibilityTestTask::test(ThreadHive& hive, U32 taskId)
 		if(nextQueues.getSize() > 0)
 		{
 			count = 0;
-			err = node.iterateComponentsOfType<FrustumComponent>([&](FrustumComponent& frc) {
-				m_frcCtx->m_visCtx->submitNewWork(frc, nextQueues[count++], hive);
-				return Error::NONE;
-			});
-			(void)err;
+
+			if(ANKI_LIKELY(nextQueueFrustumComponents.getSize() == 0))
+			{
+				err = node.iterateComponentsOfType<FrustumComponent>([&](FrustumComponent& frc) {
+					m_frcCtx->m_visCtx->submitNewWork(frc, nextQueues[count++], hive);
+					return Error::NONE;
+				});
+				(void)err;
+			}
+			else
+			{
+				for(FrustumComponent& frc : nextQueueFrustumComponents)
+				{
+					m_frcCtx->m_visCtx->submitNewWork(frc, nextQueues[count++], hive);
+				}
+			}
 		}
 
 		// Update timestamp
@@ -531,6 +604,14 @@ void CombineResultsTask::combine()
 	ANKI_VIS_COMBINE(DecalQueueElement, m_decals);
 	ANKI_VIS_COMBINE(FogDensityQueueElement, m_fogDensityVolumes);
 
+	for(U i = 0; i < threadCount; ++i)
+	{
+		if(m_frcCtx->m_queueViews[i].m_directionalLight.m_uuid != 0)
+		{
+			results.m_directionalLight = m_frcCtx->m_queueViews[i].m_directionalLight;
+		}
+	}
+
 #undef ANKI_VIS_COMBINE
 #undef ANKI_VIS_COMBINE_AND_PTR
 
@@ -668,7 +749,7 @@ void SceneGraph::doVisibilityTests(SceneNode& fsn, SceneGraph& scene, RenderQueu
 
 	VisibilityContext ctx;
 	ctx.m_scene = &scene;
-	ctx.m_earlyZDist = scene.getEarlyZDistance();
+	ctx.m_earlyZDist = scene.getLimits().m_earlyZDistance;
 	ctx.submitNewWork(fsn.getComponent<FrustumComponent>(), rqueue, hive);
 
 	hive.waitAllTasks();

+ 6 - 0
src/anki/scene/VisibilityInternal.h

@@ -110,12 +110,18 @@ public:
 	TRenderQueueElementStorage<U32> m_shadowPointLights;
 	TRenderQueueElementStorage<SpotLightQueueElement> m_spotLights;
 	TRenderQueueElementStorage<U32> m_shadowSpotLights;
+	DirectionalLightQueueElement m_directionalLight;
 	TRenderQueueElementStorage<ReflectionProbeQueueElement> m_reflectionProbes;
 	TRenderQueueElementStorage<LensFlareQueueElement> m_lensFlares;
 	TRenderQueueElementStorage<DecalQueueElement> m_decals;
 	TRenderQueueElementStorage<FogDensityQueueElement> m_fogDensityVolumes;
 
 	Timestamp m_timestamp = 0;
+
+	RenderQueueView()
+	{
+		zeroMemory(m_directionalLight);
+	}
 };
 
 static_assert(std::is_trivially_destructible<RenderQueueView>::value == true, "Should be trivially destructible");

+ 43 - 22
src/anki/scene/components/FrustumComponent.h

@@ -25,16 +25,26 @@ enum class FrustumComponentVisibilityTestFlag : U16
 	RENDER_COMPONENTS = 1 << 0,
 	LIGHT_COMPONENTS = 1 << 1,
 	LENS_FLARE_COMPONENTS = 1 << 2,
-	SHADOW_CASTERS = 1 << 3,
-	REFLECTION_PROBES = 1 << 4,
-	REFLECTION_PROXIES = 1 << 5,
-	OCCLUDERS = 1 << 6,
-	DECALS = 1 << 7,
-	FOG_DENSITY_COMPONENTS = 1 << 8,
-	EARLY_Z = 1 << 9,
-
-	ALL_TESTS = RENDER_COMPONENTS | LIGHT_COMPONENTS | LENS_FLARE_COMPONENTS | SHADOW_CASTERS | REFLECTION_PROBES
-				| REFLECTION_PROXIES | DECALS | FOG_DENSITY_COMPONENTS | EARLY_Z
+	SHADOW_CASTERS = 1 << 3, ///< Render components that cast shadow
+	POINT_LIGHT_SHADOWS_ENABLED = 1 << 4,
+	SPOT_LIGHT_SHADOWS_ENABLED = 1 << 5,
+	DIRECTIONAL_LIGHT_SHADOWS_ALL_CASCADES = 1 << 6,
+	DIRECTIONAL_LIGHT_SHADOWS_1_CASCADE = 1 << 7,
+	REFLECTION_PROBES = 1 << 8,
+	REFLECTION_PROXIES = 1 << 9,
+	OCCLUDERS = 1 << 10,
+	DECALS = 1 << 11,
+	FOG_DENSITY_COMPONENTS = 1 << 12,
+	EARLY_Z = 1 << 13,
+
+	LAST = EARLY_Z,
+
+	ALL = RENDER_COMPONENTS | LIGHT_COMPONENTS | LENS_FLARE_COMPONENTS | SHADOW_CASTERS | POINT_LIGHT_SHADOWS_ENABLED
+		  | SPOT_LIGHT_SHADOWS_ENABLED | DIRECTIONAL_LIGHT_SHADOWS_ALL_CASCADES | DIRECTIONAL_LIGHT_SHADOWS_1_CASCADE
+		  | REFLECTION_PROBES | REFLECTION_PROXIES | OCCLUDERS | DECALS | FOG_DENSITY_COMPONENTS | EARLY_Z,
+
+	ALL_SHADOWS_ENABLED =
+		POINT_LIGHT_SHADOWS_ENABLED | SPOT_LIGHT_SHADOWS_ENABLED | DIRECTIONAL_LIGHT_SHADOWS_ALL_CASCADES
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(FrustumComponentVisibilityTestFlag, inline)
 
@@ -44,12 +54,6 @@ class FrustumComponent : public SceneComponent
 public:
 	static const SceneComponentType CLASS_TYPE = SceneComponentType::FRUSTUM;
 
-	struct VisibilityStats
-	{
-		U32 m_renderablesCount = 0;
-		U32 m_lightsCount = 0;
-	};
-
 	/// Pass the frustum here so we can avoid the virtuals
 	FrustumComponent(SceneNode* node, Frustum* frustum);
 
@@ -132,7 +136,7 @@ public:
 
 	void setEnabledVisibilityTests(FrustumComponentVisibilityTestFlag bits)
 	{
-		m_flags.unset(FrustumComponentVisibilityTestFlag::ALL_TESTS);
+		m_flags.unset(FrustumComponentVisibilityTestFlag::ALL);
 		m_flags.set(bits, true);
 
 #if ANKI_ASSERTS_ENABLED
@@ -145,6 +149,8 @@ public:
 				ANKI_ASSERT(0 && "Cannot have them both");
 			}
 		}
+
+		// TODO
 #endif
 	}
 
@@ -155,7 +161,7 @@ public:
 
 	Bool anyVisibilityTestEnabled() const
 	{
-		return m_flags.getAny(FrustumComponentVisibilityTestFlag::ALL_TESTS);
+		return m_flags.getAny(FrustumComponentVisibilityTestFlag::ALL);
 	}
 
 	/// The type is FillCoverageBufferCallback.
@@ -181,11 +187,23 @@ public:
 		}
 	}
 
+	/// How far to render shadows for this frustum.
+	F32 getEffectiveShadowDistance() const
+	{
+		return (m_effectiveShadowDist < 0.0f) ? m_frustum->getFar() : m_effectiveShadowDist;
+	}
+
+	/// Set how far to render shadows for this frustum or set to negative if you want to use the m_frustun's far.
+	void setEffectiveShadowDistance(F32 dist)
+	{
+		m_effectiveShadowDist = dist;
+	}
+
 private:
-	enum Flags
+	enum Flags : U16
 	{
-		SHAPE_MARKED_FOR_UPDATE = 1 << 10,
-		TRANSFORM_MARKED_FOR_UPDATE = 1 << 12,
+		SHAPE_MARKED_FOR_UPDATE = static_cast<U16>(FrustumComponentVisibilityTestFlag::LAST) << 1,
+		TRANSFORM_MARKED_FOR_UPDATE = static_cast<U16>(FrustumComponentVisibilityTestFlag::LAST) << 2,
 	};
 	ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(Flags, friend)
 
@@ -196,7 +214,8 @@ private:
 	Mat4 m_viewProjMat = Mat4::getIdentity(); ///< View projection matrix
 	Mat4 m_prevViewProjMat = Mat4::getIdentity();
 
-	BitMask<U16> m_flags;
+	/// How far to render shadows for this frustum. If negative it's the m_frustum's far.
+	F32 m_effectiveShadowDist = -1.0f;
 
 	class
 	{
@@ -205,6 +224,8 @@ private:
 		U32 m_depthMapWidth = 0;
 		U32 m_depthMapHeight = 0;
 	} m_coverageBuff; ///< Coverage buffer for extra visibility tests.
+
+	BitMask<U16> m_flags;
 };
 /// @}
 

+ 158 - 5
src/anki/scene/components/LightComponent.cpp

@@ -4,6 +4,12 @@
 // http://www.anki3d.org/LICENSE
 
 #include <anki/scene/components/LightComponent.h>
+#include <anki/scene/SceneNode.h>
+#include <anki/scene/SceneGraph.h>
+#include <anki/scene/Octree.h>
+#include <anki/collision/Frustum.h>
+#include <anki/collision/Sphere.h>
+#include <anki/collision/Plane.h>
 #include <shaders/glsl_cpp_common/ClusteredShading.h>
 
 namespace anki
@@ -15,9 +21,25 @@ LightComponent::LightComponent(LightComponentType type, U64 uuid)
 	, m_type(type)
 {
 	ANKI_ASSERT(m_uuid > 0);
-	setInnerAngle(toRad(45.0));
-	setOuterAngle(toRad(30.0));
-	m_radius = 1.0;
+
+	switch(type)
+	{
+	case LightComponentType::POINT:
+		m_point.m_radius = 1.0f;
+		break;
+	case LightComponentType::SPOT:
+		setInnerAngle(toRad(45.0));
+		setOuterAngle(toRad(30.0));
+		m_spot.m_distance = 1.0f;
+		m_spot.m_textureMat = Mat4::getIdentity();
+		break;
+	case LightComponentType::DIRECTIONAL:
+		m_dir.m_sceneMax = Vec3(MIN_F32);
+		m_dir.m_sceneMin = Vec3(MAX_F32);
+		break;
+	default:
+		ANKI_ASSERT(0);
+	}
 }
 
 Error LightComponent::update(SceneNode& node, Second prevTime, Second crntTime, Bool& updated)
@@ -37,14 +59,145 @@ Error LightComponent::update(SceneNode& node, Second prevTime, Second crntTime,
 		{
 			static const Mat4 biasMat4(0.5, 0.0, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0);
 			Mat4 proj = Mat4::calculatePerspectiveProjectionMatrix(
-				m_outerAngle, m_outerAngle, LIGHT_FRUSTUM_NEAR_PLANE, m_distance);
-			m_spotTextureMatrix = biasMat4 * proj * Mat4(m_trf.getInverse());
+				m_spot.m_outerAngle, m_spot.m_outerAngle, LIGHT_FRUSTUM_NEAR_PLANE, m_spot.m_distance);
+			m_spot.m_textureMat = biasMat4 * proj * Mat4(m_trf.getInverse());
 		}
 	}
 
 	m_flags.unset(DIRTY | TRF_DIRTY);
 
+	// Update the scene bounds always
+	if(m_type == LightComponentType::DIRECTIONAL)
+	{
+		node.getSceneGraph().getOctree().getActualSceneBounds(m_dir.m_sceneMin, m_dir.m_sceneMax);
+	}
+
 	return Error::NONE;
 }
 
+void LightComponent::setupDirectionalLightQueueElement(const Frustum& frustum,
+	F32 overrideFrustumFar,
+	DirectionalLightQueueElement& el,
+	WeakArray<OrthographicFrustum> cascadeFrustums) const
+{
+	ANKI_ASSERT(m_type == LightComponentType::DIRECTIONAL);
+	ANKI_ASSERT(cascadeFrustums.getSize() <= MAX_SHADOW_CASCADES);
+
+	const U shadowCascadeCount = cascadeFrustums.getSize();
+
+	el.m_userData = this;
+	el.m_drawCallback = derectionalLightDebugDrawCallback;
+	el.m_uuid = m_uuid;
+	el.m_diffuseColor = m_diffColor.xyz();
+	el.m_direction = -m_trf.getRotation().getZAxis().xyz();
+	el.m_shadowCascadeCount = shadowCascadeCount;
+
+	// Compute the texture matrices
+	if(shadowCascadeCount == 0)
+	{
+		return;
+	}
+
+	const Mat4 lightTrf(m_trf);
+	if(frustum.getType() == FrustumType::PERSPECTIVE)
+	{
+		// Get some stuff
+		const PerspectiveFrustum& pfrustum = static_cast<const PerspectiveFrustum&>(frustum);
+		const F32 fovX = pfrustum.getFovX();
+		const F32 fovY = pfrustum.getFovY();
+		const F32 far = (overrideFrustumFar > 0.0f) ? overrideFrustumFar : pfrustum.getFar();
+
+		// Compute a sphere per cascade
+		Array<Sphere, MAX_SHADOW_CASCADES> boundingSpheres;
+		for(U i = 0; i < shadowCascadeCount; ++i)
+		{
+			const F32 cascadeFarNearDist = far / F32(shadowCascadeCount);
+
+			// Compute the center of the sphere
+			//           ^ z
+			//           |
+			// ----------|---------- A(a, -f)
+			//  \        |        /
+			//   \       |       /
+			//    \    C(0,z)   /
+			//     \     |     /
+			//      \    |    /
+			//       \---|---/ B(b, -n)
+			//        \  |  /
+			//         \ | /
+			//           v
+			// --------------------------> x
+			//           |
+			// The square distance of A-C is equal to B-C. Solve the equation to find the z.
+			const F32 f = F32(i + 1) * cascadeFarNearDist; // Cascade far
+			const F32 n = max(frustum.getNear(), F32(i) * cascadeFarNearDist); // Cascade near
+			const F32 a = f * tan(fovY / 2.0f) * fovX / fovY;
+			const F32 b = n * tan(fovY / 2.0f) * fovX / fovY;
+			const F32 z = (b * b + n * n - a * a - f * f) / (2.0f * (f - n));
+			ANKI_ASSERT(absolute((Vec2(a, -f) - Vec2(0, z)).getLength() - (Vec2(b, -n) - Vec2(0, z)).getLength())
+						<= EPSILON * 100.0f);
+
+			Vec3 C(0.0f, 0.0f, z); // Sphere center
+
+			// Compute the radius of the sphere
+			const Vec3 A(a, tan(fovY / 2.0f) * f, -f);
+			const F32 r = (A - C).getLength();
+
+			// Set the sphere
+			boundingSpheres[i].setRadius(r);
+			boundingSpheres[i].setCenter(frustum.getTransform().transform(C));
+		}
+
+		// Compute the matrices
+		for(U i = 0; i < shadowCascadeCount; ++i)
+		{
+			const Sphere& sphere = boundingSpheres[i];
+			const Vec3 sphereCenter = sphere.getCenter().xyz();
+			const F32 sphereRadius = sphere.getRadius();
+			const Vec3& lightDir = el.m_direction;
+			const Vec3 sceneMin = m_dir.m_sceneMin - Vec3(sphereRadius); // Push the bounds a bit
+			const Vec3 sceneMax = m_dir.m_sceneMax + Vec3(sphereRadius);
+
+			// Compute the intersections with the scene bounds
+			Vec3 eye;
+			if(sphereCenter > sceneMin && sphereCenter < sceneMax)
+			{
+				// Inside the scene bounds
+				const Aabb sceneBox(sceneMin, sceneMax);
+				const F32 t = sceneBox.intersectRayInside(sphereCenter, -lightDir);
+				eye = sphereCenter + t * (-lightDir);
+			}
+			else
+			{
+				eye = sphereCenter + sphereRadius * (-lightDir);
+			}
+
+			// Projection
+			const F32 far = (eye - sphereCenter).getLength() + sphereRadius;
+			const Mat4 cascadeProjMat = Mat4::calculateOrthographicProjectionMatrix(
+				sphereRadius, -sphereRadius, sphereRadius, -sphereRadius, LIGHT_FRUSTUM_NEAR_PLANE, far);
+
+			// View
+			Transform cascadeTransform = m_trf;
+			cascadeTransform.setOrigin(eye.xyz0());
+			const Mat4 cascadeViewMat = Mat4(cascadeTransform.getInverse());
+
+			// Light matrix
+			static const Mat4 biasMat4(
+				0.5f, 0.0f, 0.0f, 0.5f, 0.0f, 0.5f, 0.0f, 0.5f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f);
+			el.m_textureMatrices[i] = biasMat4 * cascadeProjMat * cascadeViewMat;
+
+			// Fill the frustum
+			OrthographicFrustum& cascadeFrustum = cascadeFrustums[i];
+			cascadeFrustum.setAll(
+				-sphereRadius, sphereRadius, LIGHT_FRUSTUM_NEAR_PLANE, far, sphereRadius, -sphereRadius);
+			cascadeFrustum.transform(cascadeTransform);
+		}
+	}
+	else
+	{
+		ANKI_ASSERT(!"TODO");
+	}
+}
+
 } // end namespace anki

+ 61 - 25
src/anki/scene/components/LightComponent.h

@@ -12,6 +12,10 @@
 namespace anki
 {
 
+// Forward
+class Frustum;
+class OrthographicFrustum;
+
 /// @addtogroup scene
 /// @{
 
@@ -54,58 +58,58 @@ public:
 
 	void setRadius(F32 x)
 	{
-		m_radius = x;
+		m_point.m_radius = x;
 		m_flags.set(DIRTY);
 	}
 
 	F32 getRadius() const
 	{
-		return m_radius;
+		return m_point.m_radius;
 	}
 
 	void setDistance(F32 x)
 	{
-		m_distance = x;
+		m_spot.m_distance = x;
 		m_flags.set(DIRTY);
 	}
 
 	F32 getDistance() const
 	{
-		return m_distance;
+		return m_spot.m_distance;
 	}
 
 	void setInnerAngle(F32 ang)
 	{
-		m_innerAngleCos = cos(ang / 2.0);
-		m_innerAngle = ang;
+		m_spot.m_innerAngleCos = cos(ang / 2.0);
+		m_spot.m_innerAngle = ang;
 		m_flags.set(DIRTY);
 	}
 
 	F32 getInnerAngleCos() const
 	{
-		return m_innerAngleCos;
+		return m_spot.m_innerAngleCos;
 	}
 
 	F32 getInnerAngle() const
 	{
-		return m_innerAngle;
+		return m_spot.m_innerAngle;
 	}
 
 	void setOuterAngle(F32 ang)
 	{
-		m_outerAngleCos = cos(ang / 2.0);
-		m_outerAngle = ang;
+		m_spot.m_outerAngleCos = cos(ang / 2.0);
+		m_spot.m_outerAngle = ang;
 		m_flags.set(DIRTY);
 	}
 
 	F32 getOuterAngle() const
 	{
-		return m_outerAngle;
+		return m_spot.m_outerAngle;
 	}
 
 	F32 getOuterAngleCos() const
 	{
-		return m_outerAngleCos;
+		return m_spot.m_outerAngleCos;
 	}
 
 	Bool getShadowEnabled() const
@@ -125,7 +129,7 @@ public:
 		ANKI_ASSERT(m_type == LightComponentType::POINT);
 		el.m_uuid = m_uuid;
 		el.m_worldPosition = m_trf.getOrigin().xyz();
-		el.m_radius = m_radius;
+		el.m_radius = m_point.m_radius;
 		el.m_diffuseColor = m_diffColor.xyz();
 		el.m_userData = this;
 		el.m_drawCallback = pointLightDebugDrawCallback;
@@ -136,31 +140,57 @@ public:
 		ANKI_ASSERT(m_type == LightComponentType::SPOT);
 		el.m_uuid = m_uuid;
 		el.m_worldTransform = Mat4(m_trf);
-		el.m_textureMatrix = m_spotTextureMatrix;
-		el.m_distance = m_distance;
-		el.m_outerAngle = m_outerAngle;
-		el.m_innerAngle = m_innerAngle;
+		el.m_textureMatrix = m_spot.m_textureMat;
+		el.m_distance = m_spot.m_distance;
+		el.m_outerAngle = m_spot.m_outerAngle;
+		el.m_innerAngle = m_spot.m_innerAngle;
 		el.m_diffuseColor = m_diffColor.xyz();
 		el.m_userData = this;
 		el.m_drawCallback = spotLightDebugDrawCallback;
 	}
 
+	/// Setup a directional queue element.
+	/// @param[in] frustum The frustum that is looking that directional light. Used to calculate the cascades.
+	/// @param overrideFrustumFar Override frustum's far or set it to <0.0 to ignore that value.
+	/// @param[out] el The queue element to fill out.
+	/// @param[out] cascadeFrustums Fill those frustums as well. The size of this array is the count of the cascades.
+	void setupDirectionalLightQueueElement(const Frustum& frustum,
+		F32 overrideFrustumFar,
+		DirectionalLightQueueElement& el,
+		WeakArray<OrthographicFrustum> cascadeFrustums) const;
+
 private:
 	U64 m_uuid;
-	LightComponentType m_type;
 	Vec4 m_diffColor = Vec4(0.5f);
-	union
+	Transform m_trf = Transform::getIdentity();
+
+	struct Point
 	{
 		F32 m_radius;
+	};
+
+	struct Spot
+	{
+		Mat4 m_textureMat;
 		F32 m_distance;
+		F32 m_innerAngleCos;
+		F32 m_outerAngleCos;
+		F32 m_outerAngle;
+		F32 m_innerAngle;
 	};
-	F32 m_innerAngleCos;
-	F32 m_outerAngleCos;
-	F32 m_outerAngle;
-	F32 m_innerAngle;
 
-	Transform m_trf = Transform::getIdentity();
-	Mat4 m_spotTextureMatrix = Mat4::getIdentity();
+	struct Dir
+	{
+		Vec3 m_sceneMin;
+		Vec3 m_sceneMax;
+	};
+
+	union
+	{
+		Point m_point;
+		Spot m_spot;
+		Dir m_dir;
+	};
 
 	enum
 	{
@@ -169,6 +199,7 @@ private:
 		TRF_DIRTY = 1 << 2
 	};
 
+	LightComponentType m_type;
 	BitMask<U8> m_flags = BitMask<U8>(DIRTY | TRF_DIRTY);
 
 	static void pointLightDebugDrawCallback(RenderQueueDrawContext& ctx, ConstWeakArray<void*> userData)
@@ -180,6 +211,11 @@ private:
 	{
 		// TODO
 	}
+
+	static void derectionalLightDebugDrawCallback(RenderQueueDrawContext& ctx, ConstWeakArray<void*> userData)
+	{
+		// TODO
+	}
 };
 /// @}
 

+ 1 - 1
src/anki/scene/components/SpatialComponent.cpp

@@ -39,7 +39,7 @@ Error SpatialComponent::update(SceneNode& node, Second prevTime, Second crntTime
 		m_shape->computeAabb(m_aabb);
 		m_markedForUpdate = false;
 
-		m_node->getSceneGraph().getOctree().place(m_aabb, &m_octreeInfo);
+		m_node->getSceneGraph().getOctree().place(m_aabb, &m_octreeInfo, m_updateOctreeBounds);
 		m_placed = true;
 	}
 

+ 10 - 3
src/anki/scene/components/SpatialComponent.h

@@ -81,6 +81,12 @@ public:
 		m_markedForUpdate = true;
 	}
 
+	/// Update the "actual scene bounds" of the octree or not.
+	void setUpdateOctreeBounds(Bool update)
+	{
+		m_updateOctreeBounds = update;
+	}
+
 	/// @name SceneComponent overrides
 	/// @{
 	ANKI_USE_RESULT Error update(SceneNode& node, Second prevTime, Second crntTime, Bool& updated) override;
@@ -90,12 +96,13 @@ private:
 	SceneNode* m_node;
 	const CollisionShape* m_shape;
 	Aabb m_aabb; ///< A faster shape
-	Vec4 m_origin = Vec4(MAX_F32, MAX_F32, MAX_F32, 0.0);
+	Vec4 m_origin = Vec4(MAX_F32, MAX_F32, MAX_F32, 0.0f);
+
+	OctreePlaceable m_octreeInfo;
 
 	Bool8 m_markedForUpdate = false;
 	Bool8 m_placed = false;
-
-	OctreePlaceable m_octreeInfo;
+	Bool8 m_updateOctreeBounds = true;
 };
 
 /// A class that holds spatial information and implements the SpatialComponent virtuals. You just need to update the

+ 133 - 0
src/anki/script/Scene.cpp

@@ -2755,6 +2755,75 @@ static inline void wrapSpotLightNode(lua_State* l)
 	lua_settop(l, 0);
 }
 
+LuaUserDataTypeInfo luaUserDataTypeInfoDirectionalLightNode = {3634924534632382552,
+	"DirectionalLightNode",
+	LuaUserData::computeSizeForGarbageCollected<DirectionalLightNode>(),
+	nullptr,
+	nullptr};
+
+template<>
+const LuaUserDataTypeInfo& LuaUserData::getDataTypeInfoFor<DirectionalLightNode>()
+{
+	return luaUserDataTypeInfoDirectionalLightNode;
+}
+
+/// Pre-wrap method DirectionalLightNode::getSceneNodeBase.
+static inline int pwrapDirectionalLightNodegetSceneNodeBase(lua_State* l)
+{
+	LuaUserData* ud;
+	(void)ud;
+	void* voidp;
+	(void)voidp;
+	PtrSize size;
+	(void)size;
+
+	if(ANKI_UNLIKELY(LuaBinder::checkArgsCount(l, 1)))
+	{
+		return -1;
+	}
+
+	// Get "this" as "self"
+	if(LuaBinder::checkUserData(l, 1, luaUserDataTypeInfoDirectionalLightNode, ud))
+	{
+		return -1;
+	}
+
+	DirectionalLightNode* self = ud->getData<DirectionalLightNode>();
+
+	// Call the method
+	SceneNode& ret = *self;
+
+	// Push return value
+	voidp = lua_newuserdata(l, sizeof(LuaUserData));
+	ud = static_cast<LuaUserData*>(voidp);
+	luaL_setmetatable(l, "SceneNode");
+	extern LuaUserDataTypeInfo luaUserDataTypeInfoSceneNode;
+	ud->initPointed(&luaUserDataTypeInfoSceneNode, const_cast<SceneNode*>(&ret));
+
+	return 1;
+}
+
+/// Wrap method DirectionalLightNode::getSceneNodeBase.
+static int wrapDirectionalLightNodegetSceneNodeBase(lua_State* l)
+{
+	int res = pwrapDirectionalLightNodegetSceneNodeBase(l);
+	if(res >= 0)
+	{
+		return res;
+	}
+
+	lua_error(l);
+	return 0;
+}
+
+/// Wrap class DirectionalLightNode.
+static inline void wrapDirectionalLightNode(lua_State* l)
+{
+	LuaBinder::createClass(l, &luaUserDataTypeInfoDirectionalLightNode);
+	LuaBinder::pushLuaCFuncMethod(l, "getSceneNodeBase", wrapDirectionalLightNodegetSceneNodeBase);
+	lua_settop(l, 0);
+}
+
 LuaUserDataTypeInfo luaUserDataTypeInfoStaticCollisionNode = {-4376619865753613291,
 	"StaticCollisionNode",
 	LuaUserData::computeSizeForGarbageCollected<StaticCollisionNode>(),
@@ -3495,6 +3564,68 @@ static int wrapSceneGraphnewSpotLightNode(lua_State* l)
 	return 0;
 }
 
+/// Pre-wrap method SceneGraph::newDirectionalLightNode.
+static inline int pwrapSceneGraphnewDirectionalLightNode(lua_State* l)
+{
+	LuaUserData* ud;
+	(void)ud;
+	void* voidp;
+	(void)voidp;
+	PtrSize size;
+	(void)size;
+
+	if(ANKI_UNLIKELY(LuaBinder::checkArgsCount(l, 2)))
+	{
+		return -1;
+	}
+
+	// Get "this" as "self"
+	if(LuaBinder::checkUserData(l, 1, luaUserDataTypeInfoSceneGraph, ud))
+	{
+		return -1;
+	}
+
+	SceneGraph* self = ud->getData<SceneGraph>();
+
+	// Pop arguments
+	const char* arg0;
+	if(ANKI_UNLIKELY(LuaBinder::checkString(l, 2, arg0)))
+	{
+		return -1;
+	}
+
+	// Call the method
+	DirectionalLightNode* ret = newSceneNode<DirectionalLightNode>(self, arg0);
+
+	// Push return value
+	if(ANKI_UNLIKELY(ret == nullptr))
+	{
+		lua_pushstring(l, "Glue code returned nullptr");
+		return -1;
+	}
+
+	voidp = lua_newuserdata(l, sizeof(LuaUserData));
+	ud = static_cast<LuaUserData*>(voidp);
+	luaL_setmetatable(l, "DirectionalLightNode");
+	extern LuaUserDataTypeInfo luaUserDataTypeInfoDirectionalLightNode;
+	ud->initPointed(&luaUserDataTypeInfoDirectionalLightNode, const_cast<DirectionalLightNode*>(ret));
+
+	return 1;
+}
+
+/// Wrap method SceneGraph::newDirectionalLightNode.
+static int wrapSceneGraphnewDirectionalLightNode(lua_State* l)
+{
+	int res = pwrapSceneGraphnewDirectionalLightNode(l);
+	if(res >= 0)
+	{
+		return res;
+	}
+
+	lua_error(l);
+	return 0;
+}
+
 /// Pre-wrap method SceneGraph::newStaticCollisionNode.
 static inline int pwrapSceneGraphnewStaticCollisionNode(lua_State* l)
 {
@@ -3978,6 +4109,7 @@ static inline void wrapSceneGraph(lua_State* l)
 	LuaBinder::pushLuaCFuncMethod(l, "newModelNode", wrapSceneGraphnewModelNode);
 	LuaBinder::pushLuaCFuncMethod(l, "newPointLightNode", wrapSceneGraphnewPointLightNode);
 	LuaBinder::pushLuaCFuncMethod(l, "newSpotLightNode", wrapSceneGraphnewSpotLightNode);
+	LuaBinder::pushLuaCFuncMethod(l, "newDirectionalLightNode", wrapSceneGraphnewDirectionalLightNode);
 	LuaBinder::pushLuaCFuncMethod(l, "newStaticCollisionNode", wrapSceneGraphnewStaticCollisionNode);
 	LuaBinder::pushLuaCFuncMethod(l, "newParticleEmitterNode", wrapSceneGraphnewParticleEmitterNode);
 	LuaBinder::pushLuaCFuncMethod(l, "newReflectionProbeNode", wrapSceneGraphnewReflectionProbeNode);
@@ -4387,6 +4519,7 @@ void wrapModuleScene(lua_State* l)
 	wrapPerspectiveCameraNode(l);
 	wrapPointLightNode(l);
 	wrapSpotLightNode(l);
+	wrapDirectionalLightNode(l);
 	wrapStaticCollisionNode(l);
 	wrapParticleEmitterNode(l);
 	wrapReflectionProbeNode(l);

+ 15 - 0
src/anki/script/Scene.xml

@@ -316,6 +316,14 @@ using WeakArraySceneNodePtr = WeakArray<SceneNode*>;
 				</method>
 			</methods>
 		</class>
+		<class name="DirectionalLightNode">
+			<methods>
+				<method name="getSceneNodeBase">
+					<overrideCall>SceneNode&amp; ret = *self;</overrideCall>
+					<return>SceneNode&amp;</return>
+				</method>
+			</methods>
+		</class>
 		<class name="StaticCollisionNode">
 			<methods>
 				<method name="getSceneNodeBase">
@@ -403,6 +411,13 @@ using WeakArraySceneNodePtr = WeakArray<SceneNode*>;
 					</args>
 					<return>SpotLightNode*</return>
 				</method>
+				<method name="newDirectionalLightNode">
+					<overrideCall><![CDATA[DirectionalLightNode* ret = newSceneNode<DirectionalLightNode>(self, arg0);]]></overrideCall>
+					<args>
+						<arg>const CString&amp;</arg>
+					</args>
+					<return>DirectionalLightNode*</return>
+				</method>
 				<method name="newStaticCollisionNode">
 					<overrideCall><![CDATA[StaticCollisionNode* ret = newSceneNode<StaticCollisionNode>(self, arg0, arg1, arg2);]]></overrideCall>
 					<args>

+ 11 - 0
src/anki/util/Allocator.h

@@ -159,6 +159,17 @@ public:
 		return static_cast<pointer>(out);
 	}
 
+	/// Allocate memory
+	/// @param n The elements of type T to allocate
+	/// @param alignment The alignment of the allocation.
+	///
+	/// @note It's not part of the STL interface
+	pointer allocate(size_type n, U32 alignment)
+	{
+		PtrSize hint = alignment;
+		return allocate(n, &hint);
+	}
+
 	/// Deallocate memory
 	void deallocate(void* p, size_type n)
 	{

+ 82 - 0
src/anki/util/ClassWrapper.h

@@ -0,0 +1,82 @@
+// Copyright (C) 2009-2018, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <anki/util/Array.h>
+
+namespace anki
+{
+
+/// @addtogroup util_misc
+/// @{
+
+/// A wrapper template to compensate for the fact that some classes get initialized in the constructor but that's not
+/// always desirable. One solution is to use a pointer and dynamic allocation but that creates an indirection and it
+/// might cause bad cache locality. With this wrapper is scenario is avoided.
+template<typename TClass>
+class ClassWrapper
+{
+public:
+	ClassWrapper()
+	{
+	}
+
+	/// Call the constructor of the TClass.
+	template<typename... TArgs>
+	void init(TArgs&&... args)
+	{
+		::new(&m_data[0]) TClass(std::forward<TArgs>(args)...);
+	}
+
+	/// Call the destructor of the TClass.
+	void destroy()
+	{
+		reinterpret_cast<TClass*>(&m_data[0])->~TClass();
+	}
+
+	/// Access the instance.
+	TClass* operator->()
+	{
+		return reinterpret_cast<TClass*>(&m_data[0]);
+	}
+
+	/// Access the instance.
+	const TClass* operator->() const
+	{
+		return reinterpret_cast<const TClass*>(&m_data[0]);
+	}
+
+	/// Access the instance.
+	TClass& operator*()
+	{
+		return *reinterpret_cast<TClass*>(&m_data[0]);
+	}
+
+	/// Access the instance.
+	const TClass& operator*() const
+	{
+		return *reinterpret_cast<const TClass*>(&m_data[0]);
+	}
+
+	/// Access the instance.
+	TClass* get()
+	{
+		return reinterpret_cast<TClass*>(&m_data[0]);
+	}
+
+	/// Access the instance.
+	const TClass* get() const
+	{
+		return reinterpret_cast<const TClass*>(&m_data[0]);
+	}
+
+private:
+	/// The data as a POD with correct size and alignment.
+	alignas(alignof(TClass)) Array<U8, sizeof(TClass)> m_data;
+};
+/// @}
+
+} // end namespace anki

+ 0 - 16
src/anki/util/Functions.h

@@ -238,22 +238,6 @@ inline void splitThreadedProblem(
 	ANKI_ASSERT(!(threadId == threadCount - 1 && end != problemSize));
 }
 
-/// Equivelent to static_cast.
-template<typename T, typename Y>
-inline T scast(Y from)
-{
-	ANKI_ASSERT(from);
-	return static_cast<T>(from);
-}
-
-/// Equivelent to reinterpret_cast.
-template<typename T, typename Y>
-inline T rcast(Y from)
-{
-	ANKI_ASSERT(from);
-	return reinterpret_cast<T>(from);
-}
-
 #define _ANKI_CONCATENATE(a, b) a##b
 
 /// Concatenate 2 preprocessor tokens.

+ 1 - 1
src/anki/util/String.cpp

@@ -167,7 +167,7 @@ void String::create(Allocator alloc, Char c, PtrSize length)
 	ANKI_ASSERT(c != '\0');
 	m_data.create(alloc, length + 1);
 
-	std::memset(&m_data[0], c, length);
+	memset(&m_data[0], c, length);
 	m_data[length] = '\0';
 }
 

+ 74 - 0
tests/renderer/TileAllocator.cpp

@@ -0,0 +1,74 @@
+// Copyright (C) 2009-2018, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#include <tests/framework/Framework.h>
+#include <anki/renderer/TileAllocator.h>
+
+namespace anki
+{
+
+ANKI_TEST(Renderer, TileAllocator)
+{
+	HeapAllocator<U8> alloc(allocAligned, nullptr);
+
+	TileAllocator talloc;
+	talloc.init(alloc, 8, 8, 3, true);
+
+	Array<U32, 4> viewport;
+	TileAllocatorResult res;
+
+	const U lightUuid = 1;
+	const U dcCount = 666;
+	Timestamp crntTimestamp = 1;
+	Timestamp lightTimestamp = 1;
+
+	// Allocate 1 med
+	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 1, 0, dcCount, 1, viewport);
+	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::ALLOCATION_SUCCEEDED);
+
+	// Allocate 3 big
+	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 2, 0, dcCount, 2, viewport);
+	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::ALLOCATION_SUCCEEDED);
+	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 3, 0, dcCount, 2, viewport);
+	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::ALLOCATION_SUCCEEDED);
+	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 4, 0, dcCount, 2, viewport);
+	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::ALLOCATION_SUCCEEDED);
+
+	// Fail to allocate 1 big
+	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 5, 0, dcCount, 2, viewport);
+	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::ALLOCATION_FAILED);
+
+	// Allocate 3 med
+	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 1, 1, dcCount, 1, viewport);
+	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::ALLOCATION_SUCCEEDED);
+	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 1, 2, dcCount, 1, viewport);
+	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::ALLOCATION_SUCCEEDED);
+	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 1, 3, dcCount, 1, viewport);
+	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::ALLOCATION_SUCCEEDED);
+
+	// Fail to allocate a small
+	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 6, 0, dcCount, 0, viewport);
+	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::ALLOCATION_FAILED);
+
+	// New frame
+	++crntTimestamp;
+
+	// Allocate 3 big again
+	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 2, 0, dcCount + 1, 2, viewport);
+	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::ALLOCATION_SUCCEEDED);
+	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 3, 0, dcCount, 2, viewport);
+	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::CACHED);
+	res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 4, 0, dcCount + 1, 2, viewport);
+	ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::ALLOCATION_SUCCEEDED);
+
+	// Allocate 16 small
+	for(U i = 0; i < 16; ++i)
+	{
+		res = talloc.allocate(crntTimestamp, lightTimestamp, lightUuid + 6 + i, 0, dcCount, 0, viewport);
+		ANKI_TEST_EXPECT_EQ(res, TileAllocatorResult::ALLOCATION_SUCCEEDED);
+	}
+}
+
+} // end namespace anki

+ 1 - 1
tests/scene/Octree.cpp

@@ -36,7 +36,7 @@ ANKI_TEST(Scene, Octree)
 			{
 				// Place
 				placeables[i].m_userData = &placeables[i];
-				octree.place(volume, &placeables[i]);
+				octree.place(volume, &placeables[i], true);
 				placed.push_back(i);
 			}
 			else if(mode == 1 && placed.size() > 0)

+ 25 - 8
tools/scene/Exporter.cpp

@@ -451,7 +451,8 @@ void Exporter::exportLight(const aiLight& light)
 
 	LOGI("Exporting light %s", light.mName.C_Str());
 
-	if(light.mType != aiLightSource_POINT && light.mType != aiLightSource_SPOT)
+	if(light.mType != aiLightSource_POINT && light.mType != aiLightSource_SPOT
+		&& light.mType != aiLightSource_DIRECTIONAL)
 	{
 		LOGW("Skipping light %s. Unsupported type (0x%x)", light.mName.C_Str(), light.mType);
 		return;
@@ -463,8 +464,24 @@ void Exporter::exportLight(const aiLight& light)
 		return;
 	}
 
-	file << "\nnode = scene:new" << ((light.mType == aiLightSource_POINT) ? "Point" : "Spot") << "LightNode(\""
-		 << light.mName.C_Str() << "\")\n";
+	const char* lightType;
+	switch(light.mType)
+	{
+	case aiLightSource_POINT:
+		lightType = "Point";
+		break;
+	case aiLightSource_SPOT:
+		lightType = "Spot";
+		break;
+	case aiLightSource_DIRECTIONAL:
+		lightType = "Directional";
+		break;
+	default:
+		lightType = nullptr;
+		assert(0);
+	}
+
+	file << "\nnode = scene:new" << lightType << "LightNode(\"" << light.mName.C_Str() << "\")\n";
 
 	file << "lcomp = node:getSceneNodeBase():getLightComponent()\n";
 
@@ -474,15 +491,12 @@ void Exporter::exportLight(const aiLight& light)
 	file << "lcomp:setDiffuseColor(Vec4.new(" << linear[0] << ", " << linear[1] << ", " << linear[2] << ", 1))\n";
 
 	// Geometry
-	aiVector3D direction(0.0, 0.0, 1.0);
-
 	switch(light.mType)
 	{
 	case aiLightSource_POINT:
 	{
 		// At this point I want the radius and have the attenuation factors
-		// att = Ac + Al*d + Aq*d^2. When d = r then att = 0.0. Also if we
-		// assume that Al is 0 then:
+		// att = Ac + Al*d + Aq*d^2. When d = r then att = 0.0. Also if we assume that Al is 0 then:
 		// 0 = Ac + Aq*r^2. Solving by r is easy
 		float r = sqrt(light.mAttenuationConstant / light.mAttenuationQuadratic);
 		file << "lcomp:setRadius(" << r << ")\n";
@@ -502,8 +516,11 @@ void Exporter::exportLight(const aiLight& light)
 		file << "lcomp:setInnerAngle(" << inner << ")\n"
 			 << "lcomp:setOuterAngle(" << outer << ")\n"
 			 << "lcomp:setDistance(" << dist << ")\n";
+		break;
+	}
+	case aiLightSource_DIRECTIONAL:
+	{
 
-		direction = light.mDirection;
 		break;
 	}
 	default: