Преглед изворни кода

Merge pull request #67 from godlikepanos/ssgi

Ssgi
Panagiotis Christopoulos Charitos пре 5 година
родитељ
комит
253aebcc94
54 измењених фајлова са 1533 додато и 114 уклоњено
  1. 14 0
      .gitattributes
  2. 10 0
      samples/common/Framework.cpp
  3. 23 1
      sandbox/Main.cpp
  4. 6 6
      shaders/Bloom.ankiprog
  5. 8 6
      shaders/FinalComposite.ankiprog
  6. 24 0
      shaders/Functions.glsl
  7. 69 0
      shaders/ImportanceSampling.glsl
  8. 3 1
      shaders/LightFunctions.glsl
  9. 3 0
      shaders/LightShading.ankiprog
  10. 241 0
      shaders/SsRaymarching.glsl
  11. 159 0
      shaders/Ssgi.ankiprog
  12. 103 0
      shaders/SsgiDenoise.ankiprog
  13. 122 0
      shaders/SsgiReconstruct.ankiprog
  14. 42 13
      shaders/Ssr.ankiprog
  15. 31 0
      shaders/glsl_cpp_common/Ssgi.h
  16. 2 0
      shaders/glsl_cpp_common/Ssr.h
  17. 1 0
      src/anki/Renderer.h
  18. 3 2
      src/anki/core/ConfigDefs.h
  19. 1 1
      src/anki/core/NativeWindowSdl.cpp
  20. 2 1
      src/anki/gr/RenderGraph.cpp
  21. 2 1
      src/anki/gr/vulkan/Buffer.cpp
  22. 2 1
      src/anki/gr/vulkan/CommandBuffer.cpp
  23. 25 18
      src/anki/gr/vulkan/CommandBufferFactory.cpp
  24. 1 1
      src/anki/gr/vulkan/CommandBufferFactory.h
  25. 1 0
      src/anki/gr/vulkan/DescriptorSet.cpp
  26. 2 1
      src/anki/gr/vulkan/Framebuffer.cpp
  27. 1 0
      src/anki/gr/vulkan/OcclusionQuery.cpp
  28. 2 1
      src/anki/gr/vulkan/Sampler.cpp
  29. 2 1
      src/anki/gr/vulkan/Shader.cpp
  30. 2 1
      src/anki/gr/vulkan/ShaderProgram.cpp
  31. 2 1
      src/anki/gr/vulkan/Texture.cpp
  32. 2 1
      src/anki/gr/vulkan/TextureView.cpp
  33. 1 0
      src/anki/gr/vulkan/TimestampQuery.cpp
  34. 19 23
      src/anki/math/Mat.h
  35. 2 3
      src/anki/math/Transform.h
  36. 1 1
      src/anki/math/Vec.h
  37. 1 1
      src/anki/physics/Common.h
  38. 2 0
      src/anki/renderer/Common.h
  39. 4 1
      src/anki/renderer/ConfigDefs.h
  40. 28 9
      src/anki/renderer/FinalComposite.cpp
  41. 1 1
      src/anki/renderer/FinalComposite.h
  42. 3 0
      src/anki/renderer/LightShading.cpp
  43. 60 0
      src/anki/renderer/Renderer.cpp
  44. 34 0
      src/anki/renderer/Renderer.h
  45. 5 0
      src/anki/renderer/RendererObject.cpp
  46. 7 0
      src/anki/renderer/RendererObject.h
  47. 274 0
      src/anki/renderer/Ssgi.cpp
  48. 92 0
      src/anki/renderer/Ssgi.h
  49. 6 3
      src/anki/renderer/Ssr.cpp
  50. 9 0
      src/anki/renderer/Ssr.h
  51. 57 4
      src/anki/script/Renderer.cpp
  52. 8 2
      src/anki/script/Renderer.xml
  53. 6 6
      src/anki/util/Memory.h
  54. 2 2
      src/anki/util/Thread.h

+ 14 - 0
.gitattributes

@@ -0,0 +1,14 @@
+*.cpp text eol=lf
+*.c text eol=lf
+*.h text eol=lf
+*.hpp text eol=lf
+*.glsl text eol=lf
+*.lua text eol=lf
+*.py text eol=lf
+*.cmake text eol=lf
+*.txt text eol=lf
+*.md text eol=lf
+*.ankiprog text eol=lf
+*.ankimtl text eol=lf
+*.ankimdl text eol=lf
+*.ankipart text eol=lf

+ 10 - 0
samples/common/Framework.cpp

@@ -61,6 +61,16 @@ Error SampleApp::userMainLoop(Bool& quit)
 		setDisplayDeveloperConsole(!getDisplayDeveloperConsole());
 	}
 
+	if(in.getKey(KeyCode::U) == 1)
+	{
+		renderer.setCurrentDebugRenderTarget((renderer.getCurrentDebugRenderTarget() == "SSGI") ? "" : "SSGI");
+	}
+
+	if(in.getKey(KeyCode::I) == 1)
+	{
+		renderer.setCurrentDebugRenderTarget((renderer.getCurrentDebugRenderTarget() == "SSR") ? "" : "SSR");
+	}
+
 	if(!getDisplayDeveloperConsole())
 	{
 		in.hideCursor(true);

+ 23 - 1
sandbox/Main.cpp

@@ -114,7 +114,7 @@ Error MyApp::userMainLoop(Bool& quit)
 	}
 	if(in.getKey(KeyCode::_2))
 	{
-		mover = &scene.findSceneNode("Spot_004").getComponent<MoveComponent>();
+		mover = &scene.findSceneNode("Cylinder.049").getComponent<MoveComponent>();
 	}
 
 	if(in.getKey(KeyCode::L) == 1)
@@ -216,6 +216,28 @@ Error MyApp::userMainLoop(Bool& quit)
 	}
 #endif
 
+	{
+		static Bool pressed = false;
+		Bool somethingPressed = false;
+		if(in.getKey(KeyCode::U) == 1)
+		{
+			pressed = !pressed;
+			somethingPressed = true;
+		}
+
+		if(somethingPressed)
+		{
+			if(pressed)
+			{
+				renderer.getOffscreenRenderer().setCurrentDebugRenderTarget("SSGI");
+			}
+			else
+			{
+				renderer.getOffscreenRenderer().setCurrentDebugRenderTarget("");
+			}
+		}
+	}
+
 	if(in.getEvent(InputEvent::WINDOW_CLOSED))
 	{
 		quit = true;

+ 6 - 6
shaders/Bloom.ankiprog

@@ -26,7 +26,7 @@ layout(set = 0, binding = 2, std140) readonly buffer ss0_
 	Vec4 u_averageLuminancePad3;
 };
 
-layout(set = 0, binding = 3) writeonly uniform image2D out_img;
+layout(set = 0, binding = 3) writeonly uniform image2D u_outImg;
 
 void main()
 {
@@ -41,15 +41,15 @@ void main()
 	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(FB_SIZE);
 
 	Vec3 color = textureLod(u_tex, u_linearAnyClampSampler, uv, 0.0).rgb;
-	color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, ivec2(+1, +1)).rgb;
-	color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, ivec2(-1, -1)).rgb;
-	color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, ivec2(-1, +1)).rgb;
-	color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, ivec2(+1, -1)).rgb;
+	color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, IVec2(+1, +1)).rgb;
+	color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, IVec2(-1, -1)).rgb;
+	color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, IVec2(-1, +1)).rgb;
+	color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, IVec2(+1, -1)).rgb;
 
 	color *= (1.0 / 5.0);
 
 	color = tonemap(color, u_averageLuminancePad3.x, u_thresholdScalePad2.x) * u_thresholdScalePad2.y;
 
-	imageStore(out_img, IVec2(gl_GlobalInvocationID.xy), Vec4(color, 0.0));
+	imageStore(u_outImg, IVec2(gl_GlobalInvocationID.xy), Vec4(color, 0.0));
 }
 #pragma anki end

+ 8 - 6
shaders/FinalComposite.ankiprog

@@ -6,6 +6,7 @@
 #pragma anki mutator BLUE_NOISE 0 1
 #pragma anki mutator BLOOM_ENABLED 0 1
 #pragma anki mutator DBG_ENABLED 0 1
+#pragma anki mutator DBG_RENDER_TARGET_ENABLED 0 1
 
 ANKI_SPECIALIZATION_CONSTANT_U32(LUT_SIZE, 0, 1);
 ANKI_SPECIALIZATION_CONSTANT_UVEC2(FB_SIZE, 1, UVec2(1, 1));
@@ -36,7 +37,10 @@ layout(set = 0, binding = 7) uniform texture2DArray u_blueNoise;
 layout(set = 0, binding = 8) uniform texture2D u_velocityRt;
 layout(set = 0, binding = 9) uniform texture2D u_depthRt;
 #if DBG_ENABLED
-layout(set = 0, binding = 10) uniform texture2D u_dbgRt;
+layout(set = 0, binding = 10) uniform texture2D u_dbgOutlineRt;
+#endif
+#if DBG_RENDER_TARGET_ENABLED
+layout(set = 0, binding = 11) uniform texture2D u_dbgRt;
 #endif
 
 layout(push_constant, row_major, std430) uniform pc_
@@ -97,14 +101,12 @@ void main()
 	out_color += blueNoise / 255.0;
 #endif
 
-#if 0
-	{
-		out_color = textureLod(u_lightShadingRt, u_linearAnyClampSampler, uv, 0.0).rgb;
-	}
+#if DBG_RENDER_TARGET_ENABLED
+	out_color = textureLod(u_dbgRt, u_linearAnyClampSampler, uv, 0.0).rgb;
 #endif
 
 #if DBG_ENABLED
-	const Vec4 dbg = textureLod(u_dbgRt, u_linearAnyClampSampler, uv, 0.0);
+	const Vec4 dbg = textureLod(u_dbgOutlineRt, u_linearAnyClampSampler, uv, 0.0);
 	out_color = mix(out_color, dbg.rgb, dbg.a);
 #endif
 }

+ 24 - 0
shaders/Functions.glsl

@@ -517,3 +517,27 @@ Bool aabbsOverlap(const Vec3 aMin, const Vec3 aMax, const Vec3 bMin, const Vec3
 			return; \
 		} \
 	}
+
+// Create a matrix from some direction.
+Mat3 rotationFromDirection(Vec3 zAxis)
+{
+#if 0
+	const Vec3 z = zAxis;
+	const Bool alignsWithXBasis = abs(z.x - 1.0) <= EPSILON; // aka z == Vec3(1.0, 0.0, 0.0)
+	Vec3 x = (alignsWithXBasis) ? Vec3(0.0, 0.0, 1.0) : Vec3(1.0, 0.0, 0.0);
+	const Vec3 y = normalize(cross(x, z));
+	x = normalize(cross(z, y));
+	return Mat3(x, y, z);
+#else
+	// http://jcgt.org/published/0006/01/01/
+	const Vec3 z = zAxis;
+	const F32 sign = (z.z >= 0.0) ? 1.0 : -1.0;
+	const F32 a = -1.0 / (sign + z.z);
+	const F32 b = z.x * z.y * a;
+
+	const Vec3 x = Vec3(1.0 + sign * a * pow(z.x, 2.0), sign * b, -sign * z.x);
+	const Vec3 y = Vec3(b, sign + a * pow(z.y, 2.0), -z.y);
+
+	return Mat3(x, y, z);
+#endif
+}

+ 69 - 0
shaders/ImportanceSampling.glsl

@@ -0,0 +1,69 @@
+// Copyright (C) 2009-2020, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+// NOTE: To visualize some of these functions go to https://www.shadertoy.com/view/wsBBzV
+
+#pragma once
+
+#include <shaders/Common.glsl>
+
+// http://holger.dammertz.org/stuff/notes_HammersleyOnHemisphere.html
+// Using bitfieldReverse instead of bitwise ops
+F32 radicalInverseVdC(U32 bits)
+{
+	bits = bitfieldReverse(bits);
+	return F32(bits) * 2.3283064365386963e-10; // / 0x100000000
+}
+
+// http://holger.dammertz.org/stuff/notes_HammersleyOnHemisphere.html
+Vec2 hammersley2d(U32 i, U32 N)
+{
+	return Vec2(F32(i) / F32(N), radicalInverseVdC(i));
+}
+
+// Stolen from Unreal
+// Returns three elements with 16 random bits each (0-0xffff)
+UVec3 rand3DPCG16(UVec3 v)
+{
+	v = v * 1664525u + 1013904223u;
+
+	v.x += v.y * v.z;
+	v.y += v.z * v.x;
+	v.z += v.x * v.y;
+	v.x += v.y * v.z;
+	v.y += v.z * v.x;
+	v.z += v.x * v.y;
+
+	return v >> 16u;
+}
+
+// Stolen from Unreal
+// It will return a uniform 2D point inside [0.0, 1.0]. For random use rand3DPCG16()
+Vec2 hammersleyRandom16(U32 sampleIdx, U32 sampleCount, UVec2 random)
+{
+	const F32 e1 = fract(F32(sampleIdx) / sampleCount + F32(random.x) * (1.0 / 65536.0));
+	const F32 e2 = F32((bitfieldReverse(sampleIdx) >> 16) ^ random.y) * (1.0 / 65536.0);
+	return Vec2(e1, e2);
+}
+
+// http://holger.dammertz.org/stuff/notes_HammersleyOnHemisphere.html
+// From a uniform 2D point inside a circle get a 3D point in the surface of a hemisphere. It's oriented in the z axis
+Vec3 hemisphereSampleUniform(Vec2 uv)
+{
+	const F32 phi = uv.y * 2.0 * PI;
+	const F32 cosTheta = 1.0 - uv.x;
+	const F32 sinTheta = sqrt(1.0 - cosTheta * cosTheta);
+	return Vec3(cos(phi) * sinTheta, sin(phi) * sinTheta, cosTheta);
+}
+
+// http://holger.dammertz.org/stuff/notes_HammersleyOnHemisphere.html
+// Same as hemisphereSampleUniform but it distributes points closer to the z axis
+Vec3 hemisphereSampleCos(Vec2 uv)
+{
+	const F32 phi = uv.y * 2.0 * PI;
+	const F32 cosTheta = sqrt(1.0 - uv.x);
+	const F32 sinTheta = sqrt(1.0 - cosTheta * cosTheta);
+	return Vec3(cos(phi) * sinTheta, sin(phi) * sinTheta, cosTheta);
+}

+ 3 - 1
shaders/LightFunctions.glsl

@@ -218,7 +218,9 @@ F32 computeShadowFactorPointLight(PointLight light, Vec3 frag2Light, texture2D s
 F32 computeShadowFactorDirLight(
 	DirectionalLight light, U32 cascadeIdx, Vec3 worldPos, texture2D shadowMap, sampler shadowMapSampler)
 {
-#if defined(ANKI_VENDOR_NVIDIA)
+#define ANKI_FAST_CASCADES_WORKAROUND 1 // Doesn't make sense but it's super fast
+
+#if ANKI_FAST_CASCADES_WORKAROUND
 	// Assumes MAX_SHADOW_CASCADES is 4
 	Mat4 lightProjectionMat;
 	switch(cascadeIdx)

+ 3 - 0
shaders/LightShading.ankiprog

@@ -51,6 +51,7 @@ layout(set = 0, binding = 15) uniform texture2D u_msRt2;
 layout(set = 0, binding = 16) uniform texture2D u_msDepthRt;
 layout(set = 0, binding = 17) uniform texture2D u_ssrRt;
 layout(set = 0, binding = 18) uniform texture2D u_ssaoRt;
+layout(set = 0, binding = 19) uniform texture2D u_ssgiRt;
 
 layout(location = 0) in Vec2 in_uv;
 layout(location = 1) in Vec2 in_clusterIJ;
@@ -280,6 +281,8 @@ void main()
 			diffIndirect /= totalBlendWeight;
 		}
 
+		const Vec3 ssgi = textureLod(u_ssgiRt, u_trilinearClampSampler, in_uv, 0.0).rgb;
+		diffIndirect += ssgi;
 		out_color += diffIndirect * gbuffer.m_diffuse;
 	}
 }

+ 241 - 0
shaders/SsRaymarching.glsl

@@ -0,0 +1,241 @@
+// Copyright (C) 2009-2020, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+// Screen space ray marching
+
+#pragma once
+
+#include <shaders/Common.glsl>
+
+// Find the intersection of a ray and a AABB when the ray is inside the AABB
+void rayAabbIntersectionInside2d(Vec2 rayOrigin, Vec2 rayDir, Vec2 aabbMin, Vec2 aabbMax, out F32 t)
+{
+	// Find the boundary of the AABB that the rayDir points at
+	Vec2 boundary;
+	boundary.x = (rayDir.x > 0.0) ? aabbMax.x : aabbMin.x;
+	boundary.y = (rayDir.y > 0.0) ? aabbMax.y : aabbMin.y;
+
+	// Find the intersection of the ray with the line y=boundary.y
+	// The intersection is: rayOrigin + T * rayDir
+	// For y it's: rayOrigin.y + T * rayDir.y
+	// And it's equal to the boundary.y: rayOrigin.y + T * rayDir.y = boundary.y
+	const F32 ty = (boundary.y - rayOrigin.y) / rayDir.y;
+
+	// Same for x=boundary.x
+	const F32 tx = (boundary.x - rayOrigin.x) / rayDir.x;
+
+	// Chose the shortest t
+	t = min(ty, tx);
+}
+
+// Find the cell the rayOrigin is in and push it outside that cell towards the direction of the rayDir
+void stepToNextCell(Vec3 rayOrigin, Vec3 rayDir, U32 mipLevel, UVec2 hizSize, out Vec3 newRayOrigin)
+{
+	const UVec2 mipSize = hizSize >> mipLevel;
+	const Vec2 mipSizef = Vec2(mipSize);
+
+	// Position in texture space
+	const Vec2 texPos = rayOrigin.xy * mipSizef;
+
+	// Compute the boundaries of the cell in UV space
+	const Vec2 cellMin = floor(texPos) / mipSizef;
+	const Vec2 cellMax = ceil(texPos) / mipSizef;
+
+	// Find the intersection
+	F32 t;
+	rayAabbIntersectionInside2d(rayOrigin.xy, rayDir.xy, cellMin, cellMax, t);
+
+	// Bump t a bit to stop touching the cell
+	const F32 texelSizeX = 1.0 / mipSizef.x;
+	t += texelSizeX / 10.0;
+
+	// Compute the new origin
+	newRayOrigin = rayOrigin + rayDir * t;
+}
+
+// Note: All calculations in view space
+void raymarch(Vec3 rayOrigin, // Ray origin in view space
+	Vec3 rayDir, // Ray dir in view space
+	F32 tmin, // Shoot rays from
+	Vec2 uv, // UV the ray starts
+	F32 depthRef, // Depth the ray starts
+	Mat4 projMat, // Projection matrix
+	U32 randFrom0To3,
+	U32 maxIterations,
+	texture2D hizTex,
+	sampler hizSampler,
+	U32 hizMipCount,
+	UVec2 hizMip0Size,
+	out Vec3 hitPoint,
+	out F32 attenuation)
+{
+	attenuation = 0.0;
+
+	// Check for view facing reflections [sakibsaikia]
+	const Vec3 viewDir = normalize(rayOrigin);
+	const F32 cameraContribution = 1.0 - smoothstep(0.25, 0.5, dot(-viewDir, rayDir));
+	ANKI_BRANCH if(cameraContribution <= 0.0)
+	{
+		return;
+	}
+
+	// Dither and set starting pos
+	const F32 bayerMat[4] = F32[](1.0, 4.0, 2.0, 3.0);
+	const Vec3 p0 = rayOrigin + rayDir * (tmin * bayerMat[randFrom0To3]);
+
+	// p1
+	const F32 tmax = 10.0;
+	const Vec3 p1 = rayOrigin + rayDir * tmax;
+
+	// Compute start & end in clip space (well not clip space since x,y are in [0, 1])
+	Vec4 v4 = projMat * Vec4(p0, 1.0);
+	Vec3 start = v4.xyz / v4.w;
+	start.xy = NDC_TO_UV(start.xy);
+	v4 = projMat * Vec4(p1, 1.0);
+	Vec3 end = v4.xyz / v4.w;
+	end.xy = NDC_TO_UV(end.xy);
+
+	// Ray
+	Vec3 origin = start;
+	const Vec3 dir = normalize(end - start);
+
+	// Start looping
+	I32 mipLevel = 0;
+	while(mipLevel > -1 && maxIterations > 0)
+	{
+		// Step to the next cell
+		Vec3 newOrigin;
+		stepToNextCell(origin, dir, U32(mipLevel), hizMip0Size, newOrigin);
+		origin = newOrigin;
+
+		if(all(greaterThan(origin.xy, Vec2(0.0))) && all(lessThan(origin.xy, Vec2(1.0))))
+		{
+			const F32 newDepth = textureLod(hizTex, hizSampler, origin.xy, F32(mipLevel)).r;
+
+			if(origin.z < newDepth)
+			{
+				// In front of depth
+				mipLevel = min(mipLevel + 1, I32(hizMipCount - 1u));
+			}
+			else
+			{
+				// Behind depth
+				const F32 t = (origin.z - newDepth) / dir.z;
+				origin -= dir * t;
+				--mipLevel;
+			}
+
+			--maxIterations;
+		}
+		else
+		{
+			// Out of the screen
+			break;
+		}
+	}
+
+	// Write the values
+	const F32 blackMargin = 0.05 / 4.0;
+	const F32 whiteMargin = 0.1 / 2.0;
+	const Vec2 marginAttenuation2d = smoothstep(blackMargin, whiteMargin, origin.xy)
+									 * (1.0 - smoothstep(1.0 - whiteMargin, 1.0 - blackMargin, origin.xy));
+	const F32 marginAttenuation = marginAttenuation2d.x * marginAttenuation2d.y;
+	attenuation = marginAttenuation * cameraContribution;
+
+	hitPoint = origin;
+}
+
+// Note: All calculations in view space
+void raymarchGroundTruth(Vec3 rayOrigin, // Ray origin in view space
+	Vec3 rayDir, // Ray dir in view space
+	Vec2 uv, // UV the ray starts
+	F32 depthRef, // Depth the ray starts
+	Mat4 projMat, // Projection matrix
+	U32 maxIterations,
+	texture2D depthTex,
+	sampler depthSampler,
+	F32 depthLod,
+	UVec2 depthTexSize,
+	U32 bigStep,
+	U32 randInitialStep,
+	out Vec3 hitPoint,
+	out F32 attenuation)
+{
+	attenuation = 0.0;
+
+	// Check for view facing reflections [sakibsaikia]
+	const Vec3 viewDir = normalize(rayOrigin);
+	const F32 cameraContribution = 1.0 - smoothstep(0.25, 0.5, dot(-viewDir, rayDir));
+	if(cameraContribution <= 0.0)
+	{
+		return;
+	}
+
+	// Start point
+	const Vec3 p0 = rayOrigin;
+	const Vec3 start = Vec3(uv, depthRef);
+
+	// Project end point
+	const Vec3 p1 = rayOrigin + rayDir * 0.1;
+	const Vec4 end4 = projMat * Vec4(p1, 1.0);
+	Vec3 end = end4.xyz / end4.w;
+	end.xy = NDC_TO_UV(end.xy);
+
+	// Compute the ray and step size
+	Vec3 dir = end - start;
+	const Vec2 texelSize = abs(dir.xy) * Vec2(depthTexSize);
+	const F32 stepSize = length(dir.xy) / max(texelSize.x, texelSize.y);
+	dir = normalize(dir);
+
+	// Compute step
+	I32 stepSkip = I32(bigStep);
+	I32 step = I32(randInitialStep);
+
+	// Iterate
+	Vec3 origin;
+	ANKI_LOOP while(maxIterations-- != 0)
+	{
+		origin = start + dir * (F32(step) * stepSize);
+
+		// Check if it's out of the view
+		if(origin.x <= 0.0 || origin.y <= 0.0 || origin.x >= 1.0 || origin.y >= 1.0)
+		{
+			break;
+		}
+
+		const F32 depth = textureLod(depthTex, depthSampler, origin.xy, depthLod).r;
+		const Bool hit = origin.z - depth >= 0.0;
+		if(!hit)
+		{
+			step += stepSkip;
+		}
+		else if(stepSkip > 1)
+		{
+			step = max(1, step - stepSkip + 1);
+			stepSkip = stepSkip / 2;
+		}
+		else
+		{
+			// Found it
+
+			// Compute attenuation
+			const F32 blackMargin = 0.05 / 4.0;
+			const F32 whiteMargin = 0.1 / 2.0;
+			const Vec2 marginAttenuation2d = smoothstep(blackMargin, whiteMargin, origin.xy)
+											 * (1.0 - smoothstep(1.0 - whiteMargin, 1.0 - blackMargin, origin.xy));
+			const F32 marginAttenuation = marginAttenuation2d.x * marginAttenuation2d.y;
+			attenuation = marginAttenuation * cameraContribution;
+
+			// ...and hit point
+			hitPoint = origin;
+			break;
+		}
+	}
+}
+
+void rejectBackFaces(Vec3 reflection, Vec3 normalAtHitPoint, out F32 attenuation)
+{
+	attenuation = smoothstep(-0.17, 0.0, dot(normalAtHitPoint, -reflection));
+}

+ 159 - 0
shaders/Ssgi.ankiprog

@@ -0,0 +1,159 @@
+// Copyright (C) 2009-2020, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+// The VARIANT points to the fragment in the quad that will be processed:
+// -----
+// |3|2|
+// |0|1|
+// -----
+
+#pragma anki mutator VARIANT 0 1 2 3
+
+#pragma anki start comp
+#include <shaders/SsRaymarching.glsl>
+#include <shaders/Functions.glsl>
+#include <shaders/Pack.glsl>
+#include <shaders/ImportanceSampling.glsl>
+#include <shaders/glsl_cpp_common/Ssgi.h>
+
+const UVec2 WORKGROUP_SIZE = UVec2(16, 16);
+layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y, local_size_z = 1) in;
+
+layout(set = 0, binding = 0, rgba16f) uniform image2D out_img;
+
+layout(set = 0, binding = 1, row_major, std140) uniform u_
+{
+	SsgiUniforms u_unis;
+};
+
+layout(set = 0, binding = 2) uniform sampler u_trilinearClampSampler;
+layout(set = 0, binding = 3) uniform texture2D u_gbufferRt2;
+layout(set = 0, binding = 4) uniform texture2D u_depthRt;
+layout(set = 0, binding = 5) uniform texture2D u_lightBufferRt;
+layout(set = 0, binding = 6) uniform texture2D u_historyTex;
+
+void main()
+{
+	// Compute a global invocation ID that takes the checkerboard pattern into account
+	UVec2 fixedGlobalInvocationId = IVec2(gl_GlobalInvocationID.xy);
+	fixedGlobalInvocationId *= 2;
+#if VARIANT == 0
+	// Nothing
+#elif VARIANT == 1
+	fixedGlobalInvocationId.x += 1;
+#elif VARIANT == 2
+	fixedGlobalInvocationId += 1;
+#else
+	fixedGlobalInvocationId.y += 1;
+#endif
+
+	if(fixedGlobalInvocationId.x >= u_unis.m_framebufferSize.x
+		|| fixedGlobalInvocationId.y >= u_unis.m_framebufferSize.y)
+	{
+		// Skip threads outside the writable image
+		return;
+	}
+
+	const Vec2 uv = (Vec2(fixedGlobalInvocationId.xy) + 0.5) / Vec2(u_unis.m_framebufferSize);
+	const Vec2 ndc = UV_TO_NDC(uv);
+
+	// Get normal
+	const Vec3 worldNormal = readNormalFromGBuffer(u_gbufferRt2, u_trilinearClampSampler, uv);
+	const Vec3 viewNormal = u_unis.m_normalMat * worldNormal;
+
+	// Get depth
+	const F32 depth = textureLod(u_depthRt, u_trilinearClampSampler, uv, 0.0).r;
+
+	// Compute view pos
+	const Vec4 viewPos4 = u_unis.m_invProjMat * Vec4(ndc, depth, 1.0);
+	const Vec3 viewPos = viewPos4.xyz / viewPos4.w;
+
+	// Get a random point inside the hemisphere. Use hemisphereSampleCos to avoid perpendicular vecs to viewNormal
+	const UVec2 random = rand3DPCG16(UVec3(gl_GlobalInvocationID.xy, u_unis.m_frameCount)).xy;
+	const Vec2 randomCircle = hammersleyRandom16(0, 0xFFFFu, random);
+	const Vec3 randomHemisphere = rotationFromDirection(viewNormal) * hemisphereSampleCos(randomCircle);
+
+	// Trace
+	Vec3 hitPoint;
+	F32 hitAttenuation;
+	const U32 lod = 0;
+	const F32 minStepf = 4.0;
+	const F32 noise = F32(random.x) * (1.0 / 65536.0);
+	raymarchGroundTruth(viewPos,
+		randomHemisphere,
+		uv,
+		depth,
+		u_unis.m_projMat,
+		u_unis.m_maxSteps,
+		u_depthRt,
+		u_trilinearClampSampler,
+		F32(lod),
+		u_unis.m_depthBufferSize,
+		u_unis.m_firstStepPixels,
+		U32(mix(minStepf, F32(u_unis.m_firstStepPixels), noise)),
+		hitPoint,
+		hitAttenuation);
+
+	// Reject backfacing
+	ANKI_BRANCH if(hitAttenuation > 0.0)
+	{
+		const Vec3 hitNormal =
+			u_unis.m_normalMat * readNormalFromGBuffer(u_gbufferRt2, u_trilinearClampSampler, hitPoint.xy);
+		F32 backFaceAttenuation;
+		rejectBackFaces(randomHemisphere, hitNormal, backFaceAttenuation);
+
+		hitAttenuation *= backFaceAttenuation;
+	}
+
+	// Read the light buffer
+	Vec3 outColor;
+	ANKI_BRANCH if(hitAttenuation > 0.0)
+	{
+		// Reproject the UV because you are reading the previous frame
+		const Vec4 v4 = u_unis.m_prevViewProjMatMulInvViewProjMat * Vec4(UV_TO_NDC(hitPoint.xy), hitPoint.z, 1.0);
+		hitPoint.xy = NDC_TO_UV(v4.xy / v4.w);
+
+		// Read the light buffer
+		outColor = textureLod(u_lightBufferRt, u_trilinearClampSampler, hitPoint.xy, 100.0).rgb;
+		outColor = clamp(outColor, 0.0, FLT_MAX); // Fix the value just in case
+		outColor *= hitAttenuation;
+
+#if 0
+		// Compute a new normal based on the new hit point
+		const F32 depth = textureLod(u_depthRt, u_trilinearClampSampler, hitPoint.xy, 0.0).r;
+		const Vec4 viewPos4 = u_unis.m_invProjMat * Vec4(UV_TO_NDC(hitPoint.xy), depth, 1.0);
+		const Vec3 hitViewPos = viewPos4.xyz / viewPos4.w;
+		const Vec3 newViewNormal = normalize(hitViewPos - viewPos);
+#else
+		const Vec3 newViewNormal = viewNormal;
+#endif
+
+		// Modulate
+		const F32 NoL = max(0.0, dot(randomHemisphere, newViewNormal));
+		outColor *= NoL;
+		outColor *= 2.0 * PI;
+	}
+	else
+	{
+		outColor = Vec3(0.0, 0.0, 0.0);
+	}
+
+	// Blend with history
+	const Vec4 v4 = u_unis.m_prevViewProjMatMulInvViewProjMat * Vec4(ndc, depth, 1.0);
+	const Vec2 historyUv = NDC_TO_UV(v4.xy / v4.w);
+	const Vec3 history = textureLod(u_historyTex, u_trilinearClampSampler, historyUv, 0.0).rgb;
+	outColor = mix(history, outColor, 0.15);
+
+	// Remove fireflies
+#if 0
+	const Vec3 avgColor = subgroupAdd(outColor) / F32(gl_SubgroupSize);
+	outColor = min(outColor, avgColor);
+#endif
+
+	// Store
+	imageStore(out_img, IVec2(gl_GlobalInvocationID.xy), Vec4(outColor, 1.0));
+}
+
+#pragma anki end

+ 103 - 0
shaders/SsgiDenoise.ankiprog

@@ -0,0 +1,103 @@
+// Copyright (C) 2009-2020, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma anki mutator VARIANT 0 1 2 3
+#pragma anki mutator ORIENTATION 0 1 // 0: VERTICAL, 1: HORIZONTAL
+#pragma anki mutator SAMPLE_COUNT 3 5 7 9 11 13 15
+
+ANKI_SPECIALIZATION_CONSTANT_UVEC2(IN_TEXTURE_SIZE, 0, UVec2(1));
+
+#pragma anki start comp
+
+#include <shaders/Common.glsl>
+
+#if SAMPLE_COUNT < 3
+#	error See file
+#endif
+
+const UVec2 WORKGROUP_SIZE = UVec2(8u, 8u);
+layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y, local_size_z = 1) in;
+
+layout(set = 0, binding = 0) uniform sampler u_linearAnyClampSampler;
+layout(set = 0, binding = 1) uniform texture2D u_inTex;
+layout(set = 0, binding = 2) uniform texture2D u_depthTex;
+layout(set = 0, binding = 3) writeonly uniform image2D u_outImg;
+
+F32 computeDepthWeight(F32 refDepth, F32 depth)
+{
+	const F32 diff = abs(refDepth - depth);
+	const F32 weight = sqrt(1.0 / (EPSILON + diff));
+	return weight;
+}
+
+F32 readDepth(Vec2 uv)
+{
+	return textureLod(u_depthTex, u_linearAnyClampSampler, uv, 0.0).r;
+}
+
+void sampleTex(Vec2 inUv, Vec2 depthUv, F32 refDepth, inout Vec3 col, inout F32 weight)
+{
+	const Vec3 color = textureLod(u_inTex, u_linearAnyClampSampler, inUv, 0.0).rgb;
+	const F32 w = computeDepthWeight(refDepth, readDepth(depthUv));
+	col += color * w;
+	weight += w;
+}
+
+void main()
+{
+	// Set UVs
+	ANKI_BRANCH if(gl_GlobalInvocationID.x >= IN_TEXTURE_SIZE.x || gl_GlobalInvocationID.y >= IN_TEXTURE_SIZE.y)
+	{
+		// Out of bounds
+		return;
+	}
+
+	const Vec2 inUv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(IN_TEXTURE_SIZE);
+#if VARIANT == 0
+	const UVec2 depthReadOffset = UVec2(0, 0);
+#elif VARIANT == 1
+	const UVec2 depthReadOffset = UVec2(1, 0);
+#elif VARIANT == 2
+	const UVec2 depthReadOffset = UVec2(1, 1);
+#else
+	const UVec2 depthReadOffset = UVec2(1, 0);
+#endif
+	const Vec2 depthUv = (Vec2(gl_GlobalInvocationID.xy * 2 + depthReadOffset) + 0.5) / Vec2(IN_TEXTURE_SIZE * 2);
+
+	const Vec2 IN_TEXEL_SIZE = 1.0 / Vec2(IN_TEXTURE_SIZE);
+	const Vec2 DEPTH_TEXEL_SIZE = 1.0 / Vec2(IN_TEXTURE_SIZE * 2);
+
+	// Reference
+	Vec3 color = textureLod(u_inTex, u_linearAnyClampSampler, inUv, 0.0).rgb;
+	const F32 refDepth = readDepth(depthUv);
+	F32 weight = 1.0;
+
+#if ORIENTATION == 1
+#	define X_OR_Y x
+#else
+#	define X_OR_Y y
+#endif
+
+	Vec2 inUvOffset = Vec2(0.0);
+	inUvOffset.X_OR_Y = 1.0 * IN_TEXEL_SIZE.X_OR_Y;
+	Vec2 depthUvOffset = Vec2(0.0);
+	depthUvOffset.X_OR_Y = 2.0 * DEPTH_TEXEL_SIZE.X_OR_Y;
+
+	ANKI_UNROLL for(U32 i = 0u; i < (SAMPLE_COUNT - 1u) / 2u; ++i)
+	{
+		sampleTex(inUv + inUvOffset, depthUv + depthUvOffset, refDepth, color, weight);
+		sampleTex(inUv - inUvOffset, depthUv - depthUvOffset, refDepth, color, weight);
+
+		inUvOffset.X_OR_Y += IN_TEXEL_SIZE.X_OR_Y;
+		depthUvOffset.X_OR_Y += 2.0 * DEPTH_TEXEL_SIZE.X_OR_Y;
+	}
+
+	color /= weight;
+
+	// Write value
+	imageStore(u_outImg, IVec2(gl_GlobalInvocationID.xy), Vec4(color, 0.0));
+}
+
+#pragma anki end

+ 122 - 0
shaders/SsgiReconstruct.ankiprog

@@ -0,0 +1,122 @@
+// Copyright (C) 2009-2020, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+// The VARIANT points to the master fragment in the quad:
+// -----
+// |3|2|
+// |0|1|
+// -----
+
+#pragma anki mutator VARIANT 0 1 2 3
+
+ANKI_SPECIALIZATION_CONSTANT_UVEC2(FB_SIZE, 0, UVec2(1));
+
+#pragma anki start comp
+
+#include <shaders/Common.glsl>
+
+const UVec2 WORKGROUP_SIZE = UVec2(16u, 16u);
+layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y, local_size_z = 1) in;
+
+layout(set = 0, binding = 0) uniform sampler u_linearAnyClampSampler;
+layout(set = 0, binding = 1) uniform texture2D u_inTex;
+layout(set = 0, binding = 2) uniform texture2D u_depthTex;
+layout(set = 0, binding = 3) writeonly uniform image2D u_outImg;
+
+shared Vec3 s_colors[WORKGROUP_SIZE.y][WORKGROUP_SIZE.x];
+shared Vec4 s_depths[WORKGROUP_SIZE.y][WORKGROUP_SIZE.x];
+
+F32 computeDepthWeights(F32 refDepth, F32 depth)
+{
+	const F32 diff = abs(refDepth - depth);
+	const F32 weight = sqrt(1.0 / (EPSILON + diff));
+	return weight;
+}
+
+void reconstruct(IVec2 storeCoord, F32 depthRef, Vec4 colorAndDepth0, Vec4 colorAndDepth1)
+{
+	F32 weight = computeDepthWeights(depthRef, colorAndDepth0.w);
+	Vec3 col = colorAndDepth0.rgb * weight;
+	F32 weightSum = weight;
+
+	weight = computeDepthWeights(depthRef, colorAndDepth1.w);
+	col += colorAndDepth1.rgb * weight;
+	weightSum += weight;
+
+	col /= weightSum;
+
+	imageStore(u_outImg, storeCoord, Vec4(col, 0.0));
+}
+
+void reconstructAll(Vec4 depthRefs, Vec3 masterColor)
+{
+	const IVec2 localInvocationId = IVec2(gl_LocalInvocationID.xy);
+
+#if VARIANT == 0
+	const IVec2 masterStoreCoord = IVec2(gl_GlobalInvocationID.xy * 2);
+	const IVec2 slaveRelativeCoords[3] = IVec2[](IVec2(1, 0), IVec2(1, 1), IVec2(0, 1));
+	const U32 masterDrefIdx = 3;
+	const U32 slaveDrefIdx[3] = U32[](2, 1, 0);
+#elif VARIANT == 1
+	const IVec2 masterStoreCoord = IVec2(gl_GlobalInvocationID.xy * 2) + IVec2(1, 0);
+	const IVec2 slaveRelativeCoords[3] = IVec2[](IVec2(-1, 0), IVec2(0, 1), IVec2(-1, 1));
+	const U32 masterDrefIdx = 2;
+	const U32 slaveDrefIdx[3] = U32[](3, 1, 0);
+#elif VARIANT == 2
+	const IVec2 masterStoreCoord = IVec2(gl_GlobalInvocationID.xy * 2) + IVec2(1, 1);
+	const IVec2 slaveRelativeCoords[3] = IVec2[](IVec2(-1, -1), IVec2(0, -1), IVec2(-1, 0));
+	const U32 masterDrefIdx = 1;
+	const U32 slaveDrefIdx[3] = U32[](3, 2, 0);
+#else
+	const IVec2 masterStoreCoord = IVec2(gl_GlobalInvocationID.xy * 2) + IVec2(0, 1);
+	const IVec2 slaveRelativeCoords[3] = IVec2[](IVec2(0, -1), IVec2(1, -1), IVec2(1, 0));
+	const U32 masterDrefIdx = 0;
+	const U32 slaveDrefIdx[3] = U32[](3, 2, 1);
+#endif
+
+	const Vec4 masterColorAndDepth = Vec4(masterColor, depthRefs[masterDrefIdx]);
+	imageStore(u_outImg, masterStoreCoord, Vec4(masterColor, 0.0));
+
+	ANKI_UNROLL for(U32 i = 0; i < 3; ++i)
+	{
+		const IVec2 sharedCoord =
+			clamp(localInvocationId + slaveRelativeCoords[i], IVec2(0), IVec2(WORKGROUP_SIZE) - 1);
+		const Vec3 masterColor2 = s_colors[sharedCoord.y][sharedCoord.x];
+		const F32 masterDepth2 = s_depths[sharedCoord.y][sharedCoord.x][masterDrefIdx];
+		const IVec2 storeCoord = masterStoreCoord + slaveRelativeCoords[i];
+		reconstruct(storeCoord, depthRefs[slaveDrefIdx[i]], masterColorAndDepth, Vec4(masterColor2, masterDepth2));
+	}
+}
+
+void main()
+{
+	const UVec2 IN_TEXTURE_SIZE = FB_SIZE / 2;
+	ANKI_BRANCH if(gl_GlobalInvocationID.x >= IN_TEXTURE_SIZE.x || gl_GlobalInvocationID.y >= IN_TEXTURE_SIZE.y)
+	{
+		// Out of bounds
+		s_colors[gl_LocalInvocationID.y][gl_LocalInvocationID.x] = Vec3(0.0);
+		s_depths[gl_LocalInvocationID.y][gl_LocalInvocationID.x] = Vec4(1000.0); // High value so it has low weight
+		return;
+	}
+
+	const Vec2 inTexelSize = 1.0 / Vec2(IN_TEXTURE_SIZE);
+	const Vec2 fbTexelSize = 1.0 / Vec2(FB_SIZE);
+
+	const Vec2 inUv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(IN_TEXTURE_SIZE);
+	const Vec2 fbUv = (Vec2(gl_GlobalInvocationID.xy) * 2.0 + 1.0) / Vec2(FB_SIZE);
+
+	const Vec3 color = textureLod(u_inTex, u_linearAnyClampSampler, inUv, 0.0).rgb;
+	const Vec4 depthRefs = textureGather(sampler2D(u_depthTex, u_linearAnyClampSampler), fbUv, 0);
+
+	s_colors[gl_LocalInvocationID.y][gl_LocalInvocationID.x] = color;
+	s_depths[gl_LocalInvocationID.y][gl_LocalInvocationID.x] = depthRefs;
+
+	memoryBarrierShared();
+	barrier();
+
+	reconstructAll(depthRefs, color);
+}
+
+#pragma anki end

+ 42 - 13
shaders/Ssr.ankiprog

@@ -10,6 +10,7 @@
 // -----
 
 #pragma anki mutator VARIANT 0 1
+#define EXTRA_REJECTION 0
 
 #pragma anki start comp
 #include <shaders/Functions.glsl>
@@ -41,21 +42,22 @@ const Vec2 NOISE_TEX_SIZE = Vec2(16.0);
 void main()
 {
 	// Compute a global invocation ID that takes the checkerboard pattern into account
-	IVec2 fixedInvocationId = IVec2(gl_GlobalInvocationID.xy);
-	fixedInvocationId.x *= 2;
+	IVec2 fixedGlobalInvocationId = IVec2(gl_GlobalInvocationID.xy);
+	fixedGlobalInvocationId.x *= 2;
 #if VARIANT == 0
-	fixedInvocationId.x += ((fixedInvocationId.y + 1) & 1);
+	fixedGlobalInvocationId.x += ((fixedGlobalInvocationId.y + 1) & 1);
 #else
-	fixedInvocationId.x += ((fixedInvocationId.y + 0) & 1);
+	fixedGlobalInvocationId.x += ((fixedGlobalInvocationId.y + 0) & 1);
 #endif
 
-	if(fixedInvocationId.x >= I32(u_unis.m_framebufferSize.x) || fixedInvocationId.y >= I32(u_unis.m_framebufferSize.y))
+	if(fixedGlobalInvocationId.x >= I32(u_unis.m_framebufferSize.x)
+		|| fixedGlobalInvocationId.y >= I32(u_unis.m_framebufferSize.y))
 	{
 		// Skip threads outside the writable image
 		return;
 	}
 
-	const Vec2 uv = (Vec2(fixedInvocationId.xy) + 0.5) / Vec2(u_unis.m_framebufferSize);
+	const Vec2 uv = (Vec2(fixedGlobalInvocationId.xy) + 0.5) / Vec2(u_unis.m_framebufferSize);
 
 	// Read part of the G-buffer
 	const F32 roughness = readRoughnessFromGBuffer(u_gbufferRt1, u_trilinearClampSampler, uv);
@@ -81,10 +83,10 @@ void main()
 	// Do the heavy work
 	Vec3 hitPoint;
 	F32 hitAttenuation;
-	const U32 lod = 1;
-	const U32 step = 16u;
+	const U32 lod = 0;
+	const U32 step = u_unis.m_firstStepPixels;
 	const F32 stepf = step;
-	const F32 minStepf = 4.0;
+	const F32 minStepf = stepf / 4.0;
 	raymarchGroundTruth(viewPos,
 		reflVec,
 		uv,
@@ -94,18 +96,45 @@ void main()
 		u_depthRt,
 		u_trilinearClampSampler,
 		F32(lod),
-		u_unis.m_depthBufferSize >> lod,
+		u_unis.m_depthBufferSize,
 		step,
 		U32((stepf - minStepf) * noise + minStepf),
 		hitPoint,
 		hitAttenuation);
 
+#if EXTRA_REJECTION
+	// Reject backfacing
+	ANKI_BRANCH if(hitAttenuation > 0.0)
+	{
+		const Vec3 hitNormal =
+			u_unis.m_normalMat * readNormalFromGBuffer(u_gbufferRt2, u_trilinearClampSampler, hitPoint.xy);
+		F32 backFaceAttenuation;
+		rejectBackFaces(reflVec, hitNormal, backFaceAttenuation);
+
+		hitAttenuation *= backFaceAttenuation;
+	}
+
+	// Reject far from hit point
+	ANKI_BRANCH if(hitAttenuation > 0.0)
+	{
+		const F32 depth = textureLod(u_depthRt, u_trilinearClampSampler, hitPoint.xy, 0.0).r;
+		Vec4 viewPos4 = u_unis.m_invProjMat * Vec4(UV_TO_NDC(hitPoint.xy), depth, 1.0);
+		const F32 actualZ = viewPos4.z / viewPos4.w;
+
+		viewPos4 = u_unis.m_invProjMat * Vec4(UV_TO_NDC(hitPoint.xy), hitPoint.z, 1.0);
+		const F32 hitZ = viewPos4.z / viewPos4.w;
+
+		const F32 rejectionMeters = 1.0;
+		const F32 diff = abs(actualZ - hitZ);
+		const F32 distAttenuation = (diff < rejectionMeters) ? 1.0 : 0.0;
+		hitAttenuation *= distAttenuation;
+	}
+#endif
+
 	// Read the reflection
 	Vec4 outColor;
 	ANKI_BRANCH if(hitAttenuation > 0.0)
 	{
-		// Read the refl
-
 		// Reproject the UV because you are reading the previous frame
 		const Vec4 v4 = u_unis.m_prevViewProjMatMulInvViewProjMat * Vec4(UV_TO_NDC(hitPoint.xy), hitPoint.z, 1.0);
 		hitPoint.xy = NDC_TO_UV(v4.xy / v4.w);
@@ -125,6 +154,6 @@ void main()
 	}
 
 	// Store
-	imageStore(out_img, fixedInvocationId, outColor);
+	imageStore(out_img, fixedGlobalInvocationId, outColor);
 }
 #pragma anki end

+ 31 - 0
shaders/glsl_cpp_common/Ssgi.h

@@ -0,0 +1,31 @@
+// Copyright (C) 2009-2020, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <shaders/glsl_cpp_common/Common.h>
+
+ANKI_BEGIN_NAMESPACE
+
+// Screen space reflections uniforms
+struct SsgiUniforms
+{
+	UVec2 m_depthBufferSize;
+	UVec2 m_framebufferSize;
+	U32 m_frameCount;
+	U32 m_maxSteps;
+	U32 m_firstStepPixels;
+	U32 m_padding0;
+	Mat4 m_invProjMat;
+	Mat4 m_projMat;
+	Mat4 m_prevViewProjMatMulInvViewProjMat;
+#ifdef __cplusplus
+	Mat3x4 m_normalMat;
+#else
+	Mat3 m_normalMat;
+#endif
+};
+
+ANKI_END_NAMESPACE

+ 2 - 0
shaders/glsl_cpp_common/Ssr.h

@@ -18,6 +18,8 @@ struct SsrUniforms
 	U32 m_depthMipCount;
 	U32 m_maxSteps;
 	U32 m_lightBufferMipCount;
+	UVec3 m_padding0;
+	U32 m_firstStepPixels;
 	Mat4 m_prevViewProjMatMulInvViewProjMat;
 	Mat4 m_projMat;
 	Mat4 m_invProjMat;

+ 1 - 0
src/anki/Renderer.h

@@ -21,6 +21,7 @@
 #include <anki/renderer/TemporalAA.h>
 #include <anki/renderer/RenderQueue.h>
 #include <anki/renderer/Ssr.h>
+#include <anki/renderer/Ssgi.h>
 #include <anki/renderer/ProbeReflections.h>
 #include <anki/renderer/Dbg.h>
 #include <anki/renderer/Ssao.h>

+ 3 - 2
src/anki/core/ConfigDefs.h

@@ -8,9 +8,10 @@ ANKI_CONFIG_OPTION(core_storagePerFrameMemorySize, 16_MB, 1_MB, 1_GB)
 ANKI_CONFIG_OPTION(core_vertexPerFrameMemorySize, 10_MB, 1_MB, 1_GB)
 ANKI_CONFIG_OPTION(core_textureBufferPerFrameMemorySize, 1_MB, 1_MB, 1_GB)
 
-ANKI_CONFIG_OPTION(width, 1280, 16, 16 * 1024, "Width")
-ANKI_CONFIG_OPTION(height, 768, 16, 16 * 1024, "Height")
+ANKI_CONFIG_OPTION(width, 1920, 16, 16 * 1024, "Width")
+ANKI_CONFIG_OPTION(height, 1080, 16, 16 * 1024, "Height")
 ANKI_CONFIG_OPTION(core_targetFps, 60u, 30u, MAX_U32, "Target FPS")
+
 ANKI_CONFIG_OPTION(core_mainThreadCount, max(2u, getCpuCoresCount() / 2u), 2u, 1024u)
 ANKI_CONFIG_OPTION(core_displayStats, 0, 0, 1)
 ANKI_CONFIG_OPTION(core_clearCaches, 0, 0, 1)

+ 1 - 1
src/anki/core/NativeWindowSdl.cpp

@@ -21,7 +21,7 @@ Error NativeWindow::init(NativeWindowInitInfo& init, HeapAllocator<U8>& alloc)
 
 	if(SDL_Init(INIT_SUBSYSTEMS) != 0)
 	{
-		ANKI_CORE_LOGE("SDL_Init() failed");
+		ANKI_CORE_LOGE("SDL_Init() failed: %s", SDL_GetError());
 		return Error::FUNCTION_FAILED;
 	}
 

+ 2 - 1
src/anki/gr/RenderGraph.cpp

@@ -886,7 +886,8 @@ void RenderGraph::iterateSurfsOrVolumes(const TexturePtr& tex, const TextureSubr
 	{
 		for(U32 layer = subresource.m_firstLayer; layer < subresource.m_firstLayer + subresource.m_layerCount; ++layer)
 		{
-			for(U32 face = subresource.m_firstFace; face < subresource.m_firstFace + subresource.m_faceCount; ++face)
+			for(U32 face = subresource.m_firstFace; face < U32(subresource.m_firstFace + subresource.m_faceCount);
+				++face)
 			{
 				// Compute surf or vol idx
 				const U32 faceCount = textureTypeIsCube(tex->getTextureType()) ? 6 : 1;

+ 2 - 1
src/anki/gr/vulkan/Buffer.cpp

@@ -13,10 +13,11 @@ namespace anki
 Buffer* Buffer::newInstance(GrManager* manager, const BufferInitInfo& init)
 {
 	BufferImpl* impl = manager->getAllocator().newInstance<BufferImpl>(manager, init.getName());
-	Error err = impl->init(init);
+	const Error err = impl->init(init);
 	if(err)
 	{
 		manager->getAllocator().deleteInstance(impl);
+		impl = nullptr;
 	}
 	return impl;
 }

+ 2 - 1
src/anki/gr/vulkan/CommandBuffer.cpp

@@ -13,10 +13,11 @@ namespace anki
 CommandBuffer* CommandBuffer::newInstance(GrManager* manager, const CommandBufferInitInfo& init)
 {
 	CommandBufferImpl* impl = manager->getAllocator().newInstance<CommandBufferImpl>(manager, init.getName());
-	Error err = impl->init(init);
+	const Error err = impl->init(init);
 	if(err)
 	{
 		manager->getAllocator().deleteInstance(impl);
+		impl = nullptr;
 	}
 	return impl;
 }

+ 25 - 18
src/anki/gr/vulkan/CommandBufferFactory.cpp

@@ -254,8 +254,6 @@ Error CommandBufferFactory::newCommandBuffer(ThreadId tid, CommandBufferFlag cmd
 
 	// Get the thread allocator
 	{
-		LockGuard<SpinLock> lock(m_threadAllocMtx);
-
 		class Comp
 		{
 		public:
@@ -271,32 +269,41 @@ Error CommandBufferFactory::newCommandBuffer(ThreadId tid, CommandBufferFlag cmd
 		};
 
 		// Find using binary search
-		auto it = binarySearch(m_threadAllocs.getBegin(), m_threadAllocs.getEnd(), tid, Comp());
-
-		if(it != m_threadAllocs.getEnd())
 		{
-			ANKI_ASSERT((*it)->m_tid == tid);
-			alloc = *it;
+			RLockGuard<RWMutex> lock(m_threadAllocMtx);
+			auto it = binarySearch(m_threadAllocs.getBegin(), m_threadAllocs.getEnd(), tid, Comp());
+			alloc = (it != m_threadAllocs.getEnd()) ? (*it) : nullptr;
 		}
-		else
+
+		if(ANKI_UNLIKELY(alloc == nullptr))
 		{
-			alloc = m_alloc.newInstance<CommandBufferThreadAllocator>(this, tid);
+			WLockGuard<RWMutex> lock(m_threadAllocMtx);
 
-			m_threadAllocs.resize(m_alloc, m_threadAllocs.getSize() + 1);
-			m_threadAllocs[m_threadAllocs.getSize() - 1] = alloc;
+			// Check again
+			auto it = binarySearch(m_threadAllocs.getBegin(), m_threadAllocs.getEnd(), tid, Comp());
+			alloc = (it != m_threadAllocs.getEnd()) ? (*it) : nullptr;
 
-			// Sort for fast find
-			std::sort(m_threadAllocs.getBegin(),
-				m_threadAllocs.getEnd(),
-				[](const CommandBufferThreadAllocator* a, const CommandBufferThreadAllocator* b) {
-					return a->m_tid < b->m_tid;
-				});
+			if(alloc == nullptr)
+			{
+				alloc = m_alloc.newInstance<CommandBufferThreadAllocator>(this, tid);
 
-			ANKI_CHECK(alloc->init());
+				m_threadAllocs.resize(m_alloc, m_threadAllocs.getSize() + 1);
+				m_threadAllocs[m_threadAllocs.getSize() - 1] = alloc;
+
+				// Sort for fast find
+				std::sort(m_threadAllocs.getBegin(),
+					m_threadAllocs.getEnd(),
+					[](const CommandBufferThreadAllocator* a, const CommandBufferThreadAllocator* b) {
+						return a->m_tid < b->m_tid;
+					});
+
+				ANKI_CHECK(alloc->init());
+			}
 		}
 	}
 
 	ANKI_ASSERT(alloc);
+	ANKI_ASSERT(alloc->m_tid == tid);
 	Bool createdNew;
 	ANKI_CHECK(alloc->newCommandBuffer(cmdbFlags, ptr, createdNew));
 	if(createdNew)

+ 1 - 1
src/anki/gr/vulkan/CommandBufferFactory.h

@@ -175,7 +175,7 @@ private:
 	uint32_t m_queueFamily;
 
 	DynamicArray<CommandBufferThreadAllocator*> m_threadAllocs;
-	SpinLock m_threadAllocMtx;
+	RWMutex m_threadAllocMtx;
 
 	Atomic<U32> m_createdCmdBufferCount = {0};
 };

+ 1 - 0
src/anki/gr/vulkan/DescriptorSet.cpp

@@ -788,6 +788,7 @@ Error DSLayoutCacheEntry::getOrCreateThreadAllocator(ThreadId tid, DSThreadAlloc
 	}
 
 	ANKI_ASSERT(alloc);
+	ANKI_ASSERT(alloc->m_tid == tid);
 	return Error::NONE;
 }
 

+ 2 - 1
src/anki/gr/vulkan/Framebuffer.cpp

@@ -13,10 +13,11 @@ namespace anki
 Framebuffer* Framebuffer::newInstance(GrManager* manager, const FramebufferInitInfo& init)
 {
 	FramebufferImpl* impl = manager->getAllocator().newInstance<FramebufferImpl>(manager, init.getName());
-	Error err = impl->init(init);
+	const Error err = impl->init(init);
 	if(err)
 	{
 		manager->getAllocator().deleteInstance(impl);
+		impl = nullptr;
 	}
 	return impl;
 }

+ 1 - 0
src/anki/gr/vulkan/OcclusionQuery.cpp

@@ -17,6 +17,7 @@ OcclusionQuery* OcclusionQuery::newInstance(GrManager* manager)
 	if(err)
 	{
 		manager->getAllocator().deleteInstance(impl);
+		impl = nullptr;
 	}
 	return impl;
 }

+ 2 - 1
src/anki/gr/vulkan/Sampler.cpp

@@ -13,10 +13,11 @@ namespace anki
 Sampler* Sampler::newInstance(GrManager* manager, const SamplerInitInfo& init)
 {
 	SamplerImpl* impl = manager->getAllocator().newInstance<SamplerImpl>(manager, init.getName());
-	Error err = impl->init(init);
+	const Error err = impl->init(init);
 	if(err)
 	{
 		manager->getAllocator().deleteInstance(impl);
+		impl = nullptr;
 	}
 	return impl;
 }

+ 2 - 1
src/anki/gr/vulkan/Shader.cpp

@@ -13,10 +13,11 @@ namespace anki
 Shader* Shader::newInstance(GrManager* manager, const ShaderInitInfo& init)
 {
 	ShaderImpl* impl = manager->getAllocator().newInstance<ShaderImpl>(manager, init.getName());
-	Error err = impl->init(init);
+	const Error err = impl->init(init);
 	if(err)
 	{
 		manager->getAllocator().deleteInstance(impl);
+		impl = nullptr;
 	}
 	return impl;
 }

+ 2 - 1
src/anki/gr/vulkan/ShaderProgram.cpp

@@ -14,10 +14,11 @@ namespace anki
 ShaderProgram* ShaderProgram::newInstance(GrManager* manager, const ShaderProgramInitInfo& init)
 {
 	ShaderProgramImpl* impl = manager->getAllocator().newInstance<ShaderProgramImpl>(manager, init.getName());
-	Error err = impl->init(init);
+	const Error err = impl->init(init);
 	if(err)
 	{
 		manager->getAllocator().deleteInstance(impl);
+		impl = nullptr;
 	}
 	return impl;
 }

+ 2 - 1
src/anki/gr/vulkan/Texture.cpp

@@ -13,10 +13,11 @@ namespace anki
 Texture* Texture::newInstance(GrManager* manager, const TextureInitInfo& init)
 {
 	TextureImpl* impl = manager->getAllocator().newInstance<TextureImpl>(manager, init.getName());
-	Error err = impl->init(init);
+	const Error err = impl->init(init);
 	if(err)
 	{
 		manager->getAllocator().deleteInstance(impl);
+		impl = nullptr;
 	}
 	return impl;
 }

+ 2 - 1
src/anki/gr/vulkan/TextureView.cpp

@@ -13,10 +13,11 @@ namespace anki
 TextureView* TextureView::newInstance(GrManager* manager, const TextureViewInitInfo& init)
 {
 	TextureViewImpl* impl = manager->getAllocator().newInstance<TextureViewImpl>(manager, init.getName());
-	Error err = impl->init(init);
+	const Error err = impl->init(init);
 	if(err)
 	{
 		manager->getAllocator().deleteInstance(impl);
+		impl = nullptr;
 	}
 	return impl;
 }

+ 1 - 0
src/anki/gr/vulkan/TimestampQuery.cpp

@@ -17,6 +17,7 @@ TimestampQuery* TimestampQuery::newInstance(GrManager* manager)
 	if(err)
 	{
 		manager->getAllocator().deleteInstance(impl);
+		impl = nullptr;
 	}
 	return impl;
 }

+ 19 - 23
src/anki/math/Mat.h

@@ -384,7 +384,7 @@ public:
 		{
 			m_arr1[n] = b.m_arr1[n];
 		}
-		return static_cast<TMat&>(*this);
+		return *this;
 	}
 
 	/// Copy.
@@ -394,7 +394,7 @@ public:
 		{
 			m_simd[i] = b.m_simd[i];
 		}
-		return static_cast<TMat&>(*this);
+		return *this;
 	}
 
 	ANKI_ENABLE_METHOD(!HAS_SIMD)
@@ -426,7 +426,7 @@ public:
 		{
 			m_arr1[n] += b.m_arr1[n];
 		}
-		return static_cast<TMat&>(*this);
+		return *this;
 	}
 
 	ANKI_ENABLE_METHOD(HAS_SIMD)
@@ -436,7 +436,7 @@ public:
 		{
 			m_simd[i] = _mm_add_ps(m_simd[i], b.m_simd[i]);
 		}
-		return static_cast<TMat&>(*this);
+		return *this;
 	}
 
 	ANKI_ENABLE_METHOD(!HAS_SIMD)
@@ -468,7 +468,7 @@ public:
 		{
 			m_arr1[n] -= b.m_arr1[n];
 		}
-		return static_cast<TMat&>(*this);
+		return *this;
 	}
 
 	ANKI_ENABLE_METHOD(HAS_SIMD)
@@ -478,7 +478,7 @@ public:
 		{
 			m_simd[i] = _mm_sub_ps(m_simd[i], b.m_simd[i]);
 		}
-		return static_cast<TMat&>(*this);
+		return *this;
 	}
 
 	ANKI_ENABLE_METHOD(J == I && !HAS_MAT4_SIMD)
@@ -528,7 +528,7 @@ public:
 	TMat& operator*=(const TMat& b)
 	{
 		(*this) = (*this) * b;
-		return static_cast<TMat&>(*this);
+		return *this;
 	}
 
 	Bool operator==(const TMat& b) const
@@ -574,7 +574,7 @@ public:
 		{
 			m_arr1[i] += f;
 		}
-		return static_cast<TMat&>(*this);
+		return *this;
 	}
 
 	TMat operator-(const T f) const
@@ -593,7 +593,7 @@ public:
 		{
 			m_arr1[i] -= f;
 		}
-		return static_cast<TMat&>(*this);
+		return *this;
 	}
 
 	TMat operator*(const T f) const
@@ -612,7 +612,7 @@ public:
 		{
 			m_arr1[i] *= f;
 		}
-		return static_cast<TMat&>(*this);
+		return *this;
 	}
 
 	TMat operator/(const T f) const
@@ -633,7 +633,7 @@ public:
 		{
 			m_arr1[i] /= f;
 		}
-		return static_cast<TMat&>(*this);
+		return *this;
 	}
 	/// @}
 
@@ -1494,10 +1494,9 @@ public:
 		return ((*this) * (1.0 - t)) + (b * t);
 	}
 
-	static const TMat& getZero()
+	static TMat getZero()
 	{
-		static const TMat zero(0.0);
-		return zero;
+		return TMat(0.0);
 	}
 
 	void setZero()
@@ -1506,24 +1505,21 @@ public:
 	}
 
 	ANKI_ENABLE_METHOD(I == 3 && J == 3)
-	static const TMat& getIdentity()
+	static TMat getIdentity()
 	{
-		static const TMat ident(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0);
-		return ident;
+		return TMat(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0);
 	}
 
 	ANKI_ENABLE_METHOD(I == 4 && J == 4)
-	static const TMat& getIdentity()
+	static TMat getIdentity()
 	{
-		static const TMat ident(1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0);
-		return ident;
+		return TMat(1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0);
 	}
 
 	ANKI_ENABLE_METHOD(I == 4 && J == 3)
-	static const TMat& getIdentity()
+	static TMat getIdentity()
 	{
-		static const TMat ident(1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0);
-		return ident;
+		return TMat(1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0);
 	}
 
 	void setIdentity()

+ 2 - 3
src/anki/math/Transform.h

@@ -151,10 +151,9 @@ public:
 		(*this) = getIdentity();
 	}
 
-	static const TTransform& getIdentity()
+	static TTransform getIdentity()
 	{
-		static const TTransform ident(TVec<T, 4>(0.0), TMat<T, 3, 4>::getIdentity(), 1.0);
-		return ident;
+		return TTransform(TVec<T, 4>(0.0), TMat<T, 3, 4>::getIdentity(), 1.0);
 	}
 
 	/// @copybrief combineTTransformations

+ 1 - 1
src/anki/math/Vec.h

@@ -2942,7 +2942,7 @@ public:
 	ANKI_ENABLE_METHOD(HAS_VEC4_SIMD)
 	TVec abs() const
 	{
-		static const __m128 signMask = _mm_set1_ps(-0.0f);
+		const __m128 signMask = _mm_set1_ps(-0.0f);
 		return TVec(_mm_andnot_ps(signMask, m_simd));
 	}
 

+ 1 - 1
src/anki/physics/Common.h

@@ -15,7 +15,7 @@
 #	pragma GCC diagnostic ignored "-Wall"
 #	pragma GCC diagnostic ignored "-Wconversion"
 #	pragma GCC diagnostic ignored "-Wfloat-conversion"
-#	if ANKI_COMPILER_GCC && __GNUC__ >= 9
+#	if(ANKI_COMPILER_GCC && __GNUC__ >= 9) || (ANKI_COMPILER_CLANG && __clang_major__ >= 10)
 #		pragma GCC diagnostic ignored "-Wdeprecated-copy"
 #	endif
 #endif

+ 2 - 0
src/anki/renderer/Common.h

@@ -20,6 +20,7 @@ namespace anki
 
 // Forward
 class Renderer;
+class RendererObject;
 class GBuffer;
 class GBufferPost;
 class ShadowMapping;
@@ -38,6 +39,7 @@ class DepthDownscale;
 class TemporalAA;
 class UiStage;
 class Ssr;
+class Ssgi;
 class VolumetricLightingAccumulation;
 class GlobalIllumination;
 class GenericCompute;

+ 4 - 1
src/anki/renderer/ConfigDefs.h

@@ -17,7 +17,10 @@ ANKI_CONFIG_OPTION(r_volumetricLightingAccumulationClusterFractionZ, 4, 1, 16)
 ANKI_CONFIG_OPTION(r_volumetricLightingAccumulationFinalClusterInZ, 26, 1, 256)
 
 ANKI_CONFIG_OPTION(r_ssrMaxSteps, 64, 1, 2048)
-ANKI_CONFIG_OPTION(r_ssrHistoryBlendFactor, 0.3, 0.0, MAX_F64)
+ANKI_CONFIG_OPTION(r_ssrDepthLod, 2, 0, 1000)
+
+ANKI_CONFIG_OPTION(r_ssgiMaxSteps, 32, 1, 2048)
+ANKI_CONFIG_OPTION(r_ssgiDepthLod, 2, 0, 1000)
 
 ANKI_CONFIG_OPTION(r_shadowMappingTileResolution, 128, 16, 2048)
 ANKI_CONFIG_OPTION(r_shadowMappingTileCountPerRowOrColumn, 16, 1, 256)

+ 28 - 9
src/anki/renderer/FinalComposite.cpp

@@ -48,19 +48,22 @@ Error FinalComposite::initInternal(const ConfigSet& config)
 	ShaderProgramResourceVariantInitInfo variantInitInfo(m_prog);
 	variantInitInfo.addMutation("BLUE_NOISE", 1);
 	variantInitInfo.addMutation("BLOOM_ENABLED", 1);
-	variantInitInfo.addMutation("DBG_ENABLED", 0);
 	variantInitInfo.addConstant("LUT_SIZE", U32(LUT_SIZE));
 	variantInitInfo.addConstant("LUT_SIZE", U32(LUT_SIZE));
 	variantInitInfo.addConstant("FB_SIZE", UVec2(m_r->getWidth(), m_r->getHeight()));
 	variantInitInfo.addConstant("MOTION_BLUR_SAMPLES", config.getNumberU32("r_motionBlurSamples"));
 
-	const ShaderProgramResourceVariant* variant;
-	m_prog->getOrCreateVariant(variantInitInfo, variant);
-	m_grProgs[0] = variant->getProgram();
-
-	variantInitInfo.addMutation("DBG_ENABLED", 1);
-	m_prog->getOrCreateVariant(variantInitInfo, variant);
-	m_grProgs[1] = variant->getProgram();
+	for(U32 dbg = 0; dbg < 2; ++dbg)
+	{
+		for(U32 dbgRt = 0; dbgRt < 2; ++dbgRt)
+		{
+			const ShaderProgramResourceVariant* variant;
+			variantInitInfo.addMutation("DBG_ENABLED", dbg);
+			variantInitInfo.addMutation("DBG_RENDER_TARGET_ENABLED", dbgRt);
+			m_prog->getOrCreateVariant(variantInitInfo, variant);
+			m_grProgs[dbg][dbgRt] = variant->getProgram();
+		}
+	}
 
 	return Error::NONE;
 }
@@ -91,8 +94,11 @@ void FinalComposite::run(RenderingContext& ctx, RenderPassWorkContext& rgraphCtx
 {
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 	const Bool dbgEnabled = m_r->getDbg().getEnabled();
+	RenderTargetHandle dbgRt;
+	Bool dbgRtValid;
+	m_r->getCurrentDebugRenderTarget(dbgRt, dbgRtValid);
 
-	cmdb->bindShaderProgram(m_grProgs[dbgEnabled]);
+	cmdb->bindShaderProgram(m_grProgs[dbgEnabled][dbgRtValid]);
 
 	// Bind stuff
 	rgraphCtx.bindUniformBuffer(0, 0, m_r->getTonemapping().getAverageLuminanceBuffer());
@@ -114,6 +120,11 @@ void FinalComposite::run(RenderingContext& ctx, RenderPassWorkContext& rgraphCtx
 		rgraphCtx.bindColorTexture(0, 10, m_r->getDbg().getRt());
 	}
 
+	if(dbgRtValid)
+	{
+		rgraphCtx.bindColorTexture(0, 11, dbgRt);
+	}
+
 	struct PushConsts
 	{
 		Vec4 m_blueNoiseLayerPad3;
@@ -161,6 +172,14 @@ void FinalComposite::populateRenderGraph(RenderingContext& ctx)
 
 	pass.newDependency({m_r->getGBuffer().getColorRt(3), TextureUsageBit::SAMPLED_FRAGMENT});
 	pass.newDependency({m_r->getGBuffer().getDepthRt(), TextureUsageBit::SAMPLED_FRAGMENT});
+
+	RenderTargetHandle dbgRt;
+	Bool dbgRtValid;
+	m_r->getCurrentDebugRenderTarget(dbgRt, dbgRtValid);
+	if(dbgRtValid)
+	{
+		pass.newDependency({dbgRt, TextureUsageBit::SAMPLED_FRAGMENT});
+	}
 }
 
 } // end namespace anki

+ 1 - 1
src/anki/renderer/FinalComposite.h

@@ -35,7 +35,7 @@ private:
 	FramebufferDescription m_fbDescr;
 
 	ShaderProgramResourcePtr m_prog;
-	Array<ShaderProgramPtr, 2> m_grProgs; ///< One with Dbg and one without
+	Array2d<ShaderProgramPtr, 2, 2> m_grProgs; ///< [Debug on or off][Dbg render target on or off]
 
 	TextureResourcePtr m_lut; ///< Color grading lookup texture.
 	TextureResourcePtr m_blueNoise;

+ 3 - 0
src/anki/renderer/LightShading.cpp

@@ -14,6 +14,7 @@
 #include <anki/renderer/DepthDownscale.h>
 #include <anki/renderer/Ssao.h>
 #include <anki/renderer/Ssr.h>
+#include <anki/renderer/Ssgi.h>
 #include <anki/renderer/GlobalIllumination.h>
 #include <anki/core/ConfigSet.h>
 #include <anki/util/HighRezTimer.h>
@@ -135,6 +136,7 @@ void LightShading::run(RenderPassWorkContext& rgraphCtx)
 			0, 16, m_r->getGBuffer().getDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
 		rgraphCtx.bindColorTexture(0, 17, m_r->getSsr().getRt());
 		rgraphCtx.bindColorTexture(0, 18, m_r->getSsao().getRt());
+		rgraphCtx.bindColorTexture(0, 19, m_r->getSsgi().getRt());
 
 		// Draw
 		drawQuad(cmdb);
@@ -203,6 +205,7 @@ void LightShading::populateRenderGraph(RenderingContext& ctx)
 		TextureSubresourceInfo(DepthStencilAspectBit::DEPTH)});
 	pass.newDependency({m_r->getShadowMapping().getShadowmapRt(), TextureUsageBit::SAMPLED_FRAGMENT});
 	pass.newDependency({m_r->getSsao().getRt(), TextureUsageBit::SAMPLED_FRAGMENT});
+	pass.newDependency({m_r->getSsgi().getRt(), TextureUsageBit::SAMPLED_FRAGMENT});
 
 	// Refl & indirect
 	pass.newDependency({m_r->getSsr().getRt(), TextureUsageBit::SAMPLED_FRAGMENT});

+ 60 - 0
src/anki/renderer/Renderer.cpp

@@ -28,6 +28,7 @@
 #include <anki/renderer/TemporalAA.h>
 #include <anki/renderer/UiStage.h>
 #include <anki/renderer/Ssr.h>
+#include <anki/renderer/Ssgi.h>
 #include <anki/renderer/VolumetricLightingAccumulation.h>
 #include <anki/renderer/GlobalIllumination.h>
 #include <anki/renderer/GenericCompute.h>
@@ -43,6 +44,12 @@ Renderer::Renderer()
 
 Renderer::~Renderer()
 {
+	for(DebugRtInfo& info : m_debugRts)
+	{
+		info.m_rtName.destroy(getAllocator());
+	}
+	m_debugRts.destroy(getAllocator());
+	m_currentDebugRtName.destroy(getAllocator());
 }
 
 Error Renderer::init(ThreadHive* hive,
@@ -168,6 +175,9 @@ Error Renderer::initInternal(const ConfigSet& config)
 	m_ssr.reset(m_alloc.newInstance<Ssr>(this));
 	ANKI_CHECK(m_ssr->init(config));
 
+	m_ssgi.reset(m_alloc.newInstance<Ssgi>(this));
+	ANKI_CHECK(m_ssgi->init(config));
+
 	m_tonemapping.reset(getAllocator().newInstance<Tonemapping>(this));
 	ANKI_CHECK(m_tonemapping->init(config));
 
@@ -313,6 +323,7 @@ Error Renderer::populateRenderGraph(RenderingContext& ctx)
 	m_ssao->populateRenderGraph(ctx);
 	m_lensFlare->populateRenderGraph(ctx);
 	m_ssr->populateRenderGraph(ctx);
+	m_ssgi->populateRenderGraph(ctx);
 	m_lightShading->populateRenderGraph(ctx);
 	m_temporalAA->populateRenderGraph(ctx);
 	m_downscale->populateRenderGraph(ctx);
@@ -594,4 +605,53 @@ void Renderer::updateLightShadingUniforms(RenderingContext& ctx) const
 	}
 }
 
+void Renderer::registerDebugRenderTarget(RendererObject* obj, CString rtName)
+{
+#if ANKI_ASSERTS_ENABLED
+	for(const DebugRtInfo& inf : m_debugRts)
+	{
+		ANKI_ASSERT(inf.m_rtName != rtName && "Choose different name");
+	}
+#endif
+
+	ANKI_ASSERT(obj);
+	DebugRtInfo inf;
+	inf.m_obj = obj;
+	inf.m_rtName.create(getAllocator(), rtName);
+
+	m_debugRts.emplaceBack(getAllocator(), std::move(inf));
+}
+
+void Renderer::getCurrentDebugRenderTarget(RenderTargetHandle& handle, Bool& handleValid)
+{
+	if(ANKI_LIKELY(m_currentDebugRtName.isEmpty()))
+	{
+		handleValid = false;
+		return;
+	}
+
+	RendererObject* obj = nullptr;
+	for(const DebugRtInfo& inf : m_debugRts)
+	{
+		if(inf.m_rtName == m_currentDebugRtName)
+		{
+			obj = inf.m_obj;
+		}
+	}
+	ANKI_ASSERT(obj);
+
+	obj->getDebugRenderTarget(m_currentDebugRtName, handle);
+	handleValid = true;
+}
+
+void Renderer::setCurrentDebugRenderTarget(CString rtName)
+{
+	m_currentDebugRtName.destroy(getAllocator());
+
+	if(!rtName.isEmpty() && rtName.getLength() > 0)
+	{
+		m_currentDebugRtName.create(getAllocator(), rtName);
+	}
+}
+
 } // end namespace anki

+ 34 - 0
src/anki/renderer/Renderer.h

@@ -195,6 +195,11 @@ public:
 		return *m_ssr;
 	}
 
+	Ssgi& getSsgi()
+	{
+		return *m_ssgi;
+	}
+
 	U32 getWidth() const
 	{
 		return m_width;
@@ -359,6 +364,25 @@ public:
 		return *m_threadHive;
 	}
 
+	/// @name Debug render targets
+	/// @{
+
+	/// Register a debug render target.
+	void registerDebugRenderTarget(RendererObject* obj, CString rtName);
+
+	/// Set the render target you want to show.
+	void setCurrentDebugRenderTarget(CString rtName);
+
+	/// Get the render target currently showing.
+	CString getCurrentDebugRenderTarget() const
+	{
+		return m_currentDebugRtName;
+	}
+
+	// Need to call it after the handle is set by the RenderGraph.
+	void getCurrentDebugRenderTarget(RenderTargetHandle& handle, Bool& handleValid);
+	/// @}
+
 private:
 	ResourceManager* m_resources = nullptr;
 	ThreadHive* m_threadHive = nullptr;
@@ -377,6 +401,7 @@ private:
 	UniquePtr<GBuffer> m_gbuffer; ///< Material rendering stage
 	UniquePtr<GBufferPost> m_gbufferPost;
 	UniquePtr<Ssr> m_ssr;
+	UniquePtr<Ssgi> m_ssgi;
 	UniquePtr<LightShading> m_lightShading; ///< Illumination rendering stage
 	UniquePtr<DepthDownscale> m_depth;
 	UniquePtr<ForwardShading> m_forwardShading; ///< Forward shading.
@@ -426,6 +451,15 @@ private:
 	RendererStats m_stats;
 	Bool m_statsEnabled = false;
 
+	class DebugRtInfo
+	{
+	public:
+		RendererObject* m_obj;
+		String m_rtName;
+	};
+	DynamicArray<DebugRtInfo> m_debugRts;
+	String m_currentDebugRtName;
+
 	ANKI_USE_RESULT Error initInternal(const ConfigSet& initializer);
 
 	void initJitteredMats();

+ 5 - 0
src/anki/renderer/RendererObject.cpp

@@ -77,4 +77,9 @@ U32 RendererObject::computeNumberOfSecondLevelCommandBuffers(U32 drawcallCount)
 	return secondLevelCmdbCount;
 }
 
+void RendererObject::registerDebugRenderTarget(CString rtName)
+{
+	m_r->registerDebugRenderTarget(this, rtName);
+}
+
 } // end namespace anki

+ 7 - 0
src/anki/renderer/RendererObject.h

@@ -38,6 +38,11 @@ public:
 
 	HeapAllocator<U8> getAllocator() const;
 
+	virtual void getDebugRenderTarget(CString rtName, RenderTargetHandle& handle) const
+	{
+		ANKI_ASSERT(!"Object doesn't support that");
+	}
+
 protected:
 	Renderer* m_r; ///< Know your father
 
@@ -112,6 +117,8 @@ protected:
 		bindStorage(cmdb, set, binding, token);
 		return ptr;
 	}
+
+	void registerDebugRenderTarget(CString rtName);
 };
 /// @}
 

+ 274 - 0
src/anki/renderer/Ssgi.cpp

@@ -0,0 +1,274 @@
+// Copyright (C) 2009-2020, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#include <anki/renderer/Ssgi.h>
+#include <anki/renderer/Renderer.h>
+#include <anki/renderer/DepthDownscale.h>
+#include <anki/renderer/GBuffer.h>
+#include <anki/renderer/DownscaleBlur.h>
+#include <anki/core/ConfigSet.h>
+#include <shaders/glsl_cpp_common/Ssgi.h>
+
+namespace anki
+{
+
+static constexpr U32 WRITE = 0;
+static constexpr U32 READ = 1;
+
+Ssgi::~Ssgi()
+{
+}
+
+Error Ssgi::init(const ConfigSet& cfg)
+{
+	const Error err = initInternal(cfg);
+	if(err)
+	{
+		ANKI_R_LOGE("Failed to initialize SSGI pass");
+	}
+	return err;
+}
+
+Error Ssgi::initInternal(const ConfigSet& cfg)
+{
+	const U32 width = m_r->getWidth();
+	const U32 height = m_r->getHeight();
+	ANKI_ASSERT((width % 2) == 0 && (height % 2) == 0 && "The algorithms won't work");
+	ANKI_R_LOGI("Initializing SSGI pass");
+	m_main.m_maxSteps = cfg.getNumberU32("r_ssgiMaxSteps");
+	m_main.m_depthLod = min(cfg.getNumberU32("r_ssgiDepthLod"), m_r->getDepthDownscale().getMipmapCount() - 1);
+	m_main.m_firstStepPixels = 32;
+
+	ANKI_CHECK(getResourceManager().loadResource("engine_data/BlueNoiseRgb816x16.png", m_main.m_noiseTex));
+
+	// Init main
+	{
+		m_main.m_rtDescr =
+			m_r->create2DRenderTargetDescription(width / 2, height / 2, Format::B10G11R11_UFLOAT_PACK32, "SSGI_tmp");
+		m_main.m_rtDescr.bake();
+
+		ANKI_CHECK(getResourceManager().loadResource("shaders/Ssgi.ankiprog", m_main.m_prog));
+
+		ShaderProgramResourceVariantInitInfo variantInitInfo(m_main.m_prog);
+
+		for(U32 i = 0; i < 4; ++i)
+		{
+			variantInitInfo.addMutation("VARIANT", i);
+
+			const ShaderProgramResourceVariant* variant;
+			m_main.m_prog->getOrCreateVariant(variantInitInfo, variant);
+			m_main.m_grProg[i] = variant->getProgram();
+		}
+	}
+
+	// Init denoise
+	{
+		ANKI_CHECK(getResourceManager().loadResource("shaders/SsgiDenoise.ankiprog", m_denoise.m_prog));
+		ShaderProgramResourceVariantInitInfo variantInitInfo(m_denoise.m_prog);
+		const ShaderProgramResourceVariant* variant;
+
+		variantInitInfo.addConstant("IN_TEXTURE_SIZE", UVec2(width / 2, height / 2));
+
+		for(U32 i = 0; i < 4; ++i)
+		{
+			variantInitInfo.addMutation("VARIANT", i);
+
+			variantInitInfo.addMutation("SAMPLE_COUNT", 11);
+			variantInitInfo.addMutation("ORIENTATION", 0);
+			m_denoise.m_prog->getOrCreateVariant(variantInitInfo, variant);
+			m_denoise.m_grProg[0][i] = variant->getProgram();
+
+			variantInitInfo.addMutation("SAMPLE_COUNT", 15);
+			variantInitInfo.addMutation("ORIENTATION", 1);
+			m_denoise.m_prog->getOrCreateVariant(variantInitInfo, variant);
+			m_denoise.m_grProg[1][i] = variant->getProgram();
+		}
+	}
+
+	// Init reconstruction
+	{
+		ANKI_CHECK(getResourceManager().loadResource("shaders/SsgiReconstruct.ankiprog", m_recontruction.m_prog));
+		ShaderProgramResourceVariantInitInfo variantInitInfo(m_recontruction.m_prog);
+		variantInitInfo.addConstant("FB_SIZE", UVec2(m_r->getWidth(), m_r->getHeight()));
+		const ShaderProgramResourceVariant* variant;
+
+		for(U32 i = 0; i < 4; ++i)
+		{
+			variantInitInfo.addMutation("VARIANT", i);
+			m_recontruction.m_prog->getOrCreateVariant(variantInitInfo, variant);
+			m_recontruction.m_grProg[i] = variant->getProgram();
+		}
+
+		const TextureInitInfo initInfo = m_r->create2DRenderTargetInitInfo(width,
+			height,
+			Format::B10G11R11_UFLOAT_PACK32,
+			TextureUsageBit::SAMPLED_ALL | TextureUsageBit::IMAGE_COMPUTE_WRITE,
+			"SSGI");
+		m_recontruction.m_rt = m_r->createAndClearRenderTarget(initInfo);
+	}
+
+	return Error::NONE;
+}
+
+void Ssgi::populateRenderGraph(RenderingContext& ctx)
+{
+	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
+	m_runCtx.m_ctx = &ctx;
+
+	// Main pass
+	{
+		// Create RTs
+		if(ANKI_LIKELY(m_recontruction.m_rtImportedOnce))
+		{
+			m_runCtx.m_finalRt = rgraph.importRenderTarget(m_recontruction.m_rt);
+		}
+		else
+		{
+			m_runCtx.m_finalRt = rgraph.importRenderTarget(m_recontruction.m_rt, TextureUsageBit::SAMPLED_FRAGMENT);
+			m_recontruction.m_rtImportedOnce = true;
+		}
+		m_runCtx.m_intermediateRts[WRITE] = rgraph.newRenderTarget(m_main.m_rtDescr);
+		m_runCtx.m_intermediateRts[READ] = rgraph.newRenderTarget(m_main.m_rtDescr);
+
+		// Create pass
+		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("SSGI");
+		rpass.setWork(
+			[](RenderPassWorkContext& rgraphCtx) { static_cast<Ssgi*>(rgraphCtx.m_userData)->run(rgraphCtx); },
+			this,
+			0);
+
+		rpass.newDependency({m_runCtx.m_intermediateRts[WRITE], TextureUsageBit::IMAGE_COMPUTE_WRITE});
+		rpass.newDependency({m_runCtx.m_finalRt, TextureUsageBit::SAMPLED_COMPUTE});
+
+		TextureSubresourceInfo hizSubresource;
+		hizSubresource.m_firstMipmap = m_main.m_depthLod;
+		rpass.newDependency({m_r->getDepthDownscale().getHiZRt(), TextureUsageBit::SAMPLED_COMPUTE, hizSubresource});
+		rpass.newDependency({m_r->getGBuffer().getColorRt(2), TextureUsageBit::SAMPLED_COMPUTE});
+		rpass.newDependency({m_r->getDownscaleBlur().getRt(), TextureUsageBit::SAMPLED_COMPUTE});
+	}
+
+	// Blur vertical
+	{
+		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("SSGI_blur_v");
+
+		rpass.newDependency({m_runCtx.m_intermediateRts[WRITE], TextureUsageBit::SAMPLED_COMPUTE});
+		rpass.newDependency({m_runCtx.m_intermediateRts[READ], TextureUsageBit::IMAGE_COMPUTE_WRITE});
+		rpass.newDependency({m_r->getGBuffer().getDepthRt(), TextureUsageBit::SAMPLED_COMPUTE});
+
+		rpass.setWork(
+			[](RenderPassWorkContext& rgraphCtx) { static_cast<Ssgi*>(rgraphCtx.m_userData)->runVBlur(rgraphCtx); },
+			this,
+			0);
+	}
+
+	// Blur horizontal
+	{
+		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("SSGI_blur_h");
+
+		rpass.newDependency({m_runCtx.m_intermediateRts[READ], TextureUsageBit::SAMPLED_COMPUTE});
+		rpass.newDependency({m_runCtx.m_intermediateRts[WRITE], TextureUsageBit::IMAGE_COMPUTE_WRITE});
+		rpass.newDependency({m_r->getGBuffer().getDepthRt(), TextureUsageBit::SAMPLED_COMPUTE});
+
+		rpass.setWork(
+			[](RenderPassWorkContext& rgraphCtx) { static_cast<Ssgi*>(rgraphCtx.m_userData)->runHBlur(rgraphCtx); },
+			this,
+			0);
+	}
+
+	// Reconstruction
+	{
+		ComputeRenderPassDescription& rpass = rgraph.newComputeRenderPass("SSGI_recon");
+
+		rpass.newDependency({m_runCtx.m_intermediateRts[WRITE], TextureUsageBit::SAMPLED_COMPUTE});
+		rpass.newDependency({m_runCtx.m_finalRt, TextureUsageBit::IMAGE_COMPUTE_WRITE});
+
+		rpass.setWork(
+			[](RenderPassWorkContext& rgraphCtx) {
+				static_cast<Ssgi*>(rgraphCtx.m_userData)->runRecontruct(rgraphCtx);
+			},
+			this,
+			0);
+	}
+}
+
+void Ssgi::run(RenderPassWorkContext& rgraphCtx)
+{
+	RenderingContext& ctx = *m_runCtx.m_ctx;
+	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
+	cmdb->bindShaderProgram(m_main.m_grProg[m_r->getFrameCount() % 4]);
+
+	rgraphCtx.bindImage(0, 0, m_runCtx.m_intermediateRts[WRITE], TextureSubresourceInfo());
+
+	// Bind uniforms
+	SsgiUniforms* unis = allocateAndBindUniforms<SsgiUniforms*>(sizeof(SsgiUniforms), cmdb, 0, 1);
+	unis->m_depthBufferSize = UVec2(m_r->getWidth(), m_r->getHeight()) >> (m_main.m_depthLod + 1);
+	unis->m_framebufferSize = UVec2(m_r->getWidth(), m_r->getHeight());
+	unis->m_invProjMat = ctx.m_matrices.m_projectionJitter.getInverse();
+	unis->m_projMat = ctx.m_matrices.m_projectionJitter;
+	unis->m_prevViewProjMatMulInvViewProjMat =
+		ctx.m_prevMatrices.m_viewProjection * ctx.m_matrices.m_viewProjectionJitter.getInverse();
+	unis->m_normalMat = Mat3x4(ctx.m_matrices.m_view.getRotationPart());
+	unis->m_frameCount = m_r->getFrameCount() & MAX_U32;
+	unis->m_maxSteps = m_main.m_maxSteps;
+	unis->m_firstStepPixels = m_main.m_firstStepPixels;
+
+	cmdb->bindSampler(0, 2, m_r->getSamplers().m_trilinearClamp);
+
+	rgraphCtx.bindColorTexture(0, 3, m_r->getGBuffer().getColorRt(2));
+
+	TextureSubresourceInfo hizSubresource;
+	hizSubresource.m_firstMipmap = m_main.m_depthLod;
+	rgraphCtx.bindTexture(0, 4, m_r->getDepthDownscale().getHiZRt(), hizSubresource);
+
+	rgraphCtx.bindColorTexture(0, 5, m_r->getDownscaleBlur().getRt());
+	rgraphCtx.bindColorTexture(0, 6, m_runCtx.m_finalRt);
+
+	// Dispatch
+	dispatchPPCompute(cmdb, 16, 16, m_r->getWidth() / 2, m_r->getHeight() / 2);
+}
+
+void Ssgi::runVBlur(RenderPassWorkContext& rgraphCtx)
+{
+	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
+	cmdb->bindShaderProgram(m_denoise.m_grProg[0][m_r->getFrameCount() % 4]);
+
+	cmdb->bindSampler(0, 0, m_r->getSamplers().m_trilinearClamp);
+	rgraphCtx.bindColorTexture(0, 1, m_runCtx.m_intermediateRts[WRITE]);
+	rgraphCtx.bindTexture(0, 2, m_r->getGBuffer().getDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
+
+	rgraphCtx.bindImage(0, 3, m_runCtx.m_intermediateRts[READ], TextureSubresourceInfo());
+
+	dispatchPPCompute(cmdb, 8, 8, m_r->getWidth() / 2, m_r->getHeight() / 2);
+}
+
+void Ssgi::runHBlur(RenderPassWorkContext& rgraphCtx)
+{
+	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
+	cmdb->bindShaderProgram(m_denoise.m_grProg[1][m_r->getFrameCount() % 4]);
+
+	cmdb->bindSampler(0, 0, m_r->getSamplers().m_trilinearClamp);
+	rgraphCtx.bindColorTexture(0, 1, m_runCtx.m_intermediateRts[READ]);
+	rgraphCtx.bindTexture(0, 2, m_r->getGBuffer().getDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
+
+	rgraphCtx.bindImage(0, 3, m_runCtx.m_intermediateRts[WRITE], TextureSubresourceInfo());
+
+	dispatchPPCompute(cmdb, 8, 8, m_r->getWidth() / 2, m_r->getHeight() / 2);
+}
+
+void Ssgi::runRecontruct(RenderPassWorkContext& rgraphCtx)
+{
+	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
+	cmdb->bindShaderProgram(m_recontruction.m_grProg[m_r->getFrameCount() % 4]);
+
+	cmdb->bindSampler(0, 0, m_r->getSamplers().m_trilinearClamp);
+	rgraphCtx.bindColorTexture(0, 1, m_runCtx.m_intermediateRts[WRITE]);
+	rgraphCtx.bindTexture(0, 2, m_r->getGBuffer().getDepthRt(), TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
+
+	rgraphCtx.bindImage(0, 3, m_runCtx.m_finalRt, TextureSubresourceInfo());
+
+	dispatchPPCompute(cmdb, 16, 16, m_r->getWidth(), m_r->getHeight());
+}
+
+} // end namespace anki

+ 92 - 0
src/anki/renderer/Ssgi.h

@@ -0,0 +1,92 @@
+// Copyright (C) 2009-2020, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <anki/renderer/RendererObject.h>
+#include <anki/resource/TextureResource.h>
+#include <anki/Gr.h>
+
+namespace anki
+{
+
+/// @addtogroup renderer
+/// @{
+
+/// Screen space global illumination.
+class Ssgi : public RendererObject
+{
+public:
+	Ssgi(Renderer* r)
+		: RendererObject(r)
+	{
+		registerDebugRenderTarget("SSGI");
+	}
+
+	~Ssgi();
+
+	ANKI_USE_RESULT Error init(const ConfigSet& cfg);
+
+	/// Populate the rendergraph.
+	void populateRenderGraph(RenderingContext& ctx);
+
+	RenderTargetHandle getRt() const
+	{
+		return m_runCtx.m_finalRt;
+	}
+
+	void getDebugRenderTarget(CString rtName, RenderTargetHandle& handle) const override
+	{
+		ANKI_ASSERT(rtName == "SSGI");
+		handle = m_runCtx.m_finalRt;
+	}
+
+private:
+	class
+	{
+	public:
+		ShaderProgramResourcePtr m_prog;
+		Array<ShaderProgramPtr, 4> m_grProg;
+		RenderTargetDescription m_rtDescr;
+		TextureResourcePtr m_noiseTex;
+		U32 m_maxSteps = 32;
+		U32 m_firstStepPixels = 16;
+		U32 m_depthLod = 0;
+	} m_main;
+
+	class
+	{
+	public:
+		ShaderProgramResourcePtr m_prog;
+		Array2d<ShaderProgramPtr, 2, 4> m_grProg;
+	} m_denoise;
+
+	class
+	{
+	public:
+		TexturePtr m_rt;
+		ShaderProgramResourcePtr m_prog;
+		Array<ShaderProgramPtr, 4> m_grProg;
+		Bool m_rtImportedOnce = false;
+	} m_recontruction;
+
+	class
+	{
+	public:
+		Array<RenderTargetHandle, 2> m_intermediateRts;
+		RenderTargetHandle m_finalRt;
+		RenderingContext* m_ctx = nullptr;
+	} m_runCtx;
+
+	ANKI_USE_RESULT Error initInternal(const ConfigSet& cfg);
+
+	void run(RenderPassWorkContext& rgraphCtx);
+	void runVBlur(RenderPassWorkContext& rgraphCtx);
+	void runHBlur(RenderPassWorkContext& rgraphCtx);
+	void runRecontruct(RenderPassWorkContext& rgraphCtx);
+};
+/// @}
+
+} // end namespace

+ 6 - 3
src/anki/renderer/Ssr.cpp

@@ -35,6 +35,8 @@ Error Ssr::initInternal(const ConfigSet& cfg)
 	const U32 height = m_r->getHeight();
 	ANKI_R_LOGI("Initializing SSR pass (%ux%u)", width, height);
 	m_maxSteps = cfg.getNumberU32("r_ssrMaxSteps");
+	m_depthLod = cfg.getNumberU32("r_ssrDepthLod");
+	m_firstStepPixels = 32;
 
 	ANKI_CHECK(getResourceManager().loadResource("engine_data/BlueNoiseRgb816x16.png", m_noiseTex));
 
@@ -84,7 +86,7 @@ void Ssr::populateRenderGraph(RenderingContext& ctx)
 	rpass.newDependency({m_r->getGBuffer().getColorRt(2), TextureUsageBit::SAMPLED_COMPUTE});
 
 	TextureSubresourceInfo hizSubresource;
-	hizSubresource.m_mipmapCount = m_r->getDepthDownscale().getMipmapCount();
+	hizSubresource.m_firstMipmap = m_depthLod;
 	rpass.newDependency({m_r->getDepthDownscale().getHiZRt(), TextureUsageBit::SAMPLED_COMPUTE, hizSubresource});
 
 	rpass.newDependency({m_r->getDownscaleBlur().getRt(), TextureUsageBit::SAMPLED_COMPUTE});
@@ -100,12 +102,13 @@ void Ssr::run(RenderPassWorkContext& rgraphCtx)
 
 	// Bind uniforms
 	SsrUniforms* unis = allocateAndBindUniforms<SsrUniforms*>(sizeof(SsrUniforms), cmdb, 0, 1);
-	unis->m_depthBufferSize = UVec2(m_r->getWidth(), m_r->getHeight()) >> 2u;
+	unis->m_depthBufferSize = UVec2(m_r->getWidth(), m_r->getHeight()) >> (m_depthLod + 1);
 	unis->m_framebufferSize = UVec2(m_r->getWidth(), m_r->getHeight());
 	unis->m_frameCount = m_r->getFrameCount() & MAX_U32;
 	unis->m_depthMipCount = m_r->getDepthDownscale().getMipmapCount();
 	unis->m_maxSteps = m_maxSteps;
 	unis->m_lightBufferMipCount = m_r->getDownscaleBlur().getMipmapCount();
+	unis->m_firstStepPixels = m_firstStepPixels;
 	unis->m_prevViewProjMatMulInvViewProjMat =
 		ctx.m_prevMatrices.m_viewProjection * ctx.m_matrices.m_viewProjectionJitter.getInverse();
 	unis->m_projMat = ctx.m_matrices.m_projectionJitter;
@@ -119,7 +122,7 @@ void Ssr::run(RenderPassWorkContext& rgraphCtx)
 	rgraphCtx.bindColorTexture(0, 4, m_r->getGBuffer().getColorRt(2));
 
 	TextureSubresourceInfo hizSubresource;
-	hizSubresource.m_mipmapCount = m_r->getDepthDownscale().getMipmapCount();
+	hizSubresource.m_firstMipmap = m_depthLod;
 	rgraphCtx.bindTexture(0, 5, m_r->getDepthDownscale().getHiZRt(), hizSubresource);
 
 	rgraphCtx.bindColorTexture(0, 6, m_r->getDownscaleBlur().getRt());

+ 9 - 0
src/anki/renderer/Ssr.h

@@ -20,6 +20,7 @@ public:
 	Ssr(Renderer* r)
 		: RendererObject(r)
 	{
+		registerDebugRenderTarget("SSR");
 	}
 
 	~Ssr();
@@ -43,6 +44,8 @@ private:
 
 	Array<U32, 2> m_workgroupSize = {};
 	U32 m_maxSteps = 32;
+	U32 m_depthLod = 0;
+	U32 m_firstStepPixels = 16;
 
 	class
 	{
@@ -54,6 +57,12 @@ private:
 	ANKI_USE_RESULT Error initInternal(const ConfigSet& cfg);
 
 	void run(RenderPassWorkContext& rgraphCtx);
+
+	void getDebugRenderTarget(CString rtName, RenderTargetHandle& handle) const override
+	{
+		ANKI_ASSERT(rtName == "SSR");
+		handle = m_runCtx.m_rt;
+	}
 };
 /// @}
 

+ 57 - 4
src/anki/script/Renderer.cpp

@@ -23,7 +23,7 @@ static MainRenderer* getMainRenderer(lua_State* l)
 }
 
 LuaUserDataTypeInfo luaUserDataTypeInfoDbg = {
-	-2784798555522127122, "Dbg", LuaUserData::computeSizeForGarbageCollected<Dbg>(), nullptr, nullptr};
+	6963341295180544814, "Dbg", LuaUserData::computeSizeForGarbageCollected<Dbg>(), nullptr, nullptr};
 
 template<>
 const LuaUserDataTypeInfo& LuaUserData::getDataTypeInfoFor<Dbg>()
@@ -134,8 +134,11 @@ static inline void wrapDbg(lua_State* l)
 	lua_settop(l, 0);
 }
 
-LuaUserDataTypeInfo luaUserDataTypeInfoMainRenderer = {
-	919289102518575326, "MainRenderer", LuaUserData::computeSizeForGarbageCollected<MainRenderer>(), nullptr, nullptr};
+LuaUserDataTypeInfo luaUserDataTypeInfoMainRenderer = {-2700850970637484325,
+	"MainRenderer",
+	LuaUserData::computeSizeForGarbageCollected<MainRenderer>(),
+	nullptr,
+	nullptr};
 
 template<>
 const LuaUserDataTypeInfo& LuaUserData::getDataTypeInfoFor<MainRenderer>()
@@ -170,7 +173,7 @@ static inline int pwrapMainRenderergetAspectRatio(lua_State* l)
 	F32 ret = self->getAspectRatio();
 
 	// Push return value
-	lua_pushnumber(l, ret);
+	lua_pushnumber(l, lua_Number(ret));
 
 	return 1;
 }
@@ -188,11 +191,61 @@ static int wrapMainRenderergetAspectRatio(lua_State* l)
 	return 0;
 }
 
+/// Pre-wrap method MainRenderer::setCurrentDebugRenderTarget.
+static inline int pwrapMainRenderersetCurrentDebugRenderTarget(lua_State* l)
+{
+	LuaUserData* ud;
+	(void)ud;
+	void* voidp;
+	(void)voidp;
+	PtrSize size;
+	(void)size;
+
+	if(ANKI_UNLIKELY(LuaBinder::checkArgsCount(l, 2)))
+	{
+		return -1;
+	}
+
+	// Get "this" as "self"
+	if(LuaBinder::checkUserData(l, 1, luaUserDataTypeInfoMainRenderer, ud))
+	{
+		return -1;
+	}
+
+	MainRenderer* self = ud->getData<MainRenderer>();
+
+	// Pop arguments
+	const char* arg0;
+	if(ANKI_UNLIKELY(LuaBinder::checkString(l, 2, arg0)))
+	{
+		return -1;
+	}
+
+	// Call the method
+	self->getOffscreenRenderer().setCurrentDebugRenderTarget(arg0);
+
+	return 0;
+}
+
+/// Wrap method MainRenderer::setCurrentDebugRenderTarget.
+static int wrapMainRenderersetCurrentDebugRenderTarget(lua_State* l)
+{
+	int res = pwrapMainRenderersetCurrentDebugRenderTarget(l);
+	if(res >= 0)
+	{
+		return res;
+	}
+
+	lua_error(l);
+	return 0;
+}
+
 /// Wrap class MainRenderer.
 static inline void wrapMainRenderer(lua_State* l)
 {
 	LuaBinder::createClass(l, &luaUserDataTypeInfoMainRenderer);
 	LuaBinder::pushLuaCFuncMethod(l, "getAspectRatio", wrapMainRenderergetAspectRatio);
+	LuaBinder::pushLuaCFuncMethod(l, "setCurrentDebugRenderTarget", wrapMainRenderersetCurrentDebugRenderTarget);
 	lua_settop(l, 0);
 }
 

+ 8 - 2
src/anki/script/Renderer.xml

@@ -11,7 +11,7 @@
 #include <anki/Renderer.h>
 
 namespace anki {
-	
+
 static MainRenderer* getMainRenderer(lua_State* l)
 {
 	LuaBinder* binder = nullptr;
@@ -20,7 +20,7 @@ static MainRenderer* getMainRenderer(lua_State* l)
 	MainRenderer* r = binder->getOtherSystems().m_renderer;
 	ANKI_ASSERT(r);
 	return r;
-}	
+}
 ]]></head>
 
 	<classes>
@@ -41,6 +41,12 @@ static MainRenderer* getMainRenderer(lua_State* l)
 				<method name="getAspectRatio">
 					<return>F32</return>
 				</method>
+				<method name="setCurrentDebugRenderTarget">
+					<overrideCall>self->getOffscreenRenderer().setCurrentDebugRenderTarget(arg0);</overrideCall>
+					<args>
+						<arg>CString</arg>
+					</args>
+				</method>
 			</methods>
 		</class>
 	</classes>

+ 6 - 6
src/anki/util/Memory.h

@@ -132,14 +132,14 @@ private:
 
 /// A dummy interface to match the StackMemoryPool and ChainMemoryPool interfaces in order to be used by the same
 /// allocator template.
-class HeapMemoryPool : public BaseMemoryPool
+class HeapMemoryPool final : public BaseMemoryPool
 {
 public:
 	/// Default constructor.
 	HeapMemoryPool();
 
 	/// Destroy
-	~HeapMemoryPool() final;
+	~HeapMemoryPool();
 
 	/// The real constructor.
 	/// @param allocCb The allocation function callback
@@ -163,7 +163,7 @@ private:
 
 /// Thread safe memory pool. It's a preallocated memory pool that is used for memory allocations on top of that
 /// preallocated memory. It is mainly used by fast stack allocators
-class StackMemoryPool : public BaseMemoryPool
+class StackMemoryPool final : public BaseMemoryPool
 {
 public:
 	/// The type of the pool's snapshot
@@ -173,7 +173,7 @@ public:
 	StackMemoryPool();
 
 	/// Destroy
-	~StackMemoryPool() final;
+	~StackMemoryPool();
 
 	/// Create with parameters
 	/// @param allocCb The allocation function callback
@@ -269,14 +269,14 @@ private:
 };
 
 /// Chain memory pool. Almost similar to StackMemoryPool but more flexible and at the same time a bit slower.
-class ChainMemoryPool : public BaseMemoryPool
+class ChainMemoryPool final : public BaseMemoryPool
 {
 public:
 	/// Default constructor
 	ChainMemoryPool();
 
 	/// Destroy
-	~ChainMemoryPool() final;
+	~ChainMemoryPool();
 
 	/// Creates the pool.
 	/// @param allocCb The allocation function callback.

+ 2 - 2
src/anki/util/Thread.h

@@ -52,8 +52,8 @@ public:
 	{
 		if(name)
 		{
-			U len = std::strlen(name);
-			len = std::min<U>(len, sizeof(m_name) - 1);
+			PtrSize len = std::strlen(name);
+			len = std::min<PtrSize>(len, sizeof(m_name) - 1);
 			memcpy(&m_name[0], &name[0], len);
 			m_name[len] = '\0';
 		}